ocaml/0002-robustify-intern.c.patch
Richard W.M. Jones e404b5e3e5 Fix remote code execution via buffer over-read in Marshal deserialization
CVE-2026-28364
Resolves: RHEL-152488
2026-03-02 12:36:44 +00:00

837 lines
30 KiB
Diff

From 5bc4360d8fc5660615bf08fc51b7456d062773a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= <n.oje.bar@gmail.com>
Date: Tue, 16 Dec 2025 16:34:35 +0100
Subject: [PATCH 2/2] robustify intern.c
Co-Authored-By: Xavier Leroy <xavier.leroy@college-de-france.fr>
(cherry picked from commit e3919fef436f89271bc30bbe8592851f7289fb68)
---
runtime/bigarray.c | 13 +-
runtime/caml/custom.h | 6 +
runtime/caml/intext.h | 16 +-
runtime/intern.c | 216 ++++++++++++++++++++-------
testsuite/tests/lib-marshal/fuzzy.ml | 141 +++++++++++++++++
5 files changed, 325 insertions(+), 67 deletions(-)
create mode 100644 testsuite/tests/lib-marshal/fuzzy.ml
diff --git a/runtime/bigarray.c b/runtime/bigarray.c
index 1e529ffeca..45de8589d2 100644
--- a/runtime/bigarray.c
+++ b/runtime/bigarray.c
@@ -592,12 +592,16 @@ CAMLexport uintnat caml_ba_deserialize(void * dst)
{
struct caml_ba_array * b = dst;
int i;
- uintnat num_elts, size;
+ uintnat num_elts, size, descriptor_size;
/* Read back header information */
- b->num_dims = caml_deserialize_uint_4();
- if (b->num_dims < 0 || b->num_dims > CAML_BA_MAX_NUM_DIMS)
+ int num_dims = caml_deserialize_uint_4();
+ if (num_dims < 0 || num_dims > CAML_BA_MAX_NUM_DIMS)
caml_deserialize_error("input_value: wrong number of bigarray dimensions");
+ descriptor_size = SIZEOF_BA_ARRAY + num_dims * sizeof(intnat);
+ if (descriptor_size > Bsize_custom_data(dst))
+ caml_deserialize_error("input_value: bigarray buffer overflow");
+ b->num_dims = num_dims;
b->flags = caml_deserialize_uint_4() | CAML_BA_MANAGED;
b->proxy = NULL;
for (i = 0; i < b->num_dims; i++) {
@@ -646,8 +650,7 @@ CAMLexport uintnat caml_ba_deserialize(void * dst)
case CAML_BA_NATIVE_INT:
caml_ba_deserialize_longarray(b->data, num_elts); break;
}
- /* PR#5516: use C99's flexible array types if possible */
- return SIZEOF_BA_ARRAY + b->num_dims * sizeof(intnat);
+ return descriptor_size;
}
/* Allocate a bigarray from OCaml */
diff --git a/runtime/caml/custom.h b/runtime/caml/custom.h
index 3731b900dc..e44ac78e20 100644
--- a/runtime/caml/custom.h
+++ b/runtime/caml/custom.h
@@ -47,6 +47,12 @@ struct custom_operations {
#define Custom_ops_val(v) (*((const struct custom_operations **) (v)))
+/* Given a pointer [p] to the data part of a custom block
+ (as returned by [Data_custom_val]), return the size of this data part
+ (in words or in bytes). */
+#define Wsize_custom_data(p) (Wosize_hp((header_t *)(p) - 2) - 1)
+#define Bsize_custom_data(p) (Bsize_wsize(Wsize_custom_data(p)))
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/runtime/caml/intext.h b/runtime/caml/intext.h
index 0ed2fc0b8b..727b4caedf 100644
--- a/runtime/caml/intext.h
+++ b/runtime/caml/intext.h
@@ -63,9 +63,11 @@
Each byte carries 7 bits of the number.
Bytes come in big-endian order: xxxxxxx are the 7 high-order bits,
zzzzzzzz the 7 low-order bits.
+
+ The header size is stored in a 6-bit field, so it's 63 at most.
*/
-#define MAX_INTEXT_HEADER_SIZE 55
+#define MAX_INTEXT_HEADER_SIZE 63
/* Codes for the compact format */
@@ -168,11 +170,13 @@ value caml_input_val (struct channel * chan);
CAMLextern value caml_input_val_from_string (value str, intnat ofs);
/* Read a structured value from the OCaml string [str], starting
at offset [ofs]. */
-CAMLextern value caml_input_value_from_malloc(char * data, intnat ofs);
- /* Read a structured value from a malloced buffer. [data] points
- to the beginning of the buffer, and [ofs] is the offset of the
- beginning of the externed data in this buffer. The buffer is
- deallocated with [free] on return, or if an exception is raised. */
+CAMLextern value caml_input_value_from_malloc(char * data, intnat ofs,
+ uintnat len);
+ /* Read a structured value from a malloced buffer. [data] points to the
+ beginning of the buffer, [ofs] is the offset of the beginning of the
+ externed data in this buffer, and [len] is the length in bytes of valid
+ data in this buffer. The buffer is deallocated with [free] on return,
+ or if an exception is raised. */
CAMLextern value caml_input_value_from_block(const char * data, intnat len);
/* Read a structured value from a user-provided buffer. [data] points
to the beginning of the externed data in this buffer,
diff --git a/runtime/intern.c b/runtime/intern.c
index f52ac51a6c..f753b7a32d 100644
--- a/runtime/intern.c
+++ b/runtime/intern.c
@@ -62,6 +62,9 @@ struct caml_intern_state {
const unsigned char * intern_src;
/* Reading pointer in block holding input data. */
+ const unsigned char * intern_src_end;
+ /* Pointer to the end of the readable data. */
+
unsigned char * intern_input;
/* Pointer to beginning of block holding input data,
if non-NULL this pointer will be freed by the cleanup function.
@@ -76,6 +79,9 @@ struct caml_intern_state {
value * intern_obj_table;
/* The pointers to objects already seen */
+ uintnat intern_num_objects;
+ /* How many objects are expected (from the header) */
+
struct intern_item intern_stack_init[INTERN_STACK_INIT_SIZE];
/* The initial intern stack */
@@ -88,6 +94,8 @@ struct caml_intern_state {
/* Writing pointer in destination block. Only used when the object fits in
the minor heap. */
+ header_t * intern_dest_end;
+
char compressed;
/* 1 if the compressed format is in use, 0 otherwise */
};
@@ -110,10 +118,13 @@ static struct caml_intern_state* init_intern_state (void)
s = caml_stat_alloc(sizeof(struct caml_intern_state));
s->intern_src = NULL;
+ s->intern_src_end = NULL;
s->intern_input = NULL;
s->obj_counter = 0;
+ s->intern_num_objects = 0;
s->intern_obj_table = NULL;
s->intern_dest = NULL;
+ s->intern_dest_end = NULL;
init_intern_stack(s);
Caml_state->intern_state = s;
@@ -141,19 +152,44 @@ void caml_free_intern_state (void)
}
}
+CAMLnoret static void intern_cleanup_failwith(struct caml_intern_state* s, const char * msg);
static char * intern_resolve_code_pointer(unsigned char digest[16],
asize_t offset);
CAMLnoret static void intern_bad_code_pointer(unsigned char digest[16]);
+Caml_inline void intern_check_read(struct caml_intern_state* s, uintnat len)
+{
+ if (CAMLunlikely(len > s->intern_src_end - s->intern_src)) {
+ intern_cleanup_failwith(s, "input_value: invalid read");
+ }
+}
+
+Caml_inline void intern_record_obj(struct caml_intern_state* s, value v)
+{
+ if (s->intern_obj_table != NULL) {
+ if (CAMLunlikely(s->obj_counter >= s->intern_num_objects)) {
+ intern_cleanup_failwith(s, "input_value: too many objects");
+ }
+ s->intern_obj_table[s->obj_counter++] = v;
+ }
+}
+
Caml_inline unsigned char read8u(struct caml_intern_state* s)
-{ return *s->intern_src++; }
+{
+ intern_check_read(s, 1);
+ return *s->intern_src++;
+}
Caml_inline signed char read8s(struct caml_intern_state* s)
-{ return *s->intern_src++; }
+{
+ intern_check_read(s, 1);
+ return *s->intern_src++;
+}
Caml_inline uint16_t read16u(struct caml_intern_state* s)
{
+ intern_check_read(s, 2);
uint16_t res = (s->intern_src[0] << 8) + s->intern_src[1];
s->intern_src += 2;
return res;
@@ -161,6 +197,7 @@ Caml_inline uint16_t read16u(struct caml_intern_state* s)
Caml_inline int16_t read16s(struct caml_intern_state* s)
{
+ intern_check_read(s, 2);
int16_t res = (s->intern_src[0] << 8) + s->intern_src[1];
s->intern_src += 2;
return res;
@@ -168,6 +205,7 @@ Caml_inline int16_t read16s(struct caml_intern_state* s)
Caml_inline uint32_t read32u(struct caml_intern_state* s)
{
+ intern_check_read(s, 4);
uint32_t res =
((uint32_t)(s->intern_src[0]) << 24) + (s->intern_src[1] << 16)
+ (s->intern_src[2] << 8) + s->intern_src[3];
@@ -177,6 +215,7 @@ Caml_inline uint32_t read32u(struct caml_intern_state* s)
Caml_inline int32_t read32s(struct caml_intern_state* s)
{
+ intern_check_read(s, 4);
int32_t res =
((uint32_t)(s->intern_src[0]) << 24) + (s->intern_src[1] << 16)
+ (s->intern_src[2] << 8) + s->intern_src[3];
@@ -187,6 +226,7 @@ Caml_inline int32_t read32s(struct caml_intern_state* s)
#ifdef ARCH_SIXTYFOUR
static uintnat read64u(struct caml_intern_state* s)
{
+ intern_check_read(s, 8);
uintnat res =
((uintnat) (s->intern_src[0]) << 56)
+ ((uintnat) (s->intern_src[1]) << 48)
@@ -217,14 +257,15 @@ static int readvlq(struct caml_intern_state* s, /*out*/ uintnat * res)
}
Caml_inline void readblock(struct caml_intern_state* s,
- void * dest, intnat len)
+ void * dest, uintnat len)
{
+ intern_check_read(s, len);
memcpy(dest, s->intern_src, len);
s->intern_src += len;
}
static void intern_init(struct caml_intern_state* s, const void * src,
- void * input)
+ uintnat len, void * input)
{
CAMLassert (s);
/* This is asserted at the beginning of demarshaling primitives.
@@ -232,6 +273,7 @@ static void intern_init(struct caml_intern_state* s, const void * src,
without calling intern_cleanup() during the previous demarshaling. */
CAMLassert (s->intern_input == NULL && s->intern_obj_table == NULL);
s->intern_src = src;
+ s->intern_src_end = s->intern_src + len;
s->intern_input = input;
}
@@ -253,8 +295,10 @@ static void intern_cleanup(struct caml_intern_state* s)
if (s->intern_obj_table != NULL) {
caml_stat_free(s->intern_obj_table);
s->intern_obj_table = NULL;
+ s->intern_num_objects = 0;
}
s->intern_dest = NULL;
+ s->intern_dest_end = NULL;
/* free the recursion stack */
intern_free_stack(s);
}
@@ -264,6 +308,21 @@ CAMLnoret static void intern_failwith2(const char * fun_name, const char * msg)
caml_failwith_value(caml_alloc_sprintf("%s: %s", fun_name, msg));
}
+CAMLnoret static void
+intern_cleanup_failwith(struct caml_intern_state* s, const char * msg)
+{
+ intern_cleanup(s);
+ caml_failwith(msg);
+}
+
+CAMLnoret static void
+intern_cleanup_failwith2(struct caml_intern_state* s, const char * fun_name,
+ const char * msg)
+{
+ intern_cleanup(s);
+ intern_failwith2(fun_name, msg);
+}
+
CAMLnoret static void
intern_cleanup_failwith3(struct caml_intern_state* s, const char * fun_name,
const char * msg, const char * arg)
@@ -398,13 +457,15 @@ static void intern_alloc_storage(struct caml_intern_state* s, mlsize_t whsize,
if (wosize <= Max_young_wosize && wosize != 0) {
v = caml_alloc_small (wosize, String_tag);
s->intern_dest = (header_t *) Hp_val(v);
+ s->intern_dest_end = s->intern_dest + whsize;
} else {
CAMLassert (s->intern_dest == NULL);
}
+ s->intern_num_objects = num_objects;
s->obj_counter = 0;
if (num_objects > 0) {
s->intern_obj_table =
- (value *) caml_stat_alloc_noexc(num_objects * sizeof(value));
+ (value *) caml_stat_calloc_noexc(num_objects, sizeof(value));
if (s->intern_obj_table == NULL) {
intern_cleanup(s);
caml_raise_out_of_memory();
@@ -421,9 +482,15 @@ static value intern_alloc_obj(struct caml_intern_state* s, caml_domain_state* d,
{
void* p;
+ if (CAMLunlikely(wosize > Max_wosize)) {
+ intern_cleanup_failwith(s, "input_value: block size too large");
+ }
if (s->intern_dest) {
CAMLassert ((value*)s->intern_dest >= d->young_start &&
(value*)s->intern_dest < d->young_end);
+ if (CAMLunlikely(wosize >= s->intern_dest_end - s->intern_dest)) {
+ intern_cleanup_failwith(s, "input_value: invalid allocation");
+ }
p = s->intern_dest;
*s->intern_dest = Make_header (wosize, tag, 0);
s->intern_dest += 1 + wosize;
@@ -475,9 +542,19 @@ static void intern_rec(struct caml_intern_state* s,
/* Pop item and iterate */
sp--;
break;
- case OShift:
+ case OShift: {
/* Shift value by an offset */
- *dest += sp->arg;
+ value v = *dest;
+ intnat ofs = sp->arg;
+ if (Is_block(v)
+ && (uintnat) ofs % sizeof(value) == 0
+ && ofs >= 0 && ofs < Bosize_val(v)
+ && Tag_val(v + ofs) == Infix_tag
+ && Infix_offset_val(v + ofs) == ofs)
+ *dest = v + ofs;
+ else
+ intern_cleanup_failwith2(s, fun_name, "bad infix offset");
+ }
/* Pop item and iterate */
sp--;
break;
@@ -497,11 +574,11 @@ static void intern_rec(struct caml_intern_state* s,
v = Atom(tag);
} else {
v = intern_alloc_obj (s, d, size, tag);
- if (s->intern_obj_table != NULL)
- s->intern_obj_table[s->obj_counter++] = v;
+ intern_record_obj(s, v);
/* For objects, we need to freshen the oid */
if (tag == Object_tag) {
- CAMLassert(size >= 2);
+ if (CAMLunlikely(size < 2))
+ intern_cleanup_failwith2(s, fun_name, "bad object block");
/* Request to read rest of the elements of the block */
ReadItems(s, &Field(v, 2), size - 2);
/* Request freshing OID */
@@ -524,10 +601,11 @@ static void intern_rec(struct caml_intern_state* s,
/* Small string */
len = (code & 0x1F);
read_string:
+ if (CAMLunlikely(len > Bsize_wsize (Max_wosize) - 1))
+ intern_cleanup_failwith2(s, fun_name, "string too large");
size = (len + sizeof(value)) / sizeof(value);
v = intern_alloc_obj (s, d, size, String_tag);
- if (s->intern_obj_table != NULL)
- s->intern_obj_table[s->obj_counter++] = v;
+ intern_record_obj(s, v);
Field(v, size - 1) = 0;
ofs_ind = Bsize_wsize(size) - 1;
Byte(v, ofs_ind) = ofs_ind - len;
@@ -548,16 +626,17 @@ static void intern_rec(struct caml_intern_state* s,
v = Val_long((intnat) (read64u(s)));
break;
#else
- intern_cleanup(s);
- intern_failwith2(fun_name, "integer too large");
+ intern_cleanup_failwith2(s, fun_name, "integer too large");
break;
#endif
case CODE_SHARED8:
ofs = read8u(s);
read_shared:
if (!s->compressed) ofs = s->obj_counter - ofs;
- CAMLassert (ofs < s->obj_counter);
- CAMLassert (s->intern_obj_table != NULL);
+ /* If intern_obj_table is NULL, obj_counter is 0 and the check fails */
+ if (CAMLunlikely(ofs >= s->obj_counter)) {
+ intern_cleanup_failwith2(s, fun_name, "invalid shared reference");
+ }
v = s->intern_obj_table[ofs];
break;
case CODE_SHARED16:
@@ -575,12 +654,18 @@ static void intern_rec(struct caml_intern_state* s,
header = (header_t) read32u(s);
tag = Tag_hd(header);
size = Wosize_hd(header);
+ if (CAMLunlikely(tag >= No_scan_tag
+ || tag == Infix_tag || tag == Cont_tag))
+ intern_cleanup_failwith2(s, fun_name, "invalid block32");
goto read_block;
#ifdef ARCH_SIXTYFOUR
case CODE_BLOCK64:
header = (header_t) read64u(s);
tag = Tag_hd(header);
size = Wosize_hd(header);
+ if (CAMLunlikely(tag >= No_scan_tag
+ || tag == Infix_tag || tag == Cont_tag))
+ intern_cleanup_failwith2(s, fun_name, "invalid block64");
goto read_block;
#endif
case CODE_STRING8:
@@ -597,18 +682,18 @@ static void intern_rec(struct caml_intern_state* s,
case CODE_DOUBLE_LITTLE:
case CODE_DOUBLE_BIG:
v = intern_alloc_obj (s, d, Double_wosize, Double_tag);
- if (s->intern_obj_table != NULL)
- s->intern_obj_table[s->obj_counter++] = v;
+ intern_record_obj(s, v);
readfloat(s, (double *) v, code);
break;
case CODE_DOUBLE_ARRAY8_LITTLE:
case CODE_DOUBLE_ARRAY8_BIG:
len = read8u(s);
read_double_array:
+ if (len == 0)
+ intern_cleanup_failwith2(s, fun_name, "invalid double_array");
size = len * Double_wosize;
v = intern_alloc_obj (s, d, size, Double_array_tag);
- if (s->intern_obj_table != NULL)
- s->intern_obj_table[s->obj_counter++] = v;
+ intern_record_obj(s, v);
readfloats(s, (double *) v, len, code);
break;
case CODE_DOUBLE_ARRAY32_LITTLE:
@@ -649,28 +734,33 @@ static void intern_rec(struct caml_intern_state* s,
ReadItems(s, dest, 1);
continue; /* with next iteration of main loop, skipping *dest = v */
case OLD_CODE_CUSTOM:
- intern_cleanup(s);
- intern_failwith2(fun_name, "custom blocks serialized with "
- "OCaml 4.08.0 (or prior) are no longer supported");
+ intern_cleanup_failwith2(s, fun_name, "custom blocks serialized with "
+ "OCaml 4.08.0 (or prior) are no longer supported");
break;
case CODE_CUSTOM_LEN:
case CODE_CUSTOM_FIXED: {
uintnat expected_size, temp_size;
const char * name = (const char *) s->intern_src;
+ const unsigned char * name_end =
+ memchr(name, 0, s->intern_src_end - s->intern_src);
+ if (name_end == NULL) {
+ intern_cleanup_failwith2(s, fun_name, "unterminated custom block identifier");
+ }
ops = caml_find_custom_operations(name);
if (ops == NULL) {
intern_cleanup_failwith3
(s, fun_name, "unknown custom block identifier", name);
}
- if (code == CODE_CUSTOM_FIXED && ops->fixed_length == NULL) {
+ if ((code == CODE_CUSTOM_FIXED) != (ops->fixed_length != NULL)) {
intern_cleanup_failwith3
- (s, fun_name, "expected a fixed-size custom block", name);
+ (s, fun_name, "wrong custom block kind", name);
}
- while (*s->intern_src++ != 0) /*nothing*/; /*skip identifier*/
+ s->intern_src = name_end + 1; /*skip identifier*/
#ifdef ARCH_SIXTYFOUR
if (code == CODE_CUSTOM_FIXED) {
expected_size = ops->fixed_length->bsize_64;
} else {
+ intern_check_read(s, 4);
s->intern_src += 4;
expected_size = read64u(s);
}
@@ -679,19 +769,20 @@ static void intern_rec(struct caml_intern_state* s,
expected_size = ops->fixed_length->bsize_32;
} else {
expected_size = read32u(s);
+ intern_check_read(s, 8);
s->intern_src += 8;
}
#endif
temp_size = 1 + (expected_size + sizeof(value) - 1) / sizeof(value);
- v = intern_alloc_obj(s, d, temp_size, Custom_tag);
+ v = intern_alloc_obj(s, d, temp_size, Abstract_tag);
Custom_ops_val(v) = ops;
size = ops->deserialize(Data_custom_val(v));
if (size != expected_size) {
intern_cleanup_failwith3
(s, fun_name, "error while deserializing custom block", name);
}
- if (s->intern_obj_table != NULL)
- s->intern_obj_table[s->obj_counter++] = v;
+ Unsafe_store_tag_val(v, Custom_tag);
+ intern_record_obj(s, v);
if (ops->finalize != NULL && Is_young(v)) {
/* Remember that the block has a finalizer. */
add_to_custom_table (&d->minor_tables->custom, v, 0, 1);
@@ -699,8 +790,7 @@ static void intern_rec(struct caml_intern_state* s,
break;
}
default:
- intern_cleanup(s);
- intern_failwith2(fun_name, "ill-formed message");
+ intern_cleanup_failwith2(s, fun_name, "ill-formed message");
}
}
}
@@ -836,15 +926,14 @@ static void intern_decompress_input(struct caml_intern_state * s,
h->data_len);
if (res != h->uncompressed_data_len) {
free(blk);
- intern_cleanup(s);
- intern_failwith2(fun_name, "decompression error");
+ intern_cleanup_failwith2(s, fun_name, "decompression error");
}
if (s->intern_input != NULL) free(s->intern_input);
s->intern_input = blk; /* to be freed at end of demarshaling */
s->intern_src = blk;
+ s->intern_src_end = s->intern_src + h->uncompressed_data_len;
} else {
- intern_cleanup(s);
- intern_failwith2(fun_name, "compressed object, cannot decompress");
+ intern_cleanup_failwith2(s, fun_name, "compressed object, cannot decompress");
}
}
@@ -867,6 +956,7 @@ value caml_input_val(struct channel *chan)
else if (r < 5)
caml_failwith("input_value: truncated object");
s->intern_src = (unsigned char *) header;
+ s->intern_src_end = s->intern_src + 5;
int hlen;
switch (read32u(s)) {
case Intext_magic_number_big:
@@ -877,11 +967,12 @@ value caml_input_val(struct channel *chan)
hlen = 20; break;
}
/* Read the remainder of the header */
- CAMLassert (hlen > 5);
- if (caml_really_getblock(chan, header + 5, hlen - 5) < hlen - 5)
+ if (hlen <= 5
+ || caml_really_getblock(chan, header + 5, hlen - 5) < hlen - 5)
caml_failwith("input_value: truncated object");
/* Parse the full header */
s->intern_src = (unsigned char *) header;
+ s->intern_src_end = s->intern_src + hlen;
caml_parse_header(s, "input_value", &h);
/* Read block from channel */
/* During channel I/O, concurrent [caml_input_val] operations
@@ -897,7 +988,7 @@ value caml_input_val(struct channel *chan)
caml_failwith("input_value: truncated object");
}
/* Initialize global state */
- intern_init(s, block, block);
+ intern_init(s, block, h.data_len, block);
intern_decompress_input(s, "input_value", &h);
intern_alloc_storage(s, h.whsize, h.num_objects);
/* Fill it in - obj must NOT be registered as a GC root */
@@ -933,13 +1024,14 @@ CAMLexport value caml_input_val_from_bytes(value str, intnat ofs)
struct caml_intern_state* s = init_intern_state ();
/* Initialize global state */
- intern_init(s, &Byte_u(str, ofs), NULL);
+ intern_init(s, &Byte_u(str, ofs), caml_string_length(str) - ofs, NULL);
caml_parse_header(s, "input_val_from_string", &h);
if (ofs + h.header_len + h.data_len > caml_string_length(str))
caml_failwith("input_val_from_string: bad length");
/* Allocate result */
intern_alloc_storage(s, h.whsize, h.num_objects);
s->intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */
+ s->intern_src_end = s->intern_src + h.data_len;
/* Decompress if needed */
intern_decompress_input(s, "input_val_from_string", &h);
/* Fill it in - obj must NOT be registered as a GC root */
@@ -966,30 +1058,37 @@ static value input_val_from_block(struct caml_intern_state* s,
return (intern_end(s, obj));
}
-CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs)
-{
- struct marshal_header h;
- struct caml_intern_state* s = init_intern_state ();
-
- intern_init(s, data + ofs, data);
- caml_parse_header(s, "input_value_from_malloc", &h);
- return input_val_from_block(s, &h);
-}
-
/* [len] is a number of bytes */
-CAMLexport value caml_input_value_from_block(const char * data, intnat len)
+static value caml_input_value_from_buffer(const char * fun_name,
+ const char * src,
+ uintnat len, void * input)
{
struct marshal_header h;
struct caml_intern_state* s = init_intern_state ();
- /* Initialize global state */
- intern_init(s, data, NULL);
- caml_parse_header(s, "input_value_from_block", &h);
+ intern_init(s, src, len, input);
+ caml_parse_header(s, fun_name, &h);
if (h.header_len + h.data_len > len)
- caml_failwith("input_val_from_block: bad length");
+ intern_failwith2(fun_name, "bad length");
+ s->intern_src_end = s->intern_src + h.data_len;
return input_val_from_block(s, &h);
}
+CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs,
+ uintnat len)
+{
+ return caml_input_value_from_buffer
+ ("input_value_from_malloc", data + ofs, len, data);
+}
+
+CAMLexport value caml_input_value_from_block(const char * data, intnat len)
+{
+ if (len < 0)
+ caml_failwith("input_value_from_block: negative length");
+ return caml_input_value_from_buffer
+ ("input_value_from_block", data, len, NULL);
+}
+
/* [ofs] is a [value] that represents a number of bytes
result is a [value] that represents a number of bytes
To handle all marshaling formats,
@@ -1013,6 +1112,7 @@ CAMLprim value caml_marshal_data_size(value buff, value ofs)
struct caml_intern_state *s = init_intern_state ();
s->intern_src = &Byte_u(buff, Long_val(ofs));
+ s->intern_src_end = &Byte_u(buff, caml_string_length(buff));
magic = read32u(s);
switch(magic) {
case Intext_magic_number_small:
@@ -1136,6 +1236,7 @@ CAMLexport double caml_deserialize_float_8(void)
CAMLexport void caml_deserialize_block_1(void * data, intnat len)
{
struct caml_intern_state* s = get_intern_state ();
+ intern_check_read(s, len);
memcpy(data, s->intern_src, len);
s->intern_src += len;
}
@@ -1143,6 +1244,7 @@ CAMLexport void caml_deserialize_block_1(void * data, intnat len)
CAMLexport void caml_deserialize_block_2(void * data, intnat len)
{
struct caml_intern_state* s = get_intern_state ();
+ intern_check_read(s, len * 2);
#ifndef ARCH_BIG_ENDIAN
const unsigned char * p, * q;
for (p = s->intern_src, q = data; len > 0; len--, p += 2, q += 2)
@@ -1157,6 +1259,7 @@ CAMLexport void caml_deserialize_block_2(void * data, intnat len)
CAMLexport void caml_deserialize_block_4(void * data, intnat len)
{
struct caml_intern_state* s = get_intern_state ();
+ intern_check_read(s, len * 4);
#ifndef ARCH_BIG_ENDIAN
const unsigned char * p, * q;
for (p = s->intern_src, q = data; len > 0; len--, p += 4, q += 4)
@@ -1171,6 +1274,7 @@ CAMLexport void caml_deserialize_block_4(void * data, intnat len)
CAMLexport void caml_deserialize_block_8(void * data, intnat len)
{
struct caml_intern_state* s = get_intern_state ();
+ intern_check_read(s, len * 8);
#ifndef ARCH_BIG_ENDIAN
const unsigned char * p, * q;
for (p = s->intern_src, q = data; len > 0; len--, p += 8, q += 8)
@@ -1185,6 +1289,7 @@ CAMLexport void caml_deserialize_block_8(void * data, intnat len)
CAMLexport void caml_deserialize_block_float_8(void * data, intnat len)
{
struct caml_intern_state* s = get_intern_state ();
+ intern_check_read(s, len * 8);
#if ARCH_FLOAT_ENDIANNESS == 0x01234567
memcpy(data, s->intern_src, len * 8);
s->intern_src += len * 8;
@@ -1204,6 +1309,5 @@ CAMLexport void caml_deserialize_block_float_8(void * data, intnat len)
CAMLexport void caml_deserialize_error(char * msg)
{
struct caml_intern_state* s = get_intern_state ();
- intern_cleanup(s);
- caml_failwith(msg);
+ intern_cleanup_failwith(s, msg);
}
diff --git a/testsuite/tests/lib-marshal/fuzzy.ml b/testsuite/tests/lib-marshal/fuzzy.ml
new file mode 100644
index 0000000000..88b0d65c93
--- /dev/null
+++ b/testsuite/tests/lib-marshal/fuzzy.ml
@@ -0,0 +1,141 @@
+(* TEST
+ arguments = "-n 10000";
+*)
+
+(* Can also be used with an external fuzzer such as AFL:
+ ./fuzzy -o fuzzy.in/data
+ afl-fuzz -i fuzzy.in -o fuzzy.out -- ./fuzzy -r
+*)
+
+(* Some data to be marshaled *)
+
+type t = A | B of int | C of float | D of string | E of char
+ | F of t | G of t * t | H of int * t | I of t * float | J
+
+let longstring =
+"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+
+let bigint = Int64.to_int 0x123456789ABCDEF0L
+
+let closures () =
+ let t = true and
+ f = false in
+ let rec odd n =
+ if n = 0
+ then f
+ else even (n-1)
+ and even n =
+ if n = 0
+ then t
+ else odd (n-1)
+ in (even, odd)
+
+let data () = [|
+ Obj.repr 1;
+ Obj.repr (-1);
+ Obj.repr 20000;
+ Obj.repr 0x12345678;
+ Obj.repr bigint;
+ Obj.repr "foo";
+ Obj.repr longstring;
+ Obj.repr 3.141592654;
+ Obj.repr A;
+ Obj.repr (B 1);
+ Obj.repr (C 2.718);
+ Obj.repr (D "hello");
+ Obj.repr (E 'l');
+ Obj.repr (F(B 1));
+ Obj.repr (G(A, G(B 2, G(C 3.14, G(D "", E 'e')))));
+ Obj.repr (H(1, A));
+ Obj.repr (I(B 2, 1e-6));
+ (let x = D "sharing" in
+ let y = G(x, x) in
+ let z = G(y, G(x, y)) in
+ Obj.repr z);
+ Obj.repr [|1;2;3;4;5;6;7;8|];
+ Obj.repr [|3.14; 2.718|];
+ Obj.repr (closures());
+ Obj.repr 0l;
+ Obj.repr 123456l;
+ Obj.repr 0L;
+ (let i = Int64.of_string "123456789123456" in Obj.repr (i,i));
+ Obj.repr (Failure "fail");
+ Obj.repr Bigarray.(Array1.init int16_unsigned c_layout 5 (fun x -> 8*x))
+|]
+
+(* Generate file with marshaled data *)
+
+let generate filename =
+ Out_channel.with_open_bin filename
+ (fun oc -> Marshal.(to_channel oc (data()) [Closures]))
+
+(* Try to unmarshal possibly malicious data. Clean failure is success. *)
+
+let test ic =
+ In_channel.set_binary_mode ic true;
+ begin try
+ ignore (Marshal.from_channel ic)
+ with Failure _ | Invalid_argument _ | Out_of_memory -> ()
+ end;
+ Gc.full_major()
+
+(* Internal fuzzing. Rather naive. *)
+
+let random_offset b =
+ (* Leave the header unchanged *)
+ 20 + Random.int (Bytes.length b - 20)
+
+let flip_one_byte b =
+ let p = random_offset b in
+ Bytes.set_uint8 b p (Random.int 0x100)
+
+let flip_one_bit b =
+ let p = random_offset b in
+ let m = 1 lsl (Random.int 8) in
+ Bytes.set_uint8 b p (Bytes.get_uint8 b p lxor m)
+
+let fuzz niter =
+ let d = Marshal.(to_string (data()) [Closures]) in
+ for i = 1 to niter do
+ let b = Bytes.of_string d in
+ begin match i land 4 with
+ | 0 -> flip_one_byte b
+ | 1 -> flip_one_bit b
+ | 2 -> flip_one_byte b; flip_one_byte b
+ | _ (*3*) -> flip_one_bit b; flip_one_bit b
+ end;
+ begin try
+ ignore (Marshal.from_bytes b 0)
+ with Failure _ | Invalid_argument _ | Out_of_memory -> ()
+ end;
+ Gc.full_major()
+ done
+
+let fuzz1 () =
+ let d = Marshal.(to_string (data()) [Closures]) in
+ let b = Bytes.of_string d in
+ for i = 0 to String.length d - 1 do
+ for x = 0 to 255 do
+ Bytes.set_uint8 b i x;
+ begin try
+ ignore (Marshal.from_bytes b 0)
+ with Failure _ | Invalid_argument _ | Out_of_memory -> ()
+ end;
+ Gc.full_major()
+ done;
+ Bytes.set_uint8 b i (Bytes.get_uint8 b i)
+ done
+
+let () =
+ Arg.parse [
+ "-o", Arg.String generate,
+ "<file> Save marshaled data to <file>";
+ "-n", Arg.Int fuzz,
+ "<num iter> Perform internal fuzzing test (random)";
+ "-x", Arg.Unit fuzz1,
+ "<num iter> Perform internal fuzzing test (exhaustive 1-byte)";
+ "-r", Arg.Unit (fun () -> test stdin),
+ " Read marshaled data from standard input"
+ ]
+ (fun s -> raise (Arg.Bad ("don't know what to do with " ^ s)))
+ "Usage: fuzzy [option].\nOptions are:"
--
2.52.0