diff --git a/0001-Fix-memory-corruption-when-an-exception-is-raised-du.patch b/0001-Fix-memory-corruption-when-an-exception-is-raised-du.patch new file mode 100644 index 0000000..4219ff2 --- /dev/null +++ b/0001-Fix-memory-corruption-when-an-exception-is-raised-du.patch @@ -0,0 +1,74 @@ +From 69c9446c1ea571577cf5e09d38bb9ba6abb10f79 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Beno=C3=AEt=20Vaugon?= +Date: Tue, 6 May 2025 11:27:24 +0200 +Subject: [PATCH 1/2] Fix memory corruption when an exception is raised during + demarshaling. (#14015) + +(cherry picked from commit e6321cf1e3f3bfb0795108ab4d6c66c559ea9b63) +(cherry picked from commit 4a3eb26c902ef1d3378c40c7299ef346862afa92) +--- + runtime/intern.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/runtime/intern.c b/runtime/intern.c +index 2be12e3479..f52ac51a6c 100644 +--- a/runtime/intern.c ++++ b/runtime/intern.c +@@ -856,7 +856,6 @@ value caml_input_val(struct channel *chan) + char header[MAX_INTEXT_HEADER_SIZE]; + struct marshal_header h; + char * block; +- value res; + struct caml_intern_state* s = init_intern_state (); + + if (! caml_channel_binary_mode(chan)) +@@ -901,9 +900,10 @@ value caml_input_val(struct channel *chan) + intern_init(s, block, block); + intern_decompress_input(s, "input_value", &h); + intern_alloc_storage(s, h.whsize, h.num_objects); +- /* Fill it in */ +- intern_rec(s, "input_value", &res); +- return intern_end(s, res); ++ /* Fill it in - obj must NOT be registered as a GC root */ ++ value obj; ++ intern_rec(s, "input_value", &obj); ++ return intern_end(s, obj); + } + + CAMLprim value caml_input_value(value vchan) +@@ -929,7 +929,6 @@ CAMLprim value caml_input_value_to_outside_heap(value vchan) + CAMLexport value caml_input_val_from_bytes(value str, intnat ofs) + { + CAMLparam1 (str); +- CAMLlocal1 (obj); + struct marshal_header h; + struct caml_intern_state* s = init_intern_state (); + +@@ -943,7 +942,8 @@ CAMLexport value caml_input_val_from_bytes(value str, intnat ofs) + s->intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */ + /* Decompress if needed */ + intern_decompress_input(s, "input_val_from_string", &h); +- /* Fill it in */ ++ /* Fill it in - obj must NOT be registered as a GC root */ ++ value obj; + intern_rec(s, "input_val_from_string", &obj); + CAMLreturn (intern_end(s, obj)); + } +@@ -956,12 +956,12 @@ CAMLprim value caml_input_value_from_bytes(value str, value ofs) + static value input_val_from_block(struct caml_intern_state* s, + struct marshal_header * h) + { +- value obj; + /* Decompress if needed */ + intern_decompress_input(s, "input_val_from_block", h); + /* Allocate result */ + intern_alloc_storage(s, h->whsize, h->num_objects); +- /* Fill it in */ ++ /* Fill it in - obj must NOT be registered as a GC root */ ++ value obj; + intern_rec(s, "input_val_from_block", &obj); + return (intern_end(s, obj)); + } +-- +2.52.0 + diff --git a/0002-robustify-intern.c.patch b/0002-robustify-intern.c.patch new file mode 100644 index 0000000..a1575d1 --- /dev/null +++ b/0002-robustify-intern.c.patch @@ -0,0 +1,836 @@ +From 5bc4360d8fc5660615bf08fc51b7456d062773a2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= +Date: Tue, 16 Dec 2025 16:34:35 +0100 +Subject: [PATCH 2/2] robustify intern.c + +Co-Authored-By: Xavier Leroy +(cherry picked from commit e3919fef436f89271bc30bbe8592851f7289fb68) +--- + runtime/bigarray.c | 13 +- + runtime/caml/custom.h | 6 + + runtime/caml/intext.h | 16 +- + runtime/intern.c | 216 ++++++++++++++++++++------- + testsuite/tests/lib-marshal/fuzzy.ml | 141 +++++++++++++++++ + 5 files changed, 325 insertions(+), 67 deletions(-) + create mode 100644 testsuite/tests/lib-marshal/fuzzy.ml + +diff --git a/runtime/bigarray.c b/runtime/bigarray.c +index 1e529ffeca..45de8589d2 100644 +--- a/runtime/bigarray.c ++++ b/runtime/bigarray.c +@@ -592,12 +592,16 @@ CAMLexport uintnat caml_ba_deserialize(void * dst) + { + struct caml_ba_array * b = dst; + int i; +- uintnat num_elts, size; ++ uintnat num_elts, size, descriptor_size; + + /* Read back header information */ +- b->num_dims = caml_deserialize_uint_4(); +- if (b->num_dims < 0 || b->num_dims > CAML_BA_MAX_NUM_DIMS) ++ int num_dims = caml_deserialize_uint_4(); ++ if (num_dims < 0 || num_dims > CAML_BA_MAX_NUM_DIMS) + caml_deserialize_error("input_value: wrong number of bigarray dimensions"); ++ descriptor_size = SIZEOF_BA_ARRAY + num_dims * sizeof(intnat); ++ if (descriptor_size > Bsize_custom_data(dst)) ++ caml_deserialize_error("input_value: bigarray buffer overflow"); ++ b->num_dims = num_dims; + b->flags = caml_deserialize_uint_4() | CAML_BA_MANAGED; + b->proxy = NULL; + for (i = 0; i < b->num_dims; i++) { +@@ -646,8 +650,7 @@ CAMLexport uintnat caml_ba_deserialize(void * dst) + case CAML_BA_NATIVE_INT: + caml_ba_deserialize_longarray(b->data, num_elts); break; + } +- /* PR#5516: use C99's flexible array types if possible */ +- return SIZEOF_BA_ARRAY + b->num_dims * sizeof(intnat); ++ return descriptor_size; + } + + /* Allocate a bigarray from OCaml */ +diff --git a/runtime/caml/custom.h b/runtime/caml/custom.h +index 3731b900dc..e44ac78e20 100644 +--- a/runtime/caml/custom.h ++++ b/runtime/caml/custom.h +@@ -47,6 +47,12 @@ struct custom_operations { + + #define Custom_ops_val(v) (*((const struct custom_operations **) (v))) + ++/* Given a pointer [p] to the data part of a custom block ++ (as returned by [Data_custom_val]), return the size of this data part ++ (in words or in bytes). */ ++#define Wsize_custom_data(p) (Wosize_hp((header_t *)(p) - 2) - 1) ++#define Bsize_custom_data(p) (Bsize_wsize(Wsize_custom_data(p))) ++ + #ifdef __cplusplus + extern "C" { + #endif +diff --git a/runtime/caml/intext.h b/runtime/caml/intext.h +index 0ed2fc0b8b..727b4caedf 100644 +--- a/runtime/caml/intext.h ++++ b/runtime/caml/intext.h +@@ -63,9 +63,11 @@ + Each byte carries 7 bits of the number. + Bytes come in big-endian order: xxxxxxx are the 7 high-order bits, + zzzzzzzz the 7 low-order bits. ++ ++ The header size is stored in a 6-bit field, so it's 63 at most. + */ + +-#define MAX_INTEXT_HEADER_SIZE 55 ++#define MAX_INTEXT_HEADER_SIZE 63 + + /* Codes for the compact format */ + +@@ -168,11 +170,13 @@ value caml_input_val (struct channel * chan); + CAMLextern value caml_input_val_from_string (value str, intnat ofs); + /* Read a structured value from the OCaml string [str], starting + at offset [ofs]. */ +-CAMLextern value caml_input_value_from_malloc(char * data, intnat ofs); +- /* Read a structured value from a malloced buffer. [data] points +- to the beginning of the buffer, and [ofs] is the offset of the +- beginning of the externed data in this buffer. The buffer is +- deallocated with [free] on return, or if an exception is raised. */ ++CAMLextern value caml_input_value_from_malloc(char * data, intnat ofs, ++ uintnat len); ++ /* Read a structured value from a malloced buffer. [data] points to the ++ beginning of the buffer, [ofs] is the offset of the beginning of the ++ externed data in this buffer, and [len] is the length in bytes of valid ++ data in this buffer. The buffer is deallocated with [free] on return, ++ or if an exception is raised. */ + CAMLextern value caml_input_value_from_block(const char * data, intnat len); + /* Read a structured value from a user-provided buffer. [data] points + to the beginning of the externed data in this buffer, +diff --git a/runtime/intern.c b/runtime/intern.c +index f52ac51a6c..f753b7a32d 100644 +--- a/runtime/intern.c ++++ b/runtime/intern.c +@@ -62,6 +62,9 @@ struct caml_intern_state { + const unsigned char * intern_src; + /* Reading pointer in block holding input data. */ + ++ const unsigned char * intern_src_end; ++ /* Pointer to the end of the readable data. */ ++ + unsigned char * intern_input; + /* Pointer to beginning of block holding input data, + if non-NULL this pointer will be freed by the cleanup function. +@@ -76,6 +79,9 @@ struct caml_intern_state { + value * intern_obj_table; + /* The pointers to objects already seen */ + ++ uintnat intern_num_objects; ++ /* How many objects are expected (from the header) */ ++ + struct intern_item intern_stack_init[INTERN_STACK_INIT_SIZE]; + /* The initial intern stack */ + +@@ -88,6 +94,8 @@ struct caml_intern_state { + /* Writing pointer in destination block. Only used when the object fits in + the minor heap. */ + ++ header_t * intern_dest_end; ++ + char compressed; + /* 1 if the compressed format is in use, 0 otherwise */ + }; +@@ -110,10 +118,13 @@ static struct caml_intern_state* init_intern_state (void) + s = caml_stat_alloc(sizeof(struct caml_intern_state)); + + s->intern_src = NULL; ++ s->intern_src_end = NULL; + s->intern_input = NULL; + s->obj_counter = 0; ++ s->intern_num_objects = 0; + s->intern_obj_table = NULL; + s->intern_dest = NULL; ++ s->intern_dest_end = NULL; + init_intern_stack(s); + + Caml_state->intern_state = s; +@@ -141,19 +152,44 @@ void caml_free_intern_state (void) + } + } + ++CAMLnoret static void intern_cleanup_failwith(struct caml_intern_state* s, const char * msg); + static char * intern_resolve_code_pointer(unsigned char digest[16], + asize_t offset); + + CAMLnoret static void intern_bad_code_pointer(unsigned char digest[16]); + ++Caml_inline void intern_check_read(struct caml_intern_state* s, uintnat len) ++{ ++ if (CAMLunlikely(len > s->intern_src_end - s->intern_src)) { ++ intern_cleanup_failwith(s, "input_value: invalid read"); ++ } ++} ++ ++Caml_inline void intern_record_obj(struct caml_intern_state* s, value v) ++{ ++ if (s->intern_obj_table != NULL) { ++ if (CAMLunlikely(s->obj_counter >= s->intern_num_objects)) { ++ intern_cleanup_failwith(s, "input_value: too many objects"); ++ } ++ s->intern_obj_table[s->obj_counter++] = v; ++ } ++} ++ + Caml_inline unsigned char read8u(struct caml_intern_state* s) +-{ return *s->intern_src++; } ++{ ++ intern_check_read(s, 1); ++ return *s->intern_src++; ++} + + Caml_inline signed char read8s(struct caml_intern_state* s) +-{ return *s->intern_src++; } ++{ ++ intern_check_read(s, 1); ++ return *s->intern_src++; ++} + + Caml_inline uint16_t read16u(struct caml_intern_state* s) + { ++ intern_check_read(s, 2); + uint16_t res = (s->intern_src[0] << 8) + s->intern_src[1]; + s->intern_src += 2; + return res; +@@ -161,6 +197,7 @@ Caml_inline uint16_t read16u(struct caml_intern_state* s) + + Caml_inline int16_t read16s(struct caml_intern_state* s) + { ++ intern_check_read(s, 2); + int16_t res = (s->intern_src[0] << 8) + s->intern_src[1]; + s->intern_src += 2; + return res; +@@ -168,6 +205,7 @@ Caml_inline int16_t read16s(struct caml_intern_state* s) + + Caml_inline uint32_t read32u(struct caml_intern_state* s) + { ++ intern_check_read(s, 4); + uint32_t res = + ((uint32_t)(s->intern_src[0]) << 24) + (s->intern_src[1] << 16) + + (s->intern_src[2] << 8) + s->intern_src[3]; +@@ -177,6 +215,7 @@ Caml_inline uint32_t read32u(struct caml_intern_state* s) + + Caml_inline int32_t read32s(struct caml_intern_state* s) + { ++ intern_check_read(s, 4); + int32_t res = + ((uint32_t)(s->intern_src[0]) << 24) + (s->intern_src[1] << 16) + + (s->intern_src[2] << 8) + s->intern_src[3]; +@@ -187,6 +226,7 @@ Caml_inline int32_t read32s(struct caml_intern_state* s) + #ifdef ARCH_SIXTYFOUR + static uintnat read64u(struct caml_intern_state* s) + { ++ intern_check_read(s, 8); + uintnat res = + ((uintnat) (s->intern_src[0]) << 56) + + ((uintnat) (s->intern_src[1]) << 48) +@@ -217,14 +257,15 @@ static int readvlq(struct caml_intern_state* s, /*out*/ uintnat * res) + } + + Caml_inline void readblock(struct caml_intern_state* s, +- void * dest, intnat len) ++ void * dest, uintnat len) + { ++ intern_check_read(s, len); + memcpy(dest, s->intern_src, len); + s->intern_src += len; + } + + static void intern_init(struct caml_intern_state* s, const void * src, +- void * input) ++ uintnat len, void * input) + { + CAMLassert (s); + /* This is asserted at the beginning of demarshaling primitives. +@@ -232,6 +273,7 @@ static void intern_init(struct caml_intern_state* s, const void * src, + without calling intern_cleanup() during the previous demarshaling. */ + CAMLassert (s->intern_input == NULL && s->intern_obj_table == NULL); + s->intern_src = src; ++ s->intern_src_end = s->intern_src + len; + s->intern_input = input; + } + +@@ -253,8 +295,10 @@ static void intern_cleanup(struct caml_intern_state* s) + if (s->intern_obj_table != NULL) { + caml_stat_free(s->intern_obj_table); + s->intern_obj_table = NULL; ++ s->intern_num_objects = 0; + } + s->intern_dest = NULL; ++ s->intern_dest_end = NULL; + /* free the recursion stack */ + intern_free_stack(s); + } +@@ -264,6 +308,21 @@ CAMLnoret static void intern_failwith2(const char * fun_name, const char * msg) + caml_failwith_value(caml_alloc_sprintf("%s: %s", fun_name, msg)); + } + ++CAMLnoret static void ++intern_cleanup_failwith(struct caml_intern_state* s, const char * msg) ++{ ++ intern_cleanup(s); ++ caml_failwith(msg); ++} ++ ++CAMLnoret static void ++intern_cleanup_failwith2(struct caml_intern_state* s, const char * fun_name, ++ const char * msg) ++{ ++ intern_cleanup(s); ++ intern_failwith2(fun_name, msg); ++} ++ + CAMLnoret static void + intern_cleanup_failwith3(struct caml_intern_state* s, const char * fun_name, + const char * msg, const char * arg) +@@ -398,13 +457,15 @@ static void intern_alloc_storage(struct caml_intern_state* s, mlsize_t whsize, + if (wosize <= Max_young_wosize && wosize != 0) { + v = caml_alloc_small (wosize, String_tag); + s->intern_dest = (header_t *) Hp_val(v); ++ s->intern_dest_end = s->intern_dest + whsize; + } else { + CAMLassert (s->intern_dest == NULL); + } ++ s->intern_num_objects = num_objects; + s->obj_counter = 0; + if (num_objects > 0) { + s->intern_obj_table = +- (value *) caml_stat_alloc_noexc(num_objects * sizeof(value)); ++ (value *) caml_stat_calloc_noexc(num_objects, sizeof(value)); + if (s->intern_obj_table == NULL) { + intern_cleanup(s); + caml_raise_out_of_memory(); +@@ -421,9 +482,15 @@ static value intern_alloc_obj(struct caml_intern_state* s, caml_domain_state* d, + { + void* p; + ++ if (CAMLunlikely(wosize > Max_wosize)) { ++ intern_cleanup_failwith(s, "input_value: block size too large"); ++ } + if (s->intern_dest) { + CAMLassert ((value*)s->intern_dest >= d->young_start && + (value*)s->intern_dest < d->young_end); ++ if (CAMLunlikely(wosize >= s->intern_dest_end - s->intern_dest)) { ++ intern_cleanup_failwith(s, "input_value: invalid allocation"); ++ } + p = s->intern_dest; + *s->intern_dest = Make_header (wosize, tag, 0); + s->intern_dest += 1 + wosize; +@@ -475,9 +542,19 @@ static void intern_rec(struct caml_intern_state* s, + /* Pop item and iterate */ + sp--; + break; +- case OShift: ++ case OShift: { + /* Shift value by an offset */ +- *dest += sp->arg; ++ value v = *dest; ++ intnat ofs = sp->arg; ++ if (Is_block(v) ++ && (uintnat) ofs % sizeof(value) == 0 ++ && ofs >= 0 && ofs < Bosize_val(v) ++ && Tag_val(v + ofs) == Infix_tag ++ && Infix_offset_val(v + ofs) == ofs) ++ *dest = v + ofs; ++ else ++ intern_cleanup_failwith2(s, fun_name, "bad infix offset"); ++ } + /* Pop item and iterate */ + sp--; + break; +@@ -497,11 +574,11 @@ static void intern_rec(struct caml_intern_state* s, + v = Atom(tag); + } else { + v = intern_alloc_obj (s, d, size, tag); +- if (s->intern_obj_table != NULL) +- s->intern_obj_table[s->obj_counter++] = v; ++ intern_record_obj(s, v); + /* For objects, we need to freshen the oid */ + if (tag == Object_tag) { +- CAMLassert(size >= 2); ++ if (CAMLunlikely(size < 2)) ++ intern_cleanup_failwith2(s, fun_name, "bad object block"); + /* Request to read rest of the elements of the block */ + ReadItems(s, &Field(v, 2), size - 2); + /* Request freshing OID */ +@@ -524,10 +601,11 @@ static void intern_rec(struct caml_intern_state* s, + /* Small string */ + len = (code & 0x1F); + read_string: ++ if (CAMLunlikely(len > Bsize_wsize (Max_wosize) - 1)) ++ intern_cleanup_failwith2(s, fun_name, "string too large"); + size = (len + sizeof(value)) / sizeof(value); + v = intern_alloc_obj (s, d, size, String_tag); +- if (s->intern_obj_table != NULL) +- s->intern_obj_table[s->obj_counter++] = v; ++ intern_record_obj(s, v); + Field(v, size - 1) = 0; + ofs_ind = Bsize_wsize(size) - 1; + Byte(v, ofs_ind) = ofs_ind - len; +@@ -548,16 +626,17 @@ static void intern_rec(struct caml_intern_state* s, + v = Val_long((intnat) (read64u(s))); + break; + #else +- intern_cleanup(s); +- intern_failwith2(fun_name, "integer too large"); ++ intern_cleanup_failwith2(s, fun_name, "integer too large"); + break; + #endif + case CODE_SHARED8: + ofs = read8u(s); + read_shared: + if (!s->compressed) ofs = s->obj_counter - ofs; +- CAMLassert (ofs < s->obj_counter); +- CAMLassert (s->intern_obj_table != NULL); ++ /* If intern_obj_table is NULL, obj_counter is 0 and the check fails */ ++ if (CAMLunlikely(ofs >= s->obj_counter)) { ++ intern_cleanup_failwith2(s, fun_name, "invalid shared reference"); ++ } + v = s->intern_obj_table[ofs]; + break; + case CODE_SHARED16: +@@ -575,12 +654,18 @@ static void intern_rec(struct caml_intern_state* s, + header = (header_t) read32u(s); + tag = Tag_hd(header); + size = Wosize_hd(header); ++ if (CAMLunlikely(tag >= No_scan_tag ++ || tag == Infix_tag || tag == Cont_tag)) ++ intern_cleanup_failwith2(s, fun_name, "invalid block32"); + goto read_block; + #ifdef ARCH_SIXTYFOUR + case CODE_BLOCK64: + header = (header_t) read64u(s); + tag = Tag_hd(header); + size = Wosize_hd(header); ++ if (CAMLunlikely(tag >= No_scan_tag ++ || tag == Infix_tag || tag == Cont_tag)) ++ intern_cleanup_failwith2(s, fun_name, "invalid block64"); + goto read_block; + #endif + case CODE_STRING8: +@@ -597,18 +682,18 @@ static void intern_rec(struct caml_intern_state* s, + case CODE_DOUBLE_LITTLE: + case CODE_DOUBLE_BIG: + v = intern_alloc_obj (s, d, Double_wosize, Double_tag); +- if (s->intern_obj_table != NULL) +- s->intern_obj_table[s->obj_counter++] = v; ++ intern_record_obj(s, v); + readfloat(s, (double *) v, code); + break; + case CODE_DOUBLE_ARRAY8_LITTLE: + case CODE_DOUBLE_ARRAY8_BIG: + len = read8u(s); + read_double_array: ++ if (len == 0) ++ intern_cleanup_failwith2(s, fun_name, "invalid double_array"); + size = len * Double_wosize; + v = intern_alloc_obj (s, d, size, Double_array_tag); +- if (s->intern_obj_table != NULL) +- s->intern_obj_table[s->obj_counter++] = v; ++ intern_record_obj(s, v); + readfloats(s, (double *) v, len, code); + break; + case CODE_DOUBLE_ARRAY32_LITTLE: +@@ -649,28 +734,33 @@ static void intern_rec(struct caml_intern_state* s, + ReadItems(s, dest, 1); + continue; /* with next iteration of main loop, skipping *dest = v */ + case OLD_CODE_CUSTOM: +- intern_cleanup(s); +- intern_failwith2(fun_name, "custom blocks serialized with " +- "OCaml 4.08.0 (or prior) are no longer supported"); ++ intern_cleanup_failwith2(s, fun_name, "custom blocks serialized with " ++ "OCaml 4.08.0 (or prior) are no longer supported"); + break; + case CODE_CUSTOM_LEN: + case CODE_CUSTOM_FIXED: { + uintnat expected_size, temp_size; + const char * name = (const char *) s->intern_src; ++ const unsigned char * name_end = ++ memchr(name, 0, s->intern_src_end - s->intern_src); ++ if (name_end == NULL) { ++ intern_cleanup_failwith2(s, fun_name, "unterminated custom block identifier"); ++ } + ops = caml_find_custom_operations(name); + if (ops == NULL) { + intern_cleanup_failwith3 + (s, fun_name, "unknown custom block identifier", name); + } +- if (code == CODE_CUSTOM_FIXED && ops->fixed_length == NULL) { ++ if ((code == CODE_CUSTOM_FIXED) != (ops->fixed_length != NULL)) { + intern_cleanup_failwith3 +- (s, fun_name, "expected a fixed-size custom block", name); ++ (s, fun_name, "wrong custom block kind", name); + } +- while (*s->intern_src++ != 0) /*nothing*/; /*skip identifier*/ ++ s->intern_src = name_end + 1; /*skip identifier*/ + #ifdef ARCH_SIXTYFOUR + if (code == CODE_CUSTOM_FIXED) { + expected_size = ops->fixed_length->bsize_64; + } else { ++ intern_check_read(s, 4); + s->intern_src += 4; + expected_size = read64u(s); + } +@@ -679,19 +769,20 @@ static void intern_rec(struct caml_intern_state* s, + expected_size = ops->fixed_length->bsize_32; + } else { + expected_size = read32u(s); ++ intern_check_read(s, 8); + s->intern_src += 8; + } + #endif + temp_size = 1 + (expected_size + sizeof(value) - 1) / sizeof(value); +- v = intern_alloc_obj(s, d, temp_size, Custom_tag); ++ v = intern_alloc_obj(s, d, temp_size, Abstract_tag); + Custom_ops_val(v) = ops; + size = ops->deserialize(Data_custom_val(v)); + if (size != expected_size) { + intern_cleanup_failwith3 + (s, fun_name, "error while deserializing custom block", name); + } +- if (s->intern_obj_table != NULL) +- s->intern_obj_table[s->obj_counter++] = v; ++ Unsafe_store_tag_val(v, Custom_tag); ++ intern_record_obj(s, v); + if (ops->finalize != NULL && Is_young(v)) { + /* Remember that the block has a finalizer. */ + add_to_custom_table (&d->minor_tables->custom, v, 0, 1); +@@ -699,8 +790,7 @@ static void intern_rec(struct caml_intern_state* s, + break; + } + default: +- intern_cleanup(s); +- intern_failwith2(fun_name, "ill-formed message"); ++ intern_cleanup_failwith2(s, fun_name, "ill-formed message"); + } + } + } +@@ -836,15 +926,14 @@ static void intern_decompress_input(struct caml_intern_state * s, + h->data_len); + if (res != h->uncompressed_data_len) { + free(blk); +- intern_cleanup(s); +- intern_failwith2(fun_name, "decompression error"); ++ intern_cleanup_failwith2(s, fun_name, "decompression error"); + } + if (s->intern_input != NULL) free(s->intern_input); + s->intern_input = blk; /* to be freed at end of demarshaling */ + s->intern_src = blk; ++ s->intern_src_end = s->intern_src + h->uncompressed_data_len; + } else { +- intern_cleanup(s); +- intern_failwith2(fun_name, "compressed object, cannot decompress"); ++ intern_cleanup_failwith2(s, fun_name, "compressed object, cannot decompress"); + } + } + +@@ -867,6 +956,7 @@ value caml_input_val(struct channel *chan) + else if (r < 5) + caml_failwith("input_value: truncated object"); + s->intern_src = (unsigned char *) header; ++ s->intern_src_end = s->intern_src + 5; + int hlen; + switch (read32u(s)) { + case Intext_magic_number_big: +@@ -877,11 +967,12 @@ value caml_input_val(struct channel *chan) + hlen = 20; break; + } + /* Read the remainder of the header */ +- CAMLassert (hlen > 5); +- if (caml_really_getblock(chan, header + 5, hlen - 5) < hlen - 5) ++ if (hlen <= 5 ++ || caml_really_getblock(chan, header + 5, hlen - 5) < hlen - 5) + caml_failwith("input_value: truncated object"); + /* Parse the full header */ + s->intern_src = (unsigned char *) header; ++ s->intern_src_end = s->intern_src + hlen; + caml_parse_header(s, "input_value", &h); + /* Read block from channel */ + /* During channel I/O, concurrent [caml_input_val] operations +@@ -897,7 +988,7 @@ value caml_input_val(struct channel *chan) + caml_failwith("input_value: truncated object"); + } + /* Initialize global state */ +- intern_init(s, block, block); ++ intern_init(s, block, h.data_len, block); + intern_decompress_input(s, "input_value", &h); + intern_alloc_storage(s, h.whsize, h.num_objects); + /* Fill it in - obj must NOT be registered as a GC root */ +@@ -933,13 +1024,14 @@ CAMLexport value caml_input_val_from_bytes(value str, intnat ofs) + struct caml_intern_state* s = init_intern_state (); + + /* Initialize global state */ +- intern_init(s, &Byte_u(str, ofs), NULL); ++ intern_init(s, &Byte_u(str, ofs), caml_string_length(str) - ofs, NULL); + caml_parse_header(s, "input_val_from_string", &h); + if (ofs + h.header_len + h.data_len > caml_string_length(str)) + caml_failwith("input_val_from_string: bad length"); + /* Allocate result */ + intern_alloc_storage(s, h.whsize, h.num_objects); + s->intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */ ++ s->intern_src_end = s->intern_src + h.data_len; + /* Decompress if needed */ + intern_decompress_input(s, "input_val_from_string", &h); + /* Fill it in - obj must NOT be registered as a GC root */ +@@ -966,30 +1058,37 @@ static value input_val_from_block(struct caml_intern_state* s, + return (intern_end(s, obj)); + } + +-CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs) +-{ +- struct marshal_header h; +- struct caml_intern_state* s = init_intern_state (); +- +- intern_init(s, data + ofs, data); +- caml_parse_header(s, "input_value_from_malloc", &h); +- return input_val_from_block(s, &h); +-} +- + /* [len] is a number of bytes */ +-CAMLexport value caml_input_value_from_block(const char * data, intnat len) ++static value caml_input_value_from_buffer(const char * fun_name, ++ const char * src, ++ uintnat len, void * input) + { + struct marshal_header h; + struct caml_intern_state* s = init_intern_state (); + +- /* Initialize global state */ +- intern_init(s, data, NULL); +- caml_parse_header(s, "input_value_from_block", &h); ++ intern_init(s, src, len, input); ++ caml_parse_header(s, fun_name, &h); + if (h.header_len + h.data_len > len) +- caml_failwith("input_val_from_block: bad length"); ++ intern_failwith2(fun_name, "bad length"); ++ s->intern_src_end = s->intern_src + h.data_len; + return input_val_from_block(s, &h); + } + ++CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs, ++ uintnat len) ++{ ++ return caml_input_value_from_buffer ++ ("input_value_from_malloc", data + ofs, len, data); ++} ++ ++CAMLexport value caml_input_value_from_block(const char * data, intnat len) ++{ ++ if (len < 0) ++ caml_failwith("input_value_from_block: negative length"); ++ return caml_input_value_from_buffer ++ ("input_value_from_block", data, len, NULL); ++} ++ + /* [ofs] is a [value] that represents a number of bytes + result is a [value] that represents a number of bytes + To handle all marshaling formats, +@@ -1013,6 +1112,7 @@ CAMLprim value caml_marshal_data_size(value buff, value ofs) + struct caml_intern_state *s = init_intern_state (); + + s->intern_src = &Byte_u(buff, Long_val(ofs)); ++ s->intern_src_end = &Byte_u(buff, caml_string_length(buff)); + magic = read32u(s); + switch(magic) { + case Intext_magic_number_small: +@@ -1136,6 +1236,7 @@ CAMLexport double caml_deserialize_float_8(void) + CAMLexport void caml_deserialize_block_1(void * data, intnat len) + { + struct caml_intern_state* s = get_intern_state (); ++ intern_check_read(s, len); + memcpy(data, s->intern_src, len); + s->intern_src += len; + } +@@ -1143,6 +1244,7 @@ CAMLexport void caml_deserialize_block_1(void * data, intnat len) + CAMLexport void caml_deserialize_block_2(void * data, intnat len) + { + struct caml_intern_state* s = get_intern_state (); ++ intern_check_read(s, len * 2); + #ifndef ARCH_BIG_ENDIAN + const unsigned char * p, * q; + for (p = s->intern_src, q = data; len > 0; len--, p += 2, q += 2) +@@ -1157,6 +1259,7 @@ CAMLexport void caml_deserialize_block_2(void * data, intnat len) + CAMLexport void caml_deserialize_block_4(void * data, intnat len) + { + struct caml_intern_state* s = get_intern_state (); ++ intern_check_read(s, len * 4); + #ifndef ARCH_BIG_ENDIAN + const unsigned char * p, * q; + for (p = s->intern_src, q = data; len > 0; len--, p += 4, q += 4) +@@ -1171,6 +1274,7 @@ CAMLexport void caml_deserialize_block_4(void * data, intnat len) + CAMLexport void caml_deserialize_block_8(void * data, intnat len) + { + struct caml_intern_state* s = get_intern_state (); ++ intern_check_read(s, len * 8); + #ifndef ARCH_BIG_ENDIAN + const unsigned char * p, * q; + for (p = s->intern_src, q = data; len > 0; len--, p += 8, q += 8) +@@ -1185,6 +1289,7 @@ CAMLexport void caml_deserialize_block_8(void * data, intnat len) + CAMLexport void caml_deserialize_block_float_8(void * data, intnat len) + { + struct caml_intern_state* s = get_intern_state (); ++ intern_check_read(s, len * 8); + #if ARCH_FLOAT_ENDIANNESS == 0x01234567 + memcpy(data, s->intern_src, len * 8); + s->intern_src += len * 8; +@@ -1204,6 +1309,5 @@ CAMLexport void caml_deserialize_block_float_8(void * data, intnat len) + CAMLexport void caml_deserialize_error(char * msg) + { + struct caml_intern_state* s = get_intern_state (); +- intern_cleanup(s); +- caml_failwith(msg); ++ intern_cleanup_failwith(s, msg); + } +diff --git a/testsuite/tests/lib-marshal/fuzzy.ml b/testsuite/tests/lib-marshal/fuzzy.ml +new file mode 100644 +index 0000000000..88b0d65c93 +--- /dev/null ++++ b/testsuite/tests/lib-marshal/fuzzy.ml +@@ -0,0 +1,141 @@ ++(* TEST ++ arguments = "-n 10000"; ++*) ++ ++(* Can also be used with an external fuzzer such as AFL: ++ ./fuzzy -o fuzzy.in/data ++ afl-fuzz -i fuzzy.in -o fuzzy.out -- ./fuzzy -r ++*) ++ ++(* Some data to be marshaled *) ++ ++type t = A | B of int | C of float | D of string | E of char ++ | F of t | G of t * t | H of int * t | I of t * float | J ++ ++let longstring = ++"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ++ ++let bigint = Int64.to_int 0x123456789ABCDEF0L ++ ++let closures () = ++ let t = true and ++ f = false in ++ let rec odd n = ++ if n = 0 ++ then f ++ else even (n-1) ++ and even n = ++ if n = 0 ++ then t ++ else odd (n-1) ++ in (even, odd) ++ ++let data () = [| ++ Obj.repr 1; ++ Obj.repr (-1); ++ Obj.repr 20000; ++ Obj.repr 0x12345678; ++ Obj.repr bigint; ++ Obj.repr "foo"; ++ Obj.repr longstring; ++ Obj.repr 3.141592654; ++ Obj.repr A; ++ Obj.repr (B 1); ++ Obj.repr (C 2.718); ++ Obj.repr (D "hello"); ++ Obj.repr (E 'l'); ++ Obj.repr (F(B 1)); ++ Obj.repr (G(A, G(B 2, G(C 3.14, G(D "", E 'e'))))); ++ Obj.repr (H(1, A)); ++ Obj.repr (I(B 2, 1e-6)); ++ (let x = D "sharing" in ++ let y = G(x, x) in ++ let z = G(y, G(x, y)) in ++ Obj.repr z); ++ Obj.repr [|1;2;3;4;5;6;7;8|]; ++ Obj.repr [|3.14; 2.718|]; ++ Obj.repr (closures()); ++ Obj.repr 0l; ++ Obj.repr 123456l; ++ Obj.repr 0L; ++ (let i = Int64.of_string "123456789123456" in Obj.repr (i,i)); ++ Obj.repr (Failure "fail"); ++ Obj.repr Bigarray.(Array1.init int16_unsigned c_layout 5 (fun x -> 8*x)) ++|] ++ ++(* Generate file with marshaled data *) ++ ++let generate filename = ++ Out_channel.with_open_bin filename ++ (fun oc -> Marshal.(to_channel oc (data()) [Closures])) ++ ++(* Try to unmarshal possibly malicious data. Clean failure is success. *) ++ ++let test ic = ++ In_channel.set_binary_mode ic true; ++ begin try ++ ignore (Marshal.from_channel ic) ++ with Failure _ | Invalid_argument _ | Out_of_memory -> () ++ end; ++ Gc.full_major() ++ ++(* Internal fuzzing. Rather naive. *) ++ ++let random_offset b = ++ (* Leave the header unchanged *) ++ 20 + Random.int (Bytes.length b - 20) ++ ++let flip_one_byte b = ++ let p = random_offset b in ++ Bytes.set_uint8 b p (Random.int 0x100) ++ ++let flip_one_bit b = ++ let p = random_offset b in ++ let m = 1 lsl (Random.int 8) in ++ Bytes.set_uint8 b p (Bytes.get_uint8 b p lxor m) ++ ++let fuzz niter = ++ let d = Marshal.(to_string (data()) [Closures]) in ++ for i = 1 to niter do ++ let b = Bytes.of_string d in ++ begin match i land 4 with ++ | 0 -> flip_one_byte b ++ | 1 -> flip_one_bit b ++ | 2 -> flip_one_byte b; flip_one_byte b ++ | _ (*3*) -> flip_one_bit b; flip_one_bit b ++ end; ++ begin try ++ ignore (Marshal.from_bytes b 0) ++ with Failure _ | Invalid_argument _ | Out_of_memory -> () ++ end; ++ Gc.full_major() ++ done ++ ++let fuzz1 () = ++ let d = Marshal.(to_string (data()) [Closures]) in ++ let b = Bytes.of_string d in ++ for i = 0 to String.length d - 1 do ++ for x = 0 to 255 do ++ Bytes.set_uint8 b i x; ++ begin try ++ ignore (Marshal.from_bytes b 0) ++ with Failure _ | Invalid_argument _ | Out_of_memory -> () ++ end; ++ Gc.full_major() ++ done; ++ Bytes.set_uint8 b i (Bytes.get_uint8 b i) ++ done ++ ++let () = ++ Arg.parse [ ++ "-o", Arg.String generate, ++ " Save marshaled data to "; ++ "-n", Arg.Int fuzz, ++ " Perform internal fuzzing test (random)"; ++ "-x", Arg.Unit fuzz1, ++ " Perform internal fuzzing test (exhaustive 1-byte)"; ++ "-r", Arg.Unit (fun () -> test stdin), ++ " Read marshaled data from standard input" ++ ] ++ (fun s -> raise (Arg.Bad ("don't know what to do with " ^ s))) ++ "Usage: fuzzy [option].\nOptions are:" +-- +2.52.0 + diff --git a/ocaml.spec b/ocaml.spec index 6056f40..bcf3f33 100644 --- a/ocaml.spec +++ b/ocaml.spec @@ -44,7 +44,7 @@ ExcludeArch: %{ix86} Name: ocaml Version: 5.2.0 -Release: 4%{?dist} +Release: 5%{?dist} Summary: OCaml compiler and programming environment @@ -94,6 +94,10 @@ Patch: 0006-Reload-exception-pointer-register-in-caml_c_call.patch # https://github.com/ocaml/ocaml/commit/114ddae2d4c85391a4f939dc6623424ae35a07aa Patch: 0007-Compute-more-accurate-instruction-sizes-for-branch-r.patch +# Fix for CVE-2026-28364 +Patch: 0001-Fix-memory-corruption-when-an-exception-is-raised-du.patch +Patch: 0002-robustify-intern.c.patch + BuildRequires: make BuildRequires: git BuildRequires: gcc @@ -480,6 +484,11 @@ hardlink -t $RPM_BUILD_ROOT%{_libdir}/ocaml/stublibs %changelog +* Mon Mar 2 2026 Richard W.M. Jones - 5.2.0-5 +- Fix remote code execution via buffer over-read in Marshal deserialization + CVE-2026-28364 + Resolves: RHEL-152488 + * Tue Oct 29 2024 Troy Dawson - 5.2.0-4 - Bump release for October 2024 mass rebuild: Resolves: RHEL-64018