837 lines
30 KiB
Diff
837 lines
30 KiB
Diff
From 5bc4360d8fc5660615bf08fc51b7456d062773a2 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= <n.oje.bar@gmail.com>
|
|
Date: Tue, 16 Dec 2025 16:34:35 +0100
|
|
Subject: [PATCH 2/2] robustify intern.c
|
|
|
|
Co-Authored-By: Xavier Leroy <xavier.leroy@college-de-france.fr>
|
|
(cherry picked from commit e3919fef436f89271bc30bbe8592851f7289fb68)
|
|
---
|
|
runtime/bigarray.c | 13 +-
|
|
runtime/caml/custom.h | 6 +
|
|
runtime/caml/intext.h | 16 +-
|
|
runtime/intern.c | 216 ++++++++++++++++++++-------
|
|
testsuite/tests/lib-marshal/fuzzy.ml | 141 +++++++++++++++++
|
|
5 files changed, 325 insertions(+), 67 deletions(-)
|
|
create mode 100644 testsuite/tests/lib-marshal/fuzzy.ml
|
|
|
|
diff --git a/runtime/bigarray.c b/runtime/bigarray.c
|
|
index 1e529ffeca..45de8589d2 100644
|
|
--- a/runtime/bigarray.c
|
|
+++ b/runtime/bigarray.c
|
|
@@ -592,12 +592,16 @@ CAMLexport uintnat caml_ba_deserialize(void * dst)
|
|
{
|
|
struct caml_ba_array * b = dst;
|
|
int i;
|
|
- uintnat num_elts, size;
|
|
+ uintnat num_elts, size, descriptor_size;
|
|
|
|
/* Read back header information */
|
|
- b->num_dims = caml_deserialize_uint_4();
|
|
- if (b->num_dims < 0 || b->num_dims > CAML_BA_MAX_NUM_DIMS)
|
|
+ int num_dims = caml_deserialize_uint_4();
|
|
+ if (num_dims < 0 || num_dims > CAML_BA_MAX_NUM_DIMS)
|
|
caml_deserialize_error("input_value: wrong number of bigarray dimensions");
|
|
+ descriptor_size = SIZEOF_BA_ARRAY + num_dims * sizeof(intnat);
|
|
+ if (descriptor_size > Bsize_custom_data(dst))
|
|
+ caml_deserialize_error("input_value: bigarray buffer overflow");
|
|
+ b->num_dims = num_dims;
|
|
b->flags = caml_deserialize_uint_4() | CAML_BA_MANAGED;
|
|
b->proxy = NULL;
|
|
for (i = 0; i < b->num_dims; i++) {
|
|
@@ -646,8 +650,7 @@ CAMLexport uintnat caml_ba_deserialize(void * dst)
|
|
case CAML_BA_NATIVE_INT:
|
|
caml_ba_deserialize_longarray(b->data, num_elts); break;
|
|
}
|
|
- /* PR#5516: use C99's flexible array types if possible */
|
|
- return SIZEOF_BA_ARRAY + b->num_dims * sizeof(intnat);
|
|
+ return descriptor_size;
|
|
}
|
|
|
|
/* Allocate a bigarray from OCaml */
|
|
diff --git a/runtime/caml/custom.h b/runtime/caml/custom.h
|
|
index 3731b900dc..e44ac78e20 100644
|
|
--- a/runtime/caml/custom.h
|
|
+++ b/runtime/caml/custom.h
|
|
@@ -47,6 +47,12 @@ struct custom_operations {
|
|
|
|
#define Custom_ops_val(v) (*((const struct custom_operations **) (v)))
|
|
|
|
+/* Given a pointer [p] to the data part of a custom block
|
|
+ (as returned by [Data_custom_val]), return the size of this data part
|
|
+ (in words or in bytes). */
|
|
+#define Wsize_custom_data(p) (Wosize_hp((header_t *)(p) - 2) - 1)
|
|
+#define Bsize_custom_data(p) (Bsize_wsize(Wsize_custom_data(p)))
|
|
+
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
diff --git a/runtime/caml/intext.h b/runtime/caml/intext.h
|
|
index 0ed2fc0b8b..727b4caedf 100644
|
|
--- a/runtime/caml/intext.h
|
|
+++ b/runtime/caml/intext.h
|
|
@@ -63,9 +63,11 @@
|
|
Each byte carries 7 bits of the number.
|
|
Bytes come in big-endian order: xxxxxxx are the 7 high-order bits,
|
|
zzzzzzzz the 7 low-order bits.
|
|
+
|
|
+ The header size is stored in a 6-bit field, so it's 63 at most.
|
|
*/
|
|
|
|
-#define MAX_INTEXT_HEADER_SIZE 55
|
|
+#define MAX_INTEXT_HEADER_SIZE 63
|
|
|
|
/* Codes for the compact format */
|
|
|
|
@@ -168,11 +170,13 @@ value caml_input_val (struct channel * chan);
|
|
CAMLextern value caml_input_val_from_string (value str, intnat ofs);
|
|
/* Read a structured value from the OCaml string [str], starting
|
|
at offset [ofs]. */
|
|
-CAMLextern value caml_input_value_from_malloc(char * data, intnat ofs);
|
|
- /* Read a structured value from a malloced buffer. [data] points
|
|
- to the beginning of the buffer, and [ofs] is the offset of the
|
|
- beginning of the externed data in this buffer. The buffer is
|
|
- deallocated with [free] on return, or if an exception is raised. */
|
|
+CAMLextern value caml_input_value_from_malloc(char * data, intnat ofs,
|
|
+ uintnat len);
|
|
+ /* Read a structured value from a malloced buffer. [data] points to the
|
|
+ beginning of the buffer, [ofs] is the offset of the beginning of the
|
|
+ externed data in this buffer, and [len] is the length in bytes of valid
|
|
+ data in this buffer. The buffer is deallocated with [free] on return,
|
|
+ or if an exception is raised. */
|
|
CAMLextern value caml_input_value_from_block(const char * data, intnat len);
|
|
/* Read a structured value from a user-provided buffer. [data] points
|
|
to the beginning of the externed data in this buffer,
|
|
diff --git a/runtime/intern.c b/runtime/intern.c
|
|
index f52ac51a6c..f753b7a32d 100644
|
|
--- a/runtime/intern.c
|
|
+++ b/runtime/intern.c
|
|
@@ -62,6 +62,9 @@ struct caml_intern_state {
|
|
const unsigned char * intern_src;
|
|
/* Reading pointer in block holding input data. */
|
|
|
|
+ const unsigned char * intern_src_end;
|
|
+ /* Pointer to the end of the readable data. */
|
|
+
|
|
unsigned char * intern_input;
|
|
/* Pointer to beginning of block holding input data,
|
|
if non-NULL this pointer will be freed by the cleanup function.
|
|
@@ -76,6 +79,9 @@ struct caml_intern_state {
|
|
value * intern_obj_table;
|
|
/* The pointers to objects already seen */
|
|
|
|
+ uintnat intern_num_objects;
|
|
+ /* How many objects are expected (from the header) */
|
|
+
|
|
struct intern_item intern_stack_init[INTERN_STACK_INIT_SIZE];
|
|
/* The initial intern stack */
|
|
|
|
@@ -88,6 +94,8 @@ struct caml_intern_state {
|
|
/* Writing pointer in destination block. Only used when the object fits in
|
|
the minor heap. */
|
|
|
|
+ header_t * intern_dest_end;
|
|
+
|
|
char compressed;
|
|
/* 1 if the compressed format is in use, 0 otherwise */
|
|
};
|
|
@@ -110,10 +118,13 @@ static struct caml_intern_state* init_intern_state (void)
|
|
s = caml_stat_alloc(sizeof(struct caml_intern_state));
|
|
|
|
s->intern_src = NULL;
|
|
+ s->intern_src_end = NULL;
|
|
s->intern_input = NULL;
|
|
s->obj_counter = 0;
|
|
+ s->intern_num_objects = 0;
|
|
s->intern_obj_table = NULL;
|
|
s->intern_dest = NULL;
|
|
+ s->intern_dest_end = NULL;
|
|
init_intern_stack(s);
|
|
|
|
Caml_state->intern_state = s;
|
|
@@ -141,19 +152,44 @@ void caml_free_intern_state (void)
|
|
}
|
|
}
|
|
|
|
+CAMLnoret static void intern_cleanup_failwith(struct caml_intern_state* s, const char * msg);
|
|
static char * intern_resolve_code_pointer(unsigned char digest[16],
|
|
asize_t offset);
|
|
|
|
CAMLnoret static void intern_bad_code_pointer(unsigned char digest[16]);
|
|
|
|
+Caml_inline void intern_check_read(struct caml_intern_state* s, uintnat len)
|
|
+{
|
|
+ if (CAMLunlikely(len > s->intern_src_end - s->intern_src)) {
|
|
+ intern_cleanup_failwith(s, "input_value: invalid read");
|
|
+ }
|
|
+}
|
|
+
|
|
+Caml_inline void intern_record_obj(struct caml_intern_state* s, value v)
|
|
+{
|
|
+ if (s->intern_obj_table != NULL) {
|
|
+ if (CAMLunlikely(s->obj_counter >= s->intern_num_objects)) {
|
|
+ intern_cleanup_failwith(s, "input_value: too many objects");
|
|
+ }
|
|
+ s->intern_obj_table[s->obj_counter++] = v;
|
|
+ }
|
|
+}
|
|
+
|
|
Caml_inline unsigned char read8u(struct caml_intern_state* s)
|
|
-{ return *s->intern_src++; }
|
|
+{
|
|
+ intern_check_read(s, 1);
|
|
+ return *s->intern_src++;
|
|
+}
|
|
|
|
Caml_inline signed char read8s(struct caml_intern_state* s)
|
|
-{ return *s->intern_src++; }
|
|
+{
|
|
+ intern_check_read(s, 1);
|
|
+ return *s->intern_src++;
|
|
+}
|
|
|
|
Caml_inline uint16_t read16u(struct caml_intern_state* s)
|
|
{
|
|
+ intern_check_read(s, 2);
|
|
uint16_t res = (s->intern_src[0] << 8) + s->intern_src[1];
|
|
s->intern_src += 2;
|
|
return res;
|
|
@@ -161,6 +197,7 @@ Caml_inline uint16_t read16u(struct caml_intern_state* s)
|
|
|
|
Caml_inline int16_t read16s(struct caml_intern_state* s)
|
|
{
|
|
+ intern_check_read(s, 2);
|
|
int16_t res = (s->intern_src[0] << 8) + s->intern_src[1];
|
|
s->intern_src += 2;
|
|
return res;
|
|
@@ -168,6 +205,7 @@ Caml_inline int16_t read16s(struct caml_intern_state* s)
|
|
|
|
Caml_inline uint32_t read32u(struct caml_intern_state* s)
|
|
{
|
|
+ intern_check_read(s, 4);
|
|
uint32_t res =
|
|
((uint32_t)(s->intern_src[0]) << 24) + (s->intern_src[1] << 16)
|
|
+ (s->intern_src[2] << 8) + s->intern_src[3];
|
|
@@ -177,6 +215,7 @@ Caml_inline uint32_t read32u(struct caml_intern_state* s)
|
|
|
|
Caml_inline int32_t read32s(struct caml_intern_state* s)
|
|
{
|
|
+ intern_check_read(s, 4);
|
|
int32_t res =
|
|
((uint32_t)(s->intern_src[0]) << 24) + (s->intern_src[1] << 16)
|
|
+ (s->intern_src[2] << 8) + s->intern_src[3];
|
|
@@ -187,6 +226,7 @@ Caml_inline int32_t read32s(struct caml_intern_state* s)
|
|
#ifdef ARCH_SIXTYFOUR
|
|
static uintnat read64u(struct caml_intern_state* s)
|
|
{
|
|
+ intern_check_read(s, 8);
|
|
uintnat res =
|
|
((uintnat) (s->intern_src[0]) << 56)
|
|
+ ((uintnat) (s->intern_src[1]) << 48)
|
|
@@ -217,14 +257,15 @@ static int readvlq(struct caml_intern_state* s, /*out*/ uintnat * res)
|
|
}
|
|
|
|
Caml_inline void readblock(struct caml_intern_state* s,
|
|
- void * dest, intnat len)
|
|
+ void * dest, uintnat len)
|
|
{
|
|
+ intern_check_read(s, len);
|
|
memcpy(dest, s->intern_src, len);
|
|
s->intern_src += len;
|
|
}
|
|
|
|
static void intern_init(struct caml_intern_state* s, const void * src,
|
|
- void * input)
|
|
+ uintnat len, void * input)
|
|
{
|
|
CAMLassert (s);
|
|
/* This is asserted at the beginning of demarshaling primitives.
|
|
@@ -232,6 +273,7 @@ static void intern_init(struct caml_intern_state* s, const void * src,
|
|
without calling intern_cleanup() during the previous demarshaling. */
|
|
CAMLassert (s->intern_input == NULL && s->intern_obj_table == NULL);
|
|
s->intern_src = src;
|
|
+ s->intern_src_end = s->intern_src + len;
|
|
s->intern_input = input;
|
|
}
|
|
|
|
@@ -253,8 +295,10 @@ static void intern_cleanup(struct caml_intern_state* s)
|
|
if (s->intern_obj_table != NULL) {
|
|
caml_stat_free(s->intern_obj_table);
|
|
s->intern_obj_table = NULL;
|
|
+ s->intern_num_objects = 0;
|
|
}
|
|
s->intern_dest = NULL;
|
|
+ s->intern_dest_end = NULL;
|
|
/* free the recursion stack */
|
|
intern_free_stack(s);
|
|
}
|
|
@@ -264,6 +308,21 @@ CAMLnoret static void intern_failwith2(const char * fun_name, const char * msg)
|
|
caml_failwith_value(caml_alloc_sprintf("%s: %s", fun_name, msg));
|
|
}
|
|
|
|
+CAMLnoret static void
|
|
+intern_cleanup_failwith(struct caml_intern_state* s, const char * msg)
|
|
+{
|
|
+ intern_cleanup(s);
|
|
+ caml_failwith(msg);
|
|
+}
|
|
+
|
|
+CAMLnoret static void
|
|
+intern_cleanup_failwith2(struct caml_intern_state* s, const char * fun_name,
|
|
+ const char * msg)
|
|
+{
|
|
+ intern_cleanup(s);
|
|
+ intern_failwith2(fun_name, msg);
|
|
+}
|
|
+
|
|
CAMLnoret static void
|
|
intern_cleanup_failwith3(struct caml_intern_state* s, const char * fun_name,
|
|
const char * msg, const char * arg)
|
|
@@ -398,13 +457,15 @@ static void intern_alloc_storage(struct caml_intern_state* s, mlsize_t whsize,
|
|
if (wosize <= Max_young_wosize && wosize != 0) {
|
|
v = caml_alloc_small (wosize, String_tag);
|
|
s->intern_dest = (header_t *) Hp_val(v);
|
|
+ s->intern_dest_end = s->intern_dest + whsize;
|
|
} else {
|
|
CAMLassert (s->intern_dest == NULL);
|
|
}
|
|
+ s->intern_num_objects = num_objects;
|
|
s->obj_counter = 0;
|
|
if (num_objects > 0) {
|
|
s->intern_obj_table =
|
|
- (value *) caml_stat_alloc_noexc(num_objects * sizeof(value));
|
|
+ (value *) caml_stat_calloc_noexc(num_objects, sizeof(value));
|
|
if (s->intern_obj_table == NULL) {
|
|
intern_cleanup(s);
|
|
caml_raise_out_of_memory();
|
|
@@ -421,9 +482,15 @@ static value intern_alloc_obj(struct caml_intern_state* s, caml_domain_state* d,
|
|
{
|
|
void* p;
|
|
|
|
+ if (CAMLunlikely(wosize > Max_wosize)) {
|
|
+ intern_cleanup_failwith(s, "input_value: block size too large");
|
|
+ }
|
|
if (s->intern_dest) {
|
|
CAMLassert ((value*)s->intern_dest >= d->young_start &&
|
|
(value*)s->intern_dest < d->young_end);
|
|
+ if (CAMLunlikely(wosize >= s->intern_dest_end - s->intern_dest)) {
|
|
+ intern_cleanup_failwith(s, "input_value: invalid allocation");
|
|
+ }
|
|
p = s->intern_dest;
|
|
*s->intern_dest = Make_header (wosize, tag, 0);
|
|
s->intern_dest += 1 + wosize;
|
|
@@ -475,9 +542,19 @@ static void intern_rec(struct caml_intern_state* s,
|
|
/* Pop item and iterate */
|
|
sp--;
|
|
break;
|
|
- case OShift:
|
|
+ case OShift: {
|
|
/* Shift value by an offset */
|
|
- *dest += sp->arg;
|
|
+ value v = *dest;
|
|
+ intnat ofs = sp->arg;
|
|
+ if (Is_block(v)
|
|
+ && (uintnat) ofs % sizeof(value) == 0
|
|
+ && ofs >= 0 && ofs < Bosize_val(v)
|
|
+ && Tag_val(v + ofs) == Infix_tag
|
|
+ && Infix_offset_val(v + ofs) == ofs)
|
|
+ *dest = v + ofs;
|
|
+ else
|
|
+ intern_cleanup_failwith2(s, fun_name, "bad infix offset");
|
|
+ }
|
|
/* Pop item and iterate */
|
|
sp--;
|
|
break;
|
|
@@ -497,11 +574,11 @@ static void intern_rec(struct caml_intern_state* s,
|
|
v = Atom(tag);
|
|
} else {
|
|
v = intern_alloc_obj (s, d, size, tag);
|
|
- if (s->intern_obj_table != NULL)
|
|
- s->intern_obj_table[s->obj_counter++] = v;
|
|
+ intern_record_obj(s, v);
|
|
/* For objects, we need to freshen the oid */
|
|
if (tag == Object_tag) {
|
|
- CAMLassert(size >= 2);
|
|
+ if (CAMLunlikely(size < 2))
|
|
+ intern_cleanup_failwith2(s, fun_name, "bad object block");
|
|
/* Request to read rest of the elements of the block */
|
|
ReadItems(s, &Field(v, 2), size - 2);
|
|
/* Request freshing OID */
|
|
@@ -524,10 +601,11 @@ static void intern_rec(struct caml_intern_state* s,
|
|
/* Small string */
|
|
len = (code & 0x1F);
|
|
read_string:
|
|
+ if (CAMLunlikely(len > Bsize_wsize (Max_wosize) - 1))
|
|
+ intern_cleanup_failwith2(s, fun_name, "string too large");
|
|
size = (len + sizeof(value)) / sizeof(value);
|
|
v = intern_alloc_obj (s, d, size, String_tag);
|
|
- if (s->intern_obj_table != NULL)
|
|
- s->intern_obj_table[s->obj_counter++] = v;
|
|
+ intern_record_obj(s, v);
|
|
Field(v, size - 1) = 0;
|
|
ofs_ind = Bsize_wsize(size) - 1;
|
|
Byte(v, ofs_ind) = ofs_ind - len;
|
|
@@ -548,16 +626,17 @@ static void intern_rec(struct caml_intern_state* s,
|
|
v = Val_long((intnat) (read64u(s)));
|
|
break;
|
|
#else
|
|
- intern_cleanup(s);
|
|
- intern_failwith2(fun_name, "integer too large");
|
|
+ intern_cleanup_failwith2(s, fun_name, "integer too large");
|
|
break;
|
|
#endif
|
|
case CODE_SHARED8:
|
|
ofs = read8u(s);
|
|
read_shared:
|
|
if (!s->compressed) ofs = s->obj_counter - ofs;
|
|
- CAMLassert (ofs < s->obj_counter);
|
|
- CAMLassert (s->intern_obj_table != NULL);
|
|
+ /* If intern_obj_table is NULL, obj_counter is 0 and the check fails */
|
|
+ if (CAMLunlikely(ofs >= s->obj_counter)) {
|
|
+ intern_cleanup_failwith2(s, fun_name, "invalid shared reference");
|
|
+ }
|
|
v = s->intern_obj_table[ofs];
|
|
break;
|
|
case CODE_SHARED16:
|
|
@@ -575,12 +654,18 @@ static void intern_rec(struct caml_intern_state* s,
|
|
header = (header_t) read32u(s);
|
|
tag = Tag_hd(header);
|
|
size = Wosize_hd(header);
|
|
+ if (CAMLunlikely(tag >= No_scan_tag
|
|
+ || tag == Infix_tag || tag == Cont_tag))
|
|
+ intern_cleanup_failwith2(s, fun_name, "invalid block32");
|
|
goto read_block;
|
|
#ifdef ARCH_SIXTYFOUR
|
|
case CODE_BLOCK64:
|
|
header = (header_t) read64u(s);
|
|
tag = Tag_hd(header);
|
|
size = Wosize_hd(header);
|
|
+ if (CAMLunlikely(tag >= No_scan_tag
|
|
+ || tag == Infix_tag || tag == Cont_tag))
|
|
+ intern_cleanup_failwith2(s, fun_name, "invalid block64");
|
|
goto read_block;
|
|
#endif
|
|
case CODE_STRING8:
|
|
@@ -597,18 +682,18 @@ static void intern_rec(struct caml_intern_state* s,
|
|
case CODE_DOUBLE_LITTLE:
|
|
case CODE_DOUBLE_BIG:
|
|
v = intern_alloc_obj (s, d, Double_wosize, Double_tag);
|
|
- if (s->intern_obj_table != NULL)
|
|
- s->intern_obj_table[s->obj_counter++] = v;
|
|
+ intern_record_obj(s, v);
|
|
readfloat(s, (double *) v, code);
|
|
break;
|
|
case CODE_DOUBLE_ARRAY8_LITTLE:
|
|
case CODE_DOUBLE_ARRAY8_BIG:
|
|
len = read8u(s);
|
|
read_double_array:
|
|
+ if (len == 0)
|
|
+ intern_cleanup_failwith2(s, fun_name, "invalid double_array");
|
|
size = len * Double_wosize;
|
|
v = intern_alloc_obj (s, d, size, Double_array_tag);
|
|
- if (s->intern_obj_table != NULL)
|
|
- s->intern_obj_table[s->obj_counter++] = v;
|
|
+ intern_record_obj(s, v);
|
|
readfloats(s, (double *) v, len, code);
|
|
break;
|
|
case CODE_DOUBLE_ARRAY32_LITTLE:
|
|
@@ -649,28 +734,33 @@ static void intern_rec(struct caml_intern_state* s,
|
|
ReadItems(s, dest, 1);
|
|
continue; /* with next iteration of main loop, skipping *dest = v */
|
|
case OLD_CODE_CUSTOM:
|
|
- intern_cleanup(s);
|
|
- intern_failwith2(fun_name, "custom blocks serialized with "
|
|
- "OCaml 4.08.0 (or prior) are no longer supported");
|
|
+ intern_cleanup_failwith2(s, fun_name, "custom blocks serialized with "
|
|
+ "OCaml 4.08.0 (or prior) are no longer supported");
|
|
break;
|
|
case CODE_CUSTOM_LEN:
|
|
case CODE_CUSTOM_FIXED: {
|
|
uintnat expected_size, temp_size;
|
|
const char * name = (const char *) s->intern_src;
|
|
+ const unsigned char * name_end =
|
|
+ memchr(name, 0, s->intern_src_end - s->intern_src);
|
|
+ if (name_end == NULL) {
|
|
+ intern_cleanup_failwith2(s, fun_name, "unterminated custom block identifier");
|
|
+ }
|
|
ops = caml_find_custom_operations(name);
|
|
if (ops == NULL) {
|
|
intern_cleanup_failwith3
|
|
(s, fun_name, "unknown custom block identifier", name);
|
|
}
|
|
- if (code == CODE_CUSTOM_FIXED && ops->fixed_length == NULL) {
|
|
+ if ((code == CODE_CUSTOM_FIXED) != (ops->fixed_length != NULL)) {
|
|
intern_cleanup_failwith3
|
|
- (s, fun_name, "expected a fixed-size custom block", name);
|
|
+ (s, fun_name, "wrong custom block kind", name);
|
|
}
|
|
- while (*s->intern_src++ != 0) /*nothing*/; /*skip identifier*/
|
|
+ s->intern_src = name_end + 1; /*skip identifier*/
|
|
#ifdef ARCH_SIXTYFOUR
|
|
if (code == CODE_CUSTOM_FIXED) {
|
|
expected_size = ops->fixed_length->bsize_64;
|
|
} else {
|
|
+ intern_check_read(s, 4);
|
|
s->intern_src += 4;
|
|
expected_size = read64u(s);
|
|
}
|
|
@@ -679,19 +769,20 @@ static void intern_rec(struct caml_intern_state* s,
|
|
expected_size = ops->fixed_length->bsize_32;
|
|
} else {
|
|
expected_size = read32u(s);
|
|
+ intern_check_read(s, 8);
|
|
s->intern_src += 8;
|
|
}
|
|
#endif
|
|
temp_size = 1 + (expected_size + sizeof(value) - 1) / sizeof(value);
|
|
- v = intern_alloc_obj(s, d, temp_size, Custom_tag);
|
|
+ v = intern_alloc_obj(s, d, temp_size, Abstract_tag);
|
|
Custom_ops_val(v) = ops;
|
|
size = ops->deserialize(Data_custom_val(v));
|
|
if (size != expected_size) {
|
|
intern_cleanup_failwith3
|
|
(s, fun_name, "error while deserializing custom block", name);
|
|
}
|
|
- if (s->intern_obj_table != NULL)
|
|
- s->intern_obj_table[s->obj_counter++] = v;
|
|
+ Unsafe_store_tag_val(v, Custom_tag);
|
|
+ intern_record_obj(s, v);
|
|
if (ops->finalize != NULL && Is_young(v)) {
|
|
/* Remember that the block has a finalizer. */
|
|
add_to_custom_table (&d->minor_tables->custom, v, 0, 1);
|
|
@@ -699,8 +790,7 @@ static void intern_rec(struct caml_intern_state* s,
|
|
break;
|
|
}
|
|
default:
|
|
- intern_cleanup(s);
|
|
- intern_failwith2(fun_name, "ill-formed message");
|
|
+ intern_cleanup_failwith2(s, fun_name, "ill-formed message");
|
|
}
|
|
}
|
|
}
|
|
@@ -836,15 +926,14 @@ static void intern_decompress_input(struct caml_intern_state * s,
|
|
h->data_len);
|
|
if (res != h->uncompressed_data_len) {
|
|
free(blk);
|
|
- intern_cleanup(s);
|
|
- intern_failwith2(fun_name, "decompression error");
|
|
+ intern_cleanup_failwith2(s, fun_name, "decompression error");
|
|
}
|
|
if (s->intern_input != NULL) free(s->intern_input);
|
|
s->intern_input = blk; /* to be freed at end of demarshaling */
|
|
s->intern_src = blk;
|
|
+ s->intern_src_end = s->intern_src + h->uncompressed_data_len;
|
|
} else {
|
|
- intern_cleanup(s);
|
|
- intern_failwith2(fun_name, "compressed object, cannot decompress");
|
|
+ intern_cleanup_failwith2(s, fun_name, "compressed object, cannot decompress");
|
|
}
|
|
}
|
|
|
|
@@ -867,6 +956,7 @@ value caml_input_val(struct channel *chan)
|
|
else if (r < 5)
|
|
caml_failwith("input_value: truncated object");
|
|
s->intern_src = (unsigned char *) header;
|
|
+ s->intern_src_end = s->intern_src + 5;
|
|
int hlen;
|
|
switch (read32u(s)) {
|
|
case Intext_magic_number_big:
|
|
@@ -877,11 +967,12 @@ value caml_input_val(struct channel *chan)
|
|
hlen = 20; break;
|
|
}
|
|
/* Read the remainder of the header */
|
|
- CAMLassert (hlen > 5);
|
|
- if (caml_really_getblock(chan, header + 5, hlen - 5) < hlen - 5)
|
|
+ if (hlen <= 5
|
|
+ || caml_really_getblock(chan, header + 5, hlen - 5) < hlen - 5)
|
|
caml_failwith("input_value: truncated object");
|
|
/* Parse the full header */
|
|
s->intern_src = (unsigned char *) header;
|
|
+ s->intern_src_end = s->intern_src + hlen;
|
|
caml_parse_header(s, "input_value", &h);
|
|
/* Read block from channel */
|
|
/* During channel I/O, concurrent [caml_input_val] operations
|
|
@@ -897,7 +988,7 @@ value caml_input_val(struct channel *chan)
|
|
caml_failwith("input_value: truncated object");
|
|
}
|
|
/* Initialize global state */
|
|
- intern_init(s, block, block);
|
|
+ intern_init(s, block, h.data_len, block);
|
|
intern_decompress_input(s, "input_value", &h);
|
|
intern_alloc_storage(s, h.whsize, h.num_objects);
|
|
/* Fill it in - obj must NOT be registered as a GC root */
|
|
@@ -933,13 +1024,14 @@ CAMLexport value caml_input_val_from_bytes(value str, intnat ofs)
|
|
struct caml_intern_state* s = init_intern_state ();
|
|
|
|
/* Initialize global state */
|
|
- intern_init(s, &Byte_u(str, ofs), NULL);
|
|
+ intern_init(s, &Byte_u(str, ofs), caml_string_length(str) - ofs, NULL);
|
|
caml_parse_header(s, "input_val_from_string", &h);
|
|
if (ofs + h.header_len + h.data_len > caml_string_length(str))
|
|
caml_failwith("input_val_from_string: bad length");
|
|
/* Allocate result */
|
|
intern_alloc_storage(s, h.whsize, h.num_objects);
|
|
s->intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */
|
|
+ s->intern_src_end = s->intern_src + h.data_len;
|
|
/* Decompress if needed */
|
|
intern_decompress_input(s, "input_val_from_string", &h);
|
|
/* Fill it in - obj must NOT be registered as a GC root */
|
|
@@ -966,30 +1058,37 @@ static value input_val_from_block(struct caml_intern_state* s,
|
|
return (intern_end(s, obj));
|
|
}
|
|
|
|
-CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs)
|
|
-{
|
|
- struct marshal_header h;
|
|
- struct caml_intern_state* s = init_intern_state ();
|
|
-
|
|
- intern_init(s, data + ofs, data);
|
|
- caml_parse_header(s, "input_value_from_malloc", &h);
|
|
- return input_val_from_block(s, &h);
|
|
-}
|
|
-
|
|
/* [len] is a number of bytes */
|
|
-CAMLexport value caml_input_value_from_block(const char * data, intnat len)
|
|
+static value caml_input_value_from_buffer(const char * fun_name,
|
|
+ const char * src,
|
|
+ uintnat len, void * input)
|
|
{
|
|
struct marshal_header h;
|
|
struct caml_intern_state* s = init_intern_state ();
|
|
|
|
- /* Initialize global state */
|
|
- intern_init(s, data, NULL);
|
|
- caml_parse_header(s, "input_value_from_block", &h);
|
|
+ intern_init(s, src, len, input);
|
|
+ caml_parse_header(s, fun_name, &h);
|
|
if (h.header_len + h.data_len > len)
|
|
- caml_failwith("input_val_from_block: bad length");
|
|
+ intern_failwith2(fun_name, "bad length");
|
|
+ s->intern_src_end = s->intern_src + h.data_len;
|
|
return input_val_from_block(s, &h);
|
|
}
|
|
|
|
+CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs,
|
|
+ uintnat len)
|
|
+{
|
|
+ return caml_input_value_from_buffer
|
|
+ ("input_value_from_malloc", data + ofs, len, data);
|
|
+}
|
|
+
|
|
+CAMLexport value caml_input_value_from_block(const char * data, intnat len)
|
|
+{
|
|
+ if (len < 0)
|
|
+ caml_failwith("input_value_from_block: negative length");
|
|
+ return caml_input_value_from_buffer
|
|
+ ("input_value_from_block", data, len, NULL);
|
|
+}
|
|
+
|
|
/* [ofs] is a [value] that represents a number of bytes
|
|
result is a [value] that represents a number of bytes
|
|
To handle all marshaling formats,
|
|
@@ -1013,6 +1112,7 @@ CAMLprim value caml_marshal_data_size(value buff, value ofs)
|
|
struct caml_intern_state *s = init_intern_state ();
|
|
|
|
s->intern_src = &Byte_u(buff, Long_val(ofs));
|
|
+ s->intern_src_end = &Byte_u(buff, caml_string_length(buff));
|
|
magic = read32u(s);
|
|
switch(magic) {
|
|
case Intext_magic_number_small:
|
|
@@ -1136,6 +1236,7 @@ CAMLexport double caml_deserialize_float_8(void)
|
|
CAMLexport void caml_deserialize_block_1(void * data, intnat len)
|
|
{
|
|
struct caml_intern_state* s = get_intern_state ();
|
|
+ intern_check_read(s, len);
|
|
memcpy(data, s->intern_src, len);
|
|
s->intern_src += len;
|
|
}
|
|
@@ -1143,6 +1244,7 @@ CAMLexport void caml_deserialize_block_1(void * data, intnat len)
|
|
CAMLexport void caml_deserialize_block_2(void * data, intnat len)
|
|
{
|
|
struct caml_intern_state* s = get_intern_state ();
|
|
+ intern_check_read(s, len * 2);
|
|
#ifndef ARCH_BIG_ENDIAN
|
|
const unsigned char * p, * q;
|
|
for (p = s->intern_src, q = data; len > 0; len--, p += 2, q += 2)
|
|
@@ -1157,6 +1259,7 @@ CAMLexport void caml_deserialize_block_2(void * data, intnat len)
|
|
CAMLexport void caml_deserialize_block_4(void * data, intnat len)
|
|
{
|
|
struct caml_intern_state* s = get_intern_state ();
|
|
+ intern_check_read(s, len * 4);
|
|
#ifndef ARCH_BIG_ENDIAN
|
|
const unsigned char * p, * q;
|
|
for (p = s->intern_src, q = data; len > 0; len--, p += 4, q += 4)
|
|
@@ -1171,6 +1274,7 @@ CAMLexport void caml_deserialize_block_4(void * data, intnat len)
|
|
CAMLexport void caml_deserialize_block_8(void * data, intnat len)
|
|
{
|
|
struct caml_intern_state* s = get_intern_state ();
|
|
+ intern_check_read(s, len * 8);
|
|
#ifndef ARCH_BIG_ENDIAN
|
|
const unsigned char * p, * q;
|
|
for (p = s->intern_src, q = data; len > 0; len--, p += 8, q += 8)
|
|
@@ -1185,6 +1289,7 @@ CAMLexport void caml_deserialize_block_8(void * data, intnat len)
|
|
CAMLexport void caml_deserialize_block_float_8(void * data, intnat len)
|
|
{
|
|
struct caml_intern_state* s = get_intern_state ();
|
|
+ intern_check_read(s, len * 8);
|
|
#if ARCH_FLOAT_ENDIANNESS == 0x01234567
|
|
memcpy(data, s->intern_src, len * 8);
|
|
s->intern_src += len * 8;
|
|
@@ -1204,6 +1309,5 @@ CAMLexport void caml_deserialize_block_float_8(void * data, intnat len)
|
|
CAMLexport void caml_deserialize_error(char * msg)
|
|
{
|
|
struct caml_intern_state* s = get_intern_state ();
|
|
- intern_cleanup(s);
|
|
- caml_failwith(msg);
|
|
+ intern_cleanup_failwith(s, msg);
|
|
}
|
|
diff --git a/testsuite/tests/lib-marshal/fuzzy.ml b/testsuite/tests/lib-marshal/fuzzy.ml
|
|
new file mode 100644
|
|
index 0000000000..88b0d65c93
|
|
--- /dev/null
|
|
+++ b/testsuite/tests/lib-marshal/fuzzy.ml
|
|
@@ -0,0 +1,141 @@
|
|
+(* TEST
|
|
+ arguments = "-n 10000";
|
|
+*)
|
|
+
|
|
+(* Can also be used with an external fuzzer such as AFL:
|
|
+ ./fuzzy -o fuzzy.in/data
|
|
+ afl-fuzz -i fuzzy.in -o fuzzy.out -- ./fuzzy -r
|
|
+*)
|
|
+
|
|
+(* Some data to be marshaled *)
|
|
+
|
|
+type t = A | B of int | C of float | D of string | E of char
|
|
+ | F of t | G of t * t | H of int * t | I of t * float | J
|
|
+
|
|
+let longstring =
|
|
+"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
+
|
|
+let bigint = Int64.to_int 0x123456789ABCDEF0L
|
|
+
|
|
+let closures () =
|
|
+ let t = true and
|
|
+ f = false in
|
|
+ let rec odd n =
|
|
+ if n = 0
|
|
+ then f
|
|
+ else even (n-1)
|
|
+ and even n =
|
|
+ if n = 0
|
|
+ then t
|
|
+ else odd (n-1)
|
|
+ in (even, odd)
|
|
+
|
|
+let data () = [|
|
|
+ Obj.repr 1;
|
|
+ Obj.repr (-1);
|
|
+ Obj.repr 20000;
|
|
+ Obj.repr 0x12345678;
|
|
+ Obj.repr bigint;
|
|
+ Obj.repr "foo";
|
|
+ Obj.repr longstring;
|
|
+ Obj.repr 3.141592654;
|
|
+ Obj.repr A;
|
|
+ Obj.repr (B 1);
|
|
+ Obj.repr (C 2.718);
|
|
+ Obj.repr (D "hello");
|
|
+ Obj.repr (E 'l');
|
|
+ Obj.repr (F(B 1));
|
|
+ Obj.repr (G(A, G(B 2, G(C 3.14, G(D "", E 'e')))));
|
|
+ Obj.repr (H(1, A));
|
|
+ Obj.repr (I(B 2, 1e-6));
|
|
+ (let x = D "sharing" in
|
|
+ let y = G(x, x) in
|
|
+ let z = G(y, G(x, y)) in
|
|
+ Obj.repr z);
|
|
+ Obj.repr [|1;2;3;4;5;6;7;8|];
|
|
+ Obj.repr [|3.14; 2.718|];
|
|
+ Obj.repr (closures());
|
|
+ Obj.repr 0l;
|
|
+ Obj.repr 123456l;
|
|
+ Obj.repr 0L;
|
|
+ (let i = Int64.of_string "123456789123456" in Obj.repr (i,i));
|
|
+ Obj.repr (Failure "fail");
|
|
+ Obj.repr Bigarray.(Array1.init int16_unsigned c_layout 5 (fun x -> 8*x))
|
|
+|]
|
|
+
|
|
+(* Generate file with marshaled data *)
|
|
+
|
|
+let generate filename =
|
|
+ Out_channel.with_open_bin filename
|
|
+ (fun oc -> Marshal.(to_channel oc (data()) [Closures]))
|
|
+
|
|
+(* Try to unmarshal possibly malicious data. Clean failure is success. *)
|
|
+
|
|
+let test ic =
|
|
+ In_channel.set_binary_mode ic true;
|
|
+ begin try
|
|
+ ignore (Marshal.from_channel ic)
|
|
+ with Failure _ | Invalid_argument _ | Out_of_memory -> ()
|
|
+ end;
|
|
+ Gc.full_major()
|
|
+
|
|
+(* Internal fuzzing. Rather naive. *)
|
|
+
|
|
+let random_offset b =
|
|
+ (* Leave the header unchanged *)
|
|
+ 20 + Random.int (Bytes.length b - 20)
|
|
+
|
|
+let flip_one_byte b =
|
|
+ let p = random_offset b in
|
|
+ Bytes.set_uint8 b p (Random.int 0x100)
|
|
+
|
|
+let flip_one_bit b =
|
|
+ let p = random_offset b in
|
|
+ let m = 1 lsl (Random.int 8) in
|
|
+ Bytes.set_uint8 b p (Bytes.get_uint8 b p lxor m)
|
|
+
|
|
+let fuzz niter =
|
|
+ let d = Marshal.(to_string (data()) [Closures]) in
|
|
+ for i = 1 to niter do
|
|
+ let b = Bytes.of_string d in
|
|
+ begin match i land 4 with
|
|
+ | 0 -> flip_one_byte b
|
|
+ | 1 -> flip_one_bit b
|
|
+ | 2 -> flip_one_byte b; flip_one_byte b
|
|
+ | _ (*3*) -> flip_one_bit b; flip_one_bit b
|
|
+ end;
|
|
+ begin try
|
|
+ ignore (Marshal.from_bytes b 0)
|
|
+ with Failure _ | Invalid_argument _ | Out_of_memory -> ()
|
|
+ end;
|
|
+ Gc.full_major()
|
|
+ done
|
|
+
|
|
+let fuzz1 () =
|
|
+ let d = Marshal.(to_string (data()) [Closures]) in
|
|
+ let b = Bytes.of_string d in
|
|
+ for i = 0 to String.length d - 1 do
|
|
+ for x = 0 to 255 do
|
|
+ Bytes.set_uint8 b i x;
|
|
+ begin try
|
|
+ ignore (Marshal.from_bytes b 0)
|
|
+ with Failure _ | Invalid_argument _ | Out_of_memory -> ()
|
|
+ end;
|
|
+ Gc.full_major()
|
|
+ done;
|
|
+ Bytes.set_uint8 b i (Bytes.get_uint8 b i)
|
|
+ done
|
|
+
|
|
+let () =
|
|
+ Arg.parse [
|
|
+ "-o", Arg.String generate,
|
|
+ "<file> Save marshaled data to <file>";
|
|
+ "-n", Arg.Int fuzz,
|
|
+ "<num iter> Perform internal fuzzing test (random)";
|
|
+ "-x", Arg.Unit fuzz1,
|
|
+ "<num iter> Perform internal fuzzing test (exhaustive 1-byte)";
|
|
+ "-r", Arg.Unit (fun () -> test stdin),
|
|
+ " Read marshaled data from standard input"
|
|
+ ]
|
|
+ (fun s -> raise (Arg.Bad ("don't know what to do with " ^ s)))
|
|
+ "Usage: fuzzy [option].\nOptions are:"
|
|
--
|
|
2.52.0
|
|
|