579 lines
17 KiB
Diff
579 lines
17 KiB
Diff
From 446d8077dad5ae0faad7b9cff9c61529e34d21bb Mon Sep 17 00:00:00 2001
|
||
From: Philip Withnall <pwithnall@gnome.org>
|
||
Date: Tue, 25 Nov 2025 19:27:51 +0000
|
||
Subject: [PATCH 1/4] gutf8: Add tests and clarify documentation for
|
||
g_unichar_to_utf8()
|
||
MIME-Version: 1.0
|
||
Content-Type: text/plain; charset=UTF-8
|
||
Content-Transfer-Encoding: 8bit
|
||
|
||
While using `g_unichar_to_utf8()`, I noticed the return type is `int`,
|
||
which suggests that it could return negative values. Wanting to make
|
||
sure I properly handled this in my code, I checked the implementation of
|
||
`g_unichar_to_utf8()` and found that it can actually only return [1, 6].
|
||
|
||
It looks like the `int` return type is a historical C-ism, and it should
|
||
really return an unsigned integer type.
|
||
|
||
Improve the docs and add some unit tests while I’m there.
|
||
|
||
Signed-off-by: Philip Withnall <pwithnall@gnome.org>
|
||
---
|
||
glib/gutf8.c | 8 ++++----
|
||
glib/tests/unicode.c | 36 ++++++++++++++++++++++++++++++++++++
|
||
2 files changed, 40 insertions(+), 4 deletions(-)
|
||
|
||
diff --git a/glib/gutf8.c b/glib/gutf8.c
|
||
index 06cb9e779..5deee968e 100644
|
||
--- a/glib/gutf8.c
|
||
+++ b/glib/gutf8.c
|
||
@@ -454,16 +454,16 @@ g_utf8_strncpy (gchar *dest,
|
||
*
|
||
* Converts a single character to UTF-8.
|
||
*
|
||
- * Returns: number of bytes written
|
||
+ * Returns: number of bytes written, guaranteed to be in the range [1, 6]
|
||
*/
|
||
int
|
||
g_unichar_to_utf8 (gunichar c,
|
||
gchar *outbuf)
|
||
{
|
||
/* If this gets modified, also update the copy in g_string_insert_unichar() */
|
||
- guint len = 0;
|
||
- int first;
|
||
- int i;
|
||
+ size_t len = 0;
|
||
+ char first;
|
||
+ size_t i;
|
||
|
||
if (c < 0x80)
|
||
{
|
||
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
|
||
index c81c9b8e8..8aacb8167 100644
|
||
--- a/glib/tests/unicode.c
|
||
+++ b/glib/tests/unicode.c
|
||
@@ -525,6 +525,41 @@ test_wide (void)
|
||
}
|
||
};
|
||
|
||
+static void
|
||
+test_unichar_to_utf8 (void)
|
||
+{
|
||
+ const struct
|
||
+ {
|
||
+ gunichar unichar;
|
||
+ int expected_length;
|
||
+ const char *expected_output; /* must be of length `expected_length` */
|
||
+ }
|
||
+ vectors[] =
|
||
+ {
|
||
+ { 0x21, 1, "!" },
|
||
+ { 0x00A1, 2, "\xc2\xa1" },
|
||
+ { 0x0800, 3, "\xe0\xa0\x80" },
|
||
+ { 0x10000, 4, "\xf0\x90\x80\x80" },
|
||
+ { 0x200000, 5, "\xf8\x88\x80\x80\x80" },
|
||
+ { 0x4000000, 6, "\xfc\x84\x80\x80\x80\x80" },
|
||
+ };
|
||
+
|
||
+ for (size_t i = 0; i < G_N_ELEMENTS (vectors); i++)
|
||
+ {
|
||
+ int length;
|
||
+ char output[6];
|
||
+
|
||
+ length = g_unichar_to_utf8 (vectors[i].unichar, NULL);
|
||
+ g_assert_cmpint (length, ==, vectors[i].expected_length);
|
||
+
|
||
+ length = g_unichar_to_utf8 (vectors[i].unichar, output);
|
||
+ g_assert_cmpint (length, ==, vectors[i].expected_length);
|
||
+ g_assert_cmpmem (output, length, vectors[i].expected_output, vectors[i].expected_length);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Test that g_unichar_compose() returns the correct value for various
|
||
+ * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
|
||
static void
|
||
test_compose (void)
|
||
{
|
||
@@ -946,6 +981,7 @@ main (int argc,
|
||
g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
|
||
g_test_add_func ("/unicode/iso15924", test_iso15924);
|
||
g_test_add_func ("/unicode/cases", test_cases);
|
||
+ g_test_add_func ("/unicode/unichar-to-utf8", test_unichar_to_utf8);
|
||
|
||
return g_test_run();
|
||
}
|
||
--
|
||
2.53.0
|
||
|
||
|
||
From 207d406271e3b5f89337ebdcefc0362c5b724d12 Mon Sep 17 00:00:00 2001
|
||
From: Philip Withnall <pwithnall@gnome.org>
|
||
Date: Tue, 25 Nov 2025 19:02:56 +0000
|
||
Subject: [PATCH 2/4] gvariant-parser: Fix potential integer overflow parsing
|
||
(byte)strings
|
||
|
||
The termination condition for parsing string and bytestring literals in
|
||
GVariant text format input was subject to an integer overflow for input
|
||
string (or bytestring) literals longer than `INT_MAX`.
|
||
|
||
Fix that by counting as a `size_t` rather than as an `int`. The counter
|
||
can never correctly be negative.
|
||
|
||
Spotted by treeplus. Thanks to the Sovereign Tech Resilience programme
|
||
from the Sovereign Tech Agency. ID: #YWH-PGM9867-145
|
||
|
||
Signed-off-by: Philip Withnall <pwithnall@gnome.org>
|
||
Fixes: #3834
|
||
---
|
||
glib/gvariant-parser.c | 14 +++++++-------
|
||
1 file changed, 7 insertions(+), 7 deletions(-)
|
||
|
||
diff --git a/glib/gvariant-parser.c b/glib/gvariant-parser.c
|
||
index 3261bc1af..4c1d2626b 100644
|
||
--- a/glib/gvariant-parser.c
|
||
+++ b/glib/gvariant-parser.c
|
||
@@ -585,7 +585,7 @@ ast_resolve (AST *ast,
|
||
{
|
||
GVariant *value;
|
||
gchar *pattern;
|
||
- gint i, j = 0;
|
||
+ size_t i, j = 0;
|
||
|
||
pattern = ast_get_pattern (ast, error);
|
||
|
||
@@ -1525,10 +1525,10 @@ string_free (AST *ast)
|
||
|
||
static gboolean
|
||
unicode_unescape (const gchar *src,
|
||
- gint *src_ofs,
|
||
+ size_t *src_ofs,
|
||
gchar *dest,
|
||
- gint *dest_ofs,
|
||
- gint length,
|
||
+ size_t *dest_ofs,
|
||
+ gsize length,
|
||
SourceRef *ref,
|
||
GError **error)
|
||
{
|
||
@@ -1548,7 +1548,7 @@ unicode_unescape (const gchar *src,
|
||
{
|
||
parser_set_error (error, ref, NULL,
|
||
G_VARIANT_PARSE_ERROR_INVALID_CHARACTER,
|
||
- "invalid %d-character unicode escape", length);
|
||
+ "invalid %zu-character unicode escape", length);
|
||
return FALSE;
|
||
}
|
||
|
||
@@ -1576,7 +1576,7 @@ string_parse (TokenStream *stream,
|
||
gsize length;
|
||
gchar quote;
|
||
gchar *str;
|
||
- gint i, j;
|
||
+ size_t i, j;
|
||
|
||
token_stream_start_ref (stream, &ref);
|
||
token = token_stream_get (stream);
|
||
@@ -1704,7 +1704,7 @@ bytestring_parse (TokenStream *stream,
|
||
gsize length;
|
||
gchar quote;
|
||
gchar *str;
|
||
- gint i, j;
|
||
+ size_t i, j;
|
||
|
||
token_stream_start_ref (stream, &ref);
|
||
token = token_stream_get (stream);
|
||
--
|
||
2.53.0
|
||
|
||
|
||
From abadf4e327d2936012089a7368b7e0ee7dcfabd3 Mon Sep 17 00:00:00 2001
|
||
From: Philip Withnall <pwithnall@gnome.org>
|
||
Date: Tue, 25 Nov 2025 19:19:16 +0000
|
||
Subject: [PATCH 3/4] gvariant-parser: Use size_t to count numbers of child
|
||
elements
|
||
MIME-Version: 1.0
|
||
Content-Type: text/plain; charset=UTF-8
|
||
Content-Transfer-Encoding: 8bit
|
||
|
||
Rather than using `gint`, which could overflow for arrays (or dicts, or
|
||
tuples) longer than `INT_MAX`. There may be other limits which prevent
|
||
parsed containers becoming that long, but we might as well make the type
|
||
system reflect the programmer’s intention as best it can anyway.
|
||
|
||
For arrays and tuples this is straightforward. For dictionaries, it’s
|
||
slightly complicated by the fact that the code used
|
||
`dict->n_children == -1` to indicate that the `Dictionary` struct in
|
||
question actually represented a single freestanding dict entry. In
|
||
GVariant text format, that would be `{1, "one"}`.
|
||
|
||
The implementation previously didn’t define the semantics of
|
||
`dict->n_children < -1`.
|
||
|
||
Now, instead, change `Dictionary.n_children` to `size_t`, and define a
|
||
magic value `DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY` to indicate that
|
||
the `Dictionary` represents a single freestanding dict entry.
|
||
|
||
This magic value is `SIZE_MAX`, and given that a dictionary entry takes
|
||
more than one byte to represent in GVariant text format, that means it’s
|
||
not possible to have that many entries in a parsed dictionary, so this
|
||
magic value won’t be hit by a normal dictionary. An assertion checks
|
||
this anyway.
|
||
|
||
Spotted while working on #3834.
|
||
|
||
Signed-off-by: Philip Withnall <pwithnall@gnome.org>
|
||
---
|
||
glib/gvariant-parser.c | 58 ++++++++++++++++++++++++------------------
|
||
1 file changed, 33 insertions(+), 25 deletions(-)
|
||
|
||
diff --git a/glib/gvariant-parser.c b/glib/gvariant-parser.c
|
||
index 4c1d2626b..adc559408 100644
|
||
--- a/glib/gvariant-parser.c
|
||
+++ b/glib/gvariant-parser.c
|
||
@@ -637,9 +637,9 @@ static AST *parse (TokenStream *stream,
|
||
GError **error);
|
||
|
||
static void
|
||
-ast_array_append (AST ***array,
|
||
- gint *n_items,
|
||
- AST *ast)
|
||
+ast_array_append (AST ***array,
|
||
+ size_t *n_items,
|
||
+ AST *ast)
|
||
{
|
||
if ((*n_items & (*n_items - 1)) == 0)
|
||
*array = g_renew (AST *, *array, *n_items ? 2 ** n_items : 1);
|
||
@@ -648,10 +648,10 @@ ast_array_append (AST ***array,
|
||
}
|
||
|
||
static void
|
||
-ast_array_free (AST **array,
|
||
- gint n_items)
|
||
+ast_array_free (AST **array,
|
||
+ size_t n_items)
|
||
{
|
||
- gint i;
|
||
+ size_t i;
|
||
|
||
for (i = 0; i < n_items; i++)
|
||
ast_free (array[i]);
|
||
@@ -660,11 +660,11 @@ ast_array_free (AST **array,
|
||
|
||
static gchar *
|
||
ast_array_get_pattern (AST **array,
|
||
- gint n_items,
|
||
+ size_t n_items,
|
||
GError **error)
|
||
{
|
||
gchar *pattern;
|
||
- gint i;
|
||
+ size_t i;
|
||
|
||
pattern = ast_get_pattern (array[0], error);
|
||
|
||
@@ -693,7 +693,7 @@ ast_array_get_pattern (AST **array,
|
||
* pair of values.
|
||
*/
|
||
{
|
||
- int j = 0;
|
||
+ size_t j = 0;
|
||
|
||
while (TRUE)
|
||
{
|
||
@@ -867,7 +867,7 @@ typedef struct
|
||
AST ast;
|
||
|
||
AST **children;
|
||
- gint n_children;
|
||
+ size_t n_children;
|
||
} Array;
|
||
|
||
static gchar *
|
||
@@ -900,7 +900,7 @@ array_get_value (AST *ast,
|
||
Array *array = (Array *) ast;
|
||
const GVariantType *childtype;
|
||
GVariantBuilder builder;
|
||
- gint i;
|
||
+ size_t i;
|
||
|
||
if (!g_variant_type_is_array (type))
|
||
return ast_type_error (ast, type, error);
|
||
@@ -985,7 +985,7 @@ typedef struct
|
||
AST ast;
|
||
|
||
AST **children;
|
||
- gint n_children;
|
||
+ size_t n_children;
|
||
} Tuple;
|
||
|
||
static gchar *
|
||
@@ -995,7 +995,7 @@ tuple_get_pattern (AST *ast,
|
||
Tuple *tuple = (Tuple *) ast;
|
||
gchar *result = NULL;
|
||
gchar **parts;
|
||
- gint i;
|
||
+ size_t i;
|
||
|
||
parts = g_new (gchar *, tuple->n_children + 4);
|
||
parts[tuple->n_children + 1] = (gchar *) ")";
|
||
@@ -1025,7 +1025,7 @@ tuple_get_value (AST *ast,
|
||
Tuple *tuple = (Tuple *) ast;
|
||
const GVariantType *childtype;
|
||
GVariantBuilder builder;
|
||
- gint i;
|
||
+ size_t i;
|
||
|
||
if (!g_variant_type_is_tuple (type))
|
||
return ast_type_error (ast, type, error);
|
||
@@ -1215,9 +1215,16 @@ typedef struct
|
||
|
||
AST **keys;
|
||
AST **values;
|
||
- gint n_children;
|
||
+
|
||
+ /* Iff this is DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY then this struct
|
||
+ * represents a single freestanding dict entry (`{1, "one"}`) rather than a
|
||
+ * full dict. In the freestanding case, @keys and @values have exactly one
|
||
+ * member each. */
|
||
+ size_t n_children;
|
||
} Dictionary;
|
||
|
||
+#define DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY ((size_t) -1)
|
||
+
|
||
static gchar *
|
||
dictionary_get_pattern (AST *ast,
|
||
GError **error)
|
||
@@ -1232,7 +1239,7 @@ dictionary_get_pattern (AST *ast,
|
||
return g_strdup ("Ma{**}");
|
||
|
||
key_pattern = ast_array_get_pattern (dict->keys,
|
||
- abs (dict->n_children),
|
||
+ (dict->n_children == DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY) ? 1 : dict->n_children,
|
||
error);
|
||
|
||
if (key_pattern == NULL)
|
||
@@ -1263,7 +1270,7 @@ dictionary_get_pattern (AST *ast,
|
||
return NULL;
|
||
|
||
result = g_strdup_printf ("M%s{%c%s}",
|
||
- dict->n_children > 0 ? "a" : "",
|
||
+ (dict->n_children > 0 && dict->n_children != DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY) ? "a" : "",
|
||
key_char, value_pattern);
|
||
g_free (value_pattern);
|
||
|
||
@@ -1277,7 +1284,7 @@ dictionary_get_value (AST *ast,
|
||
{
|
||
Dictionary *dict = (Dictionary *) ast;
|
||
|
||
- if (dict->n_children == -1)
|
||
+ if (dict->n_children == DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY)
|
||
{
|
||
const GVariantType *subtype;
|
||
GVariantBuilder builder;
|
||
@@ -1310,7 +1317,7 @@ dictionary_get_value (AST *ast,
|
||
{
|
||
const GVariantType *entry, *key, *val;
|
||
GVariantBuilder builder;
|
||
- gint i;
|
||
+ size_t i;
|
||
|
||
if (!g_variant_type_is_subtype_of (type, G_VARIANT_TYPE_DICTIONARY))
|
||
return ast_type_error (ast, type, error);
|
||
@@ -1351,12 +1358,12 @@ static void
|
||
dictionary_free (AST *ast)
|
||
{
|
||
Dictionary *dict = (Dictionary *) ast;
|
||
- gint n_children;
|
||
+ size_t n_children;
|
||
|
||
- if (dict->n_children > -1)
|
||
- n_children = dict->n_children;
|
||
- else
|
||
+ if (dict->n_children == DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY)
|
||
n_children = 1;
|
||
+ else
|
||
+ n_children = dict->n_children;
|
||
|
||
ast_array_free (dict->keys, n_children);
|
||
ast_array_free (dict->values, n_children);
|
||
@@ -1373,7 +1380,7 @@ dictionary_parse (TokenStream *stream,
|
||
maybe_wrapper, dictionary_get_value,
|
||
dictionary_free
|
||
};
|
||
- gint n_keys, n_values;
|
||
+ size_t n_keys, n_values;
|
||
gboolean only_one;
|
||
Dictionary *dict;
|
||
AST *first;
|
||
@@ -1416,7 +1423,7 @@ dictionary_parse (TokenStream *stream,
|
||
goto error;
|
||
|
||
g_assert (n_keys == 1 && n_values == 1);
|
||
- dict->n_children = -1;
|
||
+ dict->n_children = DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY;
|
||
|
||
return (AST *) dict;
|
||
}
|
||
@@ -1449,6 +1456,7 @@ dictionary_parse (TokenStream *stream,
|
||
}
|
||
|
||
g_assert (n_keys == n_values);
|
||
+ g_assert (n_keys != DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY);
|
||
dict->n_children = n_keys;
|
||
|
||
return (AST *) dict;
|
||
--
|
||
2.53.0
|
||
|
||
|
||
From 4de574f856d5ed40fc6981f313a35454d7678109 Mon Sep 17 00:00:00 2001
|
||
From: Philip Withnall <pwithnall@gnome.org>
|
||
Date: Tue, 25 Nov 2025 19:25:58 +0000
|
||
Subject: [PATCH 4/4] gvariant-parser: Convert error handling code to use
|
||
size_t
|
||
|
||
The error handling code allows for printing out the range of input bytes
|
||
related to a parsing error. This was previously done using `gint`, but
|
||
the input could be longer than `INT_MAX`, so it should really be done
|
||
using `size_t`.
|
||
|
||
Spotted while working on #3834.
|
||
|
||
Signed-off-by: Philip Withnall <pwithnall@gnome.org>
|
||
---
|
||
glib/gvariant-parser.c | 36 +++++++++++++++++++++++-------------
|
||
1 file changed, 23 insertions(+), 13 deletions(-)
|
||
|
||
diff --git a/glib/gvariant-parser.c b/glib/gvariant-parser.c
|
||
index adc559408..199bd1f2c 100644
|
||
--- a/glib/gvariant-parser.c
|
||
+++ b/glib/gvariant-parser.c
|
||
@@ -86,7 +86,9 @@ g_variant_parser_get_error_quark (void)
|
||
|
||
typedef struct
|
||
{
|
||
- gint start, end;
|
||
+ /* Offsets from the start of the input, in bytes. Can be equal when referring
|
||
+ * to a point rather than a range. The invariant `end >= start` always holds. */
|
||
+ size_t start, end;
|
||
} SourceRef;
|
||
|
||
G_GNUC_PRINTF(5, 0)
|
||
@@ -101,14 +103,16 @@ parser_set_error_va (GError **error,
|
||
GString *msg = g_string_new (NULL);
|
||
|
||
if (location->start == location->end)
|
||
- g_string_append_printf (msg, "%d", location->start);
|
||
+ g_string_append_printf (msg, "%" G_GSIZE_FORMAT, location->start);
|
||
else
|
||
- g_string_append_printf (msg, "%d-%d", location->start, location->end);
|
||
+ g_string_append_printf (msg, "%" G_GSIZE_FORMAT "-%" G_GSIZE_FORMAT,
|
||
+ location->start, location->end);
|
||
|
||
if (other != NULL)
|
||
{
|
||
g_assert (other->start != other->end);
|
||
- g_string_append_printf (msg, ",%d-%d", other->start, other->end);
|
||
+ g_string_append_printf (msg, ",%" G_GSIZE_FORMAT "-%" G_GSIZE_FORMAT,
|
||
+ other->start, other->end);
|
||
}
|
||
g_string_append_c (msg, ':');
|
||
|
||
@@ -135,11 +139,15 @@ parser_set_error (GError **error,
|
||
|
||
typedef struct
|
||
{
|
||
+ /* We should always have the following ordering constraint:
|
||
+ * start <= this <= stream <= end
|
||
+ * Additionally, unless in an error or EOF state, `this < stream`.
|
||
+ */
|
||
const gchar *start;
|
||
const gchar *stream;
|
||
const gchar *end;
|
||
|
||
- const gchar *this;
|
||
+ const gchar *this; /* (nullable) */
|
||
} TokenStream;
|
||
|
||
|
||
@@ -170,7 +178,7 @@ token_stream_set_error (TokenStream *stream,
|
||
static gboolean
|
||
token_stream_prepare (TokenStream *stream)
|
||
{
|
||
- gint brackets = 0;
|
||
+ gssize brackets = 0;
|
||
const gchar *end;
|
||
|
||
if (stream->this != NULL)
|
||
@@ -395,7 +403,7 @@ static void
|
||
pattern_copy (gchar **out,
|
||
const gchar **in)
|
||
{
|
||
- gint brackets = 0;
|
||
+ gssize brackets = 0;
|
||
|
||
while (**in == 'a' || **in == 'm' || **in == 'M')
|
||
*(*out)++ = *(*in)++;
|
||
@@ -2592,7 +2600,7 @@ g_variant_builder_add_parsed (GVariantBuilder *builder,
|
||
static gboolean
|
||
parse_num (const gchar *num,
|
||
const gchar *limit,
|
||
- gint *result)
|
||
+ size_t *result)
|
||
{
|
||
gchar *endptr;
|
||
gint64 bignum;
|
||
@@ -2602,10 +2610,12 @@ parse_num (const gchar *num,
|
||
if (endptr != limit)
|
||
return FALSE;
|
||
|
||
+ /* The upper bound here is more restrictive than it technically needs to be,
|
||
+ * but should be enough for any practical situation: */
|
||
if (bignum < 0 || bignum > G_MAXINT)
|
||
return FALSE;
|
||
|
||
- *result = bignum;
|
||
+ *result = (size_t) bignum;
|
||
|
||
return TRUE;
|
||
}
|
||
@@ -2616,7 +2626,7 @@ add_last_line (GString *err,
|
||
{
|
||
const gchar *last_nl;
|
||
gchar *chomped;
|
||
- gint i;
|
||
+ size_t i;
|
||
|
||
/* This is an error at the end of input. If we have a file
|
||
* with newlines, that's probably the empty string after the
|
||
@@ -2761,7 +2771,7 @@ g_variant_parse_error_print_context (GError *error,
|
||
|
||
if (dash == NULL || colon < dash)
|
||
{
|
||
- gint point;
|
||
+ size_t point;
|
||
|
||
/* we have a single point */
|
||
if (!parse_num (error->message, colon, &point))
|
||
@@ -2779,7 +2789,7 @@ g_variant_parse_error_print_context (GError *error,
|
||
/* We have one or two ranges... */
|
||
if (comma && comma < colon)
|
||
{
|
||
- gint start1, end1, start2, end2;
|
||
+ size_t start1, end1, start2, end2;
|
||
const gchar *dash2;
|
||
|
||
/* Two ranges */
|
||
@@ -2795,7 +2805,7 @@ g_variant_parse_error_print_context (GError *error,
|
||
}
|
||
else
|
||
{
|
||
- gint start, end;
|
||
+ size_t start, end;
|
||
|
||
/* One range */
|
||
if (!parse_num (error->message, dash, &start) || !parse_num (dash + 1, colon, &end))
|
||
--
|
||
2.53.0
|
||
|