From 446d8077dad5ae0faad7b9cff9c61529e34d21bb Mon Sep 17 00:00:00 2001
From: Philip Withnall <pwithnall@gnome.org>
Date: Tue, 25 Nov 2025 19:27:51 +0000
Subject: [PATCH 1/4] gutf8: Add tests and clarify documentation for
 g_unichar_to_utf8()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While using `g_unichar_to_utf8()`, I noticed the return type is `int`,
which suggests that it could return negative values. Wanting to make
sure I properly handled this in my code, I checked the implementation of
`g_unichar_to_utf8()` and found that it can actually only return [1, 6].

It looks like the `int` return type is a historical C-ism, and it should
really return an unsigned integer type.

Improve the docs and add some unit tests while I’m there.

Signed-off-by: Philip Withnall <pwithnall@gnome.org>
---
 glib/gutf8.c         |  8 ++++----
 glib/tests/unicode.c | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/glib/gutf8.c b/glib/gutf8.c
index 06cb9e779..5deee968e 100644
--- a/glib/gutf8.c
+++ b/glib/gutf8.c
@@ -454,16 +454,16 @@ g_utf8_strncpy (gchar       *dest,
  * 
  * Converts a single character to UTF-8.
  * 
- * Returns: number of bytes written
+ * Returns: number of bytes written, guaranteed to be in the range [1, 6]
  */
 int
 g_unichar_to_utf8 (gunichar c,
 		   gchar   *outbuf)
 {
   /* If this gets modified, also update the copy in g_string_insert_unichar() */
-  guint len = 0;    
-  int first;
-  int i;
+  size_t len = 0;
+  char first;
+  size_t i;
 
   if (c < 0x80)
     {
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index c81c9b8e8..8aacb8167 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -525,6 +525,41 @@ test_wide (void)
     }
 };
 
+static void
+test_unichar_to_utf8 (void)
+{
+  const struct
+    {
+      gunichar unichar;
+      int expected_length;
+      const char *expected_output;  /* must be of length `expected_length` */
+    }
+  vectors[] =
+    {
+      { 0x21, 1, "!" },
+      { 0x00A1, 2, "\xc2\xa1" },
+      { 0x0800, 3, "\xe0\xa0\x80" },
+      { 0x10000, 4, "\xf0\x90\x80\x80" },
+      { 0x200000, 5, "\xf8\x88\x80\x80\x80" },
+      { 0x4000000, 6, "\xfc\x84\x80\x80\x80\x80" },
+    };
+
+  for (size_t i = 0; i < G_N_ELEMENTS (vectors); i++)
+    {
+      int length;
+      char output[6];
+
+      length = g_unichar_to_utf8 (vectors[i].unichar, NULL);
+      g_assert_cmpint (length, ==, vectors[i].expected_length);
+
+      length = g_unichar_to_utf8 (vectors[i].unichar, output);
+      g_assert_cmpint (length, ==, vectors[i].expected_length);
+      g_assert_cmpmem (output, length, vectors[i].expected_output, vectors[i].expected_length);
+    }
+}
+
+/* Test that g_unichar_compose() returns the correct value for various
+ * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
 static void
 test_compose (void)
 {
@@ -946,6 +981,7 @@ main (int   argc,
   g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
   g_test_add_func ("/unicode/iso15924", test_iso15924);
   g_test_add_func ("/unicode/cases", test_cases);
+  g_test_add_func ("/unicode/unichar-to-utf8", test_unichar_to_utf8);
 
   return g_test_run();
 }
-- 
2.53.0


From 207d406271e3b5f89337ebdcefc0362c5b724d12 Mon Sep 17 00:00:00 2001
From: Philip Withnall <pwithnall@gnome.org>
Date: Tue, 25 Nov 2025 19:02:56 +0000
Subject: [PATCH 2/4] gvariant-parser: Fix potential integer overflow parsing
 (byte)strings

The termination condition for parsing string and bytestring literals in
GVariant text format input was subject to an integer overflow for input
string (or bytestring) literals longer than `INT_MAX`.

Fix that by counting as a `size_t` rather than as an `int`. The counter
can never correctly be negative.

Spotted by treeplus. Thanks to the Sovereign Tech Resilience programme
from the Sovereign Tech Agency. ID: #YWH-PGM9867-145

Signed-off-by: Philip Withnall <pwithnall@gnome.org>
Fixes: #3834
---
 glib/gvariant-parser.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/glib/gvariant-parser.c b/glib/gvariant-parser.c
index 3261bc1af..4c1d2626b 100644
--- a/glib/gvariant-parser.c
+++ b/glib/gvariant-parser.c
@@ -585,7 +585,7 @@ ast_resolve (AST     *ast,
 {
   GVariant *value;
   gchar *pattern;
-  gint i, j = 0;
+  size_t i, j = 0;
 
   pattern = ast_get_pattern (ast, error);
 
@@ -1525,10 +1525,10 @@ string_free (AST *ast)
 
 static gboolean
 unicode_unescape (const gchar  *src,
-                  gint         *src_ofs,
+                  size_t       *src_ofs,
                   gchar        *dest,
-                  gint         *dest_ofs,
-                  gint          length,
+                  size_t       *dest_ofs,
+                  gsize         length,
                   SourceRef    *ref,
                   GError      **error)
 {
@@ -1548,7 +1548,7 @@ unicode_unescape (const gchar  *src,
     {
       parser_set_error (error, ref, NULL,
                         G_VARIANT_PARSE_ERROR_INVALID_CHARACTER,
-                        "invalid %d-character unicode escape", length);
+                        "invalid %zu-character unicode escape", length);
       return FALSE;
     }
 
@@ -1576,7 +1576,7 @@ string_parse (TokenStream  *stream,
   gsize length;
   gchar quote;
   gchar *str;
-  gint i, j;
+  size_t i, j;
 
   token_stream_start_ref (stream, &ref);
   token = token_stream_get (stream);
@@ -1704,7 +1704,7 @@ bytestring_parse (TokenStream  *stream,
   gsize length;
   gchar quote;
   gchar *str;
-  gint i, j;
+  size_t i, j;
 
   token_stream_start_ref (stream, &ref);
   token = token_stream_get (stream);
-- 
2.53.0


From abadf4e327d2936012089a7368b7e0ee7dcfabd3 Mon Sep 17 00:00:00 2001
From: Philip Withnall <pwithnall@gnome.org>
Date: Tue, 25 Nov 2025 19:19:16 +0000
Subject: [PATCH 3/4] gvariant-parser: Use size_t to count numbers of child
 elements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than using `gint`, which could overflow for arrays (or dicts, or
tuples) longer than `INT_MAX`. There may be other limits which prevent
parsed containers becoming that long, but we might as well make the type
system reflect the programmer’s intention as best it can anyway.

For arrays and tuples this is straightforward. For dictionaries, it’s
slightly complicated by the fact that the code used
`dict->n_children == -1` to indicate that the `Dictionary` struct in
question actually represented a single freestanding dict entry. In
GVariant text format, that would be `{1, "one"}`.

The implementation previously didn’t define the semantics of
`dict->n_children < -1`.

Now, instead, change `Dictionary.n_children` to `size_t`, and define a
magic value `DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY` to indicate that
the `Dictionary` represents a single freestanding dict entry.

This magic value is `SIZE_MAX`, and given that a dictionary entry takes
more than one byte to represent in GVariant text format, that means it’s
not possible to have that many entries in a parsed dictionary, so this
magic value won’t be hit by a normal dictionary. An assertion checks
this anyway.

Spotted while working on #3834.

Signed-off-by: Philip Withnall <pwithnall@gnome.org>
---
 glib/gvariant-parser.c | 58 ++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/glib/gvariant-parser.c b/glib/gvariant-parser.c
index 4c1d2626b..adc559408 100644
--- a/glib/gvariant-parser.c
+++ b/glib/gvariant-parser.c
@@ -637,9 +637,9 @@ static AST *parse (TokenStream  *stream,
                    GError      **error);
 
 static void
-ast_array_append (AST  ***array,
-                  gint   *n_items,
-                  AST    *ast)
+ast_array_append (AST    ***array,
+                  size_t   *n_items,
+                  AST      *ast)
 {
   if ((*n_items & (*n_items - 1)) == 0)
     *array = g_renew (AST *, *array, *n_items ? 2 ** n_items : 1);
@@ -648,10 +648,10 @@ ast_array_append (AST  ***array,
 }
 
 static void
-ast_array_free (AST  **array,
-                gint   n_items)
+ast_array_free (AST    **array,
+                size_t   n_items)
 {
-  gint i;
+  size_t i;
 
   for (i = 0; i < n_items; i++)
     ast_free (array[i]);
@@ -660,11 +660,11 @@ ast_array_free (AST  **array,
 
 static gchar *
 ast_array_get_pattern (AST    **array,
-                       gint     n_items,
+                       size_t   n_items,
                        GError **error)
 {
   gchar *pattern;
-  gint i;
+  size_t i;
 
   pattern = ast_get_pattern (array[0], error);
 
@@ -693,7 +693,7 @@ ast_array_get_pattern (AST    **array,
          * pair of values.
          */
         {
-          int j = 0;
+          size_t j = 0;
 
           while (TRUE)
             {
@@ -867,7 +867,7 @@ typedef struct
   AST ast;
 
   AST **children;
-  gint n_children;
+  size_t n_children;
 } Array;
 
 static gchar *
@@ -900,7 +900,7 @@ array_get_value (AST                 *ast,
   Array *array = (Array *) ast;
   const GVariantType *childtype;
   GVariantBuilder builder;
-  gint i;
+  size_t i;
 
   if (!g_variant_type_is_array (type))
     return ast_type_error (ast, type, error);
@@ -985,7 +985,7 @@ typedef struct
   AST ast;
 
   AST **children;
-  gint n_children;
+  size_t n_children;
 } Tuple;
 
 static gchar *
@@ -995,7 +995,7 @@ tuple_get_pattern (AST     *ast,
   Tuple *tuple = (Tuple *) ast;
   gchar *result = NULL;
   gchar **parts;
-  gint i;
+  size_t i;
 
   parts = g_new (gchar *, tuple->n_children + 4);
   parts[tuple->n_children + 1] = (gchar *) ")";
@@ -1025,7 +1025,7 @@ tuple_get_value (AST                 *ast,
   Tuple *tuple = (Tuple *) ast;
   const GVariantType *childtype;
   GVariantBuilder builder;
-  gint i;
+  size_t i;
 
   if (!g_variant_type_is_tuple (type))
     return ast_type_error (ast, type, error);
@@ -1215,9 +1215,16 @@ typedef struct
 
   AST **keys;
   AST **values;
-  gint n_children;
+
+  /* Iff this is DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY then this struct
+   * represents a single freestanding dict entry (`{1, "one"}`) rather than a
+   * full dict. In the freestanding case, @keys and @values have exactly one
+   * member each. */
+  size_t n_children;
 } Dictionary;
 
+#define DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY ((size_t) -1)
+
 static gchar *
 dictionary_get_pattern (AST     *ast,
                         GError **error)
@@ -1232,7 +1239,7 @@ dictionary_get_pattern (AST     *ast,
     return g_strdup ("Ma{**}");
 
   key_pattern = ast_array_get_pattern (dict->keys,
-                                       abs (dict->n_children),
+                                       (dict->n_children == DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY) ? 1 : dict->n_children,
                                        error);
 
   if (key_pattern == NULL)
@@ -1263,7 +1270,7 @@ dictionary_get_pattern (AST     *ast,
     return NULL;
 
   result = g_strdup_printf ("M%s{%c%s}",
-                            dict->n_children > 0 ? "a" : "",
+                            (dict->n_children > 0 && dict->n_children != DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY) ? "a" : "",
                             key_char, value_pattern);
   g_free (value_pattern);
 
@@ -1277,7 +1284,7 @@ dictionary_get_value (AST                 *ast,
 {
   Dictionary *dict = (Dictionary *) ast;
 
-  if (dict->n_children == -1)
+  if (dict->n_children == DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY)
     {
       const GVariantType *subtype;
       GVariantBuilder builder;
@@ -1310,7 +1317,7 @@ dictionary_get_value (AST                 *ast,
     {
       const GVariantType *entry, *key, *val;
       GVariantBuilder builder;
-      gint i;
+      size_t i;
 
       if (!g_variant_type_is_subtype_of (type, G_VARIANT_TYPE_DICTIONARY))
         return ast_type_error (ast, type, error);
@@ -1351,12 +1358,12 @@ static void
 dictionary_free (AST *ast)
 {
   Dictionary *dict = (Dictionary *) ast;
-  gint n_children;
+  size_t n_children;
 
-  if (dict->n_children > -1)
-    n_children = dict->n_children;
-  else
+  if (dict->n_children == DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY)
     n_children = 1;
+  else
+    n_children = dict->n_children;
 
   ast_array_free (dict->keys, n_children);
   ast_array_free (dict->values, n_children);
@@ -1373,7 +1380,7 @@ dictionary_parse (TokenStream  *stream,
     maybe_wrapper, dictionary_get_value,
     dictionary_free
   };
-  gint n_keys, n_values;
+  size_t n_keys, n_values;
   gboolean only_one;
   Dictionary *dict;
   AST *first;
@@ -1416,7 +1423,7 @@ dictionary_parse (TokenStream  *stream,
         goto error;
 
       g_assert (n_keys == 1 && n_values == 1);
-      dict->n_children = -1;
+      dict->n_children = DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY;
 
       return (AST *) dict;
     }
@@ -1449,6 +1456,7 @@ dictionary_parse (TokenStream  *stream,
     }
 
   g_assert (n_keys == n_values);
+  g_assert (n_keys != DICTIONARY_N_CHILDREN_FREESTANDING_ENTRY);
   dict->n_children = n_keys;
 
   return (AST *) dict;
-- 
2.53.0


From 4de574f856d5ed40fc6981f313a35454d7678109 Mon Sep 17 00:00:00 2001
From: Philip Withnall <pwithnall@gnome.org>
Date: Tue, 25 Nov 2025 19:25:58 +0000
Subject: [PATCH 4/4] gvariant-parser: Convert error handling code to use
 size_t

The error handling code allows for printing out the range of input bytes
related to a parsing error. This was previously done using `gint`, but
the input could be longer than `INT_MAX`, so it should really be done
using `size_t`.

Spotted while working on #3834.

Signed-off-by: Philip Withnall <pwithnall@gnome.org>
---
 glib/gvariant-parser.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/glib/gvariant-parser.c b/glib/gvariant-parser.c
index adc559408..199bd1f2c 100644
--- a/glib/gvariant-parser.c
+++ b/glib/gvariant-parser.c
@@ -86,7 +86,9 @@ g_variant_parser_get_error_quark (void)
 
 typedef struct
 {
-  gint start, end;
+  /* Offsets from the start of the input, in bytes. Can be equal when referring
+   * to a point rather than a range. The invariant `end >= start` always holds. */
+  size_t start, end;
 } SourceRef;
 
 G_GNUC_PRINTF(5, 0)
@@ -101,14 +103,16 @@ parser_set_error_va (GError      **error,
   GString *msg = g_string_new (NULL);
 
   if (location->start == location->end)
-    g_string_append_printf (msg, "%d", location->start);
+    g_string_append_printf (msg, "%" G_GSIZE_FORMAT, location->start);
   else
-    g_string_append_printf (msg, "%d-%d", location->start, location->end);
+    g_string_append_printf (msg, "%" G_GSIZE_FORMAT "-%" G_GSIZE_FORMAT,
+                            location->start, location->end);
 
   if (other != NULL)
     {
       g_assert (other->start != other->end);
-      g_string_append_printf (msg, ",%d-%d", other->start, other->end);
+      g_string_append_printf (msg, ",%" G_GSIZE_FORMAT "-%" G_GSIZE_FORMAT,
+                              other->start, other->end);
     }
   g_string_append_c (msg, ':');
 
@@ -135,11 +139,15 @@ parser_set_error (GError      **error,
 
 typedef struct
 {
+  /* We should always have the following ordering constraint:
+   *   start <= this <= stream <= end
+   * Additionally, unless in an error or EOF state, `this < stream`.
+   */
   const gchar *start;
   const gchar *stream;
   const gchar *end;
 
-  const gchar *this;
+  const gchar *this;  /* (nullable) */
 } TokenStream;
 
 
@@ -170,7 +178,7 @@ token_stream_set_error (TokenStream  *stream,
 static gboolean
 token_stream_prepare (TokenStream *stream)
 {
-  gint brackets = 0;
+  gssize brackets = 0;
   const gchar *end;
 
   if (stream->this != NULL)
@@ -395,7 +403,7 @@ static void
 pattern_copy (gchar       **out,
               const gchar **in)
 {
-  gint brackets = 0;
+  gssize brackets = 0;
 
   while (**in == 'a' || **in == 'm' || **in == 'M')
     *(*out)++ = *(*in)++;
@@ -2592,7 +2600,7 @@ g_variant_builder_add_parsed (GVariantBuilder *builder,
 static gboolean
 parse_num (const gchar *num,
            const gchar *limit,
-           gint        *result)
+           size_t      *result)
 {
   gchar *endptr;
   gint64 bignum;
@@ -2602,10 +2610,12 @@ parse_num (const gchar *num,
   if (endptr != limit)
     return FALSE;
 
+  /* The upper bound here is more restrictive than it technically needs to be,
+   * but should be enough for any practical situation: */
   if (bignum < 0 || bignum > G_MAXINT)
     return FALSE;
 
-  *result = bignum;
+  *result = (size_t) bignum;
 
   return TRUE;
 }
@@ -2616,7 +2626,7 @@ add_last_line (GString     *err,
 {
   const gchar *last_nl;
   gchar *chomped;
-  gint i;
+  size_t i;
 
   /* This is an error at the end of input.  If we have a file
    * with newlines, that's probably the empty string after the
@@ -2761,7 +2771,7 @@ g_variant_parse_error_print_context (GError      *error,
 
   if (dash == NULL || colon < dash)
     {
-      gint point;
+      size_t point;
 
       /* we have a single point */
       if (!parse_num (error->message, colon, &point))
@@ -2779,7 +2789,7 @@ g_variant_parse_error_print_context (GError      *error,
       /* We have one or two ranges... */
       if (comma && comma < colon)
         {
-          gint start1, end1, start2, end2;
+          size_t start1, end1, start2, end2;
           const gchar *dash2;
 
           /* Two ranges */
@@ -2795,7 +2805,7 @@ g_variant_parse_error_print_context (GError      *error,
         }
       else
         {
-          gint start, end;
+          size_t start, end;
 
           /* One range */
           if (!parse_num (error->message, dash, &start) || !parse_num (dash + 1, colon, &end))
-- 
2.53.0