ModemManager/rh862341-1-UCS2-conversions.patch

commit e07c2162d66ab1a2e938db48744fc00afd29c272
Author: Dan Williams <dcbw@redhat.com>
Date:   Tue Sep 11 16:36:16 2012 -0500

    core: better handling of non-UCS2 conversions that should be UCS2 (bgo #683817)

    Some modems return the +COPS operator name in hex-encoded current
    character set (as set with +CSCS).  Others return the operator name
    in ASCII when set to UCS2, while yet others return the  ASCII name
    with trash at the end (*cough* Huawei *cough*).  Handle that better
    by not crashing.

diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 68bf2d1..65fc33c 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -710,8 +710,7 @@ gsm_pack (const guint8 *src,
  * the hex representation of the charset-encoded string, so we need to cope with
  * that case. */
 gchar *
-mm_charset_take_and_convert_to_utf8 (gchar *str,
-                                     MMModemCharset charset)
+mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset)
 {
     gchar *utf8 = NULL;

@@ -752,6 +751,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
     case MM_MODEM_CHARSET_UCS2: {
         gsize len;
         gboolean possibly_hex = TRUE;
+        gsize bread = 0, bwritten = 0;

         /* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */
         len = strlen (str);
@@ -765,19 +765,37 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
                 possibly_hex = isxdigit (*p++);
         }

-        /* If we get UCS-2, we expect the HEX representation of the string */
+        /* If hex, then we expect hex-encoded UCS-2 */
         if (possibly_hex) {
             utf8 = mm_modem_charset_hex_to_utf8 (str, charset);
-            if (!utf8) {
-                /* If we couldn't convert the string as HEX-UCS-2, try to see if
-                 * the string is valid UTF-8 itself. */
-                utf8 = str;
-            } else
+            if (utf8) {
                 g_free (str);
-        } else
-            /* If we already know it's not hex, try to use the string as it is */
-            utf8 = str;
+                break;
+            }
+        }
+
+        /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8
+         * (much more likely).  Try to convert to UTF-8 and if that fails, use
+         * the partial conversion length to re-convert the part of the string
+         * that is UTF-8, if any.
+         */
+        utf8 = g_convert (str, strlen (str),
+                          "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+                          &bread, &bwritten, NULL);

+        /* Valid conversion, or we didn't get enough valid UTF-8 */
+        if (utf8 || (bwritten <= 2)) {
+            g_free (str);
+            break;
+        }
+
+        /* Last try; chop off the original string at the conversion failure
+         * location and get what we can.
+         */
+        str[bread] = '\0';
+        utf8 = g_convert (str, strlen (str),
+                          "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+                          NULL, NULL, NULL);
         break;
     }

@@ -791,7 +809,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,

     /* Validate UTF-8 always before returning. This result will be exposed in DBus
      * very likely... */
-    if (!g_utf8_validate (utf8, -1, NULL)) {
+    if (utf8 && !g_utf8_validate (utf8, -1, NULL)) {
         /* Better return NULL than an invalid UTF-8 string */
         g_free (utf8);
         utf8 = NULL;
diff --git a/src/mm-charsets.h b/src/mm-charsets.h
index 855387a..96435cc 100644
--- a/src/mm-charsets.h
+++ b/src/mm-charsets.h
@@ -72,7 +72,6 @@ guint8 *gsm_pack (const guint8 *src,
                   guint8 start_offset,  /* in bits */
                   guint32 *out_packed_len);

-gchar *mm_charset_take_and_convert_to_utf8 (gchar *str,
-                                            MMModemCharset charset);
+gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset);

 #endif /* MM_CHARSETS_H */
diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c
index f954d93..ced38a6 100644
--- a/src/tests/test-charsets.c
+++ b/src/tests/test-charsets.c
@@ -326,6 +326,41 @@ test_pack_gsm7_7_chars_offset (void *f, gpointer d)
     g_free (packed);
 }

+static void
+test_take_convert_ucs2_hex_utf8 (void *f, gpointer d)
+{
+    gchar *src, *converted;
+
+    /* Ensure hex-encoded UCS-2 works */
+    src = g_strdup ("0054002d004d006f00620069006c0065");
+    converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2);
+    g_assert_cmpstr (converted, ==, "T-Mobile");
+    g_free (converted);
+}
+
+static void
+test_take_convert_ucs2_bad_ascii (void *f, gpointer d)
+{
+    gchar *src, *converted;
+
+    /* Test that something mostly ASCII returns most of the original string */
+    src = g_strdup ("Orange\241");
+    converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2);
+    g_assert_cmpstr (converted, ==, "Orange");
+    g_free (converted);
+}
+
+static void
+test_take_convert_ucs2_bad_ascii2 (void *f, gpointer d)
+{
+    gchar *src, *converted;
+
+    /* Ensure something completely screwed up doesn't crash */
+    src = g_strdup ("\241\255\254\250\244\234");
+    converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2);
+    g_assert (converted == NULL);
+}
+

 #if GLIB_CHECK_VERSION(2,25,12)
 typedef GTestFixtureFunc TCFunc;
@@ -360,6 +395,10 @@ int main (int argc, char **argv)

     g_test_suite_add (suite, TESTCASE (test_pack_gsm7_7_chars_offset, NULL));

+    g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_hex_utf8, NULL));
+    g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_bad_ascii, NULL));
+    g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_bad_ascii2, NULL));
+
     result = g_test_run ();

     return result;