commit e07c2162d66ab1a2e938db48744fc00afd29c272 Author: Dan Williams Date: Tue Sep 11 16:36:16 2012 -0500 core: better handling of non-UCS2 conversions that should be UCS2 (bgo #683817) Some modems return the +COPS operator name in hex-encoded current character set (as set with +CSCS). Others return the operator name in ASCII when set to UCS2, while yet others return the ASCII name with trash at the end (*cough* Huawei *cough*). Handle that better by not crashing. diff --git a/src/mm-charsets.c b/src/mm-charsets.c index 68bf2d1..65fc33c 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -710,8 +710,7 @@ gsm_pack (const guint8 *src, * the hex representation of the charset-encoded string, so we need to cope with * that case. */ gchar * -mm_charset_take_and_convert_to_utf8 (gchar *str, - MMModemCharset charset) +mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset) { gchar *utf8 = NULL; @@ -752,6 +751,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, case MM_MODEM_CHARSET_UCS2: { gsize len; gboolean possibly_hex = TRUE; + gsize bread = 0, bwritten = 0; /* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */ len = strlen (str); @@ -765,19 +765,37 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, possibly_hex = isxdigit (*p++); } - /* If we get UCS-2, we expect the HEX representation of the string */ + /* If hex, then we expect hex-encoded UCS-2 */ if (possibly_hex) { utf8 = mm_modem_charset_hex_to_utf8 (str, charset); - if (!utf8) { - /* If we couldn't convert the string as HEX-UCS-2, try to see if - * the string is valid UTF-8 itself. */ - utf8 = str; - } else + if (utf8) { g_free (str); - } else - /* If we already know it's not hex, try to use the string as it is */ - utf8 = str; + break; + } + } + + /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8 + * (much more likely). Try to convert to UTF-8 and if that fails, use + * the partial conversion length to re-convert the part of the string + * that is UTF-8, if any. + */ + utf8 = g_convert (str, strlen (str), + "UTF-8//TRANSLIT", "UTF-8//TRANSLIT", + &bread, &bwritten, NULL); + /* Valid conversion, or we didn't get enough valid UTF-8 */ + if (utf8 || (bwritten <= 2)) { + g_free (str); + break; + } + + /* Last try; chop off the original string at the conversion failure + * location and get what we can. + */ + str[bread] = '\0'; + utf8 = g_convert (str, strlen (str), + "UTF-8//TRANSLIT", "UTF-8//TRANSLIT", + NULL, NULL, NULL); break; } @@ -791,7 +809,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, /* Validate UTF-8 always before returning. This result will be exposed in DBus * very likely... */ - if (!g_utf8_validate (utf8, -1, NULL)) { + if (utf8 && !g_utf8_validate (utf8, -1, NULL)) { /* Better return NULL than an invalid UTF-8 string */ g_free (utf8); utf8 = NULL; diff --git a/src/mm-charsets.h b/src/mm-charsets.h index 855387a..96435cc 100644 --- a/src/mm-charsets.h +++ b/src/mm-charsets.h @@ -72,7 +72,6 @@ guint8 *gsm_pack (const guint8 *src, guint8 start_offset, /* in bits */ guint32 *out_packed_len); -gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, - MMModemCharset charset); +gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset); #endif /* MM_CHARSETS_H */ diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c index f954d93..ced38a6 100644 --- a/src/tests/test-charsets.c +++ b/src/tests/test-charsets.c @@ -326,6 +326,41 @@ test_pack_gsm7_7_chars_offset (void *f, gpointer d) g_free (packed); } +static void +test_take_convert_ucs2_hex_utf8 (void *f, gpointer d) +{ + gchar *src, *converted; + + /* Ensure hex-encoded UCS-2 works */ + src = g_strdup ("0054002d004d006f00620069006c0065"); + converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2); + g_assert_cmpstr (converted, ==, "T-Mobile"); + g_free (converted); +} + +static void +test_take_convert_ucs2_bad_ascii (void *f, gpointer d) +{ + gchar *src, *converted; + + /* Test that something mostly ASCII returns most of the original string */ + src = g_strdup ("Orange\241"); + converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2); + g_assert_cmpstr (converted, ==, "Orange"); + g_free (converted); +} + +static void +test_take_convert_ucs2_bad_ascii2 (void *f, gpointer d) +{ + gchar *src, *converted; + + /* Ensure something completely screwed up doesn't crash */ + src = g_strdup ("\241\255\254\250\244\234"); + converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2); + g_assert (converted == NULL); +} + #if GLIB_CHECK_VERSION(2,25,12) typedef GTestFixtureFunc TCFunc; @@ -360,6 +395,10 @@ int main (int argc, char **argv) g_test_suite_add (suite, TESTCASE (test_pack_gsm7_7_chars_offset, NULL)); + g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_hex_utf8, NULL)); + g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_bad_ascii, NULL)); + g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_bad_ascii2, NULL)); + result = g_test_run (); return result;