161 lines
5.7 KiB
Diff
161 lines
5.7 KiB
Diff
commit e07c2162d66ab1a2e938db48744fc00afd29c272
|
|
Author: Dan Williams <dcbw@redhat.com>
|
|
Date: Tue Sep 11 16:36:16 2012 -0500
|
|
|
|
core: better handling of non-UCS2 conversions that should be UCS2 (bgo #683817)
|
|
|
|
Some modems return the +COPS operator name in hex-encoded current
|
|
character set (as set with +CSCS). Others return the operator name
|
|
in ASCII when set to UCS2, while yet others return the ASCII name
|
|
with trash at the end (*cough* Huawei *cough*). Handle that better
|
|
by not crashing.
|
|
|
|
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
|
|
index 68bf2d1..65fc33c 100644
|
|
--- a/src/mm-charsets.c
|
|
+++ b/src/mm-charsets.c
|
|
@@ -710,8 +710,7 @@ gsm_pack (const guint8 *src,
|
|
* the hex representation of the charset-encoded string, so we need to cope with
|
|
* that case. */
|
|
gchar *
|
|
-mm_charset_take_and_convert_to_utf8 (gchar *str,
|
|
- MMModemCharset charset)
|
|
+mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset)
|
|
{
|
|
gchar *utf8 = NULL;
|
|
|
|
@@ -752,6 +751,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
|
|
case MM_MODEM_CHARSET_UCS2: {
|
|
gsize len;
|
|
gboolean possibly_hex = TRUE;
|
|
+ gsize bread = 0, bwritten = 0;
|
|
|
|
/* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */
|
|
len = strlen (str);
|
|
@@ -765,19 +765,37 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
|
|
possibly_hex = isxdigit (*p++);
|
|
}
|
|
|
|
- /* If we get UCS-2, we expect the HEX representation of the string */
|
|
+ /* If hex, then we expect hex-encoded UCS-2 */
|
|
if (possibly_hex) {
|
|
utf8 = mm_modem_charset_hex_to_utf8 (str, charset);
|
|
- if (!utf8) {
|
|
- /* If we couldn't convert the string as HEX-UCS-2, try to see if
|
|
- * the string is valid UTF-8 itself. */
|
|
- utf8 = str;
|
|
- } else
|
|
+ if (utf8) {
|
|
g_free (str);
|
|
- } else
|
|
- /* If we already know it's not hex, try to use the string as it is */
|
|
- utf8 = str;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8
|
|
+ * (much more likely). Try to convert to UTF-8 and if that fails, use
|
|
+ * the partial conversion length to re-convert the part of the string
|
|
+ * that is UTF-8, if any.
|
|
+ */
|
|
+ utf8 = g_convert (str, strlen (str),
|
|
+ "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
|
|
+ &bread, &bwritten, NULL);
|
|
|
|
+ /* Valid conversion, or we didn't get enough valid UTF-8 */
|
|
+ if (utf8 || (bwritten <= 2)) {
|
|
+ g_free (str);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Last try; chop off the original string at the conversion failure
|
|
+ * location and get what we can.
|
|
+ */
|
|
+ str[bread] = '\0';
|
|
+ utf8 = g_convert (str, strlen (str),
|
|
+ "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
|
|
+ NULL, NULL, NULL);
|
|
break;
|
|
}
|
|
|
|
@@ -791,7 +809,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
|
|
|
|
/* Validate UTF-8 always before returning. This result will be exposed in DBus
|
|
* very likely... */
|
|
- if (!g_utf8_validate (utf8, -1, NULL)) {
|
|
+ if (utf8 && !g_utf8_validate (utf8, -1, NULL)) {
|
|
/* Better return NULL than an invalid UTF-8 string */
|
|
g_free (utf8);
|
|
utf8 = NULL;
|
|
diff --git a/src/mm-charsets.h b/src/mm-charsets.h
|
|
index 855387a..96435cc 100644
|
|
--- a/src/mm-charsets.h
|
|
+++ b/src/mm-charsets.h
|
|
@@ -72,7 +72,6 @@ guint8 *gsm_pack (const guint8 *src,
|
|
guint8 start_offset, /* in bits */
|
|
guint32 *out_packed_len);
|
|
|
|
-gchar *mm_charset_take_and_convert_to_utf8 (gchar *str,
|
|
- MMModemCharset charset);
|
|
+gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset);
|
|
|
|
#endif /* MM_CHARSETS_H */
|
|
diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c
|
|
index f954d93..ced38a6 100644
|
|
--- a/src/tests/test-charsets.c
|
|
+++ b/src/tests/test-charsets.c
|
|
@@ -326,6 +326,41 @@ test_pack_gsm7_7_chars_offset (void *f, gpointer d)
|
|
g_free (packed);
|
|
}
|
|
|
|
+static void
|
|
+test_take_convert_ucs2_hex_utf8 (void *f, gpointer d)
|
|
+{
|
|
+ gchar *src, *converted;
|
|
+
|
|
+ /* Ensure hex-encoded UCS-2 works */
|
|
+ src = g_strdup ("0054002d004d006f00620069006c0065");
|
|
+ converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2);
|
|
+ g_assert_cmpstr (converted, ==, "T-Mobile");
|
|
+ g_free (converted);
|
|
+}
|
|
+
|
|
+static void
|
|
+test_take_convert_ucs2_bad_ascii (void *f, gpointer d)
|
|
+{
|
|
+ gchar *src, *converted;
|
|
+
|
|
+ /* Test that something mostly ASCII returns most of the original string */
|
|
+ src = g_strdup ("Orange\241");
|
|
+ converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2);
|
|
+ g_assert_cmpstr (converted, ==, "Orange");
|
|
+ g_free (converted);
|
|
+}
|
|
+
|
|
+static void
|
|
+test_take_convert_ucs2_bad_ascii2 (void *f, gpointer d)
|
|
+{
|
|
+ gchar *src, *converted;
|
|
+
|
|
+ /* Ensure something completely screwed up doesn't crash */
|
|
+ src = g_strdup ("\241\255\254\250\244\234");
|
|
+ converted = mm_charset_take_and_convert_to_utf8 (src, MM_MODEM_CHARSET_UCS2);
|
|
+ g_assert (converted == NULL);
|
|
+}
|
|
+
|
|
|
|
#if GLIB_CHECK_VERSION(2,25,12)
|
|
typedef GTestFixtureFunc TCFunc;
|
|
@@ -360,6 +395,10 @@ int main (int argc, char **argv)
|
|
|
|
g_test_suite_add (suite, TESTCASE (test_pack_gsm7_7_chars_offset, NULL));
|
|
|
|
+ g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_hex_utf8, NULL));
|
|
+ g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_bad_ascii, NULL));
|
|
+ g_test_suite_add (suite, TESTCASE (test_take_convert_ucs2_bad_ascii2, NULL));
|
|
+
|
|
result = g_test_run ();
|
|
|
|
return result;
|