Fix for handling guest filenames with invalid or incomplete
multibyte or wide characters resolves: rhbz#1301593
This commit is contained in:
parent
2cd5f58b39
commit
8b9aad09c8
172
0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch
Normal file
172
0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch
Normal file
@ -0,0 +1,172 @@
|
||||
From d9c61dd60ec484909f70b7a916ada3a93af94b60 Mon Sep 17 00:00:00 2001
|
||||
From: Erik Larsson <mechie@users.sourceforge.net>
|
||||
Date: Fri, 8 Apr 2016 05:39:48 +0200
|
||||
Subject: [PATCH 1/2] unistr.c: Enable encoding broken UTF-16 into broken
|
||||
UTF-8, A.K.A. WTF-8.
|
||||
|
||||
Windows filenames may contain invalid UTF-16 sequences (specifically
|
||||
broken surrogate pairs), which cannot be converted to UTF-8 if we do
|
||||
strict conversion.
|
||||
|
||||
This patch enables encoding broken UTF-16 into similarly broken UTF-8 by
|
||||
encoding any surrogate character that don't have a match into a separate
|
||||
3-byte UTF-8 sequence.
|
||||
|
||||
This is "sort of" valid UTF-8, but not valid Unicode since the code
|
||||
points used for surrogate pair encoding are not supposed to occur in a
|
||||
valid Unicode string... but on the other hand the source UTF-16 data is
|
||||
also broken, so we aren't really making things any worse.
|
||||
|
||||
This format is sometimes referred to as WTF-8 (Wobbly Translation
|
||||
Format, 8-bit encoding) and is a common solution to represent broken
|
||||
UTF-16 as UTF-8.
|
||||
|
||||
It is a lossless round-trip conversion, i.e converting from broken
|
||||
UTF-16 to "WTF-8" and back to UTF-16 yields the same broken UTF-16
|
||||
sequence. Because of this property it enables accessing these files
|
||||
by filename through ntfs-3g and the ntfsprogs (e.g. ls -la works as
|
||||
expected).
|
||||
|
||||
To disable this behaviour you can pass the preprocessor/compiler flag
|
||||
'-DALLOW_BROKEN_SURROGATES=0' when building ntfs-3g.
|
||||
---
|
||||
libntfs-3g/unistr.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++--
|
||||
1 file changed, 65 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c
|
||||
index 7f278cd..71802aa 100644
|
||||
--- a/libntfs-3g/unistr.c
|
||||
+++ b/libntfs-3g/unistr.c
|
||||
@@ -61,6 +61,11 @@
|
||||
|
||||
#define NOREVBOM 0 /* JPA rejecting U+FFFE and U+FFFF, open to debate */
|
||||
|
||||
+#ifndef ALLOW_BROKEN_SURROGATES
|
||||
+/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */
|
||||
+#define ALLOW_BROKEN_SURROGATES 1
|
||||
+#endif /* !defined(ALLOW_BROKEN_SURROGATES) */
|
||||
+
|
||||
/*
|
||||
* IMPORTANT
|
||||
* =========
|
||||
@@ -462,8 +467,22 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
|
||||
if ((c >= 0xdc00) && (c < 0xe000)) {
|
||||
surrog = FALSE;
|
||||
count += 4;
|
||||
- } else
|
||||
+ } else {
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ /* The first UTF-16 unit of a surrogate pair has
|
||||
+ * a value between 0xd800 and 0xdc00. It can be
|
||||
+ * encoded as an individual UTF-8 sequence if we
|
||||
+ * cannot combine it with the next UTF-16 unit
|
||||
+ * unit as a surrogate pair. */
|
||||
+ surrog = FALSE;
|
||||
+ count += 3;
|
||||
+
|
||||
+ --i;
|
||||
+ continue;
|
||||
+#else
|
||||
goto fail;
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
+ }
|
||||
} else
|
||||
if (c < 0x80)
|
||||
count++;
|
||||
@@ -473,6 +492,10 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
|
||||
count += 3;
|
||||
else if (c < 0xdc00)
|
||||
surrog = TRUE;
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ else if (c < 0xe000)
|
||||
+ count += 3;
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
#if NOREVBOM
|
||||
else if ((c >= 0xe000) && (c < 0xfffe))
|
||||
#else
|
||||
@@ -487,7 +510,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
|
||||
}
|
||||
}
|
||||
if (surrog)
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ count += 3; /* ending with a single surrogate */
|
||||
+#else
|
||||
goto fail;
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
|
||||
ret = count;
|
||||
out:
|
||||
@@ -548,8 +575,24 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
*t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
|
||||
*t++ = 0x80 + (c & 63);
|
||||
halfpair = 0;
|
||||
- } else
|
||||
+ } else {
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ /* The first UTF-16 unit of a surrogate pair has
|
||||
+ * a value between 0xd800 and 0xdc00. It can be
|
||||
+ * encoded as an individual UTF-8 sequence if we
|
||||
+ * cannot combine it with the next UTF-16 unit
|
||||
+ * unit as a surrogate pair. */
|
||||
+ *t++ = 0xe0 | (halfpair >> 12);
|
||||
+ *t++ = 0x80 | ((halfpair >> 6) & 0x3f);
|
||||
+ *t++ = 0x80 | (halfpair & 0x3f);
|
||||
+ halfpair = 0;
|
||||
+
|
||||
+ --i;
|
||||
+ continue;
|
||||
+#else
|
||||
goto fail;
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
+ }
|
||||
} else if (c < 0x80) {
|
||||
*t++ = c;
|
||||
} else {
|
||||
@@ -562,6 +605,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
*t++ = 0x80 | (c & 0x3f);
|
||||
} else if (c < 0xdc00)
|
||||
halfpair = c;
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ else if (c < 0xe000) {
|
||||
+ *t++ = 0xe0 | (c >> 12);
|
||||
+ *t++ = 0x80 | ((c >> 6) & 0x3f);
|
||||
+ *t++ = 0x80 | (c & 0x3f);
|
||||
+ }
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
else if (c >= 0xe000) {
|
||||
*t++ = 0xe0 | (c >> 12);
|
||||
*t++ = 0x80 | ((c >> 6) & 0x3f);
|
||||
@@ -570,6 +620,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ if (halfpair) { /* ending with a single surrogate */
|
||||
+ *t++ = 0xe0 | (halfpair >> 12);
|
||||
+ *t++ = 0x80 | ((halfpair >> 6) & 0x3f);
|
||||
+ *t++ = 0x80 | (halfpair & 0x3f);
|
||||
+ }
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
*t = '\0';
|
||||
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
@@ -693,10 +750,16 @@ static int utf8_to_unicode(u32 *wc, const char *s)
|
||||
/* Check valid ranges */
|
||||
#if NOREVBOM
|
||||
if (((*wc >= 0x800) && (*wc <= 0xD7FF))
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
|| ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
|
||||
return 3;
|
||||
#else
|
||||
if (((*wc >= 0x800) && (*wc <= 0xD7FF))
|
||||
+#if ALLOW_BROKEN_SURROGATES
|
||||
+ || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
|
||||
+#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
|| ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
|
||||
return 3;
|
||||
#endif
|
||||
--
|
||||
2.10.2
|
||||
|
170
0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch
Normal file
170
0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch
Normal file
@ -0,0 +1,170 @@
|
||||
From f0370bfa9c47575d4e47c94e443aa91983683a43 Mon Sep 17 00:00:00 2001
|
||||
From: Erik Larsson <mechie@users.sourceforge.net>
|
||||
Date: Tue, 12 Apr 2016 17:02:40 +0200
|
||||
Subject: [PATCH 2/2] unistr.c: Unify the two defines NOREVBOM and
|
||||
ALLOW_BROKEN_SURROGATES.
|
||||
|
||||
In the mailing list discussion we came to the conclusion that there
|
||||
doesn't seem to be any reason to keep these declarations separate since
|
||||
they address the same issue, namely libntfs-3g's tolerance for bad
|
||||
Unicode data in filenames and other UTF-16 strings in the file system,
|
||||
so merge the two defines into the new define ALLOW_BROKEN_UNICODE.
|
||||
---
|
||||
libntfs-3g/unistr.c | 58 +++++++++++++++++++++++------------------------------
|
||||
1 file changed, 25 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c
|
||||
index 71802aa..753acc0 100644
|
||||
--- a/libntfs-3g/unistr.c
|
||||
+++ b/libntfs-3g/unistr.c
|
||||
@@ -59,12 +59,11 @@
|
||||
#include "logging.h"
|
||||
#include "misc.h"
|
||||
|
||||
-#define NOREVBOM 0 /* JPA rejecting U+FFFE and U+FFFF, open to debate */
|
||||
-
|
||||
-#ifndef ALLOW_BROKEN_SURROGATES
|
||||
-/* Erik allowing broken UTF-16 surrogate pairs by default, open to debate. */
|
||||
-#define ALLOW_BROKEN_SURROGATES 1
|
||||
-#endif /* !defined(ALLOW_BROKEN_SURROGATES) */
|
||||
+#ifndef ALLOW_BROKEN_UNICODE
|
||||
+/* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
|
||||
+ * open to debate. */
|
||||
+#define ALLOW_BROKEN_UNICODE 1
|
||||
+#endif /* !defined(ALLOW_BROKEN_UNICODE) */
|
||||
|
||||
/*
|
||||
* IMPORTANT
|
||||
@@ -468,7 +467,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
|
||||
surrog = FALSE;
|
||||
count += 4;
|
||||
} else {
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
+#if ALLOW_BROKEN_UNICODE
|
||||
/* The first UTF-16 unit of a surrogate pair has
|
||||
* a value between 0xd800 and 0xdc00. It can be
|
||||
* encoded as an individual UTF-8 sequence if we
|
||||
@@ -481,7 +480,7 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
|
||||
continue;
|
||||
#else
|
||||
goto fail;
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
+#endif /* ALLOW_BROKEN_UNICODE */
|
||||
}
|
||||
} else
|
||||
if (c < 0x80)
|
||||
@@ -492,15 +491,13 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
|
||||
count += 3;
|
||||
else if (c < 0xdc00)
|
||||
surrog = TRUE;
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
+#if ALLOW_BROKEN_UNICODE
|
||||
else if (c < 0xe000)
|
||||
count += 3;
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
-#if NOREVBOM
|
||||
- else if ((c >= 0xe000) && (c < 0xfffe))
|
||||
-#else
|
||||
else if (c >= 0xe000)
|
||||
-#endif
|
||||
+#else
|
||||
+ else if ((c >= 0xe000) && (c < 0xfffe))
|
||||
+#endif /* ALLOW_BROKEN_UNICODE */
|
||||
count += 3;
|
||||
else
|
||||
goto fail;
|
||||
@@ -510,11 +507,11 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
|
||||
}
|
||||
}
|
||||
if (surrog)
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
+#if ALLOW_BROKEN_UNICODE
|
||||
count += 3; /* ending with a single surrogate */
|
||||
#else
|
||||
goto fail;
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
+#endif /* ALLOW_BROKEN_UNICODE */
|
||||
|
||||
ret = count;
|
||||
out:
|
||||
@@ -576,7 +573,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
*t++ = 0x80 + (c & 63);
|
||||
halfpair = 0;
|
||||
} else {
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
+#if ALLOW_BROKEN_UNICODE
|
||||
/* The first UTF-16 unit of a surrogate pair has
|
||||
* a value between 0xd800 and 0xdc00. It can be
|
||||
* encoded as an individual UTF-8 sequence if we
|
||||
@@ -591,7 +588,7 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
continue;
|
||||
#else
|
||||
goto fail;
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
+#endif /* ALLOW_BROKEN_UNICODE */
|
||||
}
|
||||
} else if (c < 0x80) {
|
||||
*t++ = c;
|
||||
@@ -605,13 +602,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
*t++ = 0x80 | (c & 0x3f);
|
||||
} else if (c < 0xdc00)
|
||||
halfpair = c;
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
+#if ALLOW_BROKEN_UNICODE
|
||||
else if (c < 0xe000) {
|
||||
*t++ = 0xe0 | (c >> 12);
|
||||
*t++ = 0x80 | ((c >> 6) & 0x3f);
|
||||
*t++ = 0x80 | (c & 0x3f);
|
||||
}
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
+#endif /* ALLOW_BROKEN_UNICODE */
|
||||
else if (c >= 0xe000) {
|
||||
*t++ = 0xe0 | (c >> 12);
|
||||
*t++ = 0x80 | ((c >> 6) & 0x3f);
|
||||
@@ -620,13 +617,13 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
+#if ALLOW_BROKEN_UNICODE
|
||||
if (halfpair) { /* ending with a single surrogate */
|
||||
*t++ = 0xe0 | (halfpair >> 12);
|
||||
*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
|
||||
*t++ = 0x80 | (halfpair & 0x3f);
|
||||
}
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
+#endif /* ALLOW_BROKEN_UNICODE */
|
||||
*t = '\0';
|
||||
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
@@ -748,21 +745,16 @@ static int utf8_to_unicode(u32 *wc, const char *s)
|
||||
| ((u32)(s[1] & 0x3F) << 6)
|
||||
| ((u32)(s[2] & 0x3F));
|
||||
/* Check valid ranges */
|
||||
-#if NOREVBOM
|
||||
+#if ALLOW_BROKEN_UNICODE
|
||||
if (((*wc >= 0x800) && (*wc <= 0xD7FF))
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
|| ((*wc >= 0xD800) && (*wc <= 0xDFFF))
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
- || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
|
||||
- return 3;
|
||||
-#else
|
||||
- if (((*wc >= 0x800) && (*wc <= 0xD7FF))
|
||||
-#if ALLOW_BROKEN_SURROGATES
|
||||
- || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
|
||||
-#endif /* ALLOW_BROKEN_SURROGATES */
|
||||
|| ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
|
||||
return 3;
|
||||
-#endif
|
||||
+#else
|
||||
+ if (((*wc >= 0x800) && (*wc <= 0xD7FF))
|
||||
+ || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
|
||||
+ return 3;
|
||||
+#endif /* ALLOW_BROKEN_UNICODE */
|
||||
}
|
||||
goto fail;
|
||||
/* four-byte */
|
||||
--
|
||||
2.10.2
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
Name: libguestfs-winsupport
|
||||
Version: 7.2
|
||||
Release: 1%{?dist}
|
||||
Release: 2%{?dist}
|
||||
Summary: Add support for Windows guests to virt-v2v and virt-p2v
|
||||
|
||||
URL: http://www.ntfs-3g.org/
|
||||
@ -21,6 +21,9 @@ Source0: http://tuxera.com/opensource/ntfs-3g_ntfsprogs-%{ntfs_version}.t
|
||||
|
||||
Patch0: ntfs-3g_ntfsprogs-2011.10.9-RC-ntfsck-unsupported-return-0.patch
|
||||
Patch1: CVE-2015-3202.patch
|
||||
# https://bugzilla.redhat.com/show_bug.cgi?id=1301593#c8
|
||||
Patch2: 0001-unistr.c-Enable-encoding-broken-UTF-16-into-broken-U.patch
|
||||
Patch3: 0002-unistr.c-Unify-the-two-defines-NOREVBOM-and-ALLOW_BR.patch
|
||||
|
||||
BuildRequires: libtool, libattr-devel
|
||||
BuildRequires: libconfig-devel, libgcrypt-devel, gnutls-devel, libuuid-devel
|
||||
@ -35,6 +38,8 @@ virt-v2v and virt-p2v programs.
|
||||
%setup -q -n ntfs-3g_ntfsprogs-%{ntfs_version}
|
||||
%patch0 -p1 -b .unsupported
|
||||
%patch1 -p1 -b .cve
|
||||
%patch2 -p1
|
||||
%patch3 -p1
|
||||
|
||||
|
||||
%build
|
||||
@ -96,6 +101,11 @@ popd
|
||||
|
||||
|
||||
%changelog
|
||||
* Wed Feb 22 2017 Richard W.M. Jones <rjones@redhat.com> - 7.2-2
|
||||
- Fix for handling guest filenames with invalid or incomplete
|
||||
multibyte or wide characters
|
||||
resolves: rhbz#1301593
|
||||
|
||||
* Tue Jul 07 2015 Richard W.M. Jones <rjones@redhat.com> - 7.2-1
|
||||
- Rebase and rebuild for RHEL 7.2
|
||||
resolves: rhbz#1240278
|
||||
|
Loading…
Reference in New Issue
Block a user