99 lines
3.1 KiB
Diff
99 lines
3.1 KiB
Diff
|
From 72085b30bf30867360c4aa77bd43de5e1788d875 Mon Sep 17 00:00:00 2001
|
||
|
From: Ondrej Dubaj <odubaj@redhat.com>
|
||
|
Date: Tue, 24 Mar 2020 09:22:47 +0100
|
||
|
Subject: [PATCH] Bugfix and optimize archive_wstring_append_from_mbs()
|
||
|
|
||
|
The cal to mbrtowc() or mbtowc() should read up to mbs_length
|
||
|
bytes and not wcs_length. This avoids out-of-bounds reads.
|
||
|
|
||
|
mbrtowc() and mbtowc() return (size_t)-1 wit errno EILSEQ when
|
||
|
they encounter an invalid multibyte character and (size_t)-2 when
|
||
|
they they encounter an incomplete multibyte character. As we return
|
||
|
failure and all our callers error out it makes no sense to continue
|
||
|
parsing mbs.
|
||
|
|
||
|
As we allocate `len` wchars at the beginning and each wchar has
|
||
|
at least one byte, there will never be need to grow the buffer,
|
||
|
so the code can be left out. On the other hand, we are always
|
||
|
allocatng more memory than we need.
|
||
|
|
||
|
As long as wcs_length == mbs_length == len we can omit wcs_length.
|
||
|
We keep the old code commented if we decide to save memory and
|
||
|
use autoexpanding wcs_length in the future.
|
||
|
---
|
||
|
libarchive/archive_string.c | 28 +++++++++++++++++-----------
|
||
|
1 file changed, 17 insertions(+), 11 deletions(-)
|
||
|
|
||
|
diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c
|
||
|
index 5ae09b6..d7541dc 100644
|
||
|
--- a/libarchive/archive_string.c
|
||
|
+++ b/libarchive/archive_string.c
|
||
|
@@ -590,7 +590,7 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
||
|
* No single byte will be more than one wide character,
|
||
|
* so this length estimate will always be big enough.
|
||
|
*/
|
||
|
- size_t wcs_length = len;
|
||
|
+ //size_t wcs_length = len;
|
||
|
size_t mbs_length = len;
|
||
|
const char *mbs = p;
|
||
|
wchar_t *wcs;
|
||
|
@@ -599,7 +599,11 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
||
|
|
||
|
memset(&shift_state, 0, sizeof(shift_state));
|
||
|
#endif
|
||
|
- if (NULL == archive_wstring_ensure(dest, dest->length + wcs_length + 1))
|
||
|
+ /*
|
||
|
+ * As we decided to have wcs_length == mbs_length == len
|
||
|
+ * we can use len here instead of wcs_length
|
||
|
+ */
|
||
|
+ if (NULL == archive_wstring_ensure(dest, dest->length + len + 1))
|
||
|
return (-1);
|
||
|
wcs = dest->s + dest->length;
|
||
|
/*
|
||
|
@@ -608,6 +612,12 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
||
|
* multi bytes.
|
||
|
*/
|
||
|
while (*mbs && mbs_length > 0) {
|
||
|
+ /*
|
||
|
+ * The buffer we allocated is always big enough.
|
||
|
+ * Keep this code path in a comment if we decide to choose
|
||
|
+ * smaller wcs_length in the future
|
||
|
+ */
|
||
|
+/*
|
||
|
if (wcs_length == 0) {
|
||
|
dest->length = wcs - dest->s;
|
||
|
dest->s[dest->length] = L'\0';
|
||
|
@@ -617,24 +627,20 @@ archive_wstring_append_from_mbs(struct archive_wstring *dest,
|
||
|
return (-1);
|
||
|
wcs = dest->s + dest->length;
|
||
|
}
|
||
|
+*/
|
||
|
#if HAVE_MBRTOWC
|
||
|
- r = mbrtowc(wcs, mbs, wcs_length, &shift_state);
|
||
|
+ r = mbrtowc(wcs, mbs, mbs_length, &shift_state);
|
||
|
#else
|
||
|
- r = mbtowc(wcs, mbs, wcs_length);
|
||
|
+ r = mbtowc(wcs, mbs, mbs_length);
|
||
|
#endif
|
||
|
if (r == (size_t)-1 || r == (size_t)-2) {
|
||
|
ret_val = -1;
|
||
|
- if (errno == EILSEQ) {
|
||
|
- ++mbs;
|
||
|
- --mbs_length;
|
||
|
- continue;
|
||
|
- } else
|
||
|
- break;
|
||
|
+ break;
|
||
|
}
|
||
|
if (r == 0 || r > mbs_length)
|
||
|
break;
|
||
|
wcs++;
|
||
|
- wcs_length--;
|
||
|
+ //wcs_length--;
|
||
|
mbs += r;
|
||
|
mbs_length -= r;
|
||
|
}
|
||
|
--
|
||
|
2.24.1
|
||
|
|