From 848c7d24bfdefcf46fea6a3ebb4497f2c8da8869 Mon Sep 17 00:00:00 2001 From: Petr Stodulka Date: Wed, 25 Nov 2015 12:08:53 +0100 Subject: [PATCH] fix print of non-ascii filenames (#225576) --- unzip-6.0-alt-iconv-utf8-print.patch | 381 +++++++++++++++++++++++++++ unzip.spec | 10 +- 2 files changed, 389 insertions(+), 2 deletions(-) create mode 100644 unzip-6.0-alt-iconv-utf8-print.patch diff --git a/unzip-6.0-alt-iconv-utf8-print.patch b/unzip-6.0-alt-iconv-utf8-print.patch new file mode 100644 index 0000000..f45cd76 --- /dev/null +++ b/unzip-6.0-alt-iconv-utf8-print.patch @@ -0,0 +1,381 @@ +From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001 +From: Petr Stodulka +Date: Tue, 24 Nov 2015 17:56:11 +0100 +Subject: [PATCH] print correctly non-ascii filenames + +--- + extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- + unzpriv.h | 7 ++ + 2 files changed, 233 insertions(+), 63 deletions(-) + +diff --git a/extract.c b/extract.c +index 0ee4e93..741b7e0 100644 +--- a/extract.c ++++ b/extract.c +@@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry) + } /* end function set_deferred_symlink() */ + #endif /* SYMLINKS */ + ++/* ++ * If Unicode is supported, assume we have what we need to do this ++ * check using wide characters, avoiding MBCS issues. ++ */ + +- ++#ifndef UZ_FNFILTER_REPLACECHAR ++ /* A convenient choice for the replacement of unprintable char codes is ++ * the "single char wildcard", as this character is quite unlikely to ++ * appear in filenames by itself. The following default definition ++ * sets the replacement char to a question mark as the most common ++ * "single char wildcard"; this setting should be overridden in the ++ * appropiate system-specific configuration header when needed. ++ */ ++# define UZ_FNFILTER_REPLACECHAR '?' ++#endif + + /*************************/ + /* Function fnfilter() */ /* here instead of in list.c for SFX */ +@@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */ + extent size; + { + #ifndef NATIVE /* ASCII: filter ANSI escape codes, etc. */ +- ZCONST uch *r=(ZCONST uch *)raw; ++ ZCONST uch *r; // =(ZCONST uch *)raw; + uch *s=space; + uch *slim=NULL; + uch *se=NULL; + int have_overflow = FALSE; + +- if (size > 0) { +- slim = space + size +-#ifdef _MBCS +- - (MB_CUR_MAX - 1) +-#endif +- - 4; ++# if defined( UNICODE_SUPPORT) && defined( _MBCS) ++/* If Unicode support is enabled, and we have multi-byte characters, ++ * then do the isprint() checks by first converting to wide characters ++ * and checking those. This avoids our having to parse multi-byte ++ * characters for ourselves. After the wide-char replacements have been ++ * made, the wide string is converted back to the local character set. ++ */ ++ wchar_t *wstring; /* wchar_t version of raw */ ++ size_t wslen; /* length of wstring */ ++ wchar_t *wostring; /* wchar_t version of output string */ ++ size_t woslen; /* length of wostring */ ++ char *newraw; /* new raw */ ++ ++ /* 2012-11-06 SMS. ++ * Changed to check the value returned by mbstowcs(), and bypass the ++ * Unicode processing if it fails. This seems to fix a problem ++ * reported in the SourceForge forum, but it's not clear that we ++ * should be doing any Unicode processing without some evidence that ++ * the name actually is Unicode. (Check bit 11 in the flags before ++ * coming here?) ++ * http://sourceforge.net/p/infozip/bugs/40/ ++ */ ++ ++ if (MB_CUR_MAX <= 1) ++ { ++ /* There's no point to converting multi-byte chars if there are ++ * no multi-byte chars. ++ */ ++ wslen = (size_t)-1; + } +- while (*r) { +- if (size > 0 && s >= slim && se == NULL) { +- se = s; ++ else ++ { ++ /* Get Unicode wide character count (for storage allocation). */ ++ wslen = mbstowcs( NULL, raw, 0); ++ } ++ ++ if (wslen != (size_t)-1) ++ { ++ /* Apparently valid Unicode. Allocate wide-char storage. */ ++ wstring = (wchar_t *)izu_malloc((wslen + 1) * sizeof(wchar_t)); ++ if (wstring == NULL) { ++ strcpy( (char *)space, raw); ++ return (char *)space; + } +-#ifdef QDOS +- if (qlflag & 2) { +- if (*r == '/' || *r == '.') { ++ wostring = (wchar_t *)izu_malloc(2 * (wslen + 1) * sizeof(wchar_t)); ++ if (wostring == NULL) { ++ izu_free(wstring); ++ strcpy( (char *)space, raw); ++ return (char *)space; ++ } ++ ++ /* Convert the multi-byte Unicode to wide chars. */ ++ wslen = mbstowcs(wstring, raw, wslen + 1); ++ ++ /* Filter the wide-character string. */ ++ fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t))); ++ ++ /* Convert filtered wide chars back to multi-byte. */ ++ woslen = wcstombs( NULL, wostring, 0); ++ if ((newraw = izu_malloc(woslen + 1)) == NULL) { ++ izu_free(wstring); ++ izu_free(wostring); ++ strcpy( (char *)space, raw); ++ return (char *)space; ++ } ++ woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1); ++ ++ if (size > 0) { ++ slim = space + size - 4; ++ } ++ r = (ZCONST uch *)newraw; ++ while (*r) { ++ if (size > 0 && s >= slim && se == NULL) { ++ se = s; ++ } ++# ifdef QDOS ++ if (qlflag & 2) { ++ if (*r == '/' || *r == '.') { ++ if (se != NULL && (s > (space + (size-3)))) { ++ have_overflow = TRUE; ++ break; ++ } ++ ++r; ++ *s++ = '_'; ++ continue; ++ } ++ } else ++# endif ++ { + if (se != NULL && (s > (space + (size-3)))) { + have_overflow = TRUE; + break; + } +- ++r; +- *s++ = '_'; +- continue; ++ *s++ = *r++; + } +- } else ++ } ++ if (have_overflow) { ++ strcpy((char *)se, "..."); ++ } else { ++ *s = '\0'; ++ } ++ ++ izu_free(wstring); ++ izu_free(wostring); ++ izu_free(newraw); ++ } ++ else ++# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */ ++ { ++ /* No Unicode support, or apparently invalid Unicode. */ ++ r = (ZCONST uch *)raw; ++ ++ if (size > 0) { ++ slim = space + size ++#ifdef _MBCS ++ - (MB_CUR_MAX - 1) ++#endif ++ - 4; ++ } ++ while (*r) { ++ if (size > 0 && s >= slim && se == NULL) { ++ se = s; ++ } ++#ifdef QDOS ++ if (qlflag & 2) { ++ if (*r == '/' || *r == '.') { ++ if (se != NULL && (s > (space + (size-3)))) { ++ have_overflow = TRUE; ++ break; ++ } ++ ++r; ++ *s++ = '_'; ++ continue; ++ } ++ } else + #endif + #ifdef HAVE_WORKING_ISPRINT +-# ifndef UZ_FNFILTER_REPLACECHAR +- /* A convenient choice for the replacement of unprintable char codes is +- * the "single char wildcard", as this character is quite unlikely to +- * appear in filenames by itself. The following default definition +- * sets the replacement char to a question mark as the most common +- * "single char wildcard"; this setting should be overridden in the +- * appropiate system-specific configuration header when needed. +- */ +-# define UZ_FNFILTER_REPLACECHAR '?' +-# endif +- if (!isprint(*r)) { ++ if (!isprint(*r)) { ++ if (*r < 32) { ++ /* ASCII control codes are escaped as "^{letter}". */ ++ if (se != NULL && (s > (space + (size-4)))) { ++ have_overflow = TRUE; ++ break; ++ } ++ *s++ = '^', *s++ = (uch)(64 + *r++); ++ } else { ++ /* Other unprintable codes are replaced by the ++ * placeholder character. */ ++ if (se != NULL && (s > (space + (size-3)))) { ++ have_overflow = TRUE; ++ break; ++ } ++ *s++ = UZ_FNFILTER_REPLACECHAR; ++ INCSTR(r); ++ } ++#else /* !HAVE_WORKING_ISPRINT */ + if (*r < 32) { + /* ASCII control codes are escaped as "^{letter}". */ + if (se != NULL && (s > (space + (size-4)))) { +@@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */ + break; + } + *s++ = '^', *s++ = (uch)(64 + *r++); ++#endif /* ?HAVE_WORKING_ISPRINT */ + } else { +- /* Other unprintable codes are replaced by the +- * placeholder character. */ ++#ifdef _MBCS ++ unsigned i = CLEN(r); ++ if (se != NULL && (s > (space + (size-i-2)))) { ++ have_overflow = TRUE; ++ break; ++ } ++ for (; i > 0; i--) ++ *s++ = *r++; ++#else + if (se != NULL && (s > (space + (size-3)))) { + have_overflow = TRUE; + break; + } +- *s++ = UZ_FNFILTER_REPLACECHAR; +- INCSTR(r); +- } +-#else /* !HAVE_WORKING_ISPRINT */ +- if (*r < 32) { +- /* ASCII control codes are escaped as "^{letter}". */ +- if (se != NULL && (s > (space + (size-4)))) { +- have_overflow = TRUE; +- break; +- } +- *s++ = '^', *s++ = (uch)(64 + *r++); +-#endif /* ?HAVE_WORKING_ISPRINT */ +- } else { +-#ifdef _MBCS +- unsigned i = CLEN(r); +- if (se != NULL && (s > (space + (size-i-2)))) { +- have_overflow = TRUE; +- break; +- } +- for (; i > 0; i--) + *s++ = *r++; +-#else +- if (se != NULL && (s > (space + (size-3)))) { +- have_overflow = TRUE; +- break; +- } +- *s++ = *r++; + #endif +- } +- } +- if (have_overflow) { +- strcpy((char *)se, "..."); +- } else { +- *s = '\0'; ++ } ++ } ++ if (have_overflow) { ++ strcpy((char *)se, "..."); ++ } else { ++ *s = '\0'; ++ } + } + + #ifdef WINDLL +@@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */ + } /* end function fnfilter() */ + + ++#if defined( UNICODE_SUPPORT) && defined( _MBCS) ++ ++/****************************/ ++/* Function fnfilter[w]() */ /* (Here instead of in list.c for SFX.) */ ++/****************************/ ++ ++/* fnfilterw() - Convert wide name to safely printable form. */ ++ ++/* fnfilterw() - Convert wide-character name to safely printable form. */ ++ ++wchar_t *fnfilterw( src, dst, siz) ++ ZCONST wchar_t *src; /* Pointer to source char (string). */ ++ wchar_t *dst; /* Pointer to destination char (string). */ ++ extent siz; /* Not used (!). */ ++{ ++ wchar_t *dsx = dst; ++ ++ /* Filter the wide chars. */ ++ while (*src) ++ { ++ if (iswprint( *src)) ++ { ++ /* Printable code. Copy it. */ ++ *dst++ = *src; ++ } ++ else ++ { ++ /* Unprintable code. Substitute something printable for it. */ ++ if (*src < 32) ++ { ++ /* Replace ASCII control code with "^{letter}". */ ++ *dst++ = (wchar_t)'^'; ++ *dst++ = (wchar_t)(64 + *src); ++ } ++ else ++ { ++ /* Replace other unprintable code with the placeholder. */ ++ *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR; ++ } ++ } ++ src++; ++ } ++ *dst = (wchar_t)0; /* NUL-terminate the destination string. */ ++ return dsx; ++} /* fnfilterw(). */ ++ ++#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */ + + + #ifdef SET_DIR_ATTRIB +diff --git a/unzpriv.h b/unzpriv.h +index 22d3923..e48a652 100644 +--- a/unzpriv.h ++++ b/unzpriv.h +@@ -1212,6 +1212,7 @@ + # ifdef UNICODE_WCHAR + # if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP)) + # include ++# include + # endif + # endif + # ifndef _MBCS /* no need to include twice, see below */ +@@ -2410,6 +2411,12 @@ int memflush OF((__GPRO__ ZCONST uch *rawbuf, ulg size)); + char *fnfilter OF((ZCONST char *raw, uch *space, + extent size)); + ++# if defined( UNICODE_SUPPORT) && defined( _MBCS) ++wchar_t *fnfilterw OF((ZCONST wchar_t *src, wchar_t *dst, ++ extent siz)); ++#endif ++ ++ + /*--------------------------------------------------------------------------- + Decompression functions: + ---------------------------------------------------------------------------*/ +-- +2.4.3 + diff --git a/unzip.spec b/unzip.spec index e293d8b..eff434a 100644 --- a/unzip.spec +++ b/unzip.spec @@ -1,7 +1,7 @@ Summary: A utility for unpacking zip files Name: unzip Version: 6.0 -Release: 26%{?dist} +Release: 27%{?dist} License: BSD Group: Applications/Archiving Source: http://downloads.sourceforge.net/infozip/unzip60.tar.gz @@ -41,6 +41,7 @@ Patch18: unzip-6.0-heap-overflow-infloop.patch # support non-{latin,unicode} encoding Patch19: unzip-6.0-alt-iconv-utf8.patch +Patch20: unzip-6.0-alt-iconv-utf8-print.patch URL: http://www.info-zip.org/UnZip.html BuildRequires: bzip2-devel @@ -76,7 +77,9 @@ a zip archive. %patch16 -p1 -b .cve-2014-8141 %patch17 -p1 -b .overflow-long-fsize %patch18 -p1 -b .heap-overflow-infloop -%patch19 -p1 -b .iconv +%patch19 -p1 -b .utf +%patch20 -p1 -b .utf-print + %build # IZ_HAVE_UXUIDGID is needed for right functionality of unzip -X @@ -95,6 +98,9 @@ make -f unix/Makefile prefix=$RPM_BUILD_ROOT%{_prefix} MANDIR=$RPM_BUILD_ROOT/%{ %{_mandir}/*/* %changelog +* Wed Nov 25 2015 Petr Stodulka - 6.0-27 +- fix print of non-ascii filenames (#225576) + * Fri Nov 13 2015 Petr Stodulka - 6.0-26 - fix unsigned overflow patch for #1260944 (#1281804)