From 88302401fe2595d804bdbb3b4d3ec339e58415e6 Mon Sep 17 00:00:00 2001 From: Jakub Martisko Date: Tue, 26 Nov 2024 10:44:12 +0100 Subject: [PATCH] Zipinfo+another zipbomb fix zipinfo: remove the extra %c that caused invalid reads zipinfo: fix the whitespaces in the output Zipbombs: Port Another patch, orinally made by Mark Adler: https://github.com/madler/unzip/commit/af0d07f95809653b669d88aa0f424c6d5aa48ba0 Resolves: RHEL-59972 Resolves: RHEL-6286 --- unzip-6.0-alt-iconv-utf8.patch | 12 +-- unzip-zipbomb-part7.patch | 172 +++++++++++++++++++++++++++++++++ unzip.spec | 12 ++- 3 files changed, 189 insertions(+), 7 deletions(-) create mode 100644 unzip-zipbomb-part7.patch diff --git a/unzip-6.0-alt-iconv-utf8.patch b/unzip-6.0-alt-iconv-utf8.patch index b9e3777..1db3164 100644 --- a/unzip-6.0-alt-iconv-utf8.patch +++ b/unzip-6.0-alt-iconv-utf8.patch @@ -174,11 +174,11 @@ Index: unzip-6.0/unzip.c +#else /* UNIX */ +static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ + -h print header line -t print totals for listed files or for all\n\ -+ -z print zipfile comment %c-T%c print file times in sortable decimal format\ -+\n %c-C%c be case-insensitive %s\ ++ -z print zipfile comment -T print file times in sortable decimal format\ ++\n -C be case-insensitive %s\ + -x exclude filenames that follow from listing\n\ -+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ -+ -I CHARSET specify a character encoding for UNIX and other archives\n"; ++ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ ++ -I CHARSET specify a character encoding for UNIX and other archives\n"; +#endif /* !UNIX */ #ifdef MORE static ZCONST char Far ZipInfoUsageLine4[] = @@ -196,8 +196,8 @@ Index: unzip-6.0/unzip.c + -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\ + -C match filenames case-insensitively -L make (some) names \ +lowercase\n %-42s -V retain VMS version numbers\n%s\ -+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ -+ -I CHARSET specify a character encoding for UNIX and other archives\n\n"; ++ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ ++ -I CHARSET specify a character encoding for UNIX and other archives\n\n"; #else /* !VMS */ static ZCONST char Far UnzipUsageLine4[] = "\ modifiers:\n\ diff --git a/unzip-zipbomb-part7.patch b/unzip-zipbomb-part7.patch new file mode 100644 index 0000000..744a752 --- /dev/null +++ b/unzip-zipbomb-part7.patch @@ -0,0 +1,172 @@ +From af0d07f95809653b669d88aa0f424c6d5aa48ba0 Mon Sep 17 00:00:00 2001 +From: Mark Adler +Date: Sat, 2 Jul 2022 14:35:04 -0700 +Subject: [PATCH] Be more liberal in the acceptance of data descriptors. + +Previously the zip64 flag determined the size of the lengths in the +data descriptor. This is compliant with the zip format. However, a +bug in the Java zip library results in an incorrect setting of that +flag. This commit permits either 32-bit or 64-bit lengths, auto- +detecting which it is, which works around the Java bug. +--- + extract.c | 146 +++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 123 insertions(+), 23 deletions(-) + +diff --git a/extract.c b/extract.c +index 878817d..b1c74df 100644 +--- a/extract.c ++++ b/extract.c +@@ -2173,30 +2173,130 @@ static int extract_or_test_member(__G) /* return PK-type error code */ + undefer_input(__G); + if (uO.zipbomb == TRUE) { + if ((G.lrec.general_purpose_bit_flag & 8) != 0) { +- /* skip over data descriptor (harder than it sounds, due to signature +- * ambiguity) +- */ +-# define SIG 0x08074b50 +-# define LOW 0xffffffff +- uch buf[12]; +- unsigned shy = 12 - readbuf((char *)buf, 12); +- ulg crc = shy ? 0 : makelong(buf); +- ulg clen = shy ? 0 : makelong(buf + 4); +- ulg ulen = shy ? 0 : makelong(buf + 8); /* or high clen if ZIP64 */ +- if (crc == SIG && /* if not SIG, no signature */ +- (G.lrec.crc32 != SIG || /* if not SIG, have signature */ +- (clen == SIG && /* if not SIG, no signature */ +- ((G.lrec.csize & LOW) != SIG || /* if not SIG, have signature */ +- (ulen == SIG && /* if not SIG, no signature */ +- (G.pInfo->zip64 ? G.lrec.csize >> 32 : G.lrec.ucsize) != SIG +- /* if not SIG, have signature */ +- ))))) +- /* skip four more bytes to account for signature */ +- shy += 4 - readbuf((char *)buf, 4); +- if (G.pInfo->zip64) +- shy += 8 - readbuf((char *)buf, 8); /* skip eight more for ZIP64 */ +- if (shy) ++ // Skip over the data descriptor. We need to correctly position the ++ // read pointer after the data descriptor for the proper detection of ++ // overlapped zip file components. ++ // ++ // We need to resolve an ambiguity over four possible data descriptor ++ // formats. We check for all four, and pick the longest match. The data ++ // descriptor can have a signature or not, and it can use four or ++ // eight-byte lengths. The zip format requires resolving the ambiguity ++ // of a signature or not, but it uses the zip64 flag to determine ++ // whether the lengths are four or eight bytes. However there is a bug ++ // in the Java zip library that applies the wrong value of that flag. ++ // This works around that bug by always trying both length formats. ++ // ++ // So why the longest match? And does this resolve the ambiguity? No, ++ // it doesn't definitively resolve the ambiguity. However choosing the ++ // longest match at least resolves it for a normal zip file, where the ++ // bytes following the data descriptor must be another zip signature ++ // that is not a data descriptor signature. There are a few specific ++ // cases for which more than one of the formats will match the given ++ // CRC and lengths. The most plausible is between four and eight-byte ++ // lengths, either with or without a signature. That only occurs for an ++ // entry with an uncompressed size of zero. We consider the data ++ // descriptor to be a vector of four-byte values. Then the possible ++ // data descriptors are [(s) 0 c 0] and [(s) 0 c 0 0 0], where (s) is ++ // the optional signature, and c is the compressed length. c would be ++ // two for the Deflate compressed data format. These look the same, so ++ // if the file contains [(s) 0 c 0 0 0], then we cannot discriminate ++ // them. However if the data descriptor was intended to be [(s) 0 c 0], ++ // then it has been followed by eight zero bytes in the zip file for ++ // some reason. For a normal zip file this cannot be the case. The data ++ // descriptor would always be immediately followed by another zip file ++ // signature, which is four bytes that are not zeros. The other cases ++ // where more than one format matches are vanishingly unlikely, but the ++ // longest match strategy resolves those as well in a normal zip file. ++ // Those pairs are [s s s] vs. [s s s s], [s s s] vs. [s s s 0 s 0], ++ // and [s s s s s] vs. [s s s s s s]. For all, s is the signature for a ++ // data descriptor. For the first two we have an entry whose CRC, ++ // compressed length, and uncompressed length are all equal (!), and ++ // are all equal to the signature (!!). If this occurs, clearly someone ++ // is messing with us. However the strategy works nonetheless. We see ++ // that if the shorter descriptor, [s s s] were what was intended, then ++ // it has been followed by either four zero bytes or a data descriptor ++ // signature. Neither can occur for a normal zip file, where it must be ++ // followed by a signature that is not a data descriptor signature. So ++ // the longest match is the correct choice. The final case is outright ++ // insane, since the compressed and uncompressed lengths are the data ++ // descriptor signature repeated twice to make a 64-bit length, which ++ // is about 6e17. The largest drive available as I write this is 100TB, ++ // which is one six thousandth of that length. If I apply Moore's law ++ // to drive capacity, we might get to 6e17 about 25 years from now. If ++ // this code is still in use then (I've seen other code I've written in ++ // use for over 30 years), then we're still in luck. A data descriptor ++ // cannot be followed by a data descriptor signature in a normal zip ++ // file. The longest match strategy continues to work. ++ // ++ // So what is a not normal zip file, where these assumptions might fall ++ // apart? zip files have been used in a non-standard way as a poor ++ // substitute for a file system, with entries deleted and perhaps ++ // others replacing them partially, with fragmented zip files being the ++ // result. Then all bets are off as to what might or might not follow a ++ // data descriptor. Though if this sort of data descriptor ambiguity ++ // falls in one of those gaps, then there should be no adverse ++ // consequences for picking the unintended one. ++ int len = 0; ++# define SIG 0x08074b50 // optional data descriptor signature ++#ifdef LARGE_FILE_SUPPORT ++ uch buf[24]; ++ int got = readbuf((char *)buf, sizeof(buf)); ++ if (got >= 24 && makelong(buf) == SIG && ++ makelong(buf + 4) == G.lrec.crc32 && ++ makeint64(buf + 8) == G.lrec.csize && ++ makeint64(buf + 16) == G.lrec.ucsize) ++ // Have a data descriptor with a signature and 64-bit lengths. ++ len = 24; ++ else if (got >= 20 && makelong(buf) == G.lrec.crc32 && ++ makeint64(buf + 4) == G.lrec.csize && ++ makeint64(buf + 12) == G.lrec.ucsize) ++ // Have a data descriptor with no signature and 64-bit lengths. ++ len = 20; ++ else if ((G.lrec.csize >> 32) == 0 && (G.lrec.ucsize >> 32) == 0) ++ // Both lengths are short enough to fit in 32 bits. ++#else ++ uch buf[16]; ++ int got = readbuf((char *)buf, sizeof(buf)); ++#endif ++ { ++ if (got >= 16 && makelong(buf) == SIG && ++ makelong(buf + 4) == G.lrec.crc32 && ++ makelong(buf + 8) == G.lrec.csize && ++ makelong(buf + 12) == G.lrec.ucsize) ++ // Have a data descriptor with a signature and 32-bit lengths. ++ len = 16; ++ else if (got >= 12 && makelong(buf) == G.lrec.crc32 && ++ makelong(buf + 4) == G.lrec.csize && ++ makelong(buf + 8) == G.lrec.ucsize) ++ // Have a data descriptor with no signature and 32-bit lengths. ++ len = 12; ++ } ++ if (len == 0) ++ // There is no data descriptor that matches the entry CRC and ++ // length values. + error = PK_ERR; ++ ++ // Back up got-len bytes, to position the read pointer after the data ++ // descriptor. Or to where the data descriptor was supposed to be, in ++ // the event none was found. ++ int back = got - len; ++ if (G.incnt + back > INBUFSIZ) { ++ // Need to load the preceding buffer. We've been here before. ++ G.cur_zipfile_bufstart -= INBUFSIZ; ++#ifdef USE_STRM_INPUT ++ zfseeko(G.zipfd, G.cur_zipfile_bufstart, SEEK_SET); ++#else /* !USE_STRM_INPUT */ ++ zlseek(G.zipfd, G.cur_zipfile_bufstart, SEEK_SET); ++#endif /* ?USE_STRM_INPUT */ ++ read(G.zipfd, (char *)G.inbuf, INBUFSIZ); ++ G.incnt -= INBUFSIZ - back; ++ G.inptr += INBUFSIZ - back; ++ } ++ else { ++ // Back up within current buffer. ++ G.incnt += back; ++ G.inptr -= back; ++ } + } + } + return error; diff --git a/unzip.spec b/unzip.spec index 79dbef3..c1a1da5 100644 --- a/unzip.spec +++ b/unzip.spec @@ -6,7 +6,7 @@ Summary: A utility for unpacking zip files Name: unzip Version: 6.0 -Release: 66%{?dist} +Release: 67%{?dist} License: Info-ZIP Source: http://downloads.sourceforge.net/infozip/unzip60.tar.gz @@ -77,6 +77,7 @@ Patch35: unzip-6.0-wcstombs-fortify.patch #https://sources.debian.org/patches/unzip/6.0-28/21-fix-warning-messages-on-big-files.patch/ Patch36: unzip-6.0-fix-warning-messages-on-big-files.patch +Patch37: unzip-zipbomb-part7.patch URL: http://infozip.sourceforge.net BuildRequires: make BuildRequires: bzip2-devel, gcc @@ -131,6 +132,7 @@ a zip archive. %patch34 -p1 %patch35 -p1 %patch36 -p1 +%patch37 -p1 %build # IZ_HAVE_UXUIDGID is needed for right functionality of unzip -X @@ -149,6 +151,14 @@ make -f unix/Makefile prefix=$RPM_BUILD_ROOT%{_prefix} MANDIR=$RPM_BUILD_ROOT%{_ %{_mandir}/*/* %changelog +* Mon Nov 25 2024 Jakub Martisko - 6.0-67 +- zipinfo: remove the extra %c that caused invalid reads +- zipinfo: fix the whitespaces in the output +- Zipbombs: Port Another patch, orinally made by Mark Adler +- https://github.com/madler/unzip/commit/af0d07f95809653b669d88aa0f424c6d5aa48ba0 + Resolves: RHEL-59972 + Resolves: RHEL-6286 + * Tue Oct 29 2024 Troy Dawson - 6.0-66 - Bump release for October 2024 mass rebuild: Resolves: RHEL-64018