Zipinfo+another zipbomb fix

zipinfo: remove the extra %c that caused invalid reads zipinfo: fix the whitespaces in the output Zipbombs: Port Another patch, orinally made by Mark Adler: af0d07f958 Resolves: RHEL-59972 Resolves: RHEL-6286
2024-11-26 10:44:12 +01:00 · 2024-11-26 10:44:12 +01:00 · 88302401fe
commit 88302401fe
parent cb520042bc
3 changed files with 189 additions and 7 deletions
--- a/unzip-6.0-alt-iconv-utf8.patch
+++ b/unzip-6.0-alt-iconv-utf8.patch
@ -174,11 +174,11 @@ Index: unzip-6.0/unzip.c
 +#else /* UNIX */
 +static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\
 +  -h  print header line       -t  print totals for listed files or for all\n\
-+  -z  print zipfile comment  %c-T%c print file times in sortable decimal format\
-+\n %c-C%c be case-insensitive   %s\
+  -z  print zipfile comment   -T  print file times in sortable decimal format\
+\n  -C  be case-insensitive   %s\
 +  -x  exclude filenames that follow from listing\n\
-+  -O CHARSET  specify a character encoding for DOS, Windows and OS/2 archives\n\
-+  -I CHARSET  specify a character encoding for UNIX and other archives\n";
+  -O  CHARSET  specify a character encoding for DOS, Windows and OS/2 archives\n\
+  -I  CHARSET  specify a character encoding for UNIX and other archives\n";
 +#endif /* !UNIX */
 #ifdef MORE
    static ZCONST char Far ZipInfoUsageLine4[] =
@ -196,8 +196,8 @@ Index: unzip-6.0/unzip.c
 +  -U  use escapes for all non-ASCII Unicode  -UU ignore any Unicode fields\n\
 +  -C  match filenames case-insensitively     -L  make (some) names \
 +lowercase\n %-42s  -V  retain VMS version numbers\n%s\
-+  -O CHARSET  specify a character encoding for DOS, Windows and OS/2 archives\n\
-+  -I CHARSET  specify a character encoding for UNIX and other archives\n\n";
+  -O  CHARSET  specify a character encoding for DOS, Windows and OS/2 archives\n\
+  -I  CHARSET  specify a character encoding for UNIX and other archives\n\n";
 #else /* !VMS */
 static ZCONST char Far UnzipUsageLine4[] = "\
 modifiers:\n\
--- a/unzip-zipbomb-part7.patch
+++ b/unzip-zipbomb-part7.patch
@ -0,0 +1,172 @@
+From af0d07f95809653b669d88aa0f424c6d5aa48ba0 Mon Sep 17 00:00:00 2001
+From: Mark Adler <fork@madler.net>
+Date: Sat, 2 Jul 2022 14:35:04 -0700
+Subject: [PATCH] Be more liberal in the acceptance of data descriptors.
+
+Previously the zip64 flag determined the size of the lengths in the
+data descriptor. This is compliant with the zip format. However, a
+bug in the Java zip library results in an incorrect setting of that
+flag. This commit permits either 32-bit or 64-bit lengths, auto-
+detecting which it is, which works around the Java bug.
+---
+ extract.c | 146 +++++++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 123 insertions(+), 23 deletions(-)
+
+diff --git a/extract.c b/extract.c
+index 878817d..b1c74df 100644
+--- a/extract.c
+++ b/extract.c
+@@ -2173,30 +2173,130 @@ static int extract_or_test_member(__G)    /* return PK-type error code */
+     undefer_input(__G);
+     if (uO.zipbomb == TRUE) {
+       if ((G.lrec.general_purpose_bit_flag & 8) != 0) {
+-        /* skip over data descriptor (harder than it sounds, due to signature
+-         * ambiguity)
+-         */
+-#       define SIG 0x08074b50
+-#       define LOW 0xffffffff
+-        uch buf[12];
+-        unsigned shy = 12 - readbuf((char *)buf, 12);
+-        ulg crc = shy ? 0 : makelong(buf);
+-        ulg clen = shy ? 0 : makelong(buf + 4);
+-        ulg ulen = shy ? 0 : makelong(buf + 8); /* or high clen if ZIP64 */
+-        if (crc == SIG &&                       /* if not SIG, no signature */
+-            (G.lrec.crc32 != SIG ||             /* if not SIG, have signature */
+-             (clen == SIG &&                    /* if not SIG, no signature */
+-              ((G.lrec.csize & LOW) != SIG ||   /* if not SIG, have signature */
+-               (ulen == SIG &&                  /* if not SIG, no signature */
+-                (G.pInfo->zip64 ? G.lrec.csize >> 32 : G.lrec.ucsize) != SIG
+-                /* if not SIG, have signature */
+-                )))))
+-          /* skip four more bytes to account for signature */
+-          shy += 4 - readbuf((char *)buf, 4);
+-        if (G.pInfo->zip64)
+-          shy += 8 - readbuf((char *)buf, 8); /* skip eight more for ZIP64 */
+-        if (shy)
+        // Skip over the data descriptor. We need to correctly position the
+        // read pointer after the data descriptor for the proper detection of
+        // overlapped zip file components.
+        //
+        // We need to resolve an ambiguity over four possible data descriptor
+        // formats. We check for all four, and pick the longest match. The data
+        // descriptor can have a signature or not, and it can use four or
+        // eight-byte lengths. The zip format requires resolving the ambiguity
+        // of a signature or not, but it uses the zip64 flag to determine
+        // whether the lengths are four or eight bytes. However there is a bug
+        // in the Java zip library that applies the wrong value of that flag.
+        // This works around that bug by always trying both length formats.
+        //
+        // So why the longest match? And does this resolve the ambiguity? No,
+        // it doesn't definitively resolve the ambiguity. However choosing the
+        // longest match at least resolves it for a normal zip file, where the
+        // bytes following the data descriptor must be another zip signature
+        // that is not a data descriptor signature. There are a few specific
+        // cases for which more than one of the formats will match the given
+        // CRC and lengths. The most plausible is between four and eight-byte
+        // lengths, either with or without a signature. That only occurs for an
+        // entry with an uncompressed size of zero. We consider the data
+        // descriptor to be a vector of four-byte values. Then the possible
+        // data descriptors are [(s) 0 c 0] and [(s) 0 c 0 0 0], where (s) is
+        // the optional signature, and c is the compressed length. c would be
+        // two for the Deflate compressed data format. These look the same, so
+        // if the file contains [(s) 0 c 0 0 0], then we cannot discriminate
+        // them. However if the data descriptor was intended to be [(s) 0 c 0],
+        // then it has been followed by eight zero bytes in the zip file for
+        // some reason. For a normal zip file this cannot be the case. The data
+        // descriptor would always be immediately followed by another zip file
+        // signature, which is four bytes that are not zeros. The other cases
+        // where more than one format matches are vanishingly unlikely, but the
+        // longest match strategy resolves those as well in a normal zip file.
+        // Those pairs are [s s s] vs. [s s s s], [s s s] vs. [s s s 0 s 0],
+        // and [s s s s s] vs. [s s s s s s]. For all, s is the signature for a
+        // data descriptor. For the first two we have an entry whose CRC,
+        // compressed length, and uncompressed length are all equal (!), and
+        // are all equal to the signature (!!). If this occurs, clearly someone
+        // is messing with us. However the strategy works nonetheless. We see
+        // that if the shorter descriptor, [s s s] were what was intended, then
+        // it has been followed by either four zero bytes or a data descriptor
+        // signature. Neither can occur for a normal zip file, where it must be
+        // followed by a signature that is not a data descriptor signature. So
+        // the longest match is the correct choice. The final case is outright
+        // insane, since the compressed and uncompressed lengths are the data
+        // descriptor signature repeated twice to make a 64-bit length, which
+        // is about 6e17. The largest drive available as I write this is 100TB,
+        // which is one six thousandth of that length. If I apply Moore's law
+        // to drive capacity, we might get to 6e17 about 25 years from now. If
+        // this code is still in use then (I've seen other code I've written in
+        // use for over 30 years), then we're still in luck. A data descriptor
+        // cannot be followed by a data descriptor signature in a normal zip
+        // file. The longest match strategy continues to work.
+        //
+        // So what is a not normal zip file, where these assumptions might fall
+        // apart? zip files have been used in a non-standard way as a poor
+        // substitute for a file system, with entries deleted and perhaps
+        // others replacing them partially, with fragmented zip files being the
+        // result. Then all bets are off as to what might or might not follow a
+        // data descriptor. Though if this sort of data descriptor ambiguity
+        // falls in one of those gaps, then there should be no adverse
+        // consequences for picking the unintended one.
+        int len = 0;
+#       define SIG 0x08074b50           // optional data descriptor signature
+#ifdef LARGE_FILE_SUPPORT
+        uch buf[24];
+        int got = readbuf((char *)buf, sizeof(buf));
+        if (got >= 24 && makelong(buf) == SIG &&
+                         makelong(buf + 4) == G.lrec.crc32 &&
+                         makeint64(buf + 8) == G.lrec.csize &&
+                         makeint64(buf + 16) == G.lrec.ucsize)
+            // Have a data descriptor with a signature and 64-bit lengths.
+            len = 24;
+        else if (got >= 20 && makelong(buf) == G.lrec.crc32 &&
+                              makeint64(buf + 4) == G.lrec.csize &&
+                              makeint64(buf + 12) == G.lrec.ucsize)
+            // Have a data descriptor with no signature and 64-bit lengths.
+            len = 20;
+        else if ((G.lrec.csize >> 32) == 0 && (G.lrec.ucsize >> 32) == 0)
+            // Both lengths are short enough to fit in 32 bits.
+#else
+        uch buf[16];
+        int got = readbuf((char *)buf, sizeof(buf));
+#endif
+        {
+            if (got >= 16 && makelong(buf) == SIG &&
+                             makelong(buf + 4) == G.lrec.crc32 &&
+                             makelong(buf + 8) == G.lrec.csize &&
+                             makelong(buf + 12) == G.lrec.ucsize)
+                // Have a data descriptor with a signature and 32-bit lengths.
+                len = 16;
+            else if (got >= 12 && makelong(buf) == G.lrec.crc32 &&
+                                  makelong(buf + 4) == G.lrec.csize &&
+                                  makelong(buf + 8) == G.lrec.ucsize)
+                // Have a data descriptor with no signature and 32-bit lengths.
+                len = 12;
+        }
+        if (len == 0)
+            // There is no data descriptor that matches the entry CRC and
+            // length values.
+           error = PK_ERR;
+
+        // Back up got-len bytes, to position the read pointer after the data
+        // descriptor. Or to where the data descriptor was supposed to be, in
+        // the event none was found.
+        int back = got - len;
+        if (G.incnt + back > INBUFSIZ) {
+            // Need to load the preceding buffer. We've been here before.
+            G.cur_zipfile_bufstart -= INBUFSIZ;
+#ifdef USE_STRM_INPUT
+            zfseeko(G.zipfd, G.cur_zipfile_bufstart, SEEK_SET);
+#else /* !USE_STRM_INPUT */
+            zlseek(G.zipfd, G.cur_zipfile_bufstart, SEEK_SET);
+#endif /* ?USE_STRM_INPUT */
+            read(G.zipfd, (char *)G.inbuf, INBUFSIZ);
+            G.incnt -= INBUFSIZ - back;
+            G.inptr += INBUFSIZ - back;
+        }
+        else {
+            // Back up within current buffer.
+            G.incnt += back;
+            G.inptr -= back;
+        }
+       }
+     }
+     return error;
--- a/unzip.spec
+++ b/unzip.spec
@ -6,7 +6,7 @@
 Summary: A utility for unpacking zip files
 Name: unzip
 Version: 6.0
-Release: 66%{?dist}
+Release: 67%{?dist}
 License: Info-ZIP
 Source: http://downloads.sourceforge.net/infozip/unzip60.tar.gz

@ -77,6 +77,7 @@ Patch35: unzip-6.0-wcstombs-fortify.patch
 #https://sources.debian.org/patches/unzip/6.0-28/21-fix-warning-messages-on-big-files.patch/
 Patch36: unzip-6.0-fix-warning-messages-on-big-files.patch

+Patch37: unzip-zipbomb-part7.patch
 URL: http://infozip.sourceforge.net
 BuildRequires: make
 BuildRequires:  bzip2-devel, gcc
@ -131,6 +132,7 @@ a zip archive.
 %patch34 -p1
 %patch35 -p1
 %patch36 -p1
+%patch37 -p1

 %build
 # IZ_HAVE_UXUIDGID is needed for right functionality of unzip -X
@ -149,6 +151,14 @@ make -f unix/Makefile prefix=$RPM_BUILD_ROOT%{_prefix} MANDIR=$RPM_BUILD_ROOT%{_
 %{_mandir}/*/*

 %changelog
+* Mon Nov 25 2024 Jakub Martisko <jamartis@redhat.com> - 6.0-67
+- zipinfo: remove the extra %c that caused invalid reads
+- zipinfo: fix the whitespaces in the output
+- Zipbombs: Port Another patch, orinally made by Mark Adler
+- https://github.com/madler/unzip/commit/af0d07f95809653b669d88aa0f424c6d5aa48ba0
+  Resolves: RHEL-59972
+  Resolves: RHEL-6286
+
 * Tue Oct 29 2024 Troy Dawson <tdawson@redhat.com> - 6.0-66
 - Bump release for October 2024 mass rebuild:
  Resolves: RHEL-64018