88302401fe
zipinfo: remove the extra %c that caused invalid reads
zipinfo: fix the whitespaces in the output
Zipbombs: Port Another patch, orinally made by Mark Adler: af0d07f958
Resolves: RHEL-59972
Resolves: RHEL-6286
173 lines
9.5 KiB
Diff
173 lines
9.5 KiB
Diff
From af0d07f95809653b669d88aa0f424c6d5aa48ba0 Mon Sep 17 00:00:00 2001
|
|
From: Mark Adler <fork@madler.net>
|
|
Date: Sat, 2 Jul 2022 14:35:04 -0700
|
|
Subject: [PATCH] Be more liberal in the acceptance of data descriptors.
|
|
|
|
Previously the zip64 flag determined the size of the lengths in the
|
|
data descriptor. This is compliant with the zip format. However, a
|
|
bug in the Java zip library results in an incorrect setting of that
|
|
flag. This commit permits either 32-bit or 64-bit lengths, auto-
|
|
detecting which it is, which works around the Java bug.
|
|
---
|
|
extract.c | 146 +++++++++++++++++++++++++++++++++++++++++++++---------
|
|
1 file changed, 123 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/extract.c b/extract.c
|
|
index 878817d..b1c74df 100644
|
|
--- a/extract.c
|
|
+++ b/extract.c
|
|
@@ -2173,30 +2173,130 @@ static int extract_or_test_member(__G) /* return PK-type error code */
|
|
undefer_input(__G);
|
|
if (uO.zipbomb == TRUE) {
|
|
if ((G.lrec.general_purpose_bit_flag & 8) != 0) {
|
|
- /* skip over data descriptor (harder than it sounds, due to signature
|
|
- * ambiguity)
|
|
- */
|
|
-# define SIG 0x08074b50
|
|
-# define LOW 0xffffffff
|
|
- uch buf[12];
|
|
- unsigned shy = 12 - readbuf((char *)buf, 12);
|
|
- ulg crc = shy ? 0 : makelong(buf);
|
|
- ulg clen = shy ? 0 : makelong(buf + 4);
|
|
- ulg ulen = shy ? 0 : makelong(buf + 8); /* or high clen if ZIP64 */
|
|
- if (crc == SIG && /* if not SIG, no signature */
|
|
- (G.lrec.crc32 != SIG || /* if not SIG, have signature */
|
|
- (clen == SIG && /* if not SIG, no signature */
|
|
- ((G.lrec.csize & LOW) != SIG || /* if not SIG, have signature */
|
|
- (ulen == SIG && /* if not SIG, no signature */
|
|
- (G.pInfo->zip64 ? G.lrec.csize >> 32 : G.lrec.ucsize) != SIG
|
|
- /* if not SIG, have signature */
|
|
- )))))
|
|
- /* skip four more bytes to account for signature */
|
|
- shy += 4 - readbuf((char *)buf, 4);
|
|
- if (G.pInfo->zip64)
|
|
- shy += 8 - readbuf((char *)buf, 8); /* skip eight more for ZIP64 */
|
|
- if (shy)
|
|
+ // Skip over the data descriptor. We need to correctly position the
|
|
+ // read pointer after the data descriptor for the proper detection of
|
|
+ // overlapped zip file components.
|
|
+ //
|
|
+ // We need to resolve an ambiguity over four possible data descriptor
|
|
+ // formats. We check for all four, and pick the longest match. The data
|
|
+ // descriptor can have a signature or not, and it can use four or
|
|
+ // eight-byte lengths. The zip format requires resolving the ambiguity
|
|
+ // of a signature or not, but it uses the zip64 flag to determine
|
|
+ // whether the lengths are four or eight bytes. However there is a bug
|
|
+ // in the Java zip library that applies the wrong value of that flag.
|
|
+ // This works around that bug by always trying both length formats.
|
|
+ //
|
|
+ // So why the longest match? And does this resolve the ambiguity? No,
|
|
+ // it doesn't definitively resolve the ambiguity. However choosing the
|
|
+ // longest match at least resolves it for a normal zip file, where the
|
|
+ // bytes following the data descriptor must be another zip signature
|
|
+ // that is not a data descriptor signature. There are a few specific
|
|
+ // cases for which more than one of the formats will match the given
|
|
+ // CRC and lengths. The most plausible is between four and eight-byte
|
|
+ // lengths, either with or without a signature. That only occurs for an
|
|
+ // entry with an uncompressed size of zero. We consider the data
|
|
+ // descriptor to be a vector of four-byte values. Then the possible
|
|
+ // data descriptors are [(s) 0 c 0] and [(s) 0 c 0 0 0], where (s) is
|
|
+ // the optional signature, and c is the compressed length. c would be
|
|
+ // two for the Deflate compressed data format. These look the same, so
|
|
+ // if the file contains [(s) 0 c 0 0 0], then we cannot discriminate
|
|
+ // them. However if the data descriptor was intended to be [(s) 0 c 0],
|
|
+ // then it has been followed by eight zero bytes in the zip file for
|
|
+ // some reason. For a normal zip file this cannot be the case. The data
|
|
+ // descriptor would always be immediately followed by another zip file
|
|
+ // signature, which is four bytes that are not zeros. The other cases
|
|
+ // where more than one format matches are vanishingly unlikely, but the
|
|
+ // longest match strategy resolves those as well in a normal zip file.
|
|
+ // Those pairs are [s s s] vs. [s s s s], [s s s] vs. [s s s 0 s 0],
|
|
+ // and [s s s s s] vs. [s s s s s s]. For all, s is the signature for a
|
|
+ // data descriptor. For the first two we have an entry whose CRC,
|
|
+ // compressed length, and uncompressed length are all equal (!), and
|
|
+ // are all equal to the signature (!!). If this occurs, clearly someone
|
|
+ // is messing with us. However the strategy works nonetheless. We see
|
|
+ // that if the shorter descriptor, [s s s] were what was intended, then
|
|
+ // it has been followed by either four zero bytes or a data descriptor
|
|
+ // signature. Neither can occur for a normal zip file, where it must be
|
|
+ // followed by a signature that is not a data descriptor signature. So
|
|
+ // the longest match is the correct choice. The final case is outright
|
|
+ // insane, since the compressed and uncompressed lengths are the data
|
|
+ // descriptor signature repeated twice to make a 64-bit length, which
|
|
+ // is about 6e17. The largest drive available as I write this is 100TB,
|
|
+ // which is one six thousandth of that length. If I apply Moore's law
|
|
+ // to drive capacity, we might get to 6e17 about 25 years from now. If
|
|
+ // this code is still in use then (I've seen other code I've written in
|
|
+ // use for over 30 years), then we're still in luck. A data descriptor
|
|
+ // cannot be followed by a data descriptor signature in a normal zip
|
|
+ // file. The longest match strategy continues to work.
|
|
+ //
|
|
+ // So what is a not normal zip file, where these assumptions might fall
|
|
+ // apart? zip files have been used in a non-standard way as a poor
|
|
+ // substitute for a file system, with entries deleted and perhaps
|
|
+ // others replacing them partially, with fragmented zip files being the
|
|
+ // result. Then all bets are off as to what might or might not follow a
|
|
+ // data descriptor. Though if this sort of data descriptor ambiguity
|
|
+ // falls in one of those gaps, then there should be no adverse
|
|
+ // consequences for picking the unintended one.
|
|
+ int len = 0;
|
|
+# define SIG 0x08074b50 // optional data descriptor signature
|
|
+#ifdef LARGE_FILE_SUPPORT
|
|
+ uch buf[24];
|
|
+ int got = readbuf((char *)buf, sizeof(buf));
|
|
+ if (got >= 24 && makelong(buf) == SIG &&
|
|
+ makelong(buf + 4) == G.lrec.crc32 &&
|
|
+ makeint64(buf + 8) == G.lrec.csize &&
|
|
+ makeint64(buf + 16) == G.lrec.ucsize)
|
|
+ // Have a data descriptor with a signature and 64-bit lengths.
|
|
+ len = 24;
|
|
+ else if (got >= 20 && makelong(buf) == G.lrec.crc32 &&
|
|
+ makeint64(buf + 4) == G.lrec.csize &&
|
|
+ makeint64(buf + 12) == G.lrec.ucsize)
|
|
+ // Have a data descriptor with no signature and 64-bit lengths.
|
|
+ len = 20;
|
|
+ else if ((G.lrec.csize >> 32) == 0 && (G.lrec.ucsize >> 32) == 0)
|
|
+ // Both lengths are short enough to fit in 32 bits.
|
|
+#else
|
|
+ uch buf[16];
|
|
+ int got = readbuf((char *)buf, sizeof(buf));
|
|
+#endif
|
|
+ {
|
|
+ if (got >= 16 && makelong(buf) == SIG &&
|
|
+ makelong(buf + 4) == G.lrec.crc32 &&
|
|
+ makelong(buf + 8) == G.lrec.csize &&
|
|
+ makelong(buf + 12) == G.lrec.ucsize)
|
|
+ // Have a data descriptor with a signature and 32-bit lengths.
|
|
+ len = 16;
|
|
+ else if (got >= 12 && makelong(buf) == G.lrec.crc32 &&
|
|
+ makelong(buf + 4) == G.lrec.csize &&
|
|
+ makelong(buf + 8) == G.lrec.ucsize)
|
|
+ // Have a data descriptor with no signature and 32-bit lengths.
|
|
+ len = 12;
|
|
+ }
|
|
+ if (len == 0)
|
|
+ // There is no data descriptor that matches the entry CRC and
|
|
+ // length values.
|
|
error = PK_ERR;
|
|
+
|
|
+ // Back up got-len bytes, to position the read pointer after the data
|
|
+ // descriptor. Or to where the data descriptor was supposed to be, in
|
|
+ // the event none was found.
|
|
+ int back = got - len;
|
|
+ if (G.incnt + back > INBUFSIZ) {
|
|
+ // Need to load the preceding buffer. We've been here before.
|
|
+ G.cur_zipfile_bufstart -= INBUFSIZ;
|
|
+#ifdef USE_STRM_INPUT
|
|
+ zfseeko(G.zipfd, G.cur_zipfile_bufstart, SEEK_SET);
|
|
+#else /* !USE_STRM_INPUT */
|
|
+ zlseek(G.zipfd, G.cur_zipfile_bufstart, SEEK_SET);
|
|
+#endif /* ?USE_STRM_INPUT */
|
|
+ read(G.zipfd, (char *)G.inbuf, INBUFSIZ);
|
|
+ G.incnt -= INBUFSIZ - back;
|
|
+ G.inptr += INBUFSIZ - back;
|
|
+ }
|
|
+ else {
|
|
+ // Back up within current buffer.
|
|
+ G.incnt += back;
|
|
+ G.inptr -= back;
|
|
+ }
|
|
}
|
|
}
|
|
return error;
|