Port the ibm patches dealing with s390x optimization

Resolves: rhbz#1972638
This commit is contained in:
Jakub Martisko 2021-07-30 14:45:41 +02:00
parent ab0d495421
commit 4f148c6e44
6 changed files with 2189 additions and 1 deletions

67
dfltcc-segfault.patch Normal file
View File

@ -0,0 +1,67 @@
---
dfltcc.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/dfltcc.c b/dfltcc.c
index 86aa56e..3a5b92d 100644
--- a/dfltcc.c
+++ b/dfltcc.c
@@ -242,10 +242,8 @@ dfltcc_gdht (struct dfltcc_param_v0 *param)
dfltcc (DFLTCC_GDHT, param, NULL, NULL, &next_in, &avail_in, NULL);
}
-static off_t total_in;
-
static dfltcc_cc
-dfltcc_cmpr_xpnd (struct dfltcc_param_v0 *param, int fn)
+dfltcc_cmpr_xpnd (struct dfltcc_param_v0 *param, int fn, off_t *total_in)
{
uch *next_out = outbuf + outcnt;
size_t avail_out = OUTBUFSIZ - outcnt;
@@ -257,7 +255,7 @@ dfltcc_cmpr_xpnd (struct dfltcc_param_v0 *param, int fn)
window);
off_t consumed_in = next_in - (inbuf + inptr);
inptr += consumed_in;
- total_in += consumed_in;
+ *total_in += consumed_in;
outcnt += ((OUTBUFSIZ - outcnt) - avail_out);
return cc;
}
@@ -349,6 +347,7 @@ dfltcc_deflate (int pack_level)
union aligned_dfltcc_param_v0 ctx_v0;
struct dfltcc_param_v0 *param = init_param (&ctx_v0);
+ off_t total_in = 0;
/* Compress ifd into ofd in a loop. */
while (true)
@@ -398,7 +397,8 @@ dfltcc_deflate (int pack_level)
}
/* Compress inbuf into outbuf. */
- while (dfltcc_cmpr_xpnd (param, DFLTCC_CMPR) == DFLTCC_CC_AGAIN)
+ while (dfltcc_cmpr_xpnd (param, DFLTCC_CMPR, &total_in)
+ == DFLTCC_CC_AGAIN)
;
/* Unmask the input data. */
@@ -427,6 +427,7 @@ dfltcc_inflate (void)
union aligned_dfltcc_param_v0 ctx_v0;
struct dfltcc_param_v0 *param = init_param (&ctx_v0);
+ off_t total_in = 0;
/* Decompress ifd into ofd in a loop. */
while (true)
@@ -446,7 +447,8 @@ dfltcc_inflate (void)
/* Decompress inbuf into outbuf. */
dfltcc_cc cc;
- while ((cc = dfltcc_cmpr_xpnd (param, DFLTCC_XPND)) == DFLTCC_CC_AGAIN)
+ while ((cc = dfltcc_cmpr_xpnd (param, DFLTCC_XPND, &total_in))
+ == DFLTCC_CC_AGAIN)
;
if (cc == DFLTCC_CC_OK)
{
--
2.25.4

View File

@ -1,7 +1,7 @@
Summary: The GNU data compression program
Name: gzip
Version: 1.10
Release: 5%{?dist}
Release: 7%{?dist}
# info pages are under GFDL license
License: GPLv3+ and GFDL
Source0: http://ftp.gnu.org/gnu/gzip/gzip-%{version}.tar.xz
@ -13,6 +13,11 @@ Source101: colorzgrep.sh
Patch1: gnulib.patch
Patch2: gzexe.patch
Patch3: ibm.patch
Patch4: ibm2.patch
Patch5: ibm4.patch
Patch6: dfltcc-segfault.patch
Patch7: ibm5.patch
# Fixed in upstream code.
# http://thread.gmane.org/gmane.comp.gnu.gzip.bugs/378
@ -42,6 +47,11 @@ very commonly used data compression program.
%setup -q
#%patch1 -p1 -b .gnulib
%patch2 -p1 -b .gzexe
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
cp %{SOURCE1} .
autoreconf
@ -51,7 +61,14 @@ export CPPFLAGS="-DHAVE_LSTAT"
export CC="%{__cc}"
export CPP="%{__cpp}"
export CXX="%{__cxx}"
%ifarch s390x
export CFLAGS="$RPM_OPT_FLAGS -DDFLTCC_LEVEL_MASK=0x7e"
%configure --enable-dfltcc
%else
%configure
%endif
make
make check
#make gzip.info
@ -83,6 +100,14 @@ install -p -m 644 %{SOURCE101} %{buildroot}%{profiledir}
%{profiledir}/*
%changelog
* Fri Jul 30 2021 Jakub Martisko <jamartis@redhat.com> - 1.10-7
- Add the ibm patches dealing with s390x optimizations
Resolves: rhbz#1986357
* Fri Jul 30 2021 Jakub Martisko <jamartis@redhat.com> - 1.10-6
- Add gating tests
Resolves: rhbz#1986357
* Fri Apr 16 2021 Mohan Boddu <mboddu@redhat.com> - 1.10-5
- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937

841
ibm.patch Normal file
View File

@ -0,0 +1,841 @@
diff --git a/Makefile.am b/Makefile.am
index 025c914..8b4120c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -55,6 +55,9 @@ gzip_SOURCES = \
trees.c unlzh.c unlzw.c unpack.c unzip.c util.c zip.c
gzip_LDADD = libver.a lib/libgzip.a
gzip_LDADD += $(LIB_CLOCK_GETTIME)
+if IBM_Z_DFLTCC
+gzip_SOURCES += dfltcc.c
+endif
BUILT_SOURCES += version.c
version.c: Makefile
diff --git a/bits.c b/bits.c
index b0df2fe..9effc32 100644
--- a/bits.c
+++ b/bits.c
@@ -78,7 +78,7 @@
local file_t zfile; /* output gzip file */
-local unsigned short bi_buf;
+unsigned short bi_buf;
/* Output buffer. bits are inserted starting at the bottom (least significant
* bits).
*/
@@ -88,7 +88,7 @@ local unsigned short bi_buf;
* more than 16 bits on some systems.)
*/
-local int bi_valid;
+int bi_valid;
/* Number of valid bits in bi_buf. All bits above the last valid bit
* are always zero.
*/
diff --git a/configure.ac b/configure.ac
index 9a2b635..76ac26f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -78,6 +78,16 @@ AC_ARG_ENABLE([gcc-warnings],
fi]
)
+AC_ARG_ENABLE([dfltcc],
+ [AS_HELP_STRING([--enable-dfltcc],
+ [use DEFLATE COMPRESSION CALL instruction on IBM Z])],
+ [case $enableval in
+ yes|no) gl_dfltcc=$enableval ;;
+ *) AC_MSG_ERROR([bad value $enableval for dfltcc option]) ;;
+ esac],
+ [gl_dfltcc=no]
+)
+
# gl_GCC_VERSION_IFELSE([major], [minor], [run-if-found], [run-if-not-found])
# ------------------------------------------------
# If $CPP is gcc-MAJOR.MINOR or newer, then run RUN-IF-FOUND.
@@ -188,6 +198,12 @@ if test "$gl_gcc_warnings" = yes; then
AC_SUBST([GNULIB_WARN_CFLAGS])
fi
+if test "$gl_dfltcc" = yes; then
+ AC_DEFINE([IBM_Z_DFLTCC], ,
+ [Use DEFLATE COMPRESSION CALL instruction on IBM Z machines.])
+fi
+AM_CONDITIONAL([IBM_Z_DFLTCC], [test "$gl_dfltcc" = yes])
+
# cc -E produces incorrect asm files on SVR4, so postprocess it.
ASCPPPOST="sed '/^ *\\#/d; s,//.*,,; s/% /%/g; s/\\. /./g'"
AC_SUBST([ASCPPPOST])
diff --git a/deflate.c b/deflate.c
index 8ffff3a..869b902 100644
--- a/deflate.c
+++ b/deflate.c
@@ -123,10 +123,6 @@
#define NIL 0
/* Tail of hash chains */
-#define FAST 4
-#define SLOW 2
-/* speed options for the general purpose bit flag */
-
#ifndef TOO_FAR
# define TOO_FAR 4096
#endif
@@ -215,9 +211,6 @@ local unsigned int max_lazy_match;
* max_insert_length is used only for compression levels <= 3.
*/
-local int compr_level;
-/* compression level (1..9) */
-
unsigned good_match;
/* Use a faster search when the previous match is longer than this */
@@ -308,14 +301,12 @@ local void check_match (IPos start, IPos match, int length);
/* ===========================================================================
* Initialize the "longest match" routines for a new file
*/
-void lm_init (pack_level, flags)
+void lm_init (pack_level)
int pack_level; /* 0: store, 1: best speed, 9: best compression */
- ush *flags; /* general purpose bit flag */
{
register unsigned j;
if (pack_level < 1 || pack_level > 9) gzip_error ("bad pack level");
- compr_level = pack_level;
/* Initialize the hash table. */
#if defined MAXSEG_64K && HASH_BITS == 15
@@ -337,11 +328,6 @@ void lm_init (pack_level, flags)
nice_match = configuration_table[pack_level].nice_length;
#endif
max_chain_length = configuration_table[pack_level].max_chain;
- if (pack_level == 1) {
- *flags |= FAST;
- } else if (pack_level == 9) {
- *flags |= SLOW;
- }
/* ??? reduce max_chain_length for binary files */
strstart = 0;
@@ -732,7 +718,7 @@ local off_t deflate_fast()
* evaluation for matches: a match is finally adopted only if there is
* no better match at the next window position.
*/
-off_t deflate()
+off_t deflate(int pack_level)
{
IPos hash_head; /* head of hash chain */
IPos prev_match; /* previous match */
@@ -740,7 +726,8 @@ off_t deflate()
int match_available = 0; /* set if previous match exists */
register unsigned match_length = MIN_MATCH-1; /* length of best match */
- if (compr_level <= 3) return deflate_fast(); /* optimized for speed */
+ lm_init(pack_level);
+ if (pack_level <= 3) return deflate_fast(); /* optimized for speed */
/* Process the input block. */
while (lookahead != 0) {
diff --git a/dfltcc.c b/dfltcc.c
new file mode 100644
index 0000000..9010475
--- /dev/null
+++ b/dfltcc.c
@@ -0,0 +1,429 @@
+/* dfltcc.c -- compress data using IBM Z DEFLATE COMPRESSION CALL
+
+ Copyright (C) 2019 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+#include <stdlib.h>
+#ifdef DFLTCC_USDT
+#include <sys/sdt.h>
+#endif
+
+#include "tailor.h"
+#include "gzip.h"
+
+#ifdef DYN_ALLOC
+ error: DYN_ALLOC is not supported by DFLTCC
+#endif
+
+/* ===========================================================================
+ * C wrappers for the DEFLATE CONVERSION CALL instruction.
+ */
+
+typedef enum
+{
+ DFLTCC_CC_OK = 0,
+ DFLTCC_CC_OP1_TOO_SHORT = 1,
+ DFLTCC_CC_OP2_TOO_SHORT = 2,
+ DFLTCC_CC_OP2_CORRUPT = 2,
+ DFLTCC_CC_AGAIN = 3,
+} dfltcc_cc;
+
+#define DFLTCC_QAF 0
+#define DFLTCC_GDHT 1
+#define DFLTCC_CMPR 2
+#define DFLTCC_XPND 4
+#define HBT_CIRCULAR (1 << 7)
+//#define HB_BITS 15
+//#define HB_SIZE (1 << HB_BITS)
+#define DFLTCC_FACILITY 151
+#define DFLTCC_FMT0 0
+#define CVT_CRC32 0
+#define HTT_FIXED 0
+#define HTT_DYNAMIC 1
+
+struct dfltcc_qaf_param
+{
+ char fns[16];
+ char reserved1[8];
+ char fmts[2];
+ char reserved2[6];
+};
+
+struct dfltcc_param_v0
+{
+ unsigned short pbvn; /* Parameter-Block-Version Number */
+ unsigned char mvn; /* Model-Version Number */
+ unsigned char ribm; /* Reserved for IBM use */
+ unsigned reserved32 : 31;
+ unsigned cf : 1; /* Continuation Flag */
+ unsigned char reserved64[8];
+ unsigned nt : 1; /* New Task */
+ unsigned reserved129 : 1;
+ unsigned cvt : 1; /* Check Value Type */
+ unsigned reserved131 : 1;
+ unsigned htt : 1; /* Huffman-Table Type */
+ unsigned bcf : 1; /* Block-Continuation Flag */
+ unsigned bcc : 1; /* Block Closing Control */
+ unsigned bhf : 1; /* Block Header Final */
+ unsigned reserved136 : 1;
+ unsigned reserved137 : 1;
+ unsigned dhtgc : 1; /* DHT Generation Control */
+ unsigned reserved139 : 5;
+ unsigned reserved144 : 5;
+ unsigned sbb : 3; /* Sub-Byte Boundary */
+ unsigned char oesc; /* Operation-Ending-Supplemental Code */
+ unsigned reserved160 : 12;
+ unsigned ifs : 4; /* Incomplete-Function Status */
+ unsigned short ifl; /* Incomplete-Function Length */
+ unsigned char reserved192[8];
+ unsigned char reserved256[8];
+ unsigned char reserved320[4];
+ unsigned short hl; /* History Length */
+ unsigned reserved368 : 1;
+ unsigned short ho : 15; /* History Offset */
+ unsigned int cv; /* Check Value */
+ unsigned eobs : 15; /* End-of-block Symbol */
+ unsigned reserved431 : 1;
+ unsigned char eobl : 4; /* End-of-block Length */
+ unsigned reserved436 : 12;
+ unsigned reserved448 : 4;
+ unsigned short cdhtl : 12; /* Compressed-Dynamic-Huffman Table
+ Length */
+ unsigned char reserved464[6];
+ unsigned char cdht[288];
+ unsigned char reserved[32];
+ unsigned char csb[1152];
+};
+
+static int is_bit_set(const char *bits, int n)
+{
+ return bits[n / 8] & (1 << (7 - (n % 8)));
+}
+
+static int is_dfltcc_enabled(void)
+{
+ const char *env;
+ char facilities[((DFLTCC_FACILITY / 64) + 1) * 8];
+ register int r0 __asm__("r0");
+
+ env = getenv("DFLTCC");
+ if (env && !strcmp(env, "0")) {
+ return 0;
+ }
+
+ r0 = sizeof(facilities) / 8;
+ __asm__("stfle %[facilities]\n"
+ : [facilities] "=Q"(facilities) : [r0] "r"(r0) : "cc", "memory");
+ return is_bit_set((const char *) facilities, DFLTCC_FACILITY);
+}
+
+static dfltcc_cc dfltcc(int fn, void *param,
+ uch **op1, size_t *len1,
+ const uch **op2, size_t *len2,
+ void *hist)
+{
+ uch *t2 = op1 ? *op1 : NULL;
+ size_t t3 = len1 ? *len1 : 0;
+ const uch *t4 = op2 ? *op2 : NULL;
+ size_t t5 = len2 ? *len2 : 0;
+ register int r0 __asm__("r0") = fn;
+ register void *r1 __asm__("r1") = param;
+ register uch *r2 __asm__("r2") = t2;
+ register size_t r3 __asm__("r3") = t3;
+ register const uch *r4 __asm__("r4") = t4;
+ register size_t r5 __asm__("r5") = t5;
+ int cc;
+
+ __asm__ volatile(
+#ifdef DFLTCC_USDT
+ STAP_PROBE_ASM(zlib, dfltcc_entry,
+ STAP_PROBE_ASM_TEMPLATE(5))
+#endif
+ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
+#ifdef DFLTCC_USDT
+ STAP_PROBE_ASM(zlib, dfltcc_exit,
+ STAP_PROBE_ASM_TEMPLATE(5))
+#endif
+ "ipm %[cc]\n"
+ : [r2] "+r" (r2)
+ , [r3] "+r" (r3)
+ , [r4] "+r" (r4)
+ , [r5] "+r" (r5)
+ , [cc] "=r" (cc)
+ : [r0] "r" (r0)
+ , [r1] "r" (r1)
+ , [hist] "r" (hist)
+#ifdef DFLTCC_USDT
+ , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist)
+#endif
+ : "cc", "memory");
+ t2 = r2; t3 = r3; t4 = r4; t5 = r5;
+
+ if (op1)
+ *op1 = t2;
+ if (len1)
+ *len1 = t3;
+ if (op2)
+ *op2 = t4;
+ if (len2)
+ *len2 = t5;
+ return (cc >> 28) & 3;
+}
+
+static void dfltcc_qaf(struct dfltcc_qaf_param *param)
+{
+ dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL);
+}
+
+static void dfltcc_gdht(struct dfltcc_param_v0 *param)
+{
+ const uch *next_in = inbuf + inptr;
+ size_t avail_in = insize - inptr;
+
+ dfltcc(DFLTCC_GDHT, param, NULL, NULL, &next_in, &avail_in, NULL);
+}
+
+static off_t total_in;
+
+static dfltcc_cc dfltcc_cmpr_xpnd(struct dfltcc_param_v0 *param, int fn)
+{
+ uch *next_out = outbuf + outcnt;
+ size_t avail_out = OUTBUFSIZ - outcnt;
+ const uch *next_in = inbuf + inptr;
+ size_t avail_in = insize - inptr;
+ off_t consumed_in;
+ dfltcc_cc cc;
+
+ cc = dfltcc(fn | HBT_CIRCULAR, param,
+ &next_out, &avail_out,
+ &next_in, &avail_in,
+ window);
+ consumed_in = next_in - (inbuf + inptr);
+ inptr += consumed_in;
+ total_in += consumed_in;
+ outcnt += ((OUTBUFSIZ - outcnt) - avail_out);
+ return cc;
+}
+
+__attribute__((aligned(8)))
+static struct context
+{
+ union
+ {
+ struct dfltcc_qaf_param af;
+ struct dfltcc_param_v0 param;
+ };
+} ctx;
+
+static struct dfltcc_param_v0 *init_param(struct dfltcc_param_v0 *param)
+{
+ const char *s;
+
+ memset(param, 0, sizeof(*param));
+#ifndef DFLTCC_RIBM
+#define DFLTCC_RIBM 0
+#endif
+ s = getenv("DFLTCC_RIBM");
+ param->ribm = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_RIBM;
+ param->nt = 1;
+ param->cvt = CVT_CRC32;
+ param->cv = __builtin_bswap32(getcrc());
+ return param;
+}
+
+static void bi_close_block(struct dfltcc_param_v0 *param)
+{
+ bi_valid = param->sbb;
+ bi_buf = bi_valid == 0 ? 0 : outbuf[outcnt] & ((1 << bi_valid) - 1);
+ send_bits(
+ bi_reverse(param->eobs >> (15 - param->eobl), param->eobl),
+ param->eobl);
+ param->bcf = 0;
+}
+
+static void close_block(struct dfltcc_param_v0 *param)
+{
+ bi_close_block(param);
+ bi_windup();
+ param->sbb = (param->sbb + param->eobl) % 8;
+ if (param->sbb != 0) {
+ Assert(outcnt > 0, "outbuf must have enough space for EOBS");
+ outcnt--;
+ }
+}
+
+static void close_stream(struct dfltcc_param_v0 *param)
+{
+ if (param->bcf) {
+ bi_close_block(param);
+ }
+ send_bits(1, 3); /* BFINAL=1, BTYPE=00 */
+ bi_windup();
+ put_short(0x0000);
+ put_short(0xFFFF);
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+/* ===========================================================================
+ * Compress ifd into ofd in hardware or fall back to software.
+ */
+int dfltcc_deflate(int pack_level)
+{
+ const char *s;
+ unsigned long level_mask;
+ unsigned long block_size;
+ off_t block_threshold;
+ struct dfltcc_param_v0 *param;
+ int extra;
+
+ /* Check whether we can use hardware compression */
+ if (!is_dfltcc_enabled() || getenv("SOURCE_DATE_EPOCH")) {
+ return deflate(pack_level);
+ }
+#ifndef DFLTCC_LEVEL_MASK
+#define DFLTCC_LEVEL_MASK 0x2
+#endif
+ s = getenv("DFLTCC_LEVEL_MASK");
+ level_mask = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_LEVEL_MASK;
+ if ((level_mask & (1 << pack_level)) == 0) {
+ return deflate(pack_level);
+ }
+ dfltcc_qaf(&ctx.af);
+ if (!is_bit_set(ctx.af.fns, DFLTCC_CMPR) ||
+ !is_bit_set(ctx.af.fns, DFLTCC_GDHT) ||
+ !is_bit_set(ctx.af.fmts, DFLTCC_FMT0)) {
+ return deflate(pack_level);
+ }
+
+ /* Initialize tuning parameters */
+#ifndef DFLTCC_BLOCK_SIZE
+#define DFLTCC_BLOCK_SIZE 1048576
+#endif
+ s = getenv("DFLTCC_BLOCK_SIZE");
+ block_size = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_BLOCK_SIZE;
+ (void)block_size;
+#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE
+#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096
+#endif
+ s = getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE");
+ block_threshold = (s && *s) ? strtoul(s, NULL, 0) :
+ DFLTCC_FIRST_FHT_BLOCK_SIZE;
+
+ /* Compress ifd into ofd in a loop */
+ param = init_param(&ctx.param);
+ while (1) {
+ /* Flush the output data */
+ if (outcnt > OUTBUFSIZ - 8) {
+ flush_outbuf();
+ }
+
+ /* Close the block */
+ if (param->bcf && total_in == block_threshold && !param->cf) {
+ close_block(param);
+ block_threshold += block_size;
+ }
+
+ /* Read the input data */
+ if (inptr == insize) {
+ if (fill_inbuf(1) == EOF && !param->cf) {
+ break;
+ }
+ inptr = 0;
+ }
+
+ /* Temporarily mask some input data */
+ extra = MAX(0, total_in + (insize - inptr) - block_threshold);
+ insize -= extra;
+
+ /* Start a new block */
+ if (!param->bcf) {
+ if (total_in == 0 && block_threshold > 0) {
+ param->htt = HTT_FIXED;
+ } else {
+ param->htt = HTT_DYNAMIC;
+ dfltcc_gdht(param);
+ }
+ }
+
+ /* Compress inbuf into outbuf */
+ dfltcc_cmpr_xpnd(param, DFLTCC_CMPR);
+
+ /* Unmask the input data */
+ insize += extra;
+
+ /* Continue the block */
+ param->bcf = 1;
+ }
+ close_stream(param);
+ setcrc(__builtin_bswap32(param->cv));
+ return 0;
+}
+
+/* ===========================================================================
+ * Decompress ifd into ofd in hardware or fall back to software.
+ */
+int dfltcc_inflate(void)
+{
+ struct dfltcc_param_v0 *param;
+ dfltcc_cc cc;
+
+ /* Check whether we can use hardware decompression */
+ if (!is_dfltcc_enabled()) {
+ return inflate();
+ }
+ dfltcc_qaf(&ctx.af);
+ if (!is_bit_set(ctx.af.fns, DFLTCC_XPND)) {
+ return inflate();
+ }
+
+ /* Decompress ifd into ofd in a loop */
+ param = init_param(&ctx.param);
+ while (1) {
+ /* Perform I/O */
+ if (outcnt == OUTBUFSIZ) {
+ flush_outbuf();
+ }
+ if (inptr == insize) {
+ if (fill_inbuf(1) == EOF) {
+ /* Premature EOF */
+ return 2;
+ }
+ inptr = 0;
+ }
+ /* Decompress inbuf into outbuf */
+ cc = dfltcc_cmpr_xpnd(param, DFLTCC_XPND);
+ if (cc == DFLTCC_CC_OK) {
+ /* The entire deflate stream has been successfully decompressed */
+ break;
+ }
+ if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
+ /* The deflate stream is corrupted */
+ return 2;
+ }
+ /* There must be more data to decompress */
+ }
+ if (param->sbb != 0) {
+ /* The deflate stream has ended in the middle of a byte - go to the next
+ * byte boundary, so that unzip() can read CRC and length.
+ */
+ inptr++;
+ }
+ setcrc(__builtin_bswap32(param->cv)); /* set CRC value for unzip() */
+ flush_outbuf(); /* update bytes_out for unzip() */
+ return 0;
+}
diff --git a/gzip.c b/gzip.c
index 3ddfb7f..4fffc4f 100644
--- a/gzip.c
+++ b/gzip.c
@@ -128,9 +128,21 @@ static char const *const license_msg[] = {
/* global buffers */
+#ifdef IBM_Z_DFLTCC
+/* DEFLATE COMPRESSION CALL works faster with page-aligned input buffers */
+__attribute__((aligned(4096)))
+#endif
DECLARE(uch, inbuf, INBUFSIZ +INBUF_EXTRA);
+#ifdef IBM_Z_DFLTCC
+/* DEFLATE COMPRESSION CALL works faster with page-aligned output buffers */
+__attribute__((aligned(4096)))
+#endif
DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA);
DECLARE(ush, d_buf, DIST_BUFSIZE);
+#ifdef IBM_Z_DFLTCC
+/* DEFLATE COMPRESSION CALL works only with page-aligned windows */
+__attribute__((aligned(4096)))
+#endif
DECLARE(uch, window, 2L*WSIZE);
#ifndef MAXSEG_64K
DECLARE(ush, tab_prefix, 1L<<BITS);
diff --git a/gzip.h b/gzip.h
index 46bbac9..0c59cc2 100644
--- a/gzip.h
+++ b/gzip.h
@@ -74,7 +74,10 @@ extern int method; /* compression method * /
*/
#ifndef INBUFSIZ
-# ifdef SMALL_MEM
+# ifdef IBM_Z_DFLTCC
+/* DEFLATE COMPRESSION CALL works faster with larger input buffers */
+# define INBUFSIZ 0x40000
+# elif defined SMALL_MEM
# define INBUFSIZ 0x2000 /* input buffer size */
# else
# define INBUFSIZ 0x8000 /* input buffer size */
@@ -83,7 +86,10 @@ extern int method; /* compression method */
#define INBUF_EXTRA 64 /* required by unlzw() */
#ifndef OUTBUFSIZ
-# ifdef SMALL_MEM
+# ifdef IBM_Z_DFLTCC
+/* DEFLATE COMPRESSION CALL works faster with larger output buffers */
+# define OUTBUFSIZ 0x40000
+# elif defined SMALL_MEM
# define OUTBUFSIZ 8192 /* output buffer size */
# else
# define OUTBUFSIZ 16384 /* output buffer size */
@@ -275,8 +281,8 @@ extern int unlzh (int in, int out);
extern noreturn void abort_gzip (void);
/* in deflate.c */
-extern void lm_init (int pack_level, ush *flags);
-extern off_t deflate (void);
+extern void lm_init (int pack_level);
+extern off_t deflate (int pack_level);
/* in trees.c */
extern void ct_init (ush *attr, int *method);
@@ -284,6 +290,8 @@ extern int ct_tally (int dist, int lc);
extern off_t flush_block (char *buf, ulg stored_len, int pad, int eof);
/* in bits.c */
+extern unsigned short bi_buf;
+extern int bi_valid;
extern void bi_init (file_t zipfile);
extern void send_bits (int value, int length);
extern unsigned bi_reverse (unsigned value, int length) _GL_ATTRIBUTE_CONST;
@@ -293,7 +301,9 @@ extern int (*read_buf) (char *buf, unsigned size);
/* in util.c: */
extern int copy (int in, int out);
-extern ulg updcrc (uch *s, unsigned n);
+extern ulg updcrc (const uch *s, unsigned n);
+extern ulg getcrc (void) _GL_ATTRIBUTE_PURE;
+extern void setcrc (ulg c);
extern void clear_bufs (void);
extern int fill_inbuf (int eof_ok);
extern void flush_outbuf (void);
@@ -315,3 +325,9 @@ extern void fprint_off (FILE *, off_t, int);
/* in inflate.c */
extern int inflate (void);
+
+ /* in dfltcc.c */
+#ifdef IBM_Z_DFLTCC
+extern int dfltcc_deflate (int pack_level);
+extern int dfltcc_inflate (void);
+#endif
diff --git a/tests/znew-k b/tests/znew-k
index eeb7b29..d43246b 100755
--- a/tests/znew-k
+++ b/tests/znew-k
@@ -29,12 +29,13 @@ chmod +x compress || framework_failure_
# Note that the basename must have a length of 6 or greater.
# Otherwise, "test -f $name" below would fail.
name=123456.Z
+gzname=123456.gz
printf '%1012977s' ' ' | gzip -c > $name || framework_failure_
fail=0
znew -K $name || fail=1
-test -f $name || fail=1
+test -f $name || test -f $gzname || fail=1
Exit $fail
diff --git a/unzip.c b/unzip.c
index a7255d4..86ef664 100644
--- a/unzip.c
+++ b/unzip.c
@@ -129,7 +129,11 @@ int unzip(in, out)
/* Decompress */
if (method == DEFLATED) {
+#ifdef IBM_Z_DFLTCC
+ int res = dfltcc_inflate();
+#else
int res = inflate();
+#endif
if (res == 3) {
xalloc_die ();
diff --git a/util.c b/util.c
index 41e50d7..dc00f4a 100644
--- a/util.c
+++ b/util.c
@@ -96,6 +96,11 @@ static const ulg crc_32_tab[] = {
0x2d02ef8dL
};
+/* ========================================================================
+ * Shift register contents
+ */
+static ulg crc = (ulg)0xffffffffL;
+
/* ===========================================================================
* Copy input to output unchanged: zcat == cat with --force.
* IN assertion: insize bytes have already been read in inbuf and inptr bytes
@@ -126,13 +131,11 @@ int copy(in, out)
* Return the current crc in either case.
*/
ulg updcrc(s, n)
- uch *s; /* pointer to bytes to pump through */
+ const uch *s; /* pointer to bytes to pump through */
unsigned n; /* number of bytes in s[] */
{
register ulg c; /* temporary variable */
- static ulg crc = (ulg)0xffffffffL; /* shift register contents */
-
if (s == NULL) {
c = 0xffffffffL;
} else {
@@ -145,6 +148,23 @@ ulg updcrc(s, n)
return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */
}
+/* ===========================================================================
+ * Return a current CRC value.
+ */
+ulg getcrc()
+{
+ return crc ^ 0xffffffffL;
+}
+
+/* ===========================================================================
+ * Set a new CRC value.
+ */
+void setcrc(c)
+ ulg c;
+{
+ crc = c ^ 0xffffffffL;
+}
+
/* ===========================================================================
* Clear input and output buffers
*/
@@ -238,7 +258,9 @@ void flush_outbuf()
{
if (outcnt == 0) return;
- write_buf(ofd, (char *)outbuf, outcnt);
+ if (!test) {
+ write_buf(ofd, (char *)outbuf, outcnt);
+ }
bytes_out += (off_t)outcnt;
outcnt = 0;
}
diff --git a/zip.c b/zip.c
index 1bd4c78..ace7e5e 100644
--- a/zip.c
+++ b/zip.c
@@ -23,9 +23,12 @@
#include "tailor.h"
#include "gzip.h"
-local ulg crc; /* crc on uncompressed file data */
off_t header_bytes; /* number of bytes in gzip header */
+#define FAST 4
+#define SLOW 2
+/* speed options for the general purpose bit flag */
+
/* ===========================================================================
* Deflate in to out.
* IN assertions: the input and output buffers are cleared.
@@ -68,11 +71,15 @@ int zip(in, out)
put_long (stamp);
/* Write deflated file to zip file */
- crc = updcrc(0, 0);
+ updcrc(NULL, 0);
bi_init(out);
ct_init(&attr, &method);
- lm_init(level, &deflate_flags);
+ if (level == 1) {
+ deflate_flags |= FAST;
+ } else if (level == 9) {
+ deflate_flags |= SLOW;
+ }
put_byte((uch)deflate_flags); /* extra flags */
put_byte(OS_CODE); /* OS identifier */
@@ -85,7 +92,11 @@ int zip(in, out)
}
header_bytes = (off_t)outcnt;
- (void)deflate();
+#ifdef IBM_Z_DFLTCC
+ (void)dfltcc_deflate(level);
+#else
+ (void)deflate(level);
+#endif
#ifndef NO_SIZE_CHECK
/* Check input size
@@ -98,7 +109,7 @@ int zip(in, out)
#endif
/* Write the crc and uncompressed size */
- put_long(crc);
+ put_long(getcrc());
put_long((ulg)bytes_in);
header_bytes += 2*4;
@@ -126,7 +137,7 @@ int file_read(buf, size)
read_error();
}
- crc = updcrc((uch*)buf, len);
+ updcrc((uch*)buf, len);
bytes_in += (off_t)len;
return (int)len;
}
2.21.0

1025
ibm2.patch Normal file

File diff suppressed because it is too large Load Diff

218
ibm4.patch Normal file
View File

@ -0,0 +1,218 @@
* configure.ac (AC_CHECK_HEADERS_ONCE): Add feature detection for
sys/sdt.h probes.
* dfltcc.c (dfltcc_cc): Minor formatting improvements.
(HB_BITS): Remove.
(HB_SIZE): Likewise.
(is_dfltcc_enabled): Fix buffer overrun on newer models and incomplete
initialization on older models.
Add machine mode hint.
(dfltcc): Use sys/sdt.h feature detection.
(bi_load): New function.
(bi_close_block): Use bi_load.
(close_stream): Fix overwriting the End-of-block Symbol.
(dfltcc_deflate): Fix losing partial byte on flush.
Fix setting Block-Continuation Flag when DFLTCC-CMPR outputs 0 bits and
requests a retry.
Minor formatting improvements.
(dfltcc_inflate): Retry immediately if requested.
Print the hardware error code and flush the output buffer on error.
Minor formatting improvements.
* tests/hufts: Ignore the hardware error code.
---
configure.ac | 2 +-
dfltcc.c | 76 ++++++++++++++++++++++++++++++++++++----------------
tests/hufts | 2 ++
3 files changed, 56 insertions(+), 24 deletions(-)
diff --git a/configure.ac b/configure.ac
index 76ac26f..b4aea34 100644
--- a/configure.ac
+++ b/configure.ac
@@ -263,7 +263,7 @@ AC_SUBST([ASFLAGS_config])
AC_ISC_POSIX
AC_C_CONST
AC_HEADER_STDC
-AC_CHECK_HEADERS_ONCE(fcntl.h limits.h memory.h time.h)
+AC_CHECK_HEADERS_ONCE(fcntl.h limits.h memory.h time.h sys/sdt.h)
AC_CHECK_FUNCS_ONCE([chown fchmod fchown lstat siginterrupt])
AC_HEADER_DIRENT
AC_TYPE_SIGNAL
diff --git a/dfltcc.c b/dfltcc.c
index ba62968..ed3be8d 100644
--- a/dfltcc.c
+++ b/dfltcc.c
@@ -22,7 +22,7 @@
#include <stdbool.h>
#include <stdlib.h>
-#ifdef DFLTCC_USDT
+#ifdef HAVE_SYS_SDT_H
# include <sys/sdt.h>
#endif
@@ -39,11 +39,11 @@
typedef enum
{
- DFLTCC_CC_OK = 0,
- DFLTCC_CC_OP1_TOO_SHORT = 1,
- DFLTCC_CC_OP2_TOO_SHORT = 2,
- DFLTCC_CC_OP2_CORRUPT = 2,
- DFLTCC_CC_AGAIN = 3,
+ DFLTCC_CC_OK = 0,
+ DFLTCC_CC_OP1_TOO_SHORT = 1,
+ DFLTCC_CC_OP2_TOO_SHORT = 2,
+ DFLTCC_CC_OP2_CORRUPT = 2,
+ DFLTCC_CC_AGAIN = 3,
} dfltcc_cc;
#define DFLTCC_QAF 0
@@ -51,8 +51,6 @@ typedef enum
#define DFLTCC_CMPR 2
#define DFLTCC_XPND 4
#define HBT_CIRCULAR (1 << 7)
-/* #define HB_BITS 15 */
-/* #define HB_SIZE (1 << HB_BITS) */
#define DFLTCC_FACILITY 151
#define DFLTCC_FMT0 0
#define CVT_CRC32 0
@@ -155,9 +153,16 @@ is_dfltcc_enabled (void)
if (env && !strcmp (env, "0"))
return 0;
- register int r0 __asm__ ("r0") = sizeof facilities / 8;
- __asm__ ("stfle %[facilities]\n"
- : [facilities] "=Q"(facilities) : [r0] "r"(r0) : "cc", "memory");
+ memset (facilities, 0, sizeof facilities);
+ register char r0 __asm__ ("r0") = sizeof facilities / 8 - 1;
+ /* STFLE is supported since z9-109 and only in z/Architecture mode. When
+ * compiling with -m31, gcc defaults to ESA mode, however, since the kernel
+ * is 64-bit, it's always z/Architecture mode at runtime. */
+ __asm__ (".machinemode push\n"
+ ".machinemode zarch\n"
+ "stfle %[facilities]\n"
+ ".machinemode pop\n"
+ : [facilities] "=Q"(facilities), [r0] "+r"(r0) :: "cc");
return is_bit_set (facilities, DFLTCC_FACILITY);
}
@@ -180,12 +185,12 @@ dfltcc (int fn, void *param,
int cc;
__asm__ volatile (
-#ifdef DFLTCC_USDT
+#ifdef HAVE_SYS_SDT_H
STAP_PROBE_ASM (zlib, dfltcc_entry,
STAP_PROBE_ASM_TEMPLATE (5))
#endif
".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
-#ifdef DFLTCC_USDT
+#ifdef HAVE_SYS_SDT_H
STAP_PROBE_ASM (zlib, dfltcc_exit,
STAP_PROBE_ASM_TEMPLATE (5))
#endif
@@ -198,7 +203,7 @@ dfltcc (int fn, void *param,
: [r0] "r" (r0)
, [r1] "r" (r1)
, [hist] "r" (hist)
-#ifdef DFLTCC_USDT
+#ifdef HAVE_SYS_SDT_H
, STAP_PROBE_ASM_OPERANDS (5, r2, r3, r4, r5, hist)
#endif
: "cc", "memory");
@@ -264,10 +269,16 @@ init_param (union aligned_dfltcc_param_v0 *ctx)
}
static void
-bi_close_block (struct dfltcc_param_v0 *param)
+bi_load (struct dfltcc_param_v0 *param)
{
bi_valid = param->sbb;
bi_buf = bi_valid == 0 ? 0 : outbuf[outcnt] & ((1 << bi_valid) - 1);
+}
+
+static void
+bi_close_block (struct dfltcc_param_v0 *param)
+{
+ bi_load (param);
send_bits (bi_reverse (param->eobs >> (15 - param->eobl), param->eobl),
param->eobl);
param->bcf = 0;
@@ -278,6 +289,7 @@ close_block (struct dfltcc_param_v0 *param)
{
bi_close_block (param);
bi_windup ();
+ /* bi_windup has written out a possibly partial byte, fix up the position */
param->sbb = (param->sbb + param->eobl) % 8;
if (param->sbb != 0)
{
@@ -291,6 +303,8 @@ close_stream (struct dfltcc_param_v0 *param)
{
if (param->bcf)
bi_close_block (param);
+ else
+ bi_load (param);
send_bits (1, 3); /* BFINAL=1, BTYPE=00 */
bi_windup ();
put_short (0x0000);
@@ -334,7 +348,16 @@ dfltcc_deflate (int pack_level)
{
/* Flush the output data. */
if (outcnt > OUTBUFSIZ - 8)
- flush_outbuf ();
+ {
+ if (param->sbb == 0)
+ flush_outbuf ();
+ else
+ {
+ uch partial = outbuf[outcnt];
+ flush_outbuf ();
+ outbuf[outcnt] = partial;
+ }
+ }
/* Close the block. */
if (param->bcf && total_in == block_threshold && !param->cf)
@@ -360,14 +383,16 @@ dfltcc_deflate (int pack_level)
{
if (total_in == 0 && block_threshold > 0)
param->htt = HTT_FIXED;
- else {
- param->htt = HTT_DYNAMIC;
- dfltcc_gdht (param);
- }
+ else
+ {
+ param->htt = HTT_DYNAMIC;
+ dfltcc_gdht (param);
+ }
}
/* Compress inbuf into outbuf. */
- dfltcc_cmpr_xpnd (param, DFLTCC_CMPR);
+ while (dfltcc_cmpr_xpnd (param, DFLTCC_CMPR) == DFLTCC_CC_AGAIN)
+ ;
/* Unmask the input data. */
insize += extra;
@@ -413,7 +438,9 @@ dfltcc_inflate (void)
}
/* Decompress inbuf into outbuf. */
- dfltcc_cc cc = dfltcc_cmpr_xpnd (param, DFLTCC_XPND);
+ dfltcc_cc cc;
+ while ((cc = dfltcc_cmpr_xpnd (param, DFLTCC_XPND)) == DFLTCC_CC_AGAIN)
+ ;
if (cc == DFLTCC_CC_OK)
{
/* The entire deflate stream has been successfully decompressed. */
@@ -422,6 +449,9 @@ dfltcc_inflate (void)
if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0)
{
/* The deflate stream is corrupted. */
+ fprintf (stderr, "Operation-Ending-Supplemental Code 0x%x\n",
+ param->oesc);
+ flush_outbuf ();
return 2;
}
/* There must be more data to decompress. */

12
ibm5.patch Normal file
View File

@ -0,0 +1,12 @@
diff --git a/tests/hufts b/tests/hufts
index 5d8fb77..1b8ab3b 100755
--- a/tests/hufts
+++ b/tests/hufts
@@ -28,6 +28,7 @@ returns_ 1 gzip -dc "$abs_srcdir/hufts-segv.gz" > out 2> err || fail=1
compare /dev/null out || fail=1
sed 's/.*hufts-segv.gz: /...: /' err > k; mv k err || fail=1
+grep -v 'Operation-Ending-Supplemental Code' err > k; mv k err || fail=1
compare exp err || fail=1
printf '\037\213\010\000\060\060\060\060\060\060\144\000\000\000' > bug33501 \