diff --git a/Makefile.am b/Makefile.am index 025c914..8b4120c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -55,6 +55,9 @@ gzip_SOURCES = \ trees.c unlzh.c unlzw.c unpack.c unzip.c util.c zip.c gzip_LDADD = libver.a lib/libgzip.a gzip_LDADD += $(LIB_CLOCK_GETTIME) +if IBM_Z_DFLTCC +gzip_SOURCES += dfltcc.c +endif BUILT_SOURCES += version.c version.c: Makefile diff --git a/bits.c b/bits.c index b0df2fe..9effc32 100644 --- a/bits.c +++ b/bits.c @@ -78,7 +78,7 @@ local file_t zfile; /* output gzip file */ -local unsigned short bi_buf; +unsigned short bi_buf; /* Output buffer. bits are inserted starting at the bottom (least significant * bits). */ @@ -88,7 +88,7 @@ local unsigned short bi_buf; * more than 16 bits on some systems.) */ -local int bi_valid; +int bi_valid; /* Number of valid bits in bi_buf. All bits above the last valid bit * are always zero. */ diff --git a/configure.ac b/configure.ac index 9a2b635..76ac26f 100644 --- a/configure.ac +++ b/configure.ac @@ -78,6 +78,16 @@ AC_ARG_ENABLE([gcc-warnings], fi] ) +AC_ARG_ENABLE([dfltcc], + [AS_HELP_STRING([--enable-dfltcc], + [use DEFLATE COMPRESSION CALL instruction on IBM Z])], + [case $enableval in + yes|no) gl_dfltcc=$enableval ;; + *) AC_MSG_ERROR([bad value $enableval for dfltcc option]) ;; + esac], + [gl_dfltcc=no] +) + # gl_GCC_VERSION_IFELSE([major], [minor], [run-if-found], [run-if-not-found]) # ------------------------------------------------ # If $CPP is gcc-MAJOR.MINOR or newer, then run RUN-IF-FOUND. @@ -188,6 +198,12 @@ if test "$gl_gcc_warnings" = yes; then AC_SUBST([GNULIB_WARN_CFLAGS]) fi +if test "$gl_dfltcc" = yes; then + AC_DEFINE([IBM_Z_DFLTCC], , + [Use DEFLATE COMPRESSION CALL instruction on IBM Z machines.]) +fi +AM_CONDITIONAL([IBM_Z_DFLTCC], [test "$gl_dfltcc" = yes]) + # cc -E produces incorrect asm files on SVR4, so postprocess it. ASCPPPOST="sed '/^ *\\#/d; s,//.*,,; s/% /%/g; s/\\. /./g'" AC_SUBST([ASCPPPOST]) diff --git a/deflate.c b/deflate.c index 8ffff3a..869b902 100644 --- a/deflate.c +++ b/deflate.c @@ -123,10 +123,6 @@ #define NIL 0 /* Tail of hash chains */ -#define FAST 4 -#define SLOW 2 -/* speed options for the general purpose bit flag */ - #ifndef TOO_FAR # define TOO_FAR 4096 #endif @@ -215,9 +211,6 @@ local unsigned int max_lazy_match; * max_insert_length is used only for compression levels <= 3. */ -local int compr_level; -/* compression level (1..9) */ - unsigned good_match; /* Use a faster search when the previous match is longer than this */ @@ -308,14 +301,12 @@ local void check_match (IPos start, IPos match, int length); /* =========================================================================== * Initialize the "longest match" routines for a new file */ -void lm_init (pack_level, flags) +void lm_init (pack_level) int pack_level; /* 0: store, 1: best speed, 9: best compression */ - ush *flags; /* general purpose bit flag */ { register unsigned j; if (pack_level < 1 || pack_level > 9) gzip_error ("bad pack level"); - compr_level = pack_level; /* Initialize the hash table. */ #if defined MAXSEG_64K && HASH_BITS == 15 @@ -337,11 +328,6 @@ void lm_init (pack_level, flags) nice_match = configuration_table[pack_level].nice_length; #endif max_chain_length = configuration_table[pack_level].max_chain; - if (pack_level == 1) { - *flags |= FAST; - } else if (pack_level == 9) { - *flags |= SLOW; - } /* ??? reduce max_chain_length for binary files */ strstart = 0; @@ -732,7 +718,7 @@ local off_t deflate_fast() * evaluation for matches: a match is finally adopted only if there is * no better match at the next window position. */ -off_t deflate() +off_t deflate(int pack_level) { IPos hash_head; /* head of hash chain */ IPos prev_match; /* previous match */ @@ -740,7 +726,8 @@ off_t deflate() int match_available = 0; /* set if previous match exists */ register unsigned match_length = MIN_MATCH-1; /* length of best match */ - if (compr_level <= 3) return deflate_fast(); /* optimized for speed */ + lm_init(pack_level); + if (pack_level <= 3) return deflate_fast(); /* optimized for speed */ /* Process the input block. */ while (lookahead != 0) { diff --git a/dfltcc.c b/dfltcc.c new file mode 100644 index 0000000..9010475 --- /dev/null +++ b/dfltcc.c @@ -0,0 +1,429 @@ +/* dfltcc.c -- compress data using IBM Z DEFLATE COMPRESSION CALL + + Copyright (C) 2019 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include +#include +#ifdef DFLTCC_USDT +#include +#endif + +#include "tailor.h" +#include "gzip.h" + +#ifdef DYN_ALLOC + error: DYN_ALLOC is not supported by DFLTCC +#endif + +/* =========================================================================== + * C wrappers for the DEFLATE CONVERSION CALL instruction. + */ + +typedef enum +{ + DFLTCC_CC_OK = 0, + DFLTCC_CC_OP1_TOO_SHORT = 1, + DFLTCC_CC_OP2_TOO_SHORT = 2, + DFLTCC_CC_OP2_CORRUPT = 2, + DFLTCC_CC_AGAIN = 3, +} dfltcc_cc; + +#define DFLTCC_QAF 0 +#define DFLTCC_GDHT 1 +#define DFLTCC_CMPR 2 +#define DFLTCC_XPND 4 +#define HBT_CIRCULAR (1 << 7) +//#define HB_BITS 15 +//#define HB_SIZE (1 << HB_BITS) +#define DFLTCC_FACILITY 151 +#define DFLTCC_FMT0 0 +#define CVT_CRC32 0 +#define HTT_FIXED 0 +#define HTT_DYNAMIC 1 + +struct dfltcc_qaf_param +{ + char fns[16]; + char reserved1[8]; + char fmts[2]; + char reserved2[6]; +}; + +struct dfltcc_param_v0 +{ + unsigned short pbvn; /* Parameter-Block-Version Number */ + unsigned char mvn; /* Model-Version Number */ + unsigned char ribm; /* Reserved for IBM use */ + unsigned reserved32 : 31; + unsigned cf : 1; /* Continuation Flag */ + unsigned char reserved64[8]; + unsigned nt : 1; /* New Task */ + unsigned reserved129 : 1; + unsigned cvt : 1; /* Check Value Type */ + unsigned reserved131 : 1; + unsigned htt : 1; /* Huffman-Table Type */ + unsigned bcf : 1; /* Block-Continuation Flag */ + unsigned bcc : 1; /* Block Closing Control */ + unsigned bhf : 1; /* Block Header Final */ + unsigned reserved136 : 1; + unsigned reserved137 : 1; + unsigned dhtgc : 1; /* DHT Generation Control */ + unsigned reserved139 : 5; + unsigned reserved144 : 5; + unsigned sbb : 3; /* Sub-Byte Boundary */ + unsigned char oesc; /* Operation-Ending-Supplemental Code */ + unsigned reserved160 : 12; + unsigned ifs : 4; /* Incomplete-Function Status */ + unsigned short ifl; /* Incomplete-Function Length */ + unsigned char reserved192[8]; + unsigned char reserved256[8]; + unsigned char reserved320[4]; + unsigned short hl; /* History Length */ + unsigned reserved368 : 1; + unsigned short ho : 15; /* History Offset */ + unsigned int cv; /* Check Value */ + unsigned eobs : 15; /* End-of-block Symbol */ + unsigned reserved431 : 1; + unsigned char eobl : 4; /* End-of-block Length */ + unsigned reserved436 : 12; + unsigned reserved448 : 4; + unsigned short cdhtl : 12; /* Compressed-Dynamic-Huffman Table + Length */ + unsigned char reserved464[6]; + unsigned char cdht[288]; + unsigned char reserved[32]; + unsigned char csb[1152]; +}; + +static int is_bit_set(const char *bits, int n) +{ + return bits[n / 8] & (1 << (7 - (n % 8))); +} + +static int is_dfltcc_enabled(void) +{ + const char *env; + char facilities[((DFLTCC_FACILITY / 64) + 1) * 8]; + register int r0 __asm__("r0"); + + env = getenv("DFLTCC"); + if (env && !strcmp(env, "0")) { + return 0; + } + + r0 = sizeof(facilities) / 8; + __asm__("stfle %[facilities]\n" + : [facilities] "=Q"(facilities) : [r0] "r"(r0) : "cc", "memory"); + return is_bit_set((const char *) facilities, DFLTCC_FACILITY); +} + +static dfltcc_cc dfltcc(int fn, void *param, + uch **op1, size_t *len1, + const uch **op2, size_t *len2, + void *hist) +{ + uch *t2 = op1 ? *op1 : NULL; + size_t t3 = len1 ? *len1 : 0; + const uch *t4 = op2 ? *op2 : NULL; + size_t t5 = len2 ? *len2 : 0; + register int r0 __asm__("r0") = fn; + register void *r1 __asm__("r1") = param; + register uch *r2 __asm__("r2") = t2; + register size_t r3 __asm__("r3") = t3; + register const uch *r4 __asm__("r4") = t4; + register size_t r5 __asm__("r5") = t5; + int cc; + + __asm__ volatile( +#ifdef DFLTCC_USDT + STAP_PROBE_ASM(zlib, dfltcc_entry, + STAP_PROBE_ASM_TEMPLATE(5)) +#endif + ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" +#ifdef DFLTCC_USDT + STAP_PROBE_ASM(zlib, dfltcc_exit, + STAP_PROBE_ASM_TEMPLATE(5)) +#endif + "ipm %[cc]\n" + : [r2] "+r" (r2) + , [r3] "+r" (r3) + , [r4] "+r" (r4) + , [r5] "+r" (r5) + , [cc] "=r" (cc) + : [r0] "r" (r0) + , [r1] "r" (r1) + , [hist] "r" (hist) +#ifdef DFLTCC_USDT + , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) +#endif + : "cc", "memory"); + t2 = r2; t3 = r3; t4 = r4; t5 = r5; + + if (op1) + *op1 = t2; + if (len1) + *len1 = t3; + if (op2) + *op2 = t4; + if (len2) + *len2 = t5; + return (cc >> 28) & 3; +} + +static void dfltcc_qaf(struct dfltcc_qaf_param *param) +{ + dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); +} + +static void dfltcc_gdht(struct dfltcc_param_v0 *param) +{ + const uch *next_in = inbuf + inptr; + size_t avail_in = insize - inptr; + + dfltcc(DFLTCC_GDHT, param, NULL, NULL, &next_in, &avail_in, NULL); +} + +static off_t total_in; + +static dfltcc_cc dfltcc_cmpr_xpnd(struct dfltcc_param_v0 *param, int fn) +{ + uch *next_out = outbuf + outcnt; + size_t avail_out = OUTBUFSIZ - outcnt; + const uch *next_in = inbuf + inptr; + size_t avail_in = insize - inptr; + off_t consumed_in; + dfltcc_cc cc; + + cc = dfltcc(fn | HBT_CIRCULAR, param, + &next_out, &avail_out, + &next_in, &avail_in, + window); + consumed_in = next_in - (inbuf + inptr); + inptr += consumed_in; + total_in += consumed_in; + outcnt += ((OUTBUFSIZ - outcnt) - avail_out); + return cc; +} + +__attribute__((aligned(8))) +static struct context +{ + union + { + struct dfltcc_qaf_param af; + struct dfltcc_param_v0 param; + }; +} ctx; + +static struct dfltcc_param_v0 *init_param(struct dfltcc_param_v0 *param) +{ + const char *s; + + memset(param, 0, sizeof(*param)); +#ifndef DFLTCC_RIBM +#define DFLTCC_RIBM 0 +#endif + s = getenv("DFLTCC_RIBM"); + param->ribm = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_RIBM; + param->nt = 1; + param->cvt = CVT_CRC32; + param->cv = __builtin_bswap32(getcrc()); + return param; +} + +static void bi_close_block(struct dfltcc_param_v0 *param) +{ + bi_valid = param->sbb; + bi_buf = bi_valid == 0 ? 0 : outbuf[outcnt] & ((1 << bi_valid) - 1); + send_bits( + bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), + param->eobl); + param->bcf = 0; +} + +static void close_block(struct dfltcc_param_v0 *param) +{ + bi_close_block(param); + bi_windup(); + param->sbb = (param->sbb + param->eobl) % 8; + if (param->sbb != 0) { + Assert(outcnt > 0, "outbuf must have enough space for EOBS"); + outcnt--; + } +} + +static void close_stream(struct dfltcc_param_v0 *param) +{ + if (param->bcf) { + bi_close_block(param); + } + send_bits(1, 3); /* BFINAL=1, BTYPE=00 */ + bi_windup(); + put_short(0x0000); + put_short(0xFFFF); +} + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +/* =========================================================================== + * Compress ifd into ofd in hardware or fall back to software. + */ +int dfltcc_deflate(int pack_level) +{ + const char *s; + unsigned long level_mask; + unsigned long block_size; + off_t block_threshold; + struct dfltcc_param_v0 *param; + int extra; + + /* Check whether we can use hardware compression */ + if (!is_dfltcc_enabled() || getenv("SOURCE_DATE_EPOCH")) { + return deflate(pack_level); + } +#ifndef DFLTCC_LEVEL_MASK +#define DFLTCC_LEVEL_MASK 0x2 +#endif + s = getenv("DFLTCC_LEVEL_MASK"); + level_mask = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_LEVEL_MASK; + if ((level_mask & (1 << pack_level)) == 0) { + return deflate(pack_level); + } + dfltcc_qaf(&ctx.af); + if (!is_bit_set(ctx.af.fns, DFLTCC_CMPR) || + !is_bit_set(ctx.af.fns, DFLTCC_GDHT) || + !is_bit_set(ctx.af.fmts, DFLTCC_FMT0)) { + return deflate(pack_level); + } + + /* Initialize tuning parameters */ +#ifndef DFLTCC_BLOCK_SIZE +#define DFLTCC_BLOCK_SIZE 1048576 +#endif + s = getenv("DFLTCC_BLOCK_SIZE"); + block_size = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_BLOCK_SIZE; + (void)block_size; +#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE +#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 +#endif + s = getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"); + block_threshold = (s && *s) ? strtoul(s, NULL, 0) : + DFLTCC_FIRST_FHT_BLOCK_SIZE; + + /* Compress ifd into ofd in a loop */ + param = init_param(&ctx.param); + while (1) { + /* Flush the output data */ + if (outcnt > OUTBUFSIZ - 8) { + flush_outbuf(); + } + + /* Close the block */ + if (param->bcf && total_in == block_threshold && !param->cf) { + close_block(param); + block_threshold += block_size; + } + + /* Read the input data */ + if (inptr == insize) { + if (fill_inbuf(1) == EOF && !param->cf) { + break; + } + inptr = 0; + } + + /* Temporarily mask some input data */ + extra = MAX(0, total_in + (insize - inptr) - block_threshold); + insize -= extra; + + /* Start a new block */ + if (!param->bcf) { + if (total_in == 0 && block_threshold > 0) { + param->htt = HTT_FIXED; + } else { + param->htt = HTT_DYNAMIC; + dfltcc_gdht(param); + } + } + + /* Compress inbuf into outbuf */ + dfltcc_cmpr_xpnd(param, DFLTCC_CMPR); + + /* Unmask the input data */ + insize += extra; + + /* Continue the block */ + param->bcf = 1; + } + close_stream(param); + setcrc(__builtin_bswap32(param->cv)); + return 0; +} + +/* =========================================================================== + * Decompress ifd into ofd in hardware or fall back to software. + */ +int dfltcc_inflate(void) +{ + struct dfltcc_param_v0 *param; + dfltcc_cc cc; + + /* Check whether we can use hardware decompression */ + if (!is_dfltcc_enabled()) { + return inflate(); + } + dfltcc_qaf(&ctx.af); + if (!is_bit_set(ctx.af.fns, DFLTCC_XPND)) { + return inflate(); + } + + /* Decompress ifd into ofd in a loop */ + param = init_param(&ctx.param); + while (1) { + /* Perform I/O */ + if (outcnt == OUTBUFSIZ) { + flush_outbuf(); + } + if (inptr == insize) { + if (fill_inbuf(1) == EOF) { + /* Premature EOF */ + return 2; + } + inptr = 0; + } + /* Decompress inbuf into outbuf */ + cc = dfltcc_cmpr_xpnd(param, DFLTCC_XPND); + if (cc == DFLTCC_CC_OK) { + /* The entire deflate stream has been successfully decompressed */ + break; + } + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { + /* The deflate stream is corrupted */ + return 2; + } + /* There must be more data to decompress */ + } + if (param->sbb != 0) { + /* The deflate stream has ended in the middle of a byte - go to the next + * byte boundary, so that unzip() can read CRC and length. + */ + inptr++; + } + setcrc(__builtin_bswap32(param->cv)); /* set CRC value for unzip() */ + flush_outbuf(); /* update bytes_out for unzip() */ + return 0; +} diff --git a/gzip.c b/gzip.c index 3ddfb7f..4fffc4f 100644 --- a/gzip.c +++ b/gzip.c @@ -128,9 +128,21 @@ static char const *const license_msg[] = { /* global buffers */ +#ifdef IBM_Z_DFLTCC +/* DEFLATE COMPRESSION CALL works faster with page-aligned input buffers */ +__attribute__((aligned(4096))) +#endif DECLARE(uch, inbuf, INBUFSIZ +INBUF_EXTRA); +#ifdef IBM_Z_DFLTCC +/* DEFLATE COMPRESSION CALL works faster with page-aligned output buffers */ +__attribute__((aligned(4096))) +#endif DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); DECLARE(ush, d_buf, DIST_BUFSIZE); +#ifdef IBM_Z_DFLTCC +/* DEFLATE COMPRESSION CALL works only with page-aligned windows */ +__attribute__((aligned(4096))) +#endif DECLARE(uch, window, 2L*WSIZE); #ifndef MAXSEG_64K DECLARE(ush, tab_prefix, 1L< $name || framework_failure_ fail=0 znew -K $name || fail=1 -test -f $name || fail=1 +test -f $name || test -f $gzname || fail=1 Exit $fail diff --git a/unzip.c b/unzip.c index a7255d4..86ef664 100644 --- a/unzip.c +++ b/unzip.c @@ -129,7 +129,11 @@ int unzip(in, out) /* Decompress */ if (method == DEFLATED) { +#ifdef IBM_Z_DFLTCC + int res = dfltcc_inflate(); +#else int res = inflate(); +#endif if (res == 3) { xalloc_die (); diff --git a/util.c b/util.c index 41e50d7..dc00f4a 100644 --- a/util.c +++ b/util.c @@ -96,6 +96,11 @@ static const ulg crc_32_tab[] = { 0x2d02ef8dL }; +/* ======================================================================== + * Shift register contents + */ +static ulg crc = (ulg)0xffffffffL; + /* =========================================================================== * Copy input to output unchanged: zcat == cat with --force. * IN assertion: insize bytes have already been read in inbuf and inptr bytes @@ -126,13 +131,11 @@ int copy(in, out) * Return the current crc in either case. */ ulg updcrc(s, n) - uch *s; /* pointer to bytes to pump through */ + const uch *s; /* pointer to bytes to pump through */ unsigned n; /* number of bytes in s[] */ { register ulg c; /* temporary variable */ - static ulg crc = (ulg)0xffffffffL; /* shift register contents */ - if (s == NULL) { c = 0xffffffffL; } else { @@ -145,6 +148,23 @@ ulg updcrc(s, n) return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ } +/* =========================================================================== + * Return a current CRC value. + */ +ulg getcrc() +{ + return crc ^ 0xffffffffL; +} + +/* =========================================================================== + * Set a new CRC value. + */ +void setcrc(c) + ulg c; +{ + crc = c ^ 0xffffffffL; +} + /* =========================================================================== * Clear input and output buffers */ @@ -238,7 +258,9 @@ void flush_outbuf() { if (outcnt == 0) return; - write_buf(ofd, (char *)outbuf, outcnt); + if (!test) { + write_buf(ofd, (char *)outbuf, outcnt); + } bytes_out += (off_t)outcnt; outcnt = 0; } diff --git a/zip.c b/zip.c index 1bd4c78..ace7e5e 100644 --- a/zip.c +++ b/zip.c @@ -23,9 +23,12 @@ #include "tailor.h" #include "gzip.h" -local ulg crc; /* crc on uncompressed file data */ off_t header_bytes; /* number of bytes in gzip header */ +#define FAST 4 +#define SLOW 2 +/* speed options for the general purpose bit flag */ + /* =========================================================================== * Deflate in to out. * IN assertions: the input and output buffers are cleared. @@ -68,11 +71,15 @@ int zip(in, out) put_long (stamp); /* Write deflated file to zip file */ - crc = updcrc(0, 0); + updcrc(NULL, 0); bi_init(out); ct_init(&attr, &method); - lm_init(level, &deflate_flags); + if (level == 1) { + deflate_flags |= FAST; + } else if (level == 9) { + deflate_flags |= SLOW; + } put_byte((uch)deflate_flags); /* extra flags */ put_byte(OS_CODE); /* OS identifier */ @@ -85,7 +92,11 @@ int zip(in, out) } header_bytes = (off_t)outcnt; - (void)deflate(); +#ifdef IBM_Z_DFLTCC + (void)dfltcc_deflate(level); +#else + (void)deflate(level); +#endif #ifndef NO_SIZE_CHECK /* Check input size @@ -98,7 +109,7 @@ int zip(in, out) #endif /* Write the crc and uncompressed size */ - put_long(crc); + put_long(getcrc()); put_long((ulg)bytes_in); header_bytes += 2*4; @@ -126,7 +137,7 @@ int file_read(buf, size) read_error(); } - crc = updcrc((uch*)buf, len); + updcrc((uch*)buf, len); bytes_in += (off_t)len; return (int)len; } 2.21.0