Fix for python3.11 broken libxml2 and lxml

Also fixes the unnecessary IFUNC resolver for crc32_z
and covscan issue CWE-681

Resolves: #2174393 #2031015
This commit is contained in:
Lukas Javorsky 2023-05-16 08:03:21 +00:00
parent 530f352cf2
commit 4aa56c2fc6
8 changed files with 3360 additions and 424 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,73 @@
From 5eaae2af0defeca148c2a281873bb31a15246876 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 2 Feb 2023 19:34:24 +0100
Subject: [PATCH] 2155328
---
contrib/s390/dfltcc.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c
index 72099e7..f8dc21c 100644
--- a/contrib/s390/dfltcc.c
+++ b/contrib/s390/dfltcc.c
@@ -456,7 +456,10 @@ again:
*strm->next_out = (Bytef)state->bi_buf;
/* Honor history and check value */
param->nt = 0;
- param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler;
+ if (state->wrap == 1)
+ param->cv = strm->adler;
+ else if (state->wrap == 2)
+ param->cv = ZSWAP32(strm->adler);
/* When opening a block, choose a Huffman-Table Type */
if (!param->bcf) {
@@ -488,7 +491,10 @@ again:
state->bi_buf = 0; /* Avoid accessing next_out */
else
state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
- strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv;
+ if (state->wrap == 1)
+ strm->adler = param->cv;
+ else if (state->wrap == 2)
+ strm->adler = ZSWAP32(param->cv);
/* Unmask the input data */
strm->avail_in += masked_avail_in;
@@ -600,11 +606,12 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
}
/* Translate stream to parameter block */
- param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32;
+ param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32;
param->sbb = state->bits;
if (param->hl)
param->nt = 0; /* Honor history for the first block */
- param->cv = state->flags ? ZSWAP32(state->check) : state->check;
+ if (state->wrap & 4)
+ param->cv = state->flags ? ZSWAP32(state->check) : state->check;
/* Inflate */
do {
@@ -615,7 +622,9 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
state->last = cc == DFLTCC_CC_OK;
state->bits = param->sbb;
- strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv;
+ if (state->wrap & 4)
+ strm->adler = state->check = state->flags ?
+ ZSWAP32(param->cv) : param->cv;
if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
/* Report an error if stream is corrupted */
state->mode = BAD;
@@ -1077,4 +1086,4 @@ int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(strm, dictionary, dict_length)
if (dict_length)
*dict_length = param->hl;
return Z_OK;
-}
\ No newline at end of file
+}
--
2.39.1

View File

@ -0,0 +1,54 @@
From 480b65cae6c20a41aa698a6c9d3b260f6f744004 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 2 Feb 2023 19:41:32 +0100
Subject: [PATCH] 0003-PATCH-Fix-clang-s-behavior-on-versions-7.patch
---
contrib/power/clang_workaround.h | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/contrib/power/clang_workaround.h b/contrib/power/clang_workaround.h
index b5e7dae..915f7e5 100644
--- a/contrib/power/clang_workaround.h
+++ b/contrib/power/clang_workaround.h
@@ -39,7 +39,12 @@ __vector unsigned long long __builtin_pack_vector (unsigned long __a,
return __v;
}
-#ifndef vec_xxpermdi
+/*
+ * Clang 7 changed the behavior of vec_xxpermdi in order to provide the same
+ * behavior of GCC. That means code adapted to Clang >= 7 does not work on
+ * Clang <= 6. So, fallback to __builtin_unpack_vector() on Clang <= 6.
+ */
+#if !defined vec_xxpermdi || __clang_major__ <= 6
static inline
unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
@@ -62,9 +67,9 @@ static inline
unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
{
#if defined(__BIG_ENDIAN__)
- return vec_xxpermdi(__v, __v, 0x0)[1];
- #else
return vec_xxpermdi(__v, __v, 0x0)[0];
+ #else
+ return vec_xxpermdi(__v, __v, 0x3)[0];
#endif
}
@@ -72,9 +77,9 @@ static inline
unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
{
#if defined(__BIG_ENDIAN__)
- return vec_xxpermdi(__v, __v, 0x3)[1];
- #else
return vec_xxpermdi(__v, __v, 0x3)[0];
+ #else
+ return vec_xxpermdi(__v, __v, 0x0)[0];
#endif
}
#endif /* vec_xxpermdi */
--
2.39.1

View File

@ -0,0 +1,282 @@
From 14730a26e830eb2b09d1f7097910616f23c1476e Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 2 Feb 2023 19:40:32 +0100
Subject: [PATCH] 0001-PATCH-Preparation-for-Power-optimizations.patch
---
CMakeLists.txt | 67 ++++++++++++++++++++++++++++++++++++++++++
configure | 66 +++++++++++++++++++++++++++++++++++++++++
contrib/README.contrib | 8 +++++
contrib/gcc/zifunc.h | 60 +++++++++++++++++++++++++++++++++++++
contrib/power/power.h | 4 +++
5 files changed, 205 insertions(+)
create mode 100644 contrib/gcc/zifunc.h
create mode 100644 contrib/power/power.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0fe939d..e762023 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,7 @@ set(VERSION "1.2.11")
option(ASM686 "Enable building i686 assembly implementation")
option(AMD64 "Enable building amd64 assembly implementation")
+option(POWER "Enable building power implementation")
set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
@@ -140,6 +141,72 @@ if(CMAKE_COMPILER_IS_GNUCC)
add_definitions(-DASMV)
set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
endif()
+
+ # test to see if we can use a GNU indirect function to detect and load optimized code at runtime
+ CHECK_C_SOURCE_COMPILES("
+ static int test_ifunc_native(void)
+ {
+ return 1;
+ }
+ static int (*(check_ifunc_native(void)))(void)
+ {
+ return test_ifunc_native;
+ }
+ int test_ifunc(void) __attribute__ ((ifunc (\"check_ifunc_native\")));
+ int main(void)
+ {
+ return 0;
+ }
+ " HAS_C_ATTR_IFUNC)
+
+ if(HAS_C_ATTR_IFUNC)
+ add_definitions(-DHAVE_IFUNC)
+ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/gcc/zifunc.h)
+ endif()
+
+ if(POWER)
+ # Test to see if we can use the optimizations for Power
+ CHECK_C_SOURCE_COMPILES("
+ #ifndef _ARCH_PPC
+ #error \"Target is not Power\"
+ #endif
+ #ifndef __BUILTIN_CPU_SUPPORTS__
+ #error \"Target doesn't support __builtin_cpu_supports()\"
+ #endif
+ int main() { return 0; }
+ " HAS_POWER_SUPPORT)
+
+ if(HAS_POWER_SUPPORT AND HAS_C_ATTR_IFUNC)
+ add_definitions(-DZ_POWER_OPT)
+
+ set(CMAKE_REQUIRED_FLAGS -mcpu=power8)
+ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER8)
+
+ if(POWER8)
+ add_definitions(-DZ_POWER8)
+ set(ZLIB_POWER8 )
+
+ set_source_files_properties(
+ ${ZLIB_POWER8}
+ PROPERTIES COMPILE_FLAGS -mcpu=power8)
+ endif()
+
+ set(CMAKE_REQUIRED_FLAGS -mcpu=power9)
+ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER9)
+
+ if(POWER9)
+ add_definitions(-DZ_POWER9)
+ set(ZLIB_POWER9 )
+
+ set_source_files_properties(
+ ${ZLIB_POWER9}
+ PROPERTIES COMPILE_FLAGS -mcpu=power9)
+ endif()
+
+ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/power/power.h)
+ set(ZLIB_SRCS ${ZLIB_SRCS} ${ZLIB_POWER8} ${ZLIB_POWER9})
+ endif()
+ endif()
endif()
if(MSVC)
diff --git a/configure b/configure
index d026b35..0538d58 100755
--- a/configure
+++ b/configure
@@ -846,6 +846,72 @@ else
echo "Checking for sys/sdt.h ... No." | tee -a configure.log
fi
+# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
+echo >> configure.log
+cat > $test.c <<EOF
+static int test_ifunc_native(void)
+{
+ return 1;
+}
+
+static int (*(check_ifunc_native(void)))(void)
+{
+ return test_ifunc_native;
+}
+
+int test_ifunc(void) __attribute__ ((ifunc ("check_ifunc_native")));
+EOF
+
+if tryboth $CC -c $CFLAGS $test.c; then
+ SFLAGS="${SFLAGS} -DHAVE_IFUNC"
+ CFLAGS="${CFLAGS} -DHAVE_IFUNC"
+ echo "Checking for attribute(ifunc) support... Yes." | tee -a configure.log
+else
+ echo "Checking for attribute(ifunc) support... No." | tee -a configure.log
+fi
+
+# Test to see if we can use the optimizations for Power
+echo >> configure.log
+cat > $test.c <<EOF
+#ifndef _ARCH_PPC
+ #error "Target is not Power"
+#endif
+#ifndef HAVE_IFUNC
+ #error "Target doesn't support ifunc"
+#endif
+#ifndef __BUILTIN_CPU_SUPPORTS__
+ #error "Target doesn't support __builtin_cpu_supports()"
+#endif
+EOF
+
+if tryboth $CC -c $CFLAGS $test.c; then
+ echo "int main(void){return 0;}" > $test.c
+
+ if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then
+ POWER8="-DZ_POWER8"
+ PIC_OBJC="${PIC_OBJC}"
+ OBJC="${OBJC}"
+ echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log
+ else
+ echo "Checking for -mcpu=power8 support... No." | tee -a configure.log
+ fi
+
+ if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then
+ POWER9="-DZ_POWER9"
+ PIC_OBJC="${PIC_OBJC}"
+ OBJC="${OBJC}"
+ echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log
+ else
+ echo "Checking for -mcpu=power9 support... No." | tee -a configure.log
+ fi
+
+ SFLAGS="${SFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
+ CFLAGS="${CFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
+ echo "Checking for Power optimizations support... Yes." | tee -a configure.log
+else
+ echo "Checking for Power optimizations support... No." | tee -a configure.log
+fi
+
# show the results in the log
echo >> configure.log
echo ALL = $ALL >> configure.log
diff --git a/contrib/README.contrib b/contrib/README.contrib
index b4d3b18..2a53f90 100644
--- a/contrib/README.contrib
+++ b/contrib/README.contrib
@@ -19,6 +19,10 @@ asm686/ by Brian Raiter <breadbox@muppetlabs.com>
blast/ by Mark Adler <madler@alumni.caltech.edu>
Decompressor for output of PKWare Data Compression Library (DCL)
+gcc/ by Matheus Castanho <msc@linux.ibm.com>
+ and Rogerio Alves <rcardoso@linux.ibm.com>
+ Optimization helpers using GCC-specific extensions
+
delphi/ by Cosmin Truta <cosmint@cs.ubbcluj.ro>
Support for Delphi and C++ Builder
@@ -63,6 +67,10 @@ minizip/ by Gilles Vollant <info@winimage.com>
pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al.
Support for Pascal
+power/ by Matheus Castanho <msc@linux.ibm.com>
+ and Rogerio Alves <rcardoso@linux.ibm.com>
+ Optimized functions for Power processors
+
puff/ by Mark Adler <madler@alumni.caltech.edu>
Small, low memory usage inflate. Also serves to provide an
unambiguous description of the deflate format.
diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h
new file mode 100644
index 0000000..daf4fe4
--- /dev/null
+++ b/contrib/gcc/zifunc.h
@@ -0,0 +1,60 @@
+/* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * 2019 Rogerio Alves <rogerio.alves@ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef Z_IFUNC_H_
+#define Z_IFUNC_H_
+
+/* Helpers for arch optimizations */
+
+#define Z_IFUNC(fname) \
+ typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \
+ local typeof(fname) *fname##_resolver(void)
+/* This is a helper macro to declare a resolver for an indirect function
+ * (ifunc). Let's say you have function
+ *
+ * int foo (int a);
+ *
+ * for which you want to provide different implementations, for example:
+ *
+ * int foo_clever (int a) {
+ * ... clever things ...
+ * }
+ *
+ * int foo_smart (int a) {
+ * ... smart things ...
+ * }
+ *
+ * You will have to declare foo() as an indirect function and also provide a
+ * resolver for it, to choose between foo_clever() and foo_smart() based on
+ * some criteria you define (e.g. processor features).
+ *
+ * Since most likely foo() has a default implementation somewhere in zlib, you
+ * may have to rename it so the 'foo' symbol can be used by the ifunc without
+ * conflicts.
+ *
+ * #define foo foo_default
+ * int foo (int a) {
+ * ...
+ * }
+ * #undef foo
+ *
+ * Now you just have to provide a resolver function to choose which function
+ * should be used (decided at runtime on the first call to foo()):
+ *
+ * Z_IFUNC(foo) {
+ * if (... some condition ...)
+ * return foo_clever;
+ *
+ * if (... other condition ...)
+ * return foo_smart;
+ *
+ * return foo_default;
+ * }
+ *
+ * All calls to foo() throughout the code can remain untouched, all the magic
+ * will be done by the linker using the resolver function.
+ */
+
+#endif /* Z_IFUNC_H_ */
diff --git a/contrib/power/power.h b/contrib/power/power.h
new file mode 100644
index 0000000..b42c7d6
--- /dev/null
+++ b/contrib/power/power.h
@@ -0,0 +1,4 @@
+/* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * 2019 Rogerio Alves <rogerio.alves@ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
--
2.39.1

View File

@ -9,19 +9,6 @@ Subject: [PATCH] fixed covscan issues
test/crc32_test.c | 8 ++++---- test/crc32_test.c | 8 ++++----
3 files changed, 6 insertions(+), 6 deletions(-) 3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/crc32.c b/crc32.c
index 406d350..34132ea 100644
--- a/crc32.c
+++ b/crc32.c
@@ -302,7 +302,7 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
if (!crc32_func)
crc32_func = crc32_z_ifunc();
- return (*crc32_func)(crc, buf, len);
+ return (*crc32_func)(crc, buf, len);
}
#endif /* defined(Z_IFUNC_ASM) || defined(Z_IFUNC_NATIVE) */
diff --git a/deflate.c b/deflate.c diff --git a/deflate.c b/deflate.c
index 089285a..9b09718 100644 index 089285a..9b09718 100644
--- a/deflate.c --- a/deflate.c
@ -35,40 +22,4 @@ index 089285a..9b09718 100644
bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate : bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate :
s->level == 0 ? deflate_stored(s, flush) : s->level == 0 ? deflate_stored(s, flush) :
diff --git a/test/crc32_test.c b/test/crc32_test.c
index 5d73128..2d2a6c7 100644
--- a/test/crc32_test.c
+++ b/test/crc32_test.c
@@ -11,25 +11,25 @@
# include <stdlib.h>
#endif
-void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line));
+void test_crc32 OF((uLong crc, char* buf, z_size_t len, uLong chk, int line));
int main OF((void));
typedef struct {
int line;
uLong crc;
- Byte* buf;
+ char* buf;
int len;
uLong expect;
} crc32_test;
void test_crc32(crc, buf, len, chk, line)
uLong crc;
- Byte *buf;
+ char *buf;
z_size_t len;
uLong chk;
int line;
{
- uLong res = crc32(crc, buf, len);
+ uLong res = crc32(crc, (Bytef *) buf, len);
if (res != chk) {
fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n",
line, (unsigned int)res, (unsigned int)chk);
--
2.19.1

View File

@ -1,365 +0,0 @@
From 27a84de4a30cd35f8565937397f6d1205b912818 Mon Sep 17 00:00:00 2001
From: Ondrej Dubaj <odubaj@redhat.com>
Date: Thu, 5 Sep 2019 09:16:35 +0200
Subject: [PATCH 1/2] fix: power8 crc32 - return 0 with 0 ptr passed
---
contrib/power8-crc/vec_crc32.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
index bb2204b..5ce9cd2 100644
--- a/contrib/power8-crc/vec_crc32.c
+++ b/contrib/power8-crc/vec_crc32.c
@@ -74,6 +74,7 @@ unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
unsigned int prealign;
unsigned int tail;
+ if (p == (const unsigned char *) 0x0) return 0;
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
--
2.19.1
From c066ac92982a2ffe5b1e9bd36000058927437bd5 Mon Sep 17 00:00:00 2001
From: Ondrej Dubaj <odubaj@redhat.com>
Date: Thu, 5 Sep 2019 09:36:47 +0200
Subject: [PATCH 2/2] Add CRC32 tests (crc32_test)
This commit includes a CRC32 test (crc32_test). This tests are important
since some architectures may want include CPU dependent optimizations for
CRC32 algorithm like using vector instructions and we may want to
validate those.
---
Makefile.in | 35 +++++---
test/crc32_test.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 230 insertions(+), 10 deletions(-)
create mode 100644 test/crc32_test.c
diff --git a/Makefile.in b/Makefile.in
index 40b5cfb..6070dcc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -75,11 +75,11 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA)
all: static shared
-static: example$(EXE) minigzip$(EXE)
+static: crc32_test$(EXE) example$(EXE) minigzip$(EXE)
-shared: examplesh$(EXE) minigzipsh$(EXE)
+shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)
-all64: example64$(EXE) minigzip64$(EXE)
+all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE)
check: test
@@ -87,7 +87,7 @@ test: all teststatic testshared
teststatic: static
@TMPST=tmpst_$$; \
- if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST ; then \
+ if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST && ./crc32_test; then \
echo ' *** zlib test OK ***'; \
else \
echo ' *** zlib test FAILED ***'; false; \
@@ -100,7 +100,7 @@ testshared: shared
DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \
SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \
TMPSH=tmpsh_$$; \
- if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH; then \
+ if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH && ./crc32_testsh; then \
echo ' *** zlib shared test OK ***'; \
else \
echo ' *** zlib shared test FAILED ***'; false; \
@@ -109,7 +109,7 @@ testshared: shared
test64: all64
@TMP64=tmp64_$$; \
- if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64; then \
+ if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64 && ./crc32_test64; then \
echo ' *** zlib 64-bit test OK ***'; \
else \
echo ' *** zlib 64-bit test FAILED ***'; false; \
@@ -157,6 +157,12 @@ example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c
+crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
+ $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c
+
+crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
+ $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c
+
example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c
@@ -307,12 +313,21 @@ example$(EXE): example.o $(STATICLIB)
minigzip$(EXE): minigzip.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS)
+crc32_test$(EXE): crc32_test.o $(STATICLIB)
+ $(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS)
+
+crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV)
+ $(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV)
+
examplesh$(EXE): example.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ example.o -L. $(SHAREDLIBV)
minigzipsh$(EXE): minigzip.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ minigzip.o -L. $(SHAREDLIBV)
+crc32_test64$(EXE): crc32_test64.o $(STATICLIB)
+ $(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS)
+
example64$(EXE): example64.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS)
@@ -382,8 +397,8 @@ zconf: $(SRCDIR)zconf.h.in
mostlyclean: clean
clean:
rm -f *.o *.lo *~ \
- example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
- example64$(EXE) minigzip64$(EXE) \
+ crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
+ crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \
infcover \
libz.* foo.gz so_locations \
_match.s maketree contrib/infback9/*.o
@@ -407,7 +422,7 @@ tags:
adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
-compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
+compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
@@ -417,7 +432,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr
adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
-compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
+compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
diff --git a/test/crc32_test.c b/test/crc32_test.c
new file mode 100644
index 0000000..5d73128
--- /dev/null
+++ b/test/crc32_test.c
@@ -0,0 +1,205 @@
+/* crc32_tes.c -- unit test for crc32 in the zlib compression library
+ * Copyright (C) 1995-2006, 2010, 2011, 2016, 2019 Rogerio Alves
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zlib.h"
+#include <stdio.h>
+
+#ifdef STDC
+# include <string.h>
+# include <stdlib.h>
+#endif
+
+void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line));
+int main OF((void));
+
+typedef struct {
+ int line;
+ uLong crc;
+ Byte* buf;
+ int len;
+ uLong expect;
+} crc32_test;
+
+void test_crc32(crc, buf, len, chk, line)
+ uLong crc;
+ Byte *buf;
+ z_size_t len;
+ uLong chk;
+ int line;
+{
+ uLong res = crc32(crc, buf, len);
+ if (res != chk) {
+ fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n",
+ line, (unsigned int)res, (unsigned int)chk);
+ exit(1);
+ }
+}
+
+static const crc32_test tests[] = {
+ {__LINE__, 0x0, 0x0, 0, 0x0},
+ {__LINE__, 0xffffffff, 0x0, 0, 0x0},
+ {__LINE__, 0x0, 0x0, 255, 0x0}, /* BZ 174799. */
+ {__LINE__, 0x0, 0x0, 256, 0x0},
+ {__LINE__, 0x0, 0x0, 257, 0x0},
+ {__LINE__, 0x0, 0x0, 32767, 0x0},
+ {__LINE__, 0x0, 0x0, 32768, 0x0},
+ {__LINE__, 0x0, 0x0, 32769, 0x0},
+ {__LINE__, 0x0, "", 0, 0x0},
+ {__LINE__, 0xffffffff, "", 0, 0xffffffff},
+ {__LINE__, 0x0, "abacus", 6, 0xc3d7115b},
+ {__LINE__, 0x0, "backlog", 7, 0x269205},
+ {__LINE__, 0x0, "campfire", 8, 0x22a515f8},
+ {__LINE__, 0x0, "delta", 5, 0x9643fed9},
+ {__LINE__, 0x0, "executable", 10, 0xd68eda01},
+ {__LINE__, 0x0, "file", 4, 0x8c9f3610},
+ {__LINE__, 0x0, "greatest", 8, 0xc1abd6cd},
+ {__LINE__, 0x0, "hello", 5, 0x3610a686},
+ {__LINE__, 0x0, "inverter", 8, 0xc9e962c9},
+ {__LINE__, 0x0, "jigsaw", 6, 0xce4e3f69},
+ {__LINE__, 0x0, "karate", 6, 0x890be0e2},
+ {__LINE__, 0x0, "landscape", 9, 0xc4e0330b},
+ {__LINE__, 0x0, "machine", 7, 0x1505df84},
+ {__LINE__, 0x0, "nanometer", 9, 0xd4e19f39},
+ {__LINE__, 0x0, "oblivion", 8, 0xdae9de77},
+ {__LINE__, 0x0, "panama", 6, 0x66b8979c},
+ {__LINE__, 0x0, "quest", 5, 0x4317f817},
+ {__LINE__, 0x0, "resource", 8, 0xbc91f416},
+ {__LINE__, 0x0, "secret", 6, 0x5ca2e8e5},
+ {__LINE__, 0x0, "test", 4, 0xd87f7e0c},
+ {__LINE__, 0x0, "ultimate", 8, 0x3fc79b0b},
+ {__LINE__, 0x0, "vector", 6, 0x1b6e485b},
+ {__LINE__, 0x0, "walrus", 6, 0xbe769b97},
+ {__LINE__, 0x0, "xeno", 4, 0xe7a06444},
+ {__LINE__, 0x0, "yelling", 7, 0xfe3944e5},
+ {__LINE__, 0x0, "zlib", 4, 0x73887d3a},
+ {__LINE__, 0x0, "4BJD7PocN1VqX0jXVpWB", 20, 0xd487a5a1},
+ {__LINE__, 0x0, "F1rPWI7XvDs6nAIRx41l", 20, 0x61a0132e},
+ {__LINE__, 0x0, "ldhKlsVkPFOveXgkGtC2", 20, 0xdf02f76},
+ {__LINE__, 0x0, "5KKnGOOrs8BvJ35iKTOS", 20, 0x579b2b0a},
+ {__LINE__, 0x0, "0l1tw7GOcem06Ddu7yn4", 20, 0xf7d16e2d},
+ {__LINE__, 0x0, "MCr47CjPIn9R1IvE1Tm5", 20, 0x731788f5},
+ {__LINE__, 0x0, "UcixbzPKTIv0SvILHVdO", 20, 0x7112bb11},
+ {__LINE__, 0x0, "dGnAyAhRQDsWw0ESou24", 20, 0xf32a0dac},
+ {__LINE__, 0x0, "di0nvmY9UYMYDh0r45XT", 20, 0x625437bb},
+ {__LINE__, 0x0, "2XKDwHfAhFsV0RhbqtvH", 20, 0x896930f9},
+ {__LINE__, 0x0, "ZhrANFIiIvRnqClIVyeD", 20, 0x8579a37},
+ {__LINE__, 0x0, "v7Q9ehzioTOVeDIZioT1", 20, 0x632aa8e0},
+ {__LINE__, 0x0, "Yod5hEeKcYqyhfXbhxj2", 20, 0xc829af29},
+ {__LINE__, 0x0, "GehSWY2ay4uUKhehXYb0", 20, 0x1b08b7e8},
+ {__LINE__, 0x0, "kwytJmq6UqpflV8Y8GoE", 20, 0x4e33b192},
+ {__LINE__, 0x0, "70684206568419061514", 20, 0x59a179f0},
+ {__LINE__, 0x0, "42015093765128581010", 20, 0xcd1013d7},
+ {__LINE__, 0x0, "88214814356148806939", 20, 0xab927546},
+ {__LINE__, 0x0, "43472694284527343838", 20, 0x11f3b20c},
+ {__LINE__, 0x0, "49769333513942933689", 20, 0xd562d4ca},
+ {__LINE__, 0x0, "54979784887993251199", 20, 0x233395f7},
+ {__LINE__, 0x0, "58360544869206793220", 20, 0x2d167fd5},
+ {__LINE__, 0x0, "27347953487840714234", 20, 0x8b5108ba},
+ {__LINE__, 0x0, "07650690295365319082", 20, 0xc46b3cd8},
+ {__LINE__, 0x0, "42655507906821911703", 20, 0xc10b2662},
+ {__LINE__, 0x0, "29977409200786225655", 20, 0xc9a0f9d2},
+ {__LINE__, 0x0, "85181542907229116674", 20, 0x9341357b},
+ {__LINE__, 0x0, "87963594337989416799", 20, 0xf0424937},
+ {__LINE__, 0x0, "21395988329504168551", 20, 0xd7c4c31f},
+ {__LINE__, 0x0, "51991013580943379423", 20, 0xf11edcc4},
+ {__LINE__, 0x0, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x40795df4},
+ {__LINE__, 0x0, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0xdd61a631},
+ {__LINE__, 0x0, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xca907a99},
+ {__LINE__, 0x0, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0xf652deac},
+ {__LINE__, 0x0, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0xaf39a5a9},
+ {__LINE__, 0x0, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x6bebb4cf},
+ {__LINE__, 0x0, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0x76430bac},
+ {__LINE__, 0x0, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x6c80c388},
+ {__LINE__, 0x0, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xd54d977d},
+ {__LINE__, 0x0, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0xe3966ad5},
+ {__LINE__, 0x0, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xe7c71db9},
+ {__LINE__, 0x0, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xeaa52777},
+ {__LINE__, 0x0, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xcd472048},
+ {__LINE__, 0x7a30360d, "abacus", 6, 0xf8655a84},
+ {__LINE__, 0x6fd767ee, "backlog", 7, 0x1ed834b1},
+ {__LINE__, 0xefeb7589, "campfire", 8, 0x686cfca},
+ {__LINE__, 0x61cf7e6b, "delta", 5, 0x1554e4b1},
+ {__LINE__, 0xdc712e2, "executable", 10, 0x761b4254},
+ {__LINE__, 0xad23c7fd, "file", 4, 0x7abdd09b},
+ {__LINE__, 0x85cb2317, "greatest", 8, 0x4ba91c6b},
+ {__LINE__, 0x9eed31b0, "inverter", 8, 0xd5e78ba5},
+ {__LINE__, 0xb94f34ca, "jigsaw", 6, 0x23649109},
+ {__LINE__, 0xab058a2, "karate", 6, 0xc5591f41},
+ {__LINE__, 0x5bff2b7a, "landscape", 9, 0xf10eb644},
+ {__LINE__, 0x605c9a5f, "machine", 7, 0xbaa0a636},
+ {__LINE__, 0x51bdeea5, "nanometer", 9, 0x6af89afb},
+ {__LINE__, 0x85c21c79, "oblivion", 8, 0xecae222b},
+ {__LINE__, 0x97216f56, "panama", 6, 0x47dffac4},
+ {__LINE__, 0x18444af2, "quest", 5, 0x70c2fe36},
+ {__LINE__, 0xbe6ce359, "resource", 8, 0x1471d925},
+ {__LINE__, 0x843071f1, "secret", 6, 0x50c9a0db},
+ {__LINE__, 0xf2480c60, "ultimate", 8, 0xf973daf8},
+ {__LINE__, 0x2d2feb3d, "vector", 6, 0x344ac03d},
+ {__LINE__, 0x7490310a, "walrus", 6, 0x6d1408ef},
+ {__LINE__, 0x97d247d4, "xeno", 4, 0xe62670b5},
+ {__LINE__, 0x93cf7599, "yelling", 7, 0x1b36da38},
+ {__LINE__, 0x73c84278, "zlib", 4, 0x6432d127},
+ {__LINE__, 0x228a87d1, "4BJD7PocN1VqX0jXVpWB", 20, 0x997107d0},
+ {__LINE__, 0xa7a048d0, "F1rPWI7XvDs6nAIRx41l", 20, 0xdc567274},
+ {__LINE__, 0x1f0ded40, "ldhKlsVkPFOveXgkGtC2", 20, 0xdcc63870},
+ {__LINE__, 0xa804a62f, "5KKnGOOrs8BvJ35iKTOS", 20, 0x6926cffd},
+ {__LINE__, 0x508fae6a, "0l1tw7GOcem06Ddu7yn4", 20, 0xb52b38bc},
+ {__LINE__, 0xe5adaf4f, "MCr47CjPIn9R1IvE1Tm5", 20, 0xf83b8178},
+ {__LINE__, 0x67136a40, "UcixbzPKTIv0SvILHVdO", 20, 0xc5213070},
+ {__LINE__, 0xb00c4a10, "dGnAyAhRQDsWw0ESou24", 20, 0xbc7648b0},
+ {__LINE__, 0x2e0c84b5, "di0nvmY9UYMYDh0r45XT", 20, 0xd8123a72},
+ {__LINE__, 0x81238d44, "2XKDwHfAhFsV0RhbqtvH", 20, 0xd5ac5620},
+ {__LINE__, 0xf853aa92, "ZhrANFIiIvRnqClIVyeD", 20, 0xceae099d},
+ {__LINE__, 0x5a692325, "v7Q9ehzioTOVeDIZioT1", 20, 0xb07d2b24},
+ {__LINE__, 0x3275b9f, "Yod5hEeKcYqyhfXbhxj2", 20, 0x24ce91df},
+ {__LINE__, 0x38371feb, "GehSWY2ay4uUKhehXYb0", 20, 0x707b3b30},
+ {__LINE__, 0xafc8bf62, "kwytJmq6UqpflV8Y8GoE", 20, 0x16abc6a9},
+ {__LINE__, 0x9b07db73, "70684206568419061514", 20, 0xae1fb7b7},
+ {__LINE__, 0xe75b214, "42015093765128581010", 20, 0xd4eecd2d},
+ {__LINE__, 0x72d0fe6f, "88214814356148806939", 20, 0x4660ec7},
+ {__LINE__, 0xf857a4b1, "43472694284527343838", 20, 0xfd8afdf7},
+ {__LINE__, 0x54b8e14, "49769333513942933689", 20, 0xc6d1b5f2},
+ {__LINE__, 0xd6aa5616, "54979784887993251199", 20, 0x32476461},
+ {__LINE__, 0x11e63098, "58360544869206793220", 20, 0xd917cf1a},
+ {__LINE__, 0xbe92385, "27347953487840714234", 20, 0x4ad14a12},
+ {__LINE__, 0x49511de0, "07650690295365319082", 20, 0xe37b5c6c},
+ {__LINE__, 0x3db13bc1, "42655507906821911703", 20, 0x7cc497f1},
+ {__LINE__, 0xbb899bea, "29977409200786225655", 20, 0x99781bb2},
+ {__LINE__, 0xf6cd9436, "85181542907229116674", 20, 0x132256a1},
+ {__LINE__, 0x9109e6c3, "87963594337989416799", 20, 0xbfdb2c83},
+ {__LINE__, 0x75770fc, "21395988329504168551", 20, 0x8d9d1e81},
+ {__LINE__, 0x69b1d19b, "51991013580943379423", 20, 0x7b6d4404},
+ {__LINE__, 0xc6132975, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x8619f010},
+ {__LINE__, 0xd58cb00c, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0x15746ac3},
+ {__LINE__, 0xb63b8caa, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xaccf812f},
+ {__LINE__, 0x8a45a2b8, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0x78af45de},
+ {__LINE__, 0xcbe95b78, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0x25b06b59},
+ {__LINE__, 0x4ef8a54b, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x4ba0d08f},
+ {__LINE__, 0x76ad267a, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0xe26b6aac},
+ {__LINE__, 0x569e613c, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x7e2b0a66},
+ {__LINE__, 0x36aa61da, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xb3430dc7},
+ {__LINE__, 0xf67222df, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0x626c17a},
+ {__LINE__, 0x74b34fd3, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xccf98060},
+ {__LINE__, 0x351fd770, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xd8b95312},
+ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xbb1c9912},
+ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 600, 0x888AFA5B}
+};
+
+static const int test_size = sizeof(tests) / sizeof(tests[0]);
+
+int main(void)
+{
+ int i;
+ for (i = 0; i < test_size; i++) {
+ test_crc32(tests[i].crc, tests[i].buf, tests[i].len,
+ tests[i].expect, tests[i].line);
+ }
+ return 0;
+}
\ No newline at end of file
--
2.19.1

View File

@ -0,0 +1,428 @@
From cfbf97cb54a6d06a80e86c85869331e4e2871129 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 19 Mar 2020 11:52:03 +0100
Subject: [PATCH] s390x: vectorize crc32
Use vector extensions when compiling for s390x and binutils knows
about them. At runtime, check whether kernel supports vector
extensions (it has to be not just the CPU, but also the kernel) and
choose between the regular and the vectorized implementations.
---
Makefile.in | 9 ++
configure | 28 +++++
contrib/gcc/zifunc.h | 21 +++-
contrib/s390/crc32-vx.c | 195 ++++++++++++++++++++++++++++++++
contrib/s390/crc32_z_resolver.c | 41 +++++++
crc32.c | 11 +-
6 files changed, 301 insertions(+), 4 deletions(-)
create mode 100644 contrib/s390/crc32-vx.c
create mode 100644 contrib/s390/crc32_z_resolver.c
diff --git a/Makefile.in b/Makefile.in
index d392616..63f76da 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -29,6 +29,7 @@ LDFLAGS=
TEST_LDFLAGS=-L. libz.a
LDSHARED=$(CC)
CPP=$(CC) -E
+VGFMAFLAG=
STATICLIB=libz.a
SHAREDLIB=libz.so
@@ -179,6 +180,9 @@ crc32.o: $(SRCDIR)crc32.c
crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c
$(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c
+crc32-vx.o: $(SRCDIR)contrib/s390/crc32-vx.c
+ $(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/crc32-vx.c
+
deflate.o: $(SRCDIR)deflate.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
@@ -229,6 +233,11 @@ crc32.lo: $(SRCDIR)crc32.c
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
-@mv objs/crc32.o $@
+crc32-vx.lo: $(SRCDIR)contrib/s390/crc32-vx.c
+ -@mkdir objs 2>/dev/null || test -d objs
+ $(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390/crc32-vx.c
+ -@mv objs/crc32-vx.o $@
+
crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c
diff --git a/configure b/configure
index e37dac8..a4606b8 100755
--- a/configure
+++ b/configure
@@ -915,6 +915,32 @@ else
echo "Checking for Power optimizations support... No." | tee -a configure.log
fi
+# check if we are compiling for s390 and binutils support vector extensions
+VGFMAFLAG=-march=z13
+cat > $test.c <<EOF
+#ifndef __s390__
+#error
+#endif
+EOF
+if try $CC -c $CFLAGS $VGFMAFLAG $test.c; then
+ CFLAGS="$CFLAGS -DHAVE_S390X_VX"
+ SFLAGS="$SFLAGS -DHAVE_S390X_VX"
+ OBJC="$OBJC crc32-vx.o"
+ PIC_OBJC="$PIC_OBJC crc32-vx.lo"
+ echo "Checking for s390 vector extensions... Yes." | tee -a configure.log
+
+ for flag in -mzarch -fzvector; do
+ if try $CC -c $CFLAGS $VGFMAFLAG $flag $test.c; then
+ VGFMAFLAG="$VGFMAFLAG $flag"
+ echo "Checking for $flag... Yes." | tee -a configure.log
+ else
+ echo "Checking for $flag... No." | tee -a configure.log
+ fi
+ done
+else
+ echo "Checking for s390 vector extensions... No." | tee -a configure.log
+fi
+
# show the results in the log
echo >> configure.log
echo ALL = $ALL >> configure.log
@@ -947,6 +973,7 @@ echo mandir = $mandir >> configure.log
echo prefix = $prefix >> configure.log
echo sharedlibdir = $sharedlibdir >> configure.log
echo uname = $uname >> configure.log
+echo VGFMAFLAG = $VGFMAFLAG >> configure.log
# udpate Makefile with the configure results
sed < ${SRCDIR}Makefile.in "
@@ -956,6 +983,7 @@ sed < ${SRCDIR}Makefile.in "
/^LDFLAGS *=/s#=.*#=$LDFLAGS#
/^LDSHARED *=/s#=.*#=$LDSHARED#
/^CPP *=/s#=.*#=$CPP#
+/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG#
/^STATICLIB *=/s#=.*#=$STATICLIB#
/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h
index daf4fe4..b62379e 100644
--- a/contrib/gcc/zifunc.h
+++ b/contrib/gcc/zifunc.h
@@ -8,9 +8,28 @@
/* Helpers for arch optimizations */
+#if defined(__clang__)
+#if __has_feature(coverage_sanitizer)
+#define Z_IFUNC_NO_SANCOV __attribute__((no_sanitize("coverage")))
+#else /* __has_feature(coverage_sanitizer) */
+#define Z_IFUNC_NO_SANCOV
+#endif /* __has_feature(coverage_sanitizer) */
+#else /* __clang__ */
+#define Z_IFUNC_NO_SANCOV
+#endif /* __clang__ */
+
+#ifdef __s390__
+#define Z_IFUNC_PARAMS unsigned long hwcap
+#define Z_IFUNC_ATTRS Z_IFUNC_NO_SANCOV
+#else /* __s390__ */
+#define Z_IFUNC_PARAMS void
+#define Z_IFUNC_ATTRS
+#endif /* __s390__ */
+
#define Z_IFUNC(fname) \
typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \
- local typeof(fname) *fname##_resolver(void)
+ Z_IFUNC_ATTRS \
+ local typeof(fname) *fname##_resolver(Z_IFUNC_PARAMS)
/* This is a helper macro to declare a resolver for an indirect function
* (ifunc). Let's say you have function
*
diff --git a/contrib/s390/crc32-vx.c b/contrib/s390/crc32-vx.c
new file mode 100644
index 0000000..fa5387c
--- /dev/null
+++ b/contrib/s390/crc32-vx.c
@@ -0,0 +1,195 @@
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of bitreflected CRC-32 checksums.
+ *
+ * This CRC-32 implementation algorithm is bitreflected and processes
+ * the least-significant bit first (Little-Endian).
+ *
+ * This code was originally written by Hendrik Brueckner
+ * <brueckner@linux.vnet.ibm.com> for use in the Linux kernel and has been
+ * relicensed under the zlib license.
+ */
+
+#include "../../zutil.h"
+
+#include <stdint.h>
+#include <vecintrin.h>
+
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
+typedef unsigned int uv4si __attribute__((vector_size(16)));
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+
+uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) {
+ /*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these definitions:
+ *
+ * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
+ * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
+ * R3 = [(x128+32 mod P'(x) << 32)]' << 1
+ * R4 = [(x128-32 mod P'(x) << 32)]' << 1
+ * R5 = [(x64 mod P'(x) << 32)]' << 1
+ * R6 = [(x32 mod P'(x) << 32)]' << 1
+ *
+ * The bitreflected Barret reduction constant, u', is defined as
+ * the bit reversal of floor(x**64 / P(x)).
+ *
+ * where P(x) is the polynomial in the normal domain and the P'(x) is the
+ * polynomial in the reversed (bitreflected) domain.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ * P(x) = 0x04C11DB7
+ * P'(x) = 0xEDB88320
+ */
+ const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */
+ const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */
+ const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */
+ const uv2di r5 = {0, 0x163CD6124}; /* R5 */
+ const uv2di ru_poly = {0, 0x1F7011641}; /* u' */
+ const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */
+
+ /*
+ * Load the initial CRC value.
+ *
+ * The CRC value is loaded into the rightmost word of the
+ * vector register and is later XORed with the LSB portion
+ * of the loaded input data.
+ */
+ uv2di v0 = {0, 0};
+ v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3);
+
+ /* Load a 64-byte data chunk and XOR with CRC */
+ uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be);
+ uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be);
+ uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be);
+ uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be);
+
+ v1 ^= v0;
+ buf += 64;
+ len -= 64;
+
+ while (len >= 64) {
+ /* Load the next 64-byte data chunk */
+ uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be);
+ uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be);
+ uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be);
+ uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be);
+
+ /*
+ * Perform a GF(2) multiplication of the doublewords in V1 with
+ * the R1 and R2 reduction constants in V0. The intermediate result
+ * is then folded (accumulated) with the next data chunk in PART1 and
+ * stored in V1. Repeat this step for the register contents
+ * in V2, V3, and V4 respectively.
+ */
+ v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1);
+ v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2);
+ v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3);
+ v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4);
+
+ buf += 64;
+ len -= 64;
+ }
+
+ /*
+ * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
+ * and R4 and accumulating the next 128-bit chunk until a single 128-bit
+ * value remains.
+ */
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3);
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4);
+
+ while (len >= 16) {
+ /* Load next data chunk */
+ v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be);
+
+ /* Fold next data chunk */
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
+
+ buf += 16;
+ len -= 16;
+ }
+
+ /*
+ * Set up a vector register for byte shifts. The shift value must
+ * be loaded in bits 1-4 in byte element 7 of a vector register.
+ * Shift by 8 bytes: 0x40
+ * Shift by 4 bytes: 0x20
+ */
+ uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ v9 = vec_insert((unsigned char)0x40, v9, 7);
+
+ /*
+ * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
+ * to move R4 into the rightmost doubleword and set the leftmost
+ * doubleword to 0x1.
+ */
+ v0 = vec_srb(r4r3, (uv2di)v9);
+ v0[0] = 1;
+
+ /*
+ * Compute GF(2) product of V1 and V0. The rightmost doubleword
+ * of V1 is multiplied with R4. The leftmost doubleword of V1 is
+ * multiplied by 0x1 and is then XORed with rightmost product.
+ * Implicitly, the intermediate leftmost product becomes padded
+ */
+ v1 = (uv2di)vec_gfmsum_128(v0, v1);
+
+ /*
+ * Now do the final 32-bit fold by multiplying the rightmost word
+ * in V1 with R5 and XOR the result with the remaining bits in V1.
+ *
+ * To achieve this by a single VGFMAG, right shift V1 by a word
+ * and store the result in V2 which is then accumulated. Use the
+ * vector unpack instruction to load the rightmost half of the
+ * doubleword into the rightmost doubleword element of V1; the other
+ * half is loaded in the leftmost doubleword.
+ * The vector register with CONST_R5 contains the R5 constant in the
+ * rightmost doubleword and the leftmost doubleword is zero to ignore
+ * the leftmost product of V1.
+ */
+ v9 = vec_insert((unsigned char)0x20, v9, 7);
+ v2 = vec_srb(v1, (uv2di)v9);
+ v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */
+ v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2);
+
+ /*
+ * Apply a Barret reduction to compute the final 32-bit CRC value.
+ *
+ * The input values to the Barret reduction are the degree-63 polynomial
+ * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+ * constant u. The Barret reduction result is the CRC value of R(x) mod
+ * P(x).
+ *
+ * The Barret reduction algorithm is defined as:
+ *
+ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+ * 3. C(x) = R(x) XOR T2(x) mod x^32
+ *
+ * Note: The leftmost doubleword of vector register containing
+ * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
+ * is zero and does not contribute to the final result.
+ */
+
+ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+ v2 = vec_unpackl((uv4si)v1);
+ v2 = (uv2di)vec_gfmsum_128(ru_poly, v2);
+
+ /*
+ * Compute the GF(2) product of the CRC polynomial with T1(x) in
+ * V2 and XOR the intermediate result, T2(x), with the value in V1.
+ * The final result is stored in word element 2 of V2.
+ */
+ v2 = vec_unpackl((uv4si)v2);
+ v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1);
+
+ return ((uv4si)v2)[2];
+}
diff --git a/contrib/s390/crc32_z_resolver.c b/contrib/s390/crc32_z_resolver.c
new file mode 100644
index 0000000..9749cab
--- /dev/null
+++ b/contrib/s390/crc32_z_resolver.c
@@ -0,0 +1,41 @@
+#include <sys/auxv.h>
+#include "../gcc/zifunc.h"
+
+#define VX_MIN_LEN 64
+#define VX_ALIGNMENT 16L
+#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
+
+unsigned int crc32_le_vgfm_16(unsigned int crc, const unsigned char FAR *buf, z_size_t len);
+
+local unsigned long s390_crc32_vx(unsigned long crc, const unsigned char FAR *buf, z_size_t len)
+{
+ uintptr_t prealign, aligned, remaining;
+
+ if (buf == Z_NULL) return 0UL;
+
+ if (len < VX_MIN_LEN + VX_ALIGN_MASK)
+ return crc32_z_default(crc, buf, len);
+
+ if ((uintptr_t)buf & VX_ALIGN_MASK) {
+ prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK);
+ len -= prealign;
+ crc = crc32_z_default(crc, buf, prealign);
+ buf += prealign;
+ }
+ aligned = len & ~VX_ALIGN_MASK;
+ remaining = len & VX_ALIGN_MASK;
+
+ crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff;
+
+ if (remaining)
+ crc = crc32_z_default(crc, buf + aligned, remaining);
+
+ return crc;
+}
+
+Z_IFUNC(crc32_z)
+{
+ if (hwcap & HWCAP_S390_VX)
+ return s390_crc32_vx;
+ return crc32_z_default;
+}
diff --git a/crc32.c b/crc32.c
index b0cda20..379fac3 100644
--- a/crc32.c
+++ b/crc32.c
@@ -199,12 +199,12 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
/* ========================================================================= */
-#ifdef Z_POWER_OPT
+#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX)
/* Rename function so resolver can use its symbol. The default version will be
* returned by the resolver if the host has no support for an optimized version.
*/
#define crc32_z crc32_z_default
-#endif /* Z_POWER_OPT */
+#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */
unsigned long ZEXPORT crc32_z(crc, buf, len)
unsigned long crc;
@@ -240,10 +240,15 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
return crc ^ 0xffffffffUL;
}
-#ifdef Z_POWER_OPT
+#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX)
#undef crc32_z
+#ifdef Z_POWER_OPT
#include "contrib/power/crc32_z_resolver.c"
#endif /* Z_POWER_OPT */
+#ifdef HAVE_S390X_VX
+#include "contrib/s390/crc32_z_resolver.c"
+#endif /* HAVE_S390X_VX */
+#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */
/* ========================================================================= */
unsigned long ZEXPORT crc32(crc, buf, len)
--
2.39.1

View File

@ -3,7 +3,7 @@
Name: zlib Name: zlib
Version: 1.2.11 Version: 1.2.11
Release: 22%{?dist} Release: 23%{?dist}
Summary: The compression and decompression library Summary: The compression and decompression library
# /contrib/dotzlib/ have Boost license # /contrib/dotzlib/ have Boost license
License: zlib and Boost License: zlib and Boost
@ -16,12 +16,6 @@ Patch0: zlib-1.2.5-minizip-fixuncrypt.patch
Patch1: zlib-1.2.11-optimized-s390.patch Patch1: zlib-1.2.11-optimized-s390.patch
# IBM Z optimalizations # IBM Z optimalizations
Patch2: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch Patch2: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch
# IBM CRC32 optimalization for POWER archs
Patch3: zlib-1.2.11-optimized-CRC32-framework.patch
# fixed firefox crash + added test case
Patch4: zlib-1.2.11-firefox-crash-fix.patch
# fixed covscan issues
Patch5: zlib-1.2.11-covscan-issues.patch
# fix for IBM Z optimalizations # fix for IBM Z optimalizations
Patch6: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch Patch6: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch
# permit a deflateParams() parameter change # permit a deflateParams() parameter change
@ -44,6 +38,23 @@ Patch14: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch
# Optimization for z15 # Optimization for z15
Patch15: zlib-1.2.11-IBM-Z-hw-accelrated-inflate-small-window.patch Patch15: zlib-1.2.11-IBM-Z-hw-accelrated-inflate-small-window.patch
# Optimized crc32 for Power 8+ processors
# Source: https://github.com/madler/zlib/pull/750
Patch16: zlib-1.2.11-Preparation-for-Power-optimizations.patch
Patch17: zlib-1.2.11-Add-Power8-optimized-crc32.patch
Patch18: zlib-1.2.11-Fix-clang-s-behavior-on-versions-7.patch
# Fix for Unnecessary IFUNC resolver for crc32_z
# Fix for s390x vectorize CRC32
Patch19: zlib-1.2.11-s390x-vectorize-crc32.patch
# Fix for python3.11 broken libxml2 and lxml on s390x
Patch20: zlib-1.2.11-Fix-broken-libxml2-for-python311.patch
# fixed covscan issues
Patch21: zlib-1.2.11-covscan-issues.patch
BuildRequires: automake, autoconf, libtool BuildRequires: automake, autoconf, libtool
%description %description
@ -98,9 +109,6 @@ developing applications which use minizip.
%patch1 -p1 -b .optimized-deflate %patch1 -p1 -b .optimized-deflate
%endif %endif
%patch2 -p1 %patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1 %patch6 -p1
%patch7 -p1 %patch7 -p1
%patch8 -p1 %patch8 -p1
@ -111,6 +119,12 @@ developing applications which use minizip.
%patch13 -p1 %patch13 -p1
%patch14 -p1 %patch14 -p1
%patch15 -p1 %patch15 -p1
%patch16 -p1
%patch17 -p1
%patch18 -p1
%patch19 -p1
%patch20 -p1
%patch21 -p1
iconv -f iso-8859-2 -t utf-8 < ChangeLog > ChangeLog.tmp iconv -f iso-8859-2 -t utf-8 < ChangeLog > ChangeLog.tmp
@ -191,6 +205,11 @@ find $RPM_BUILD_ROOT -name '*.la' -delete
%changelog %changelog
* Tue May 09 2023 Lukas Javorsky <ljavorsk@redhat.com> - 1.2.11-23
- Rebased Power 8 optimization patches
- Fix for Unnecessary IFUNC resolver for crc32_z
- Fix for python3.11 broken libxml2 and lxml on s390x
* Tue May 09 2023 Lukas Javorsky <ljavorsk@redhat.com> - 1.2.11-22 * Tue May 09 2023 Lukas Javorsky <ljavorsk@redhat.com> - 1.2.11-22
- Inflate small window optimization for IBM z15 rhbz#2154775 - Inflate small window optimization for IBM z15 rhbz#2154775