drop aarch64 optimalizations due to breakage of ABI compatibility in openjdk (#1936823)

This commit is contained in:
Ondrej Dubaj 2021-03-17 11:12:21 +01:00
parent 3ba936014f
commit c95d3d1e73
4 changed files with 1 additions and 515 deletions

View File

@ -1,170 +0,0 @@
From f0fd8c553fa024c599f4aff65d7c603ceeaa6a58 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Mon, 9 Apr 2018 13:52:17 -0700
Subject: [PATCH 1/3] Neon-Optimized hash chain rebase
This should help with compression of data, using NEON instructions
(therefore useful for ARMv7/ARMv8).
Original patch by Jun He.
---
CMakeLists.txt | 18 ++++++++
contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++
deflate.c | 7 +++
3 files changed, 109 insertions(+)
create mode 100644 contrib/arm/neon_slide_hash.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0fe939d..e9a74e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -136,6 +136,24 @@ if(CMAKE_COMPILER_IS_GNUCC)
set(ZLIB_ASMS contrib/amd64/amd64-match.S)
endif ()
+ if(ARM_NEON)
+ list(REMOVE_ITEM ZLIB_SRCS inflate.c)
+ set(ZLIB_ARM_NEON_HDRS
+ contrib/arm/chunkcopy.h
+ contrib/arm/inffast_chunk.h
+ contrib/arm/neon_slide_hash.h)
+ set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
+ add_definitions(-DARM_NEON)
+ set(COMPILER ${CMAKE_C_COMPILER})
+ # NEON is mandatory in ARMv8.
+ if(${COMPILER} MATCHES "aarch64")
+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a)
+ # But it was optional for ARMv7.
+ elseif(${COMPILER} MATCHES "arm")
+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon)
+ endif()
+ endif()
+
if(ZLIB_ASMS)
add_definitions(-DASMV)
set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h
new file mode 100644
index 0000000..0daffa1
--- /dev/null
+++ b/contrib/arm/neon_slide_hash.h
@@ -0,0 +1,84 @@
+/* Copyright (C) 1995-2011, 2016 Mark Adler
+ * Copyright (C) 2017 ARM Holdings Inc.
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
+ * Jun He <jun.he@arm.com>
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+#ifndef __NEON_SLIDE_HASH__
+#define __NEON_SLIDE_HASH__
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
+#include "deflate.h"
+#include <arm_neon.h>
+
+inline static void neon_slide_hash(deflate_state *s)
+{
+ /*
+ * This is ASIMD implementation for hash table rebase
+ * it assumes:
+ * 1. hash chain offset (Pos) is 2 bytes
+ * 2. hash table size is multiple*128 bytes
+ * #1 should be true as Pos is defined as "ush"
+ * #2 should be true as hash_bits are greater that 7
+ */
+ unsigned n, m;
+ unsigned short wsize = s->w_size;
+ uint16x8_t v, *p;
+ size_t size;
+
+ size = s->hash_size*sizeof(s->head[0]);
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
+
+ Assert(sizeof(Pos) == 2, "Wrong Pos size");
+
+ /* slide s->head */
+ v = vdupq_n_u16(wsize);
+ p = (uint16x8_t *)(s->head);
+ n = size / (sizeof(uint16x8_t) * 8);
+ do {
+ p[0] = vqsubq_u16(p[0], v);
+ p[1] = vqsubq_u16(p[1], v);
+ p[2] = vqsubq_u16(p[2], v);
+ p[3] = vqsubq_u16(p[3], v);
+ p[4] = vqsubq_u16(p[4], v);
+ p[5] = vqsubq_u16(p[5], v);
+ p[6] = vqsubq_u16(p[6], v);
+ p[7] = vqsubq_u16(p[7], v);
+ p += 8;
+ } while (--n);
+#ifndef FASTEST
+ /* slide s->prev */
+ size = wsize*sizeof(s->prev[0]);
+
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
+
+ p = (uint16x8_t *)(s->prev);
+ n = size / (sizeof(uint16x8_t) * 8);
+ do {
+ p[0] = vqsubq_u16(p[0], v);
+ p[1] = vqsubq_u16(p[1], v);
+ p[2] = vqsubq_u16(p[2], v);
+ p[3] = vqsubq_u16(p[3], v);
+ p[4] = vqsubq_u16(p[4], v);
+ p[5] = vqsubq_u16(p[5], v);
+ p[6] = vqsubq_u16(p[6], v);
+ p[7] = vqsubq_u16(p[7], v);
+ p += 8;
+ } while (--n);
+#endif
+}
+
+#endif
+#endif
diff --git a/deflate.c b/deflate.c
index 1ec7614..36f99ac 100644
--- a/deflate.c
+++ b/deflate.c
@@ -50,6 +50,9 @@
/* @(#) $Id$ */
#include "deflate.h"
+#if __ARM_NEON
+#include "contrib/arm/neon_slide_hash.h"
+#endif
const char deflate_copyright[] =
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
@@ -201,6 +204,9 @@ local const config configuration_table[10] = {
local void slide_hash(s)
deflate_state *s;
{
+#if ARM_NEON
+ return neon_slide_hash(s);
+#else
unsigned n, m;
Posf *p;
uInt wsize = s->w_size;
@@ -222,6 +228,7 @@ local void slide_hash(s)
*/
} while (--n);
#endif
+#endif
}
/* ========================================================================= */
--
2.19.0

View File

@ -1,218 +0,0 @@
From 17a154db6774a4acf347cfc5189eaf2cd675e696 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Mon, 9 Apr 2018 15:14:19 -0700
Subject: [PATCH 2/3] Porting optimized longest_match
This patch was contributed to zlib-ng and features an improved longest_match
function using the most distant hash code to reduce number of checks
(see: http://www.gildor.org/en/projects/zlib).
Original patch by Jun He.
---
CMakeLists.txt | 3 +-
contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++
deflate.c | 11 ++-
3 files changed, 152 insertions(+), 4 deletions(-)
create mode 100644 contrib/arm/arm_longest_match.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9a74e9..3826eba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
set(ZLIB_ARM_NEON_HDRS
contrib/arm/chunkcopy.h
contrib/arm/inffast_chunk.h
- contrib/arm/neon_slide_hash.h)
+ contrib/arm/neon_slide_hash.h
+ contrib/arm/arm_longest_match.h)
set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
add_definitions(-DARM_NEON)
set(COMPILER ${CMAKE_C_COMPILER})
diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h
new file mode 100644
index 0000000..9e7083f
--- /dev/null
+++ b/contrib/arm/arm_longest_match.h
@@ -0,0 +1,142 @@
+/* Copyright (C) 1995-2011, 2016 Mark Adler
+ * Copyright (C) 2017 ARM Holdings Inc.
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
+ * Jun He <jun.he@arm.com>
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+#ifndef __ARM_LONGEST__MATCH__
+#define __ARM_LONGEST__MATCH__
+
+#if defined(ARM_NEON)
+#include "deflate.h"
+#include <stdint.h>
+static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max)
+{
+ register int len = 0;
+ register unsigned long xor = 0;
+ register int check_loops = max/sizeof(unsigned long);
+ while(check_loops-- > 0) {
+ xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len));
+ if (xor) break;
+ len += sizeof(unsigned long);
+ }
+ if (0 == xor) {
+ while (len < max) {
+ if (a[len] != b[len]) break;
+ len++;
+ }
+ return len;
+ }
+ xor = __builtin_ctzl(xor)>>3;
+ return len + xor;
+}
+
+/*
+ * This implementation is based on algorithm described at:
+ * http://www.gildor.org/en/projects/zlib
+ * It uses the hash chain indexed by the most distant hash code to
+ * reduce number of checks.
+ * This also eliminates the those unnecessary check loops in legacy
+ * longest_match's do..while loop if the "most distant code" is out
+ * of search buffer
+ *
+ */
+static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) {
+ unsigned chain_length = s->max_chain_length;/* max hash chain length */
+ unsigned char *scan = s->window + s->strstart; /* current string */
+ unsigned char *match; /* matched string */
+ unsigned int len; /* length of current match */
+ unsigned int best_len = s->prev_length; /* best match length so far */
+ unsigned int nice_match = s->nice_match; /* stop if match long enough */
+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+ s->strstart - (IPos)MAX_DIST(s) : 0;
+ /* Stop when cur_match becomes <= limit. To simplify the code,
+ * we prevent matches with the string of window index 0.
+ */
+ int offset = 0; /* offset of the head[most_distant_hash] from IN cur_match */
+ Pos *prev = s->prev;
+ unsigned int wmask = s->w_mask;
+ unsigned char *scan_buf_base = s->window;
+
+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+ * It is easy to get rid of this optimization if necessary.
+ */
+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+ /* Do not look for matches beyond the end of the input. This is necessary
+ * to make deflate deterministic.
+ */
+ if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+ /* find most distant hash code for lazy_match */
+ if (best_len > MIN_MATCH) {
+ /* search for most distant hash code */
+ int i;
+ uint16_t hash = 0;
+ IPos pos;
+
+ UPDATE_HASH(s, hash, scan[1]);
+ UPDATE_HASH(s, hash, scan[2]);
+ for (i = 3; i <= best_len; i++) {
+ UPDATE_HASH(s, hash, scan[i]);
+ /* get head IPos of hash calced by scan[i-2..i] */
+ pos = s->head[hash];
+ /* compare it to current "farthest hash" IPos */
+ if (pos <= cur_match) {
+ /* we have a new "farthest hash" now */
+ offset = i - 2;
+ cur_match = pos;
+ }
+ }
+
+ /* update variables to correspond offset */
+ limit += offset;
+ /*
+ * check if the most distant code's offset is out of search buffer
+ * if it is true, then this means scan[offset..offset+2] are not
+ * presented in the search buffer. So we just return best_len
+ * we've found.
+ */
+ if (cur_match < limit) return best_len;
+
+ scan_buf_base -= offset;
+ /* reduce hash search depth based on best_len */
+ chain_length /= best_len - MIN_MATCH;
+ }
+
+ do {
+ Assert(cur_match < s->strstart, "no future");
+
+ /* Determine matched length at current pos */
+ match = scan_buf_base + cur_match;
+ len = get_match_len(match, scan, MAX_MATCH);
+
+ if (len > best_len) {
+ /* found longer string */
+ s->match_start = cur_match - offset;
+ best_len = len;
+ /* good enough? */
+ if (len >= nice_match) break;
+ }
+ /* move to prev pos in this hash chain */
+ } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0);
+
+ return (best_len <= s->lookahead)? best_len : s->lookahead;
+}
+
+#endif
+#endif
diff --git a/deflate.c b/deflate.c
index 36f99ac..4c42259 100644
--- a/deflate.c
+++ b/deflate.c
@@ -50,9 +50,6 @@
/* @(#) $Id$ */
#include "deflate.h"
-#if __ARM_NEON
-#include "contrib/arm/neon_slide_hash.h"
-#endif
const char deflate_copyright[] =
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
@@ -196,6 +193,11 @@ local const config configuration_table[10] = {
s->head[s->hash_size-1] = NIL; \
zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+#if defined(ARM_NEON)
+#include "contrib/arm/arm_longest_match.h"
+#include "contrib/arm/neon_slide_hash.h"
+#endif
+
/* ===========================================================================
* Slide the hash table when sliding the window down (could be avoided with 32
* bit values at the expense of memory usage). We slide even when level == 0 to
@@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match)
deflate_state *s;
IPos cur_match; /* current match */
{
+#if defined(ARM_NEON)
+ return arm_longest_match(s, cur_match);
+#endif
unsigned chain_length = s->max_chain_length;/* max hash chain length */
register Bytef *scan = s->window + s->strstart; /* current string */
register Bytef *match; /* matched string */
--
2.19.0

View File

@ -1,115 +0,0 @@
From e0be75f8dce27a4e32196529df2a08dca791a286 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 6 Apr 2018 11:46:42 -0500
Subject: [PATCH 3/3] arm64 specific build patch
---
Makefile.in | 19 ++++++++++++-------
configure | 2 +-
contrib/minizip/zip.c | 6 ++++--
3 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/Makefile.in b/Makefile.in
index 5a77949..9f088e5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -57,7 +57,7 @@ SRCDIR=
ZINC=
ZINCOUT=-I.
-OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o
+OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o
OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o
OBJC = $(OBJZ) $(OBJG)
@@ -163,16 +163,16 @@ crc32.o: $(SRCDIR)crc32.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
deflate.o: $(SRCDIR)deflate.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)deflate.c
infback.o: $(SRCDIR)infback.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c
inffast.o: $(SRCDIR)inffast.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inffast.c
inflate.o: $(SRCDIR)inflate.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inflate.c
inftrees.o: $(SRCDIR)inftrees.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c
@@ -214,7 +214,7 @@ crc32.lo: $(SRCDIR)crc32.c
deflate.lo: $(SRCDIR)deflate.c
-@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
-@mv objs/deflate.o $@
infback.lo: $(SRCDIR)infback.c
@@ -222,14 +222,19 @@ infback.lo: $(SRCDIR)infback.c
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c
-@mv objs/infback.o $@
+arminffast.lo: $(SRCDIR)contrib/arm/inffast_chunk.c $(SRCDIR)inffast.c
+ -@mkdir objs 2>/dev/null || test -d objs
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/arminffast.o $(SRCDIR)contrib/arm/inffast_chunk.c
+ -@mv objs/arminffast.o $@
+
inffast.lo: $(SRCDIR)inffast.c
-@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
-@mv objs/inffast.o $@
inflate.lo: $(SRCDIR)inflate.c
-@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
-@mv objs/inflate.o $@
inftrees.lo: $(SRCDIR)inftrees.c
diff --git a/configure b/configure
index e974d1f..0c5f837 100755
--- a/configure
+++ b/configure
@@ -23,7 +23,7 @@ SRCDIR=`dirname $0`
if test $SRCDIR = "."; then
ZINC=""
ZINCOUT="-I."
- SRCDIR=""
+ SRCDIR="./"
else
ZINC='-include zconf.h'
ZINCOUT='-I. -I$(SRCDIR)'
diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c
index 44e88a9..0517930 100644
--- a/contrib/minizip/zip.c
+++ b/contrib/minizip/zip.c
@@ -519,15 +519,17 @@ local ZPOS64_T zip64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_f
break;
for (i=(int)uReadSize-3; (i--)>0;)
+ {
if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) &&
((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06))
{
uPosFound = uReadPos+i;
break;
}
+ }
- if (uPosFound!=0)
- break;
+ if (uPosFound!=0)
+ break;
}
TRYFREE(buf);
return uPosFound;
--
2.19.0

View File

@ -13,10 +13,6 @@ Source: https://www.zlib.net/zlib-%{version}.tar.xz
Patch0: zlib-1.2.5-minizip-fixuncrypt.patch
# resolves: #805113
Patch1: zlib-1.2.11-optimized-s390.patch
# general aarch64 optimizations
Patch4: 0001-Neon-Optimized-hash-chain-rebase.patch
Patch5: 0002-Porting-optimized-longest_match.patch
Patch6: 0003-arm64-specific-build-patch.patch
# IBM Z optimalizations
Patch7: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch
# IBM CRC32 optimalization for POWER archs
@ -92,11 +88,6 @@ developing applications which use minizip.
%ifarch s390 s390x
%patch1 -p1 -b .optimized-deflate
%endif
%ifarch aarch64
%patch4 -p1 -b .optimize-aarch64
%patch5 -p1 -b .optimize-aarch64
%patch6 -p1 -b .optimize-aarch64
%endif
%patch7 -p1
%patch8 -p1
%patch9 -p1
@ -113,9 +104,6 @@ mv ChangeLog.tmp ChangeLog
%build
export CFLAGS="$RPM_OPT_FLAGS"
%ifarch aarch64
CFLAGS+=" -DARM_NEON -O3"
%endif
%ifarch s390 s390x
CFLAGS+=" -DDFLTCC"
%endif
@ -191,6 +179,7 @@ find $RPM_BUILD_ROOT -name '*.la' -delete
%changelog
* Fri Feb 12 2021 Michal Schorm <mschorm@redhat.com> - 1.2.11-25
- Remove ancient PPC64 hack
- Remove aarch64 optimalizations (#1936823)
* Thu Jan 28 2021 Fedora Release Engineering <releng@fedoraproject.org> - 1.2.11-24
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild