aarch64 optimizations

This commit is contained in:
Jeremy Linton 2018-04-05 16:20:56 -05:00 committed by Peter Robinson
parent 4d2785ec31
commit 25e9802713
6 changed files with 2944 additions and 0 deletions

View File

@ -0,0 +1,157 @@
From f849a23e0afc8b8a670fda64eec8b573fe62daa7 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Mon, 9 Apr 2018 13:52:17 -0700
Subject: [PATCH 1/3] Neon-Optimized hash chain rebase
This should help with compression of data, using NEON instructions
(therefore useful for ARMv7/ARMv8).
Original patch by Jun He.
---
CMakeLists.txt | 5 ++-
contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++++++++++
deflate.c | 7 ++++
3 files changed, 95 insertions(+), 1 deletion(-)
create mode 100644 contrib/arm/neon_slide_hash.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 98ee4dd..230ca6d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -139,7 +139,10 @@ if(CMAKE_COMPILER_IS_GNUCC)
if(ARM_NEON)
list(REMOVE_ITEM ZLIB_SRCS inflate.c)
- set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h contrib/arm/inffast_chunk.h)
+ set(ZLIB_ARM_NEON_HDRS
+ contrib/arm/chunkcopy.h
+ contrib/arm/inffast_chunk.h
+ contrib/arm/neon_slide_hash.h)
set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
add_definitions(-DARM_NEON)
set(COMPILER ${CMAKE_C_COMPILER})
diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h
new file mode 100644
index 0000000..0daffa1
--- /dev/null
+++ b/contrib/arm/neon_slide_hash.h
@@ -0,0 +1,84 @@
+/* Copyright (C) 1995-2011, 2016 Mark Adler
+ * Copyright (C) 2017 ARM Holdings Inc.
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
+ * Jun He <jun.he@arm.com>
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+#ifndef __NEON_SLIDE_HASH__
+#define __NEON_SLIDE_HASH__
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
+#include "deflate.h"
+#include <arm_neon.h>
+
+inline static void neon_slide_hash(deflate_state *s)
+{
+ /*
+ * This is ASIMD implementation for hash table rebase
+ * it assumes:
+ * 1. hash chain offset (Pos) is 2 bytes
+ * 2. hash table size is multiple*128 bytes
+ * #1 should be true as Pos is defined as "ush"
+ * #2 should be true as hash_bits are greater that 7
+ */
+ unsigned n, m;
+ unsigned short wsize = s->w_size;
+ uint16x8_t v, *p;
+ size_t size;
+
+ size = s->hash_size*sizeof(s->head[0]);
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
+
+ Assert(sizeof(Pos) == 2, "Wrong Pos size");
+
+ /* slide s->head */
+ v = vdupq_n_u16(wsize);
+ p = (uint16x8_t *)(s->head);
+ n = size / (sizeof(uint16x8_t) * 8);
+ do {
+ p[0] = vqsubq_u16(p[0], v);
+ p[1] = vqsubq_u16(p[1], v);
+ p[2] = vqsubq_u16(p[2], v);
+ p[3] = vqsubq_u16(p[3], v);
+ p[4] = vqsubq_u16(p[4], v);
+ p[5] = vqsubq_u16(p[5], v);
+ p[6] = vqsubq_u16(p[6], v);
+ p[7] = vqsubq_u16(p[7], v);
+ p += 8;
+ } while (--n);
+#ifndef FASTEST
+ /* slide s->prev */
+ size = wsize*sizeof(s->prev[0]);
+
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
+
+ p = (uint16x8_t *)(s->prev);
+ n = size / (sizeof(uint16x8_t) * 8);
+ do {
+ p[0] = vqsubq_u16(p[0], v);
+ p[1] = vqsubq_u16(p[1], v);
+ p[2] = vqsubq_u16(p[2], v);
+ p[3] = vqsubq_u16(p[3], v);
+ p[4] = vqsubq_u16(p[4], v);
+ p[5] = vqsubq_u16(p[5], v);
+ p[6] = vqsubq_u16(p[6], v);
+ p[7] = vqsubq_u16(p[7], v);
+ p += 8;
+ } while (--n);
+#endif
+}
+
+#endif
+#endif
diff --git a/deflate.c b/deflate.c
index 1ec7614..36f99ac 100644
--- a/deflate.c
+++ b/deflate.c
@@ -50,6 +50,9 @@
/* @(#) $Id$ */
#include "deflate.h"
+#if __ARM_NEON
+#include "contrib/arm/neon_slide_hash.h"
+#endif
const char deflate_copyright[] =
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
@@ -201,6 +204,9 @@ local const config configuration_table[10] = {
local void slide_hash(s)
deflate_state *s;
{
+#if ARM_NEON
+ return neon_slide_hash(s);
+#else
unsigned n, m;
Posf *p;
uInt wsize = s->w_size;
@@ -222,6 +228,7 @@ local void slide_hash(s)
*/
} while (--n);
#endif
+#endif
}
/* ========================================================================= */
--
2.14.3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,147 @@
From 267e6f20170edb9a00b11fc3a2ca7649ea1c4464 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Wed, 4 Apr 2018 15:14:57 -0700
Subject: [PATCH 2/2] Port Fix InflateBack corner case
This handles the case where a zlib user could rely on InflateBack
API to decompress content.
The NEON optimization assumes that it can perform wide stores, sometimes
overwriting data on the output pointer (but never overflowing the buffer
end as it has enough room for the write).
For infback there is no such guarantees (i.e. no extra wiggle room),
which can result in illegal operations. This patch fixes the potential
issue by falling back to the non-optimized code for such cases.
Also it adds some comments about the entry assumptions in inflate and
writes out a defined value at the write buffer to identify where
the real data has ended (helpful while debugging).
For reference, please see:
https://chromium.googlesource.com/chromium/src/+/0bb11040792edc5b28fcb710fc4c01fedd98c97c
Change-Id: Iffbda9eb5e08a661aa15c6e3d1c59b678cc23b2c
---
CMakeLists.txt | 5 ++---
contrib/arm/{inffast.c => inffast_chunk.c} | 10 +++++++---
contrib/arm/inffast_chunk.h | 12 ++++++++++++
contrib/arm/inflate.c | 14 ++++++++++++--
4 files changed, 33 insertions(+), 8 deletions(-)
rename contrib/arm/{inffast.c => inffast_chunk.c} (97%)
create mode 100644 contrib/arm/inffast_chunk.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09bb3db..98ee4dd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -139,9 +139,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
if(ARM_NEON)
list(REMOVE_ITEM ZLIB_SRCS inflate.c)
- list(REMOVE_ITEM ZLIB_SRCS inffast.c)
- set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h)
- set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast.c)
+ set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h contrib/arm/inffast_chunk.h)
+ set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
add_definitions(-DARM_NEON)
set(COMPILER ${CMAKE_C_COMPILER})
# NEON is mandatory in ARMv8.
diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast_chunk.c
similarity index 97%
rename from contrib/arm/inffast.c
rename to contrib/arm/inffast_chunk.c
index f7f5007..0c5c583 100644
--- a/contrib/arm/inffast.c
+++ b/contrib/arm/inffast_chunk.c
@@ -6,8 +6,8 @@
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
-#include "inffast.h"
-#include "chunkcopy.h"
+#include "contrib/arm/inffast_chunk.h"
+#include "contrib/arm/chunkcopy.h"
#ifdef ASMINF
# pragma message("Assembler code may have bugs -- use at your own risk")
@@ -28,6 +28,10 @@
strm->avail_out >= 258
start >= strm->avail_out
state->bits < 8
+ strm->next_out[0..strm->avail_out] does not overlap with
+ strm->next_in[0..strm->avail_in]
+ strm->state->window is allocated with an additional
+ CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
On return, state->mode is one of:
@@ -48,7 +52,7 @@
requires strm->avail_out >= 258 for each loop to avoid checking for
output space.
*/
-void ZLIB_INTERNAL inflate_fast(strm, start)
+void ZLIB_INTERNAL inflate_fast_chunk(strm, start)
z_streamp strm;
unsigned start; /* inflate()'s starting value for strm->avail_out */
{
diff --git a/contrib/arm/inffast_chunk.h b/contrib/arm/inffast_chunk.h
new file mode 100644
index 0000000..7839e1d
--- /dev/null
+++ b/contrib/arm/inffast_chunk.h
@@ -0,0 +1,12 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2003, 2010 Mark Adler
+ * Copyright (C) 2017 ARM, Inc.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+void ZLIB_INTERNAL inflate_fast_chunk OF((z_streamp strm, unsigned start));
diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
index 23e95f1..d860542 100644
--- a/contrib/arm/inflate.c
+++ b/contrib/arm/inflate.c
@@ -83,7 +83,7 @@
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
-#include "inffast.h"
+#include "contrib/arm/inffast_chunk.h"
#include "contrib/arm/chunkcopy.h"
#ifdef MAKEFIXED
@@ -1056,7 +1056,7 @@ int flush;
case LEN:
if (have >= 6 && left >= 258) {
RESTORE();
- inflate_fast(strm, out);
+ inflate_fast_chunk(strm, out);
LOAD();
if (state->mode == TYPE)
state->back = -1;
@@ -1262,6 +1262,16 @@ int flush;
Note: a memory error from inflate() is non-recoverable.
*/
inf_leave:
+ /* We write a defined value in the unused space to help mark
+ * where the stream has ended. We don't use zeros as that can
+ * mislead clients relying on undefined behavior (i.e. assuming
+ * that the data is over when the buffer has a zero/null value).
+ */
+ if (left >= CHUNKCOPY_CHUNK_SIZE)
+ memset(put, 0x55, CHUNKCOPY_CHUNK_SIZE);
+ else
+ memset(put, 0x55, left);
+
RESTORE();
if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
(state->mode < CHECK || flush != Z_FINISH)))
--
2.14.3

View File

@ -0,0 +1,218 @@
From 0ad56061ade1afe2896af1acffa5e15fbe5c98ed Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Mon, 9 Apr 2018 15:14:19 -0700
Subject: [PATCH 2/3] Porting optimized longest_match
This patch was contributed to zlib-ng and features an improved longest_match
function using the most distant hash code to reduce number of checks
(see: http://www.gildor.org/en/projects/zlib).
Original patch by Jun He.
---
CMakeLists.txt | 3 +-
contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++++++++++
deflate.c | 11 +++-
3 files changed, 152 insertions(+), 4 deletions(-)
create mode 100644 contrib/arm/arm_longest_match.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 230ca6d..c330093 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -142,7 +142,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
set(ZLIB_ARM_NEON_HDRS
contrib/arm/chunkcopy.h
contrib/arm/inffast_chunk.h
- contrib/arm/neon_slide_hash.h)
+ contrib/arm/neon_slide_hash.h
+ contrib/arm/arm_longest_match.h)
set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
add_definitions(-DARM_NEON)
set(COMPILER ${CMAKE_C_COMPILER})
diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h
new file mode 100644
index 0000000..9e7083f
--- /dev/null
+++ b/contrib/arm/arm_longest_match.h
@@ -0,0 +1,142 @@
+/* Copyright (C) 1995-2011, 2016 Mark Adler
+ * Copyright (C) 2017 ARM Holdings Inc.
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
+ * Jun He <jun.he@arm.com>
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+#ifndef __ARM_LONGEST__MATCH__
+#define __ARM_LONGEST__MATCH__
+
+#if defined(ARM_NEON)
+#include "deflate.h"
+#include <stdint.h>
+static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max)
+{
+ register int len = 0;
+ register unsigned long xor = 0;
+ register int check_loops = max/sizeof(unsigned long);
+ while(check_loops-- > 0) {
+ xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len));
+ if (xor) break;
+ len += sizeof(unsigned long);
+ }
+ if (0 == xor) {
+ while (len < max) {
+ if (a[len] != b[len]) break;
+ len++;
+ }
+ return len;
+ }
+ xor = __builtin_ctzl(xor)>>3;
+ return len + xor;
+}
+
+/*
+ * This implementation is based on algorithm described at:
+ * http://www.gildor.org/en/projects/zlib
+ * It uses the hash chain indexed by the most distant hash code to
+ * reduce number of checks.
+ * This also eliminates the those unnecessary check loops in legacy
+ * longest_match's do..while loop if the "most distant code" is out
+ * of search buffer
+ *
+ */
+static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) {
+ unsigned chain_length = s->max_chain_length;/* max hash chain length */
+ unsigned char *scan = s->window + s->strstart; /* current string */
+ unsigned char *match; /* matched string */
+ unsigned int len; /* length of current match */
+ unsigned int best_len = s->prev_length; /* best match length so far */
+ unsigned int nice_match = s->nice_match; /* stop if match long enough */
+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+ s->strstart - (IPos)MAX_DIST(s) : 0;
+ /* Stop when cur_match becomes <= limit. To simplify the code,
+ * we prevent matches with the string of window index 0.
+ */
+ int offset = 0; /* offset of the head[most_distant_hash] from IN cur_match */
+ Pos *prev = s->prev;
+ unsigned int wmask = s->w_mask;
+ unsigned char *scan_buf_base = s->window;
+
+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+ * It is easy to get rid of this optimization if necessary.
+ */
+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+ /* Do not look for matches beyond the end of the input. This is necessary
+ * to make deflate deterministic.
+ */
+ if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+ /* find most distant hash code for lazy_match */
+ if (best_len > MIN_MATCH) {
+ /* search for most distant hash code */
+ int i;
+ uint16_t hash = 0;
+ IPos pos;
+
+ UPDATE_HASH(s, hash, scan[1]);
+ UPDATE_HASH(s, hash, scan[2]);
+ for (i = 3; i <= best_len; i++) {
+ UPDATE_HASH(s, hash, scan[i]);
+ /* get head IPos of hash calced by scan[i-2..i] */
+ pos = s->head[hash];
+ /* compare it to current "farthest hash" IPos */
+ if (pos <= cur_match) {
+ /* we have a new "farthest hash" now */
+ offset = i - 2;
+ cur_match = pos;
+ }
+ }
+
+ /* update variables to correspond offset */
+ limit += offset;
+ /*
+ * check if the most distant code's offset is out of search buffer
+ * if it is true, then this means scan[offset..offset+2] are not
+ * presented in the search buffer. So we just return best_len
+ * we've found.
+ */
+ if (cur_match < limit) return best_len;
+
+ scan_buf_base -= offset;
+ /* reduce hash search depth based on best_len */
+ chain_length /= best_len - MIN_MATCH;
+ }
+
+ do {
+ Assert(cur_match < s->strstart, "no future");
+
+ /* Determine matched length at current pos */
+ match = scan_buf_base + cur_match;
+ len = get_match_len(match, scan, MAX_MATCH);
+
+ if (len > best_len) {
+ /* found longer string */
+ s->match_start = cur_match - offset;
+ best_len = len;
+ /* good enough? */
+ if (len >= nice_match) break;
+ }
+ /* move to prev pos in this hash chain */
+ } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0);
+
+ return (best_len <= s->lookahead)? best_len : s->lookahead;
+}
+
+#endif
+#endif
diff --git a/deflate.c b/deflate.c
index 36f99ac..4c42259 100644
--- a/deflate.c
+++ b/deflate.c
@@ -50,9 +50,6 @@
/* @(#) $Id$ */
#include "deflate.h"
-#if __ARM_NEON
-#include "contrib/arm/neon_slide_hash.h"
-#endif
const char deflate_copyright[] =
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
@@ -196,6 +193,11 @@ local const config configuration_table[10] = {
s->head[s->hash_size-1] = NIL; \
zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+#if defined(ARM_NEON)
+#include "contrib/arm/arm_longest_match.h"
+#include "contrib/arm/neon_slide_hash.h"
+#endif
+
/* ===========================================================================
* Slide the hash table when sliding the window down (could be avoided with 32
* bit values at the expense of memory usage). We slide even when level == 0 to
@@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match)
deflate_state *s;
IPos cur_match; /* current match */
{
+#if defined(ARM_NEON)
+ return arm_longest_match(s, cur_match);
+#endif
unsigned chain_length = s->max_chain_length;/* max hash chain length */
register Bytef *scan = s->window + s->strstart; /* current string */
register Bytef *match; /* matched string */
--
2.14.3

View File

@ -0,0 +1,136 @@
From bd30e5ff76aab2668ebfd46e5dbadc44322960c1 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 6 Apr 2018 11:46:42 -0500
Subject: [PATCH 3/3] arm64 specific build patch
---
Makefile.in | 37 +++++++++++++++++++++++++++----------
configure | 2 +-
contrib/minizip/zip.c | 6 ++++--
3 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/Makefile.in b/Makefile.in
index 5a77949..1a1e452 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -57,11 +57,11 @@ SRCDIR=
ZINC=
ZINCOUT=-I.
-OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o
+OBJZ = adler32.o crc32.o deflate.o infback.o arminffast.o inffast.o inflate.o inflate_chunk.o inftrees.o trees.o zutil.o
OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o
OBJC = $(OBJZ) $(OBJG)
-PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo
+PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inflate_chunk.lo inftrees.lo trees.lo zutil.lo
PIC_OBJG = compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo
PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG)
@@ -163,16 +163,22 @@ crc32.o: $(SRCDIR)crc32.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
deflate.o: $(SRCDIR)deflate.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)deflate.c
infback.o: $(SRCDIR)infback.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c
inffast.o: $(SRCDIR)inffast.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inffast.c
-inflate.o: $(SRCDIR)inflate.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c
+arminffast.o: $(SRCDIR)contrib/arm/inffast_chunk.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)contrib/arm/inffast_chunk.c
+
+inflate.o: $(SRCDIR)contrib/arm/inflate.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)contrib/arm/inflate.c
+
+inflate_chunk.o: $(SRCDIR)contrib/arm/inffast_chunk.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)contrib/arm/inffast_chunk.c
inftrees.o: $(SRCDIR)inftrees.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c
@@ -214,7 +220,7 @@ crc32.lo: $(SRCDIR)crc32.c
deflate.lo: $(SRCDIR)deflate.c
-@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
-@mv objs/deflate.o $@
infback.lo: $(SRCDIR)infback.c
@@ -222,16 +228,27 @@ infback.lo: $(SRCDIR)infback.c
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c
-@mv objs/infback.o $@
+arminffast.lo: $(SRCDIR)contrib/arm/inffast_chunk.c $(SRCDIR)inffast.c
+ -@mkdir objs 2>/dev/null || test -d objs
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/arminffast.o $(SRCDIR)contrib/arm/inffast_chunk.c
+ -@mv objs/arminffast.o $@
+
inffast.lo: $(SRCDIR)inffast.c
-@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
-@mv objs/inffast.o $@
-inflate.lo: $(SRCDIR)inflate.c
+inflate.lo: $(SRCDIR)contrib/arm/inflate.c
-@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate.o $(SRCDIR)contrib/arm/inflate.c
-@mv objs/inflate.o $@
+inflate_chunk.lo: $(SRCDIR)contrib/arm/inffast_chunk.c
+ -@mkdir objs 2>/dev/null || test -d objs
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate_chunk.o $(SRCDIR)contrib/arm/inffast_chunk.c
+ -@mv objs/inflate_chunk.o $@
+
+
inftrees.lo: $(SRCDIR)inftrees.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inftrees.o $(SRCDIR)inftrees.c
diff --git a/configure b/configure
index e974d1f..0c5f837 100755
--- a/configure
+++ b/configure
@@ -23,7 +23,7 @@ SRCDIR=`dirname $0`
if test $SRCDIR = "."; then
ZINC=""
ZINCOUT="-I."
- SRCDIR=""
+ SRCDIR="./"
else
ZINC='-include zconf.h'
ZINCOUT='-I. -I$(SRCDIR)'
diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c
index 44e88a9..0517930 100644
--- a/contrib/minizip/zip.c
+++ b/contrib/minizip/zip.c
@@ -519,15 +519,17 @@ local ZPOS64_T zip64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_f
break;
for (i=(int)uReadSize-3; (i--)>0;)
+ {
if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) &&
((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06))
{
uPosFound = uReadPos+i;
break;
}
+ }
- if (uPosFound!=0)
- break;
+ if (uPosFound!=0)
+ break;
}
TRYFREE(buf);
return uPosFound;
--
2.14.3

View File

@ -12,6 +12,12 @@ Source: http://www.zlib.net/zlib-%{version}.tar.xz
Patch0: zlib-1.2.5-minizip-fixuncrypt.patch Patch0: zlib-1.2.5-minizip-fixuncrypt.patch
# resolves: #805113 # resolves: #805113
Patch1: zlib-1.2.11-optimized-s390.patch Patch1: zlib-1.2.11-optimized-s390.patch
# general aarch64 optimizations
Patch2: 0001-Porting-inflate-using-wider-loads-and-stores.patch
Patch3: 0002-Port-Fix-InflateBack-corner-case.patch
Patch4: 0001-Neon-Optimized-hash-chain-rebase.patch
Patch5: 0002-Porting-optimized-longest_match.patch
Patch6: 0003-arm64-specific-build-patch.patch
BuildRequires: automake, autoconf, libtool BuildRequires: automake, autoconf, libtool
@ -64,6 +70,14 @@ developing applications which use minizip.
%ifarch s390 s390x %ifarch s390 s390x
%patch1 -p1 -b .optimized-deflate %patch1 -p1 -b .optimized-deflate
%endif %endif
%ifarch aarch64
%patch2 -p1 -b .optimize-aarch64
%patch3 -p1 -b .optimize-aarch64
%patch4 -p1 -b .optimize-aarch64
%patch5 -p1 -b .optimize-aarch64
%patch6 -p1 -b .optimize-aarch64
%endif
iconv -f iso-8859-2 -t utf-8 < ChangeLog > ChangeLog.tmp iconv -f iso-8859-2 -t utf-8 < ChangeLog > ChangeLog.tmp
mv ChangeLog.tmp ChangeLog mv ChangeLog.tmp ChangeLog
@ -73,6 +87,10 @@ export CFLAGS="$RPM_OPT_FLAGS"
%ifarch ppc64 %ifarch ppc64
CFLAGS+=" -O3" CFLAGS+=" -O3"
%endif %endif
%ifarch aarch64
CFLAGS+=" -DARM_NEON -O3"
%endif
export LDFLAGS="$LDFLAGS -Wl,-z,relro -Wl,-z,now" export LDFLAGS="$LDFLAGS -Wl,-z,relro -Wl,-z,now"
./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} ./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix}
make %{?_smp_mflags} make %{?_smp_mflags}