Revert aarch64 neon inflate patches until the buffer poison issues are solved

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
This commit is contained in:
Jeremy Linton 2018-09-18 13:11:17 -05:00
parent 58764fc84f
commit 71a74f9c86
6 changed files with 47 additions and 2474 deletions

View File

@ -1,4 +1,4 @@
From f849a23e0afc8b8a670fda64eec8b573fe62daa7 Mon Sep 17 00:00:00 2001 From f0fd8c553fa024c599f4aff65d7c603ceeaa6a58 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com> From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Mon, 9 Apr 2018 13:52:17 -0700 Date: Mon, 9 Apr 2018 13:52:17 -0700
Subject: [PATCH 1/3] Neon-Optimized hash chain rebase Subject: [PATCH 1/3] Neon-Optimized hash chain rebase
@ -8,28 +8,41 @@ This should help with compression of data, using NEON instructions
Original patch by Jun He. Original patch by Jun He.
--- ---
CMakeLists.txt | 5 ++- CMakeLists.txt | 18 ++++++++
contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++++++++++ contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++
deflate.c | 7 ++++ deflate.c | 7 +++
3 files changed, 95 insertions(+), 1 deletion(-) 3 files changed, 109 insertions(+)
create mode 100644 contrib/arm/neon_slide_hash.h create mode 100644 contrib/arm/neon_slide_hash.h
diff --git a/CMakeLists.txt b/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt
index 98ee4dd..230ca6d 100644 index 0fe939d..e9a74e9 100644
--- a/CMakeLists.txt --- a/CMakeLists.txt
+++ b/CMakeLists.txt +++ b/CMakeLists.txt
@@ -139,7 +139,10 @@ if(CMAKE_COMPILER_IS_GNUCC) @@ -136,6 +136,24 @@ if(CMAKE_COMPILER_IS_GNUCC)
set(ZLIB_ASMS contrib/amd64/amd64-match.S)
endif ()
if(ARM_NEON) + if(ARM_NEON)
list(REMOVE_ITEM ZLIB_SRCS inflate.c) + list(REMOVE_ITEM ZLIB_SRCS inflate.c)
- set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h contrib/arm/inffast_chunk.h)
+ set(ZLIB_ARM_NEON_HDRS + set(ZLIB_ARM_NEON_HDRS
+ contrib/arm/chunkcopy.h + contrib/arm/chunkcopy.h
+ contrib/arm/inffast_chunk.h + contrib/arm/inffast_chunk.h
+ contrib/arm/neon_slide_hash.h) + contrib/arm/neon_slide_hash.h)
set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c) + set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
add_definitions(-DARM_NEON) + add_definitions(-DARM_NEON)
set(COMPILER ${CMAKE_C_COMPILER}) + set(COMPILER ${CMAKE_C_COMPILER})
+ # NEON is mandatory in ARMv8.
+ if(${COMPILER} MATCHES "aarch64")
+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a)
+ # But it was optional for ARMv7.
+ elseif(${COMPILER} MATCHES "arm")
+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon)
+ endif()
+ endif()
+
if(ZLIB_ASMS)
add_definitions(-DASMV)
set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h
new file mode 100644 new file mode 100644
index 0000000..0daffa1 index 0000000..0daffa1
@ -153,5 +166,5 @@ index 1ec7614..36f99ac 100644
/* ========================================================================= */ /* ========================================================================= */
-- --
2.14.3 2.19.0

File diff suppressed because it is too large Load Diff

View File

@ -1,147 +0,0 @@
From 267e6f20170edb9a00b11fc3a2ca7649ea1c4464 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Wed, 4 Apr 2018 15:14:57 -0700
Subject: [PATCH 2/2] Port Fix InflateBack corner case
This handles the case where a zlib user could rely on InflateBack
API to decompress content.
The NEON optimization assumes that it can perform wide stores, sometimes
overwriting data on the output pointer (but never overflowing the buffer
end as it has enough room for the write).
For infback there is no such guarantees (i.e. no extra wiggle room),
which can result in illegal operations. This patch fixes the potential
issue by falling back to the non-optimized code for such cases.
Also it adds some comments about the entry assumptions in inflate and
writes out a defined value at the write buffer to identify where
the real data has ended (helpful while debugging).
For reference, please see:
https://chromium.googlesource.com/chromium/src/+/0bb11040792edc5b28fcb710fc4c01fedd98c97c
Change-Id: Iffbda9eb5e08a661aa15c6e3d1c59b678cc23b2c
---
CMakeLists.txt | 5 ++---
contrib/arm/{inffast.c => inffast_chunk.c} | 10 +++++++---
contrib/arm/inffast_chunk.h | 12 ++++++++++++
contrib/arm/inflate.c | 14 ++++++++++++--
4 files changed, 33 insertions(+), 8 deletions(-)
rename contrib/arm/{inffast.c => inffast_chunk.c} (97%)
create mode 100644 contrib/arm/inffast_chunk.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09bb3db..98ee4dd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -139,9 +139,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
if(ARM_NEON)
list(REMOVE_ITEM ZLIB_SRCS inflate.c)
- list(REMOVE_ITEM ZLIB_SRCS inffast.c)
- set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h)
- set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast.c)
+ set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h contrib/arm/inffast_chunk.h)
+ set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
add_definitions(-DARM_NEON)
set(COMPILER ${CMAKE_C_COMPILER})
# NEON is mandatory in ARMv8.
diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast_chunk.c
similarity index 97%
rename from contrib/arm/inffast.c
rename to contrib/arm/inffast_chunk.c
index f7f5007..0c5c583 100644
--- a/contrib/arm/inffast.c
+++ b/contrib/arm/inffast_chunk.c
@@ -6,8 +6,8 @@
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
-#include "inffast.h"
-#include "chunkcopy.h"
+#include "contrib/arm/inffast_chunk.h"
+#include "contrib/arm/chunkcopy.h"
#ifdef ASMINF
# pragma message("Assembler code may have bugs -- use at your own risk")
@@ -28,6 +28,10 @@
strm->avail_out >= 258
start >= strm->avail_out
state->bits < 8
+ strm->next_out[0..strm->avail_out] does not overlap with
+ strm->next_in[0..strm->avail_in]
+ strm->state->window is allocated with an additional
+ CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
On return, state->mode is one of:
@@ -48,7 +52,7 @@
requires strm->avail_out >= 258 for each loop to avoid checking for
output space.
*/
-void ZLIB_INTERNAL inflate_fast(strm, start)
+void ZLIB_INTERNAL inflate_fast_chunk(strm, start)
z_streamp strm;
unsigned start; /* inflate()'s starting value for strm->avail_out */
{
diff --git a/contrib/arm/inffast_chunk.h b/contrib/arm/inffast_chunk.h
new file mode 100644
index 0000000..7839e1d
--- /dev/null
+++ b/contrib/arm/inffast_chunk.h
@@ -0,0 +1,12 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2003, 2010 Mark Adler
+ * Copyright (C) 2017 ARM, Inc.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+void ZLIB_INTERNAL inflate_fast_chunk OF((z_streamp strm, unsigned start));
diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
index 23e95f1..d860542 100644
--- a/contrib/arm/inflate.c
+++ b/contrib/arm/inflate.c
@@ -83,7 +83,7 @@
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
-#include "inffast.h"
+#include "contrib/arm/inffast_chunk.h"
#include "contrib/arm/chunkcopy.h"
#ifdef MAKEFIXED
@@ -1056,7 +1056,7 @@ int flush;
case LEN:
if (have >= 6 && left >= 258) {
RESTORE();
- inflate_fast(strm, out);
+ inflate_fast_chunk(strm, out);
LOAD();
if (state->mode == TYPE)
state->back = -1;
@@ -1262,6 +1262,16 @@ int flush;
Note: a memory error from inflate() is non-recoverable.
*/
inf_leave:
+ /* We write a defined value in the unused space to help mark
+ * where the stream has ended. We don't use zeros as that can
+ * mislead clients relying on undefined behavior (i.e. assuming
+ * that the data is over when the buffer has a zero/null value).
+ */
+ if (left >= CHUNKCOPY_CHUNK_SIZE)
+ memset(put, 0x55, CHUNKCOPY_CHUNK_SIZE);
+ else
+ memset(put, 0x55, left);
+
RESTORE();
if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
(state->mode < CHECK || flush != Z_FINISH)))
--
2.14.3

View File

@ -1,4 +1,4 @@
From 0ad56061ade1afe2896af1acffa5e15fbe5c98ed Mon Sep 17 00:00:00 2001 From 17a154db6774a4acf347cfc5189eaf2cd675e696 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com> From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Mon, 9 Apr 2018 15:14:19 -0700 Date: Mon, 9 Apr 2018 15:14:19 -0700
Subject: [PATCH 2/3] Porting optimized longest_match Subject: [PATCH 2/3] Porting optimized longest_match
@ -10,16 +10,16 @@ function using the most distant hash code to reduce number of checks
Original patch by Jun He. Original patch by Jun He.
--- ---
CMakeLists.txt | 3 +- CMakeLists.txt | 3 +-
contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++++++++++ contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++
deflate.c | 11 +++- deflate.c | 11 ++-
3 files changed, 152 insertions(+), 4 deletions(-) 3 files changed, 152 insertions(+), 4 deletions(-)
create mode 100644 contrib/arm/arm_longest_match.h create mode 100644 contrib/arm/arm_longest_match.h
diff --git a/CMakeLists.txt b/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt
index 230ca6d..c330093 100644 index e9a74e9..3826eba 100644
--- a/CMakeLists.txt --- a/CMakeLists.txt
+++ b/CMakeLists.txt +++ b/CMakeLists.txt
@@ -142,7 +142,8 @@ if(CMAKE_COMPILER_IS_GNUCC) @@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
set(ZLIB_ARM_NEON_HDRS set(ZLIB_ARM_NEON_HDRS
contrib/arm/chunkcopy.h contrib/arm/chunkcopy.h
contrib/arm/inffast_chunk.h contrib/arm/inffast_chunk.h
@ -214,5 +214,5 @@ index 36f99ac..4c42259 100644
register Bytef *scan = s->window + s->strstart; /* current string */ register Bytef *scan = s->window + s->strstart; /* current string */
register Bytef *match; /* matched string */ register Bytef *match; /* matched string */
-- --
2.14.3 2.19.0

View File

@ -1,33 +1,28 @@
From bd30e5ff76aab2668ebfd46e5dbadc44322960c1 Mon Sep 17 00:00:00 2001 From e0be75f8dce27a4e32196529df2a08dca791a286 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com> From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 6 Apr 2018 11:46:42 -0500 Date: Fri, 6 Apr 2018 11:46:42 -0500
Subject: [PATCH 3/3] arm64 specific build patch Subject: [PATCH 3/3] arm64 specific build patch
--- ---
Makefile.in | 37 +++++++++++++++++++++++++++---------- Makefile.in | 19 ++++++++++++-------
configure | 2 +- configure | 2 +-
contrib/minizip/zip.c | 6 ++++-- contrib/minizip/zip.c | 6 ++++--
3 files changed, 32 insertions(+), 13 deletions(-) 3 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/Makefile.in b/Makefile.in diff --git a/Makefile.in b/Makefile.in
index 5a77949..1a1e452 100644 index 5a77949..9f088e5 100644
--- a/Makefile.in --- a/Makefile.in
+++ b/Makefile.in +++ b/Makefile.in
@@ -57,11 +57,11 @@ SRCDIR= @@ -57,7 +57,7 @@ SRCDIR=
ZINC= ZINC=
ZINCOUT=-I. ZINCOUT=-I.
-OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o -OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o
+OBJZ = adler32.o crc32.o deflate.o infback.o arminffast.o inffast.o inflate.o inflate_chunk.o inftrees.o trees.o zutil.o +OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o
OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o
OBJC = $(OBJZ) $(OBJG) OBJC = $(OBJZ) $(OBJG)
-PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo @@ -163,16 +163,16 @@ crc32.o: $(SRCDIR)crc32.c
+PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inflate_chunk.lo inftrees.lo trees.lo zutil.lo
PIC_OBJG = compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo
PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG)
@@ -163,16 +163,22 @@ crc32.o: $(SRCDIR)crc32.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
deflate.o: $(SRCDIR)deflate.c deflate.o: $(SRCDIR)deflate.c
@ -41,20 +36,13 @@ index 5a77949..1a1e452 100644
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inffast.c + $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inffast.c
-inflate.o: $(SRCDIR)inflate.c inflate.o: $(SRCDIR)inflate.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c
+arminffast.o: $(SRCDIR)contrib/arm/inffast_chunk.c + $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inflate.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)contrib/arm/inffast_chunk.c
+
+inflate.o: $(SRCDIR)contrib/arm/inflate.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)contrib/arm/inflate.c
+
+inflate_chunk.o: $(SRCDIR)contrib/arm/inffast_chunk.c
+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)contrib/arm/inffast_chunk.c
inftrees.o: $(SRCDIR)inftrees.c inftrees.o: $(SRCDIR)inftrees.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c
@@ -214,7 +220,7 @@ crc32.lo: $(SRCDIR)crc32.c @@ -214,7 +214,7 @@ crc32.lo: $(SRCDIR)crc32.c
deflate.lo: $(SRCDIR)deflate.c deflate.lo: $(SRCDIR)deflate.c
-@mkdir objs 2>/dev/null || test -d objs -@mkdir objs 2>/dev/null || test -d objs
@ -63,7 +51,7 @@ index 5a77949..1a1e452 100644
-@mv objs/deflate.o $@ -@mv objs/deflate.o $@
infback.lo: $(SRCDIR)infback.c infback.lo: $(SRCDIR)infback.c
@@ -222,16 +228,27 @@ infback.lo: $(SRCDIR)infback.c @@ -222,14 +222,19 @@ infback.lo: $(SRCDIR)infback.c
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c
-@mv objs/infback.o $@ -@mv objs/infback.o $@
@ -78,22 +66,13 @@ index 5a77949..1a1e452 100644
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c + $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
-@mv objs/inffast.o $@ -@mv objs/inffast.o $@
-inflate.lo: $(SRCDIR)inflate.c inflate.lo: $(SRCDIR)inflate.c
+inflate.lo: $(SRCDIR)contrib/arm/inflate.c
-@mkdir objs 2>/dev/null || test -d objs -@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate.o $(SRCDIR)contrib/arm/inflate.c + $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
-@mv objs/inflate.o $@ -@mv objs/inflate.o $@
+inflate_chunk.lo: $(SRCDIR)contrib/arm/inffast_chunk.c
+ -@mkdir objs 2>/dev/null || test -d objs
+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate_chunk.o $(SRCDIR)contrib/arm/inffast_chunk.c
+ -@mv objs/inflate_chunk.o $@
+
+
inftrees.lo: $(SRCDIR)inftrees.c inftrees.lo: $(SRCDIR)inftrees.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inftrees.o $(SRCDIR)inftrees.c
diff --git a/configure b/configure diff --git a/configure b/configure
index e974d1f..0c5f837 100755 index e974d1f..0c5f837 100755
--- a/configure --- a/configure
@ -132,5 +111,5 @@ index 44e88a9..0517930 100644
TRYFREE(buf); TRYFREE(buf);
return uPosFound; return uPosFound;
-- --
2.14.3 2.19.0

View File

@ -14,8 +14,6 @@ Patch0: zlib-1.2.5-minizip-fixuncrypt.patch
# resolves: #805113 # resolves: #805113
Patch1: zlib-1.2.11-optimized-s390.patch Patch1: zlib-1.2.11-optimized-s390.patch
# general aarch64 optimizations # general aarch64 optimizations
Patch2: 0001-Porting-inflate-using-wider-loads-and-stores.patch
Patch3: 0002-Port-Fix-InflateBack-corner-case.patch
Patch4: 0001-Neon-Optimized-hash-chain-rebase.patch Patch4: 0001-Neon-Optimized-hash-chain-rebase.patch
Patch5: 0002-Porting-optimized-longest_match.patch Patch5: 0002-Porting-optimized-longest_match.patch
Patch6: 0003-arm64-specific-build-patch.patch Patch6: 0003-arm64-specific-build-patch.patch
@ -77,8 +75,6 @@ developing applications which use minizip.
%patch1 -p1 -b .optimized-deflate %patch1 -p1 -b .optimized-deflate
%endif %endif
%ifarch aarch64 %ifarch aarch64
%patch2 -p1 -b .optimize-aarch64
%patch3 -p1 -b .optimize-aarch64
%patch4 -p1 -b .optimize-aarch64 %patch4 -p1 -b .optimize-aarch64
%patch5 -p1 -b .optimize-aarch64 %patch5 -p1 -b .optimize-aarch64
%patch6 -p1 -b .optimize-aarch64 %patch6 -p1 -b .optimize-aarch64