606fa411e8
The content of this branch was automatically imported from Fedora ELN with the following as its source: https://src.fedoraproject.org/rpms/zlib#4838139649437159df1a95cf2b73d6358bc7770d
171 lines
5.5 KiB
Diff
171 lines
5.5 KiB
Diff
From f0fd8c553fa024c599f4aff65d7c603ceeaa6a58 Mon Sep 17 00:00:00 2001
|
|
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
|
Date: Mon, 9 Apr 2018 13:52:17 -0700
|
|
Subject: [PATCH 1/3] Neon-Optimized hash chain rebase
|
|
|
|
This should help with compression of data, using NEON instructions
|
|
(therefore useful for ARMv7/ARMv8).
|
|
|
|
Original patch by Jun He.
|
|
---
|
|
CMakeLists.txt | 18 ++++++++
|
|
contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++
|
|
deflate.c | 7 +++
|
|
3 files changed, 109 insertions(+)
|
|
create mode 100644 contrib/arm/neon_slide_hash.h
|
|
|
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
index 0fe939d..e9a74e9 100644
|
|
--- a/CMakeLists.txt
|
|
+++ b/CMakeLists.txt
|
|
@@ -136,6 +136,24 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
|
set(ZLIB_ASMS contrib/amd64/amd64-match.S)
|
|
endif ()
|
|
|
|
+ if(ARM_NEON)
|
|
+ list(REMOVE_ITEM ZLIB_SRCS inflate.c)
|
|
+ set(ZLIB_ARM_NEON_HDRS
|
|
+ contrib/arm/chunkcopy.h
|
|
+ contrib/arm/inffast_chunk.h
|
|
+ contrib/arm/neon_slide_hash.h)
|
|
+ set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
|
|
+ add_definitions(-DARM_NEON)
|
|
+ set(COMPILER ${CMAKE_C_COMPILER})
|
|
+ # NEON is mandatory in ARMv8.
|
|
+ if(${COMPILER} MATCHES "aarch64")
|
|
+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a)
|
|
+ # But it was optional for ARMv7.
|
|
+ elseif(${COMPILER} MATCHES "arm")
|
|
+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon)
|
|
+ endif()
|
|
+ endif()
|
|
+
|
|
if(ZLIB_ASMS)
|
|
add_definitions(-DASMV)
|
|
set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
|
|
diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h
|
|
new file mode 100644
|
|
index 0000000..0daffa1
|
|
--- /dev/null
|
|
+++ b/contrib/arm/neon_slide_hash.h
|
|
@@ -0,0 +1,84 @@
|
|
+/* Copyright (C) 1995-2011, 2016 Mark Adler
|
|
+ * Copyright (C) 2017 ARM Holdings Inc.
|
|
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
|
+ * Jun He <jun.he@arm.com>
|
|
+ * This software is provided 'as-is', without any express or implied
|
|
+ * warranty. In no event will the authors be held liable for any damages
|
|
+ * arising from the use of this software.
|
|
+ * Permission is granted to anyone to use this software for any purpose,
|
|
+ * including commercial applications, and to alter it and redistribute it
|
|
+ * freely, subject to the following restrictions:
|
|
+ * 1. The origin of this software must not be misrepresented; you must not
|
|
+ * claim that you wrote the original software. If you use this software
|
|
+ * in a product, an acknowledgment in the product documentation would be
|
|
+ * appreciated but is not required.
|
|
+ * 2. Altered source versions must be plainly marked as such, and must not be
|
|
+ * misrepresented as being the original software.
|
|
+ * 3. This notice may not be removed or altered from any source distribution.
|
|
+ */
|
|
+#ifndef __NEON_SLIDE_HASH__
|
|
+#define __NEON_SLIDE_HASH__
|
|
+
|
|
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
|
|
+#include "deflate.h"
|
|
+#include <arm_neon.h>
|
|
+
|
|
+inline static void neon_slide_hash(deflate_state *s)
|
|
+{
|
|
+ /*
|
|
+ * This is ASIMD implementation for hash table rebase
|
|
+ * it assumes:
|
|
+ * 1. hash chain offset (Pos) is 2 bytes
|
|
+ * 2. hash table size is multiple*128 bytes
|
|
+ * #1 should be true as Pos is defined as "ush"
|
|
+ * #2 should be true as hash_bits are greater that 7
|
|
+ */
|
|
+ unsigned n, m;
|
|
+ unsigned short wsize = s->w_size;
|
|
+ uint16x8_t v, *p;
|
|
+ size_t size;
|
|
+
|
|
+ size = s->hash_size*sizeof(s->head[0]);
|
|
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
|
|
+
|
|
+ Assert(sizeof(Pos) == 2, "Wrong Pos size");
|
|
+
|
|
+ /* slide s->head */
|
|
+ v = vdupq_n_u16(wsize);
|
|
+ p = (uint16x8_t *)(s->head);
|
|
+ n = size / (sizeof(uint16x8_t) * 8);
|
|
+ do {
|
|
+ p[0] = vqsubq_u16(p[0], v);
|
|
+ p[1] = vqsubq_u16(p[1], v);
|
|
+ p[2] = vqsubq_u16(p[2], v);
|
|
+ p[3] = vqsubq_u16(p[3], v);
|
|
+ p[4] = vqsubq_u16(p[4], v);
|
|
+ p[5] = vqsubq_u16(p[5], v);
|
|
+ p[6] = vqsubq_u16(p[6], v);
|
|
+ p[7] = vqsubq_u16(p[7], v);
|
|
+ p += 8;
|
|
+ } while (--n);
|
|
+#ifndef FASTEST
|
|
+ /* slide s->prev */
|
|
+ size = wsize*sizeof(s->prev[0]);
|
|
+
|
|
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
|
|
+
|
|
+ p = (uint16x8_t *)(s->prev);
|
|
+ n = size / (sizeof(uint16x8_t) * 8);
|
|
+ do {
|
|
+ p[0] = vqsubq_u16(p[0], v);
|
|
+ p[1] = vqsubq_u16(p[1], v);
|
|
+ p[2] = vqsubq_u16(p[2], v);
|
|
+ p[3] = vqsubq_u16(p[3], v);
|
|
+ p[4] = vqsubq_u16(p[4], v);
|
|
+ p[5] = vqsubq_u16(p[5], v);
|
|
+ p[6] = vqsubq_u16(p[6], v);
|
|
+ p[7] = vqsubq_u16(p[7], v);
|
|
+ p += 8;
|
|
+ } while (--n);
|
|
+#endif
|
|
+}
|
|
+
|
|
+#endif
|
|
+#endif
|
|
diff --git a/deflate.c b/deflate.c
|
|
index 1ec7614..36f99ac 100644
|
|
--- a/deflate.c
|
|
+++ b/deflate.c
|
|
@@ -50,6 +50,9 @@
|
|
/* @(#) $Id$ */
|
|
|
|
#include "deflate.h"
|
|
+#if __ARM_NEON
|
|
+#include "contrib/arm/neon_slide_hash.h"
|
|
+#endif
|
|
|
|
const char deflate_copyright[] =
|
|
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
|
|
@@ -201,6 +204,9 @@ local const config configuration_table[10] = {
|
|
local void slide_hash(s)
|
|
deflate_state *s;
|
|
{
|
|
+#if ARM_NEON
|
|
+ return neon_slide_hash(s);
|
|
+#else
|
|
unsigned n, m;
|
|
Posf *p;
|
|
uInt wsize = s->w_size;
|
|
@@ -222,6 +228,7 @@ local void slide_hash(s)
|
|
*/
|
|
} while (--n);
|
|
#endif
|
|
+#endif
|
|
}
|
|
|
|
/* ========================================================================= */
|
|
--
|
|
2.19.0
|
|
|