158 lines
5.0 KiB
Diff
158 lines
5.0 KiB
Diff
|
From f849a23e0afc8b8a670fda64eec8b573fe62daa7 Mon Sep 17 00:00:00 2001
|
||
|
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
||
|
Date: Mon, 9 Apr 2018 13:52:17 -0700
|
||
|
Subject: [PATCH 1/3] Neon-Optimized hash chain rebase
|
||
|
|
||
|
This should help with compression of data, using NEON instructions
|
||
|
(therefore useful for ARMv7/ARMv8).
|
||
|
|
||
|
Original patch by Jun He.
|
||
|
---
|
||
|
CMakeLists.txt | 5 ++-
|
||
|
contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++++++++++
|
||
|
deflate.c | 7 ++++
|
||
|
3 files changed, 95 insertions(+), 1 deletion(-)
|
||
|
create mode 100644 contrib/arm/neon_slide_hash.h
|
||
|
|
||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||
|
index 98ee4dd..230ca6d 100644
|
||
|
--- a/CMakeLists.txt
|
||
|
+++ b/CMakeLists.txt
|
||
|
@@ -139,7 +139,10 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
||
|
|
||
|
if(ARM_NEON)
|
||
|
list(REMOVE_ITEM ZLIB_SRCS inflate.c)
|
||
|
- set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h contrib/arm/inffast_chunk.h)
|
||
|
+ set(ZLIB_ARM_NEON_HDRS
|
||
|
+ contrib/arm/chunkcopy.h
|
||
|
+ contrib/arm/inffast_chunk.h
|
||
|
+ contrib/arm/neon_slide_hash.h)
|
||
|
set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
|
||
|
add_definitions(-DARM_NEON)
|
||
|
set(COMPILER ${CMAKE_C_COMPILER})
|
||
|
diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h
|
||
|
new file mode 100644
|
||
|
index 0000000..0daffa1
|
||
|
--- /dev/null
|
||
|
+++ b/contrib/arm/neon_slide_hash.h
|
||
|
@@ -0,0 +1,84 @@
|
||
|
+/* Copyright (C) 1995-2011, 2016 Mark Adler
|
||
|
+ * Copyright (C) 2017 ARM Holdings Inc.
|
||
|
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
||
|
+ * Jun He <jun.he@arm.com>
|
||
|
+ * This software is provided 'as-is', without any express or implied
|
||
|
+ * warranty. In no event will the authors be held liable for any damages
|
||
|
+ * arising from the use of this software.
|
||
|
+ * Permission is granted to anyone to use this software for any purpose,
|
||
|
+ * including commercial applications, and to alter it and redistribute it
|
||
|
+ * freely, subject to the following restrictions:
|
||
|
+ * 1. The origin of this software must not be misrepresented; you must not
|
||
|
+ * claim that you wrote the original software. If you use this software
|
||
|
+ * in a product, an acknowledgment in the product documentation would be
|
||
|
+ * appreciated but is not required.
|
||
|
+ * 2. Altered source versions must be plainly marked as such, and must not be
|
||
|
+ * misrepresented as being the original software.
|
||
|
+ * 3. This notice may not be removed or altered from any source distribution.
|
||
|
+ */
|
||
|
+#ifndef __NEON_SLIDE_HASH__
|
||
|
+#define __NEON_SLIDE_HASH__
|
||
|
+
|
||
|
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
|
||
|
+#include "deflate.h"
|
||
|
+#include <arm_neon.h>
|
||
|
+
|
||
|
+inline static void neon_slide_hash(deflate_state *s)
|
||
|
+{
|
||
|
+ /*
|
||
|
+ * This is ASIMD implementation for hash table rebase
|
||
|
+ * it assumes:
|
||
|
+ * 1. hash chain offset (Pos) is 2 bytes
|
||
|
+ * 2. hash table size is multiple*128 bytes
|
||
|
+ * #1 should be true as Pos is defined as "ush"
|
||
|
+ * #2 should be true as hash_bits are greater that 7
|
||
|
+ */
|
||
|
+ unsigned n, m;
|
||
|
+ unsigned short wsize = s->w_size;
|
||
|
+ uint16x8_t v, *p;
|
||
|
+ size_t size;
|
||
|
+
|
||
|
+ size = s->hash_size*sizeof(s->head[0]);
|
||
|
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
|
||
|
+
|
||
|
+ Assert(sizeof(Pos) == 2, "Wrong Pos size");
|
||
|
+
|
||
|
+ /* slide s->head */
|
||
|
+ v = vdupq_n_u16(wsize);
|
||
|
+ p = (uint16x8_t *)(s->head);
|
||
|
+ n = size / (sizeof(uint16x8_t) * 8);
|
||
|
+ do {
|
||
|
+ p[0] = vqsubq_u16(p[0], v);
|
||
|
+ p[1] = vqsubq_u16(p[1], v);
|
||
|
+ p[2] = vqsubq_u16(p[2], v);
|
||
|
+ p[3] = vqsubq_u16(p[3], v);
|
||
|
+ p[4] = vqsubq_u16(p[4], v);
|
||
|
+ p[5] = vqsubq_u16(p[5], v);
|
||
|
+ p[6] = vqsubq_u16(p[6], v);
|
||
|
+ p[7] = vqsubq_u16(p[7], v);
|
||
|
+ p += 8;
|
||
|
+ } while (--n);
|
||
|
+#ifndef FASTEST
|
||
|
+ /* slide s->prev */
|
||
|
+ size = wsize*sizeof(s->prev[0]);
|
||
|
+
|
||
|
+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
|
||
|
+
|
||
|
+ p = (uint16x8_t *)(s->prev);
|
||
|
+ n = size / (sizeof(uint16x8_t) * 8);
|
||
|
+ do {
|
||
|
+ p[0] = vqsubq_u16(p[0], v);
|
||
|
+ p[1] = vqsubq_u16(p[1], v);
|
||
|
+ p[2] = vqsubq_u16(p[2], v);
|
||
|
+ p[3] = vqsubq_u16(p[3], v);
|
||
|
+ p[4] = vqsubq_u16(p[4], v);
|
||
|
+ p[5] = vqsubq_u16(p[5], v);
|
||
|
+ p[6] = vqsubq_u16(p[6], v);
|
||
|
+ p[7] = vqsubq_u16(p[7], v);
|
||
|
+ p += 8;
|
||
|
+ } while (--n);
|
||
|
+#endif
|
||
|
+}
|
||
|
+
|
||
|
+#endif
|
||
|
+#endif
|
||
|
diff --git a/deflate.c b/deflate.c
|
||
|
index 1ec7614..36f99ac 100644
|
||
|
--- a/deflate.c
|
||
|
+++ b/deflate.c
|
||
|
@@ -50,6 +50,9 @@
|
||
|
/* @(#) $Id$ */
|
||
|
|
||
|
#include "deflate.h"
|
||
|
+#if __ARM_NEON
|
||
|
+#include "contrib/arm/neon_slide_hash.h"
|
||
|
+#endif
|
||
|
|
||
|
const char deflate_copyright[] =
|
||
|
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
|
||
|
@@ -201,6 +204,9 @@ local const config configuration_table[10] = {
|
||
|
local void slide_hash(s)
|
||
|
deflate_state *s;
|
||
|
{
|
||
|
+#if ARM_NEON
|
||
|
+ return neon_slide_hash(s);
|
||
|
+#else
|
||
|
unsigned n, m;
|
||
|
Posf *p;
|
||
|
uInt wsize = s->w_size;
|
||
|
@@ -222,6 +228,7 @@ local void slide_hash(s)
|
||
|
*/
|
||
|
} while (--n);
|
||
|
#endif
|
||
|
+#endif
|
||
|
}
|
||
|
|
||
|
/* ========================================================================= */
|
||
|
--
|
||
|
2.14.3
|
||
|
|