219 lines
8.2 KiB
Diff
219 lines
8.2 KiB
Diff
|
From 0ad56061ade1afe2896af1acffa5e15fbe5c98ed Mon Sep 17 00:00:00 2001
|
||
|
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
||
|
Date: Mon, 9 Apr 2018 15:14:19 -0700
|
||
|
Subject: [PATCH 2/3] Porting optimized longest_match
|
||
|
|
||
|
This patch was contributed to zlib-ng and features an improved longest_match
|
||
|
function using the most distant hash code to reduce number of checks
|
||
|
(see: http://www.gildor.org/en/projects/zlib).
|
||
|
|
||
|
Original patch by Jun He.
|
||
|
---
|
||
|
CMakeLists.txt | 3 +-
|
||
|
contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++++++++++
|
||
|
deflate.c | 11 +++-
|
||
|
3 files changed, 152 insertions(+), 4 deletions(-)
|
||
|
create mode 100644 contrib/arm/arm_longest_match.h
|
||
|
|
||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||
|
index 230ca6d..c330093 100644
|
||
|
--- a/CMakeLists.txt
|
||
|
+++ b/CMakeLists.txt
|
||
|
@@ -142,7 +142,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
||
|
set(ZLIB_ARM_NEON_HDRS
|
||
|
contrib/arm/chunkcopy.h
|
||
|
contrib/arm/inffast_chunk.h
|
||
|
- contrib/arm/neon_slide_hash.h)
|
||
|
+ contrib/arm/neon_slide_hash.h
|
||
|
+ contrib/arm/arm_longest_match.h)
|
||
|
set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
|
||
|
add_definitions(-DARM_NEON)
|
||
|
set(COMPILER ${CMAKE_C_COMPILER})
|
||
|
diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h
|
||
|
new file mode 100644
|
||
|
index 0000000..9e7083f
|
||
|
--- /dev/null
|
||
|
+++ b/contrib/arm/arm_longest_match.h
|
||
|
@@ -0,0 +1,142 @@
|
||
|
+/* Copyright (C) 1995-2011, 2016 Mark Adler
|
||
|
+ * Copyright (C) 2017 ARM Holdings Inc.
|
||
|
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
||
|
+ * Jun He <jun.he@arm.com>
|
||
|
+ * This software is provided 'as-is', without any express or implied
|
||
|
+ * warranty. In no event will the authors be held liable for any damages
|
||
|
+ * arising from the use of this software.
|
||
|
+ * Permission is granted to anyone to use this software for any purpose,
|
||
|
+ * including commercial applications, and to alter it and redistribute it
|
||
|
+ * freely, subject to the following restrictions:
|
||
|
+ * 1. The origin of this software must not be misrepresented; you must not
|
||
|
+ * claim that you wrote the original software. If you use this software
|
||
|
+ * in a product, an acknowledgment in the product documentation would be
|
||
|
+ * appreciated but is not required.
|
||
|
+ * 2. Altered source versions must be plainly marked as such, and must not be
|
||
|
+ * misrepresented as being the original software.
|
||
|
+ * 3. This notice may not be removed or altered from any source distribution.
|
||
|
+ */
|
||
|
+#ifndef __ARM_LONGEST__MATCH__
|
||
|
+#define __ARM_LONGEST__MATCH__
|
||
|
+
|
||
|
+#if defined(ARM_NEON)
|
||
|
+#include "deflate.h"
|
||
|
+#include <stdint.h>
|
||
|
+static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max)
|
||
|
+{
|
||
|
+ register int len = 0;
|
||
|
+ register unsigned long xor = 0;
|
||
|
+ register int check_loops = max/sizeof(unsigned long);
|
||
|
+ while(check_loops-- > 0) {
|
||
|
+ xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len));
|
||
|
+ if (xor) break;
|
||
|
+ len += sizeof(unsigned long);
|
||
|
+ }
|
||
|
+ if (0 == xor) {
|
||
|
+ while (len < max) {
|
||
|
+ if (a[len] != b[len]) break;
|
||
|
+ len++;
|
||
|
+ }
|
||
|
+ return len;
|
||
|
+ }
|
||
|
+ xor = __builtin_ctzl(xor)>>3;
|
||
|
+ return len + xor;
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * This implementation is based on algorithm described at:
|
||
|
+ * http://www.gildor.org/en/projects/zlib
|
||
|
+ * It uses the hash chain indexed by the most distant hash code to
|
||
|
+ * reduce number of checks.
|
||
|
+ * This also eliminates the those unnecessary check loops in legacy
|
||
|
+ * longest_match's do..while loop if the "most distant code" is out
|
||
|
+ * of search buffer
|
||
|
+ *
|
||
|
+ */
|
||
|
+static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) {
|
||
|
+ unsigned chain_length = s->max_chain_length;/* max hash chain length */
|
||
|
+ unsigned char *scan = s->window + s->strstart; /* current string */
|
||
|
+ unsigned char *match; /* matched string */
|
||
|
+ unsigned int len; /* length of current match */
|
||
|
+ unsigned int best_len = s->prev_length; /* best match length so far */
|
||
|
+ unsigned int nice_match = s->nice_match; /* stop if match long enough */
|
||
|
+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
|
||
|
+ s->strstart - (IPos)MAX_DIST(s) : 0;
|
||
|
+ /* Stop when cur_match becomes <= limit. To simplify the code,
|
||
|
+ * we prevent matches with the string of window index 0.
|
||
|
+ */
|
||
|
+ int offset = 0; /* offset of the head[most_distant_hash] from IN cur_match */
|
||
|
+ Pos *prev = s->prev;
|
||
|
+ unsigned int wmask = s->w_mask;
|
||
|
+ unsigned char *scan_buf_base = s->window;
|
||
|
+
|
||
|
+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
|
||
|
+ * It is easy to get rid of this optimization if necessary.
|
||
|
+ */
|
||
|
+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
|
||
|
+
|
||
|
+ /* Do not look for matches beyond the end of the input. This is necessary
|
||
|
+ * to make deflate deterministic.
|
||
|
+ */
|
||
|
+ if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead;
|
||
|
+
|
||
|
+ Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
|
||
|
+
|
||
|
+ /* find most distant hash code for lazy_match */
|
||
|
+ if (best_len > MIN_MATCH) {
|
||
|
+ /* search for most distant hash code */
|
||
|
+ int i;
|
||
|
+ uint16_t hash = 0;
|
||
|
+ IPos pos;
|
||
|
+
|
||
|
+ UPDATE_HASH(s, hash, scan[1]);
|
||
|
+ UPDATE_HASH(s, hash, scan[2]);
|
||
|
+ for (i = 3; i <= best_len; i++) {
|
||
|
+ UPDATE_HASH(s, hash, scan[i]);
|
||
|
+ /* get head IPos of hash calced by scan[i-2..i] */
|
||
|
+ pos = s->head[hash];
|
||
|
+ /* compare it to current "farthest hash" IPos */
|
||
|
+ if (pos <= cur_match) {
|
||
|
+ /* we have a new "farthest hash" now */
|
||
|
+ offset = i - 2;
|
||
|
+ cur_match = pos;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ /* update variables to correspond offset */
|
||
|
+ limit += offset;
|
||
|
+ /*
|
||
|
+ * check if the most distant code's offset is out of search buffer
|
||
|
+ * if it is true, then this means scan[offset..offset+2] are not
|
||
|
+ * presented in the search buffer. So we just return best_len
|
||
|
+ * we've found.
|
||
|
+ */
|
||
|
+ if (cur_match < limit) return best_len;
|
||
|
+
|
||
|
+ scan_buf_base -= offset;
|
||
|
+ /* reduce hash search depth based on best_len */
|
||
|
+ chain_length /= best_len - MIN_MATCH;
|
||
|
+ }
|
||
|
+
|
||
|
+ do {
|
||
|
+ Assert(cur_match < s->strstart, "no future");
|
||
|
+
|
||
|
+ /* Determine matched length at current pos */
|
||
|
+ match = scan_buf_base + cur_match;
|
||
|
+ len = get_match_len(match, scan, MAX_MATCH);
|
||
|
+
|
||
|
+ if (len > best_len) {
|
||
|
+ /* found longer string */
|
||
|
+ s->match_start = cur_match - offset;
|
||
|
+ best_len = len;
|
||
|
+ /* good enough? */
|
||
|
+ if (len >= nice_match) break;
|
||
|
+ }
|
||
|
+ /* move to prev pos in this hash chain */
|
||
|
+ } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0);
|
||
|
+
|
||
|
+ return (best_len <= s->lookahead)? best_len : s->lookahead;
|
||
|
+}
|
||
|
+
|
||
|
+#endif
|
||
|
+#endif
|
||
|
diff --git a/deflate.c b/deflate.c
|
||
|
index 36f99ac..4c42259 100644
|
||
|
--- a/deflate.c
|
||
|
+++ b/deflate.c
|
||
|
@@ -50,9 +50,6 @@
|
||
|
/* @(#) $Id$ */
|
||
|
|
||
|
#include "deflate.h"
|
||
|
-#if __ARM_NEON
|
||
|
-#include "contrib/arm/neon_slide_hash.h"
|
||
|
-#endif
|
||
|
|
||
|
const char deflate_copyright[] =
|
||
|
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
|
||
|
@@ -196,6 +193,11 @@ local const config configuration_table[10] = {
|
||
|
s->head[s->hash_size-1] = NIL; \
|
||
|
zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
|
||
|
|
||
|
+#if defined(ARM_NEON)
|
||
|
+#include "contrib/arm/arm_longest_match.h"
|
||
|
+#include "contrib/arm/neon_slide_hash.h"
|
||
|
+#endif
|
||
|
+
|
||
|
/* ===========================================================================
|
||
|
* Slide the hash table when sliding the window down (could be avoided with 32
|
||
|
* bit values at the expense of memory usage). We slide even when level == 0 to
|
||
|
@@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match)
|
||
|
deflate_state *s;
|
||
|
IPos cur_match; /* current match */
|
||
|
{
|
||
|
+#if defined(ARM_NEON)
|
||
|
+ return arm_longest_match(s, cur_match);
|
||
|
+#endif
|
||
|
unsigned chain_length = s->max_chain_length;/* max hash chain length */
|
||
|
register Bytef *scan = s->window + s->strstart; /* current string */
|
||
|
register Bytef *match; /* matched string */
|
||
|
--
|
||
|
2.14.3
|
||
|
|