The content of this branch was automatically imported from Fedora ELN with the following as its source: https://src.fedoraproject.org/rpms/zlib#4838139649437159df1a95cf2b73d6358bc7770d
		
			
				
	
	
		
			171 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From f0fd8c553fa024c599f4aff65d7c603ceeaa6a58 Mon Sep 17 00:00:00 2001
 | |
| From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
 | |
| Date: Mon, 9 Apr 2018 13:52:17 -0700
 | |
| Subject: [PATCH 1/3] Neon-Optimized hash chain rebase
 | |
| 
 | |
| This should help with compression of data, using NEON instructions
 | |
| (therefore useful for ARMv7/ARMv8).
 | |
| 
 | |
| Original patch by Jun He.
 | |
| ---
 | |
|  CMakeLists.txt                | 18 ++++++++
 | |
|  contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++
 | |
|  deflate.c                     |  7 +++
 | |
|  3 files changed, 109 insertions(+)
 | |
|  create mode 100644 contrib/arm/neon_slide_hash.h
 | |
| 
 | |
| diff --git a/CMakeLists.txt b/CMakeLists.txt
 | |
| index 0fe939d..e9a74e9 100644
 | |
| --- a/CMakeLists.txt
 | |
| +++ b/CMakeLists.txt
 | |
| @@ -136,6 +136,24 @@ if(CMAKE_COMPILER_IS_GNUCC)
 | |
|          set(ZLIB_ASMS contrib/amd64/amd64-match.S)
 | |
|      endif ()
 | |
|  
 | |
| +    if(ARM_NEON)
 | |
| +        list(REMOVE_ITEM ZLIB_SRCS inflate.c)
 | |
| +        set(ZLIB_ARM_NEON_HDRS
 | |
| +            contrib/arm/chunkcopy.h
 | |
| +            contrib/arm/inffast_chunk.h
 | |
| +            contrib/arm/neon_slide_hash.h)
 | |
| +        set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
 | |
| +        add_definitions(-DARM_NEON)
 | |
| +        set(COMPILER ${CMAKE_C_COMPILER})
 | |
| +        # NEON is mandatory in ARMv8.
 | |
| +        if(${COMPILER} MATCHES "aarch64")
 | |
| +          set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a)
 | |
| +          # But it was optional for ARMv7.
 | |
| +        elseif(${COMPILER} MATCHES "arm")
 | |
| +          set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon)
 | |
| +        endif()
 | |
| +    endif()
 | |
| +
 | |
|  	if(ZLIB_ASMS)
 | |
|  		add_definitions(-DASMV)
 | |
|  		set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
 | |
| diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h
 | |
| new file mode 100644
 | |
| index 0000000..0daffa1
 | |
| --- /dev/null
 | |
| +++ b/contrib/arm/neon_slide_hash.h
 | |
| @@ -0,0 +1,84 @@
 | |
| +/* Copyright (C) 1995-2011, 2016 Mark Adler
 | |
| + * Copyright (C) 2017 ARM Holdings Inc.
 | |
| + * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
 | |
| + *          Jun He <jun.he@arm.com>
 | |
| + * This software is provided 'as-is', without any express or implied
 | |
| + * warranty.  In no event will the authors be held liable for any damages
 | |
| + * arising from the use of this software.
 | |
| + * Permission is granted to anyone to use this software for any purpose,
 | |
| + * including commercial applications, and to alter it and redistribute it
 | |
| + * freely, subject to the following restrictions:
 | |
| + * 1. The origin of this software must not be misrepresented; you must not
 | |
| + *  claim that you wrote the original software. If you use this software
 | |
| + *    in a product, an acknowledgment in the product documentation would be
 | |
| + *    appreciated but is not required.
 | |
| + * 2. Altered source versions must be plainly marked as such, and must not be
 | |
| + *    misrepresented as being the original software.
 | |
| + * 3. This notice may not be removed or altered from any source distribution.
 | |
| + */
 | |
| +#ifndef __NEON_SLIDE_HASH__
 | |
| +#define __NEON_SLIDE_HASH__
 | |
| +
 | |
| +#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
 | |
| +#include "deflate.h"
 | |
| +#include <arm_neon.h>
 | |
| +
 | |
| +inline static void neon_slide_hash(deflate_state *s)
 | |
| +{
 | |
| +    /*
 | |
| +     * This is ASIMD implementation for hash table rebase
 | |
| +     * it assumes:
 | |
| +     * 1. hash chain offset (Pos) is 2 bytes
 | |
| +     * 2. hash table size is multiple*128 bytes
 | |
| +     * #1 should be true as Pos is defined as "ush"
 | |
| +     * #2 should be true as hash_bits are greater that 7
 | |
| +     */
 | |
| +    unsigned n, m;
 | |
| +    unsigned short wsize = s->w_size;
 | |
| +    uint16x8_t v, *p;
 | |
| +    size_t size;
 | |
| +
 | |
| +    size = s->hash_size*sizeof(s->head[0]);
 | |
| +    Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
 | |
| +
 | |
| +    Assert(sizeof(Pos) == 2, "Wrong Pos size");
 | |
| +
 | |
| +    /* slide s->head */
 | |
| +    v = vdupq_n_u16(wsize);
 | |
| +    p = (uint16x8_t *)(s->head);
 | |
| +    n = size / (sizeof(uint16x8_t) * 8);
 | |
| +    do {
 | |
| +        p[0] = vqsubq_u16(p[0], v);
 | |
| +        p[1] = vqsubq_u16(p[1], v);
 | |
| +        p[2] = vqsubq_u16(p[2], v);
 | |
| +        p[3] = vqsubq_u16(p[3], v);
 | |
| +        p[4] = vqsubq_u16(p[4], v);
 | |
| +        p[5] = vqsubq_u16(p[5], v);
 | |
| +        p[6] = vqsubq_u16(p[6], v);
 | |
| +        p[7] = vqsubq_u16(p[7], v);
 | |
| +        p += 8;
 | |
| +    } while (--n);
 | |
| +#ifndef FASTEST
 | |
| +    /* slide s->prev */
 | |
| +    size = wsize*sizeof(s->prev[0]);
 | |
| +
 | |
| +    Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
 | |
| +
 | |
| +    p = (uint16x8_t *)(s->prev);
 | |
| +    n = size / (sizeof(uint16x8_t) * 8);
 | |
| +    do {
 | |
| +        p[0] = vqsubq_u16(p[0], v);
 | |
| +        p[1] = vqsubq_u16(p[1], v);
 | |
| +        p[2] = vqsubq_u16(p[2], v);
 | |
| +        p[3] = vqsubq_u16(p[3], v);
 | |
| +        p[4] = vqsubq_u16(p[4], v);
 | |
| +        p[5] = vqsubq_u16(p[5], v);
 | |
| +        p[6] = vqsubq_u16(p[6], v);
 | |
| +        p[7] = vqsubq_u16(p[7], v);
 | |
| +        p += 8;
 | |
| +    } while (--n);
 | |
| +#endif
 | |
| +}
 | |
| +
 | |
| +#endif
 | |
| +#endif
 | |
| diff --git a/deflate.c b/deflate.c
 | |
| index 1ec7614..36f99ac 100644
 | |
| --- a/deflate.c
 | |
| +++ b/deflate.c
 | |
| @@ -50,6 +50,9 @@
 | |
|  /* @(#) $Id$ */
 | |
|  
 | |
|  #include "deflate.h"
 | |
| +#if __ARM_NEON
 | |
| +#include "contrib/arm/neon_slide_hash.h"
 | |
| +#endif
 | |
|  
 | |
|  const char deflate_copyright[] =
 | |
|     " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
 | |
| @@ -201,6 +204,9 @@ local const config configuration_table[10] = {
 | |
|  local void slide_hash(s)
 | |
|      deflate_state *s;
 | |
|  {
 | |
| +#if ARM_NEON
 | |
| +    return neon_slide_hash(s);
 | |
| +#else
 | |
|      unsigned n, m;
 | |
|      Posf *p;
 | |
|      uInt wsize = s->w_size;
 | |
| @@ -222,6 +228,7 @@ local void slide_hash(s)
 | |
|           */
 | |
|      } while (--n);
 | |
|  #endif
 | |
| +#endif
 | |
|  }
 | |
|  
 | |
|  /* ========================================================================= */
 | |
| -- 
 | |
| 2.19.0
 | |
| 
 |