236 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			236 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0-only
 | |
| /*
 | |
|  * Copyright (C) 2024, SUSE LLC
 | |
|  *
 | |
|  * Authors: Enzo Matsumiya <ematsumiya@suse.de>
 | |
|  *
 | |
|  * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec.
 | |
|  */
 | |
| #include <linux/slab.h>
 | |
| #include <linux/sizes.h>
 | |
| #include <linux/count_zeros.h>
 | |
| #include <linux/unaligned.h>
 | |
| 
 | |
| #include "lz77.h"
 | |
| 
 | |
| /*
 | |
|  * Compression parameters.
 | |
|  */
 | |
| #define LZ77_MATCH_MIN_LEN	4
 | |
| #define LZ77_MATCH_MIN_DIST	1
 | |
| #define LZ77_MATCH_MAX_DIST	SZ_1K
 | |
| #define LZ77_HASH_LOG		15
 | |
| #define LZ77_HASH_SIZE		(1 << LZ77_HASH_LOG)
 | |
| #define LZ77_STEP_SIZE		sizeof(u64)
 | |
| 
 | |
| static __always_inline u8 lz77_read8(const u8 *ptr)
 | |
| {
 | |
| 	return get_unaligned(ptr);
 | |
| }
 | |
| 
 | |
| static __always_inline u64 lz77_read64(const u64 *ptr)
 | |
| {
 | |
| 	return get_unaligned(ptr);
 | |
| }
 | |
| 
 | |
| static __always_inline void lz77_write8(u8 *ptr, u8 v)
 | |
| {
 | |
| 	put_unaligned(v, ptr);
 | |
| }
 | |
| 
 | |
| static __always_inline void lz77_write16(u16 *ptr, u16 v)
 | |
| {
 | |
| 	put_unaligned_le16(v, ptr);
 | |
| }
 | |
| 
 | |
| static __always_inline void lz77_write32(u32 *ptr, u32 v)
 | |
| {
 | |
| 	put_unaligned_le32(v, ptr);
 | |
| }
 | |
| 
 | |
| static __always_inline u32 lz77_match_len(const void *wnd, const void *cur, const void *end)
 | |
| {
 | |
| 	const void *start = cur;
 | |
| 	u64 diff;
 | |
| 
 | |
| 	/* Safe for a do/while because otherwise we wouldn't reach here from the main loop. */
 | |
| 	do {
 | |
| 		diff = lz77_read64(cur) ^ lz77_read64(wnd);
 | |
| 		if (!diff) {
 | |
| 			cur += LZ77_STEP_SIZE;
 | |
| 			wnd += LZ77_STEP_SIZE;
 | |
| 
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* This computes the number of common bytes in @diff. */
 | |
| 		cur += count_trailing_zeros(diff) >> 3;
 | |
| 
 | |
| 		return (cur - start);
 | |
| 	} while (likely(cur + LZ77_STEP_SIZE < end));
 | |
| 
 | |
| 	while (cur < end && lz77_read8(cur++) == lz77_read8(wnd++))
 | |
| 		;
 | |
| 
 | |
| 	return (cur - start);
 | |
| }
 | |
| 
 | |
| static __always_inline void *lz77_write_match(void *dst, void **nib, u32 dist, u32 len)
 | |
| {
 | |
| 	len -= 3;
 | |
| 	dist--;
 | |
| 	dist <<= 3;
 | |
| 
 | |
| 	if (len < 7) {
 | |
| 		lz77_write16(dst, dist + len);
 | |
| 
 | |
| 		return dst + 2;
 | |
| 	}
 | |
| 
 | |
| 	dist |= 7;
 | |
| 	lz77_write16(dst, dist);
 | |
| 	dst += 2;
 | |
| 	len -= 7;
 | |
| 
 | |
| 	if (!*nib) {
 | |
| 		lz77_write8(dst, umin(len, 15));
 | |
| 		*nib = dst;
 | |
| 		dst++;
 | |
| 	} else {
 | |
| 		u8 *b = *nib;
 | |
| 
 | |
| 		lz77_write8(b, *b | umin(len, 15) << 4);
 | |
| 		*nib = NULL;
 | |
| 	}
 | |
| 
 | |
| 	if (len < 15)
 | |
| 		return dst;
 | |
| 
 | |
| 	len -= 15;
 | |
| 	if (len < 255) {
 | |
| 		lz77_write8(dst, len);
 | |
| 
 | |
| 		return dst + 1;
 | |
| 	}
 | |
| 
 | |
| 	lz77_write8(dst, 0xff);
 | |
| 	dst++;
 | |
| 	len += 7 + 15;
 | |
| 	if (len <= 0xffff) {
 | |
| 		lz77_write16(dst, len);
 | |
| 
 | |
| 		return dst + 2;
 | |
| 	}
 | |
| 
 | |
| 	lz77_write16(dst, 0);
 | |
| 	dst += 2;
 | |
| 	lz77_write32(dst, len);
 | |
| 
 | |
| 	return dst + 4;
 | |
| }
 | |
| 
 | |
| noinline int lz77_compress(const void *src, u32 slen, void *dst, u32 *dlen)
 | |
| {
 | |
| 	const void *srcp, *end;
 | |
| 	void *dstp, *nib, *flag_pos;
 | |
| 	u32 flag_count = 0;
 | |
| 	long flag = 0;
 | |
| 	u64 *htable;
 | |
| 
 | |
| 	srcp = src;
 | |
| 	end = src + slen;
 | |
| 	dstp = dst;
 | |
| 	nib = NULL;
 | |
| 	flag_pos = dstp;
 | |
| 	dstp += 4;
 | |
| 
 | |
| 	htable = kvcalloc(LZ77_HASH_SIZE, sizeof(*htable), GFP_KERNEL);
 | |
| 	if (!htable)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	/* Main loop. */
 | |
| 	do {
 | |
| 		u32 dist, len = 0;
 | |
| 		const void *wnd;
 | |
| 		u64 hash;
 | |
| 
 | |
| 		hash = ((lz77_read64(srcp) << 24) * 889523592379ULL) >> (64 - LZ77_HASH_LOG);
 | |
| 		wnd = src + htable[hash];
 | |
| 		htable[hash] = srcp - src;
 | |
| 		dist = srcp - wnd;
 | |
| 
 | |
| 		if (dist && dist < LZ77_MATCH_MAX_DIST)
 | |
| 			len = lz77_match_len(wnd, srcp, end);
 | |
| 
 | |
| 		if (len < LZ77_MATCH_MIN_LEN) {
 | |
| 			lz77_write8(dstp, lz77_read8(srcp));
 | |
| 
 | |
| 			dstp++;
 | |
| 			srcp++;
 | |
| 
 | |
| 			flag <<= 1;
 | |
| 			flag_count++;
 | |
| 			if (flag_count == 32) {
 | |
| 				lz77_write32(flag_pos, flag);
 | |
| 				flag_count = 0;
 | |
| 				flag_pos = dstp;
 | |
| 				dstp += 4;
 | |
| 			}
 | |
| 
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/*
 | |
| 		 * Bail out if @dstp reached >= 7/8 of @slen -- already compressed badly, not worth
 | |
| 		 * going further.
 | |
| 		 */
 | |
| 		if (unlikely(dstp - dst >= slen - (slen >> 3))) {
 | |
| 			*dlen = slen;
 | |
| 			goto out;
 | |
| 		}
 | |
| 
 | |
| 		dstp = lz77_write_match(dstp, &nib, dist, len);
 | |
| 		srcp += len;
 | |
| 
 | |
| 		flag = (flag << 1) | 1;
 | |
| 		flag_count++;
 | |
| 		if (flag_count == 32) {
 | |
| 			lz77_write32(flag_pos, flag);
 | |
| 			flag_count = 0;
 | |
| 			flag_pos = dstp;
 | |
| 			dstp += 4;
 | |
| 		}
 | |
| 	} while (likely(srcp + LZ77_STEP_SIZE < end));
 | |
| 
 | |
| 	while (srcp < end) {
 | |
| 		u32 c = umin(end - srcp, 32 - flag_count);
 | |
| 
 | |
| 		memcpy(dstp, srcp, c);
 | |
| 
 | |
| 		dstp += c;
 | |
| 		srcp += c;
 | |
| 
 | |
| 		flag <<= c;
 | |
| 		flag_count += c;
 | |
| 		if (flag_count == 32) {
 | |
| 			lz77_write32(flag_pos, flag);
 | |
| 			flag_count = 0;
 | |
| 			flag_pos = dstp;
 | |
| 			dstp += 4;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	flag <<= (32 - flag_count);
 | |
| 	flag |= (1 << (32 - flag_count)) - 1;
 | |
| 	lz77_write32(flag_pos, flag);
 | |
| 
 | |
| 	*dlen = dstp - dst;
 | |
| out:
 | |
| 	kvfree(htable);
 | |
| 
 | |
| 	if (*dlen < slen)
 | |
| 		return 0;
 | |
| 
 | |
| 	return -EMSGSIZE;
 | |
| }
 |