xref: /linux/fs/smb/client/compress/lz77.h (revision d14bbfff259cadb5af84413658699159556da156)
1*d14bbfffSSteve French /* SPDX-License-Identifier: GPL-2.0-only */
2*d14bbfffSSteve French /*
3*d14bbfffSSteve French  * Copyright (C) 2024, SUSE LLC
4*d14bbfffSSteve French  *
5*d14bbfffSSteve French  * Authors: Enzo Matsumiya <ematsumiya@suse.de>
6*d14bbfffSSteve French  *
7*d14bbfffSSteve French  * Definitions and optmized helpers for LZ77 compression.
8*d14bbfffSSteve French  */
9*d14bbfffSSteve French #ifndef _SMB_COMPRESS_LZ77_H
10*d14bbfffSSteve French #define _SMB_COMPRESS_LZ77_H
11*d14bbfffSSteve French 
12*d14bbfffSSteve French #include <linux/uaccess.h>
13*d14bbfffSSteve French #ifdef CONFIG_CIFS_COMPRESSION
14*d14bbfffSSteve French #include <asm/ptrace.h>
15*d14bbfffSSteve French #include <linux/kernel.h>
16*d14bbfffSSteve French #include <linux/string.h>
17*d14bbfffSSteve French #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
18*d14bbfffSSteve French #include <asm-generic/unaligned.h>
19*d14bbfffSSteve French #endif
20*d14bbfffSSteve French 
21*d14bbfffSSteve French #define LZ77_HASH_LOG		13
22*d14bbfffSSteve French #define LZ77_HASH_SIZE		(1 << LZ77_HASH_LOG)
23*d14bbfffSSteve French #define LZ77_HASH_MASK		lz77_hash_mask(LZ77_HASH_LOG)
24*d14bbfffSSteve French 
25*d14bbfffSSteve French /* We can increase this for better compression (but worse performance). */
26*d14bbfffSSteve French #define LZ77_MATCH_MIN_LEN	3
27*d14bbfffSSteve French /* From MS-XCA, but it's arbitrarily chosen. */
28*d14bbfffSSteve French #define LZ77_MATCH_MAX_LEN	S32_MAX
29*d14bbfffSSteve French /*
30*d14bbfffSSteve French  * Check this to ensure we don't match the current position, which would
31*d14bbfffSSteve French  * end up doing a verbatim copy of the input, and actually overflowing
32*d14bbfffSSteve French  * the output buffer because of the encoded metadata.
33*d14bbfffSSteve French  */
34*d14bbfffSSteve French #define LZ77_MATCH_MIN_DIST	1
35*d14bbfffSSteve French /* How far back in the buffer can we try to find a match (i.e. window size) */
36*d14bbfffSSteve French #define LZ77_MATCH_MAX_DIST	8192
37*d14bbfffSSteve French 
38*d14bbfffSSteve French #define LZ77_STEPSIZE_16	sizeof(u16)
39*d14bbfffSSteve French #define LZ77_STEPSIZE_32	sizeof(u32)
40*d14bbfffSSteve French #define LZ77_STEPSIZE_64	sizeof(u64)
41*d14bbfffSSteve French 
42*d14bbfffSSteve French struct lz77_flags {
43*d14bbfffSSteve French 	u8 *pos;
44*d14bbfffSSteve French 	size_t count;
45*d14bbfffSSteve French 	long val;
46*d14bbfffSSteve French };
47*d14bbfffSSteve French 
48*d14bbfffSSteve French static __always_inline u32 lz77_hash_mask(const unsigned int log2)
49*d14bbfffSSteve French {
50*d14bbfffSSteve French 	return ((1 << log2) - 1);
51*d14bbfffSSteve French }
52*d14bbfffSSteve French 
53*d14bbfffSSteve French static __always_inline u32 lz77_hash64(const u64 v, const unsigned int log2)
54*d14bbfffSSteve French {
55*d14bbfffSSteve French 	const u64 prime5bytes = 889523592379ULL;
56*d14bbfffSSteve French 
57*d14bbfffSSteve French 	return (u32)(((v << 24) * prime5bytes) >> (64 - log2));
58*d14bbfffSSteve French }
59*d14bbfffSSteve French 
60*d14bbfffSSteve French static __always_inline u32 lz77_hash32(const u32 v, const unsigned int log2)
61*d14bbfffSSteve French {
62*d14bbfffSSteve French 	return ((v * 2654435769LL) >> (32 - log2)) & lz77_hash_mask(log2);
63*d14bbfffSSteve French }
64*d14bbfffSSteve French 
65*d14bbfffSSteve French static __always_inline u32 lz77_log2(unsigned int x)
66*d14bbfffSSteve French {
67*d14bbfffSSteve French 	return x ? ((u32)(31 - __builtin_clz(x))) : 0;
68*d14bbfffSSteve French }
69*d14bbfffSSteve French 
70*d14bbfffSSteve French #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
71*d14bbfffSSteve French static __always_inline u8 lz77_read8(const void *ptr)
72*d14bbfffSSteve French {
73*d14bbfffSSteve French 	return *(u8 *)ptr;
74*d14bbfffSSteve French }
75*d14bbfffSSteve French 
76*d14bbfffSSteve French static __always_inline u16 lz77_read16(const void *ptr)
77*d14bbfffSSteve French {
78*d14bbfffSSteve French 	return *(u16 *)ptr;
79*d14bbfffSSteve French }
80*d14bbfffSSteve French 
81*d14bbfffSSteve French static __always_inline u32 lz77_read32(const void *ptr)
82*d14bbfffSSteve French {
83*d14bbfffSSteve French 	return *(u32 *)ptr;
84*d14bbfffSSteve French }
85*d14bbfffSSteve French 
86*d14bbfffSSteve French static __always_inline u64 lz77_read64(const void *ptr)
87*d14bbfffSSteve French {
88*d14bbfffSSteve French 	return *(u64 *)ptr;
89*d14bbfffSSteve French }
90*d14bbfffSSteve French 
91*d14bbfffSSteve French static __always_inline void lz77_write8(void *ptr, const u8 v)
92*d14bbfffSSteve French {
93*d14bbfffSSteve French 	*(u8 *)ptr = v;
94*d14bbfffSSteve French }
95*d14bbfffSSteve French 
96*d14bbfffSSteve French static __always_inline void lz77_write16(void *ptr, const u16 v)
97*d14bbfffSSteve French {
98*d14bbfffSSteve French 	*(u16 *)ptr = v;
99*d14bbfffSSteve French }
100*d14bbfffSSteve French 
101*d14bbfffSSteve French static __always_inline void lz77_write32(void *ptr, const u32 v)
102*d14bbfffSSteve French {
103*d14bbfffSSteve French 	*(u32 *)ptr = v;
104*d14bbfffSSteve French }
105*d14bbfffSSteve French 
106*d14bbfffSSteve French static __always_inline void lz77_write64(void *ptr, const u64 v)
107*d14bbfffSSteve French {
108*d14bbfffSSteve French 	*(u64 *)ptr = v;
109*d14bbfffSSteve French }
110*d14bbfffSSteve French 
111*d14bbfffSSteve French static __always_inline void lz77_write_ptr16(void *ptr, const void *vp)
112*d14bbfffSSteve French {
113*d14bbfffSSteve French 	*(u16 *)ptr = *(const u16 *)vp;
114*d14bbfffSSteve French }
115*d14bbfffSSteve French 
116*d14bbfffSSteve French static __always_inline void lz77_write_ptr32(void *ptr, const void *vp)
117*d14bbfffSSteve French {
118*d14bbfffSSteve French 	*(u32 *)ptr = *(const u32 *)vp;
119*d14bbfffSSteve French }
120*d14bbfffSSteve French 
121*d14bbfffSSteve French static __always_inline void lz77_write_ptr64(void *ptr, const void *vp)
122*d14bbfffSSteve French {
123*d14bbfffSSteve French 	*(u64 *)ptr = *(const u64 *)vp;
124*d14bbfffSSteve French }
125*d14bbfffSSteve French 
126*d14bbfffSSteve French static __always_inline long lz77_copy(u8 *dst, const u8 *src, size_t count)
127*d14bbfffSSteve French {
128*d14bbfffSSteve French 	return copy_from_kernel_nofault(dst, src, count);
129*d14bbfffSSteve French }
130*d14bbfffSSteve French #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
131*d14bbfffSSteve French static __always_inline u8 lz77_read8(const void *ptr)
132*d14bbfffSSteve French {
133*d14bbfffSSteve French 	return get_unaligned((u8 *)ptr);
134*d14bbfffSSteve French }
135*d14bbfffSSteve French 
136*d14bbfffSSteve French static __always_inline u16 lz77_read16(const void *ptr)
137*d14bbfffSSteve French {
138*d14bbfffSSteve French 	return lz77_read8(ptr) | (lz77_read8(ptr + 1) << 8);
139*d14bbfffSSteve French }
140*d14bbfffSSteve French 
141*d14bbfffSSteve French static __always_inline u32 lz77_read32(const void *ptr)
142*d14bbfffSSteve French {
143*d14bbfffSSteve French 	return lz77_read16(ptr) | (lz77_read16(ptr + 2) << 16);
144*d14bbfffSSteve French }
145*d14bbfffSSteve French 
146*d14bbfffSSteve French static __always_inline u64 lz77_read64(const void *ptr)
147*d14bbfffSSteve French {
148*d14bbfffSSteve French 	return lz77_read32(ptr) | ((u64)lz77_read32(ptr + 4) << 32);
149*d14bbfffSSteve French }
150*d14bbfffSSteve French 
151*d14bbfffSSteve French static __always_inline void lz77_write8(void *ptr, const u8 v)
152*d14bbfffSSteve French {
153*d14bbfffSSteve French 	put_unaligned(v, (u8 *)ptr);
154*d14bbfffSSteve French }
155*d14bbfffSSteve French 
156*d14bbfffSSteve French static __always_inline void lz77_write16(void *ptr, const u16 v)
157*d14bbfffSSteve French {
158*d14bbfffSSteve French 	lz77_write8(ptr, v & 0xff);
159*d14bbfffSSteve French 	lz77_write8(ptr + 1, (v >> 8) & 0xff);
160*d14bbfffSSteve French }
161*d14bbfffSSteve French 
162*d14bbfffSSteve French static __always_inline void lz77_write32(void *ptr, const u32 v)
163*d14bbfffSSteve French {
164*d14bbfffSSteve French 	lz77_write16(ptr, v & 0xffff);
165*d14bbfffSSteve French 	lz77_write16(ptr + 2, (v >> 16) & 0xffff);
166*d14bbfffSSteve French }
167*d14bbfffSSteve French 
168*d14bbfffSSteve French static __always_inline void lz77_write64(void *ptr, const u64 v)
169*d14bbfffSSteve French {
170*d14bbfffSSteve French 	lz77_write32(ptr, v & 0xffffffff);
171*d14bbfffSSteve French 	lz77_write32(ptr + 4, (v >> 32) & 0xffffffff);
172*d14bbfffSSteve French }
173*d14bbfffSSteve French 
174*d14bbfffSSteve French static __always_inline void lz77_write_ptr16(void *ptr, const void *vp)
175*d14bbfffSSteve French {
176*d14bbfffSSteve French 	const u16 v = lz77_read16(vp);
177*d14bbfffSSteve French 
178*d14bbfffSSteve French 	lz77_write16(ptr, v);
179*d14bbfffSSteve French }
180*d14bbfffSSteve French 
181*d14bbfffSSteve French static __always_inline void lz77_write_ptr32(void *ptr, const void *vp)
182*d14bbfffSSteve French {
183*d14bbfffSSteve French 	const u32 v = lz77_read32(vp);
184*d14bbfffSSteve French 
185*d14bbfffSSteve French 	lz77_write32(ptr, v);
186*d14bbfffSSteve French }
187*d14bbfffSSteve French 
188*d14bbfffSSteve French static __always_inline void lz77_write_ptr64(void *ptr, const void *vp)
189*d14bbfffSSteve French {
190*d14bbfffSSteve French 	const u64 v = lz77_read64(vp);
191*d14bbfffSSteve French 
192*d14bbfffSSteve French 	lz77_write64(ptr, v);
193*d14bbfffSSteve French }
194*d14bbfffSSteve French static __always_inline long lz77_copy(u8 *dst, const u8 *src, size_t count)
195*d14bbfffSSteve French {
196*d14bbfffSSteve French 	memcpy(dst, src, count);
197*d14bbfffSSteve French 	return 0;
198*d14bbfffSSteve French }
199*d14bbfffSSteve French #endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
200*d14bbfffSSteve French 
201*d14bbfffSSteve French static __always_inline unsigned int __count_common_bytes(const unsigned long diff)
202*d14bbfffSSteve French {
203*d14bbfffSSteve French #ifdef __has_builtin
204*d14bbfffSSteve French #  if __has_builtin(__builtin_ctzll)
205*d14bbfffSSteve French 	return (unsigned int)__builtin_ctzll(diff) >> 3;
206*d14bbfffSSteve French #  endif
207*d14bbfffSSteve French #else
208*d14bbfffSSteve French 	/* count trailing zeroes */
209*d14bbfffSSteve French 	unsigned long bits = 0, i, z = 0;
210*d14bbfffSSteve French 
211*d14bbfffSSteve French 	bits |= diff;
212*d14bbfffSSteve French 	for (i = 0; i < 64; i++) {
213*d14bbfffSSteve French 		if (bits[i])
214*d14bbfffSSteve French 			break;
215*d14bbfffSSteve French 		z++;
216*d14bbfffSSteve French 	}
217*d14bbfffSSteve French 
218*d14bbfffSSteve French 	return (unsigned int)z >> 3;
219*d14bbfffSSteve French #endif
220*d14bbfffSSteve French }
221*d14bbfffSSteve French 
222*d14bbfffSSteve French static __always_inline size_t lz77_match(const u8 *match, const u8 *cur, const u8 *end)
223*d14bbfffSSteve French {
224*d14bbfffSSteve French 	const u8 *start = cur;
225*d14bbfffSSteve French 
226*d14bbfffSSteve French 	if (cur == match)
227*d14bbfffSSteve French 		return 0;
228*d14bbfffSSteve French 
229*d14bbfffSSteve French 	if (likely(cur < end - (LZ77_STEPSIZE_64 - 1))) {
230*d14bbfffSSteve French 		u64 const diff = lz77_read64(cur) ^ lz77_read64(match);
231*d14bbfffSSteve French 
232*d14bbfffSSteve French 		if (!diff) {
233*d14bbfffSSteve French 			cur += LZ77_STEPSIZE_64;
234*d14bbfffSSteve French 			match += LZ77_STEPSIZE_64;
235*d14bbfffSSteve French 		} else {
236*d14bbfffSSteve French 			return __count_common_bytes(diff);
237*d14bbfffSSteve French 		}
238*d14bbfffSSteve French 	}
239*d14bbfffSSteve French 
240*d14bbfffSSteve French 	while (likely(cur < end - (LZ77_STEPSIZE_64 - 1))) {
241*d14bbfffSSteve French 		u64 const diff = lz77_read64(cur) ^ lz77_read64(match);
242*d14bbfffSSteve French 
243*d14bbfffSSteve French 		if (!diff) {
244*d14bbfffSSteve French 			cur += LZ77_STEPSIZE_64;
245*d14bbfffSSteve French 			match += LZ77_STEPSIZE_64;
246*d14bbfffSSteve French 			continue;
247*d14bbfffSSteve French 		}
248*d14bbfffSSteve French 
249*d14bbfffSSteve French 		cur += __count_common_bytes(diff);
250*d14bbfffSSteve French 		return (size_t)(cur - start);
251*d14bbfffSSteve French 	}
252*d14bbfffSSteve French 
253*d14bbfffSSteve French 	if (cur < end - 3 && !(lz77_read32(cur) ^ lz77_read32(match))) {
254*d14bbfffSSteve French 		cur += LZ77_STEPSIZE_32;
255*d14bbfffSSteve French 		match += LZ77_STEPSIZE_32;
256*d14bbfffSSteve French 	}
257*d14bbfffSSteve French 
258*d14bbfffSSteve French 	if (cur < end - 1 && lz77_read16(cur) == lz77_read16(match)) {
259*d14bbfffSSteve French 		cur += LZ77_STEPSIZE_16;
260*d14bbfffSSteve French 		match += LZ77_STEPSIZE_16;
261*d14bbfffSSteve French 	}
262*d14bbfffSSteve French 
263*d14bbfffSSteve French 	if (cur < end && *cur == *match)
264*d14bbfffSSteve French 		cur++;
265*d14bbfffSSteve French 
266*d14bbfffSSteve French 	return (size_t)(cur - start);
267*d14bbfffSSteve French }
268*d14bbfffSSteve French 
269*d14bbfffSSteve French static __always_inline unsigned long lz77_max(unsigned long a, unsigned long b)
270*d14bbfffSSteve French {
271*d14bbfffSSteve French 	int m = (a < b) - 1;
272*d14bbfffSSteve French 
273*d14bbfffSSteve French 	return (a & m) | (b & ~m);
274*d14bbfffSSteve French }
275*d14bbfffSSteve French 
276*d14bbfffSSteve French static __always_inline unsigned long lz77_min(unsigned long a, unsigned long b)
277*d14bbfffSSteve French {
278*d14bbfffSSteve French 	int m = (a > b) - 1;
279*d14bbfffSSteve French 
280*d14bbfffSSteve French 	return (a & m) | (b & ~m);
281*d14bbfffSSteve French }
282*d14bbfffSSteve French 
283*d14bbfffSSteve French int lz77_compress(const u8 *src, size_t src_len, u8 *dst, size_t *dst_len);
284*d14bbfffSSteve French /* when CONFIG_CIFS_COMPRESSION not set lz77_compress() is not called */
285*d14bbfffSSteve French #endif /* !CONFIG_CIFS_COMPRESSION */
286*d14bbfffSSteve French #endif /* _SMB_COMPRESS_LZ77_H */
287