1*d14bbfffSSteve French /* SPDX-License-Identifier: GPL-2.0-only */ 2*d14bbfffSSteve French /* 3*d14bbfffSSteve French * Copyright (C) 2024, SUSE LLC 4*d14bbfffSSteve French * 5*d14bbfffSSteve French * Authors: Enzo Matsumiya <ematsumiya@suse.de> 6*d14bbfffSSteve French * 7*d14bbfffSSteve French * Definitions and optmized helpers for LZ77 compression. 8*d14bbfffSSteve French */ 9*d14bbfffSSteve French #ifndef _SMB_COMPRESS_LZ77_H 10*d14bbfffSSteve French #define _SMB_COMPRESS_LZ77_H 11*d14bbfffSSteve French 12*d14bbfffSSteve French #include <linux/uaccess.h> 13*d14bbfffSSteve French #ifdef CONFIG_CIFS_COMPRESSION 14*d14bbfffSSteve French #include <asm/ptrace.h> 15*d14bbfffSSteve French #include <linux/kernel.h> 16*d14bbfffSSteve French #include <linux/string.h> 17*d14bbfffSSteve French #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 18*d14bbfffSSteve French #include <asm-generic/unaligned.h> 19*d14bbfffSSteve French #endif 20*d14bbfffSSteve French 21*d14bbfffSSteve French #define LZ77_HASH_LOG 13 22*d14bbfffSSteve French #define LZ77_HASH_SIZE (1 << LZ77_HASH_LOG) 23*d14bbfffSSteve French #define LZ77_HASH_MASK lz77_hash_mask(LZ77_HASH_LOG) 24*d14bbfffSSteve French 25*d14bbfffSSteve French /* We can increase this for better compression (but worse performance). */ 26*d14bbfffSSteve French #define LZ77_MATCH_MIN_LEN 3 27*d14bbfffSSteve French /* From MS-XCA, but it's arbitrarily chosen. */ 28*d14bbfffSSteve French #define LZ77_MATCH_MAX_LEN S32_MAX 29*d14bbfffSSteve French /* 30*d14bbfffSSteve French * Check this to ensure we don't match the current position, which would 31*d14bbfffSSteve French * end up doing a verbatim copy of the input, and actually overflowing 32*d14bbfffSSteve French * the output buffer because of the encoded metadata. 33*d14bbfffSSteve French */ 34*d14bbfffSSteve French #define LZ77_MATCH_MIN_DIST 1 35*d14bbfffSSteve French /* How far back in the buffer can we try to find a match (i.e. window size) */ 36*d14bbfffSSteve French #define LZ77_MATCH_MAX_DIST 8192 37*d14bbfffSSteve French 38*d14bbfffSSteve French #define LZ77_STEPSIZE_16 sizeof(u16) 39*d14bbfffSSteve French #define LZ77_STEPSIZE_32 sizeof(u32) 40*d14bbfffSSteve French #define LZ77_STEPSIZE_64 sizeof(u64) 41*d14bbfffSSteve French 42*d14bbfffSSteve French struct lz77_flags { 43*d14bbfffSSteve French u8 *pos; 44*d14bbfffSSteve French size_t count; 45*d14bbfffSSteve French long val; 46*d14bbfffSSteve French }; 47*d14bbfffSSteve French 48*d14bbfffSSteve French static __always_inline u32 lz77_hash_mask(const unsigned int log2) 49*d14bbfffSSteve French { 50*d14bbfffSSteve French return ((1 << log2) - 1); 51*d14bbfffSSteve French } 52*d14bbfffSSteve French 53*d14bbfffSSteve French static __always_inline u32 lz77_hash64(const u64 v, const unsigned int log2) 54*d14bbfffSSteve French { 55*d14bbfffSSteve French const u64 prime5bytes = 889523592379ULL; 56*d14bbfffSSteve French 57*d14bbfffSSteve French return (u32)(((v << 24) * prime5bytes) >> (64 - log2)); 58*d14bbfffSSteve French } 59*d14bbfffSSteve French 60*d14bbfffSSteve French static __always_inline u32 lz77_hash32(const u32 v, const unsigned int log2) 61*d14bbfffSSteve French { 62*d14bbfffSSteve French return ((v * 2654435769LL) >> (32 - log2)) & lz77_hash_mask(log2); 63*d14bbfffSSteve French } 64*d14bbfffSSteve French 65*d14bbfffSSteve French static __always_inline u32 lz77_log2(unsigned int x) 66*d14bbfffSSteve French { 67*d14bbfffSSteve French return x ? ((u32)(31 - __builtin_clz(x))) : 0; 68*d14bbfffSSteve French } 69*d14bbfffSSteve French 70*d14bbfffSSteve French #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 71*d14bbfffSSteve French static __always_inline u8 lz77_read8(const void *ptr) 72*d14bbfffSSteve French { 73*d14bbfffSSteve French return *(u8 *)ptr; 74*d14bbfffSSteve French } 75*d14bbfffSSteve French 76*d14bbfffSSteve French static __always_inline u16 lz77_read16(const void *ptr) 77*d14bbfffSSteve French { 78*d14bbfffSSteve French return *(u16 *)ptr; 79*d14bbfffSSteve French } 80*d14bbfffSSteve French 81*d14bbfffSSteve French static __always_inline u32 lz77_read32(const void *ptr) 82*d14bbfffSSteve French { 83*d14bbfffSSteve French return *(u32 *)ptr; 84*d14bbfffSSteve French } 85*d14bbfffSSteve French 86*d14bbfffSSteve French static __always_inline u64 lz77_read64(const void *ptr) 87*d14bbfffSSteve French { 88*d14bbfffSSteve French return *(u64 *)ptr; 89*d14bbfffSSteve French } 90*d14bbfffSSteve French 91*d14bbfffSSteve French static __always_inline void lz77_write8(void *ptr, const u8 v) 92*d14bbfffSSteve French { 93*d14bbfffSSteve French *(u8 *)ptr = v; 94*d14bbfffSSteve French } 95*d14bbfffSSteve French 96*d14bbfffSSteve French static __always_inline void lz77_write16(void *ptr, const u16 v) 97*d14bbfffSSteve French { 98*d14bbfffSSteve French *(u16 *)ptr = v; 99*d14bbfffSSteve French } 100*d14bbfffSSteve French 101*d14bbfffSSteve French static __always_inline void lz77_write32(void *ptr, const u32 v) 102*d14bbfffSSteve French { 103*d14bbfffSSteve French *(u32 *)ptr = v; 104*d14bbfffSSteve French } 105*d14bbfffSSteve French 106*d14bbfffSSteve French static __always_inline void lz77_write64(void *ptr, const u64 v) 107*d14bbfffSSteve French { 108*d14bbfffSSteve French *(u64 *)ptr = v; 109*d14bbfffSSteve French } 110*d14bbfffSSteve French 111*d14bbfffSSteve French static __always_inline void lz77_write_ptr16(void *ptr, const void *vp) 112*d14bbfffSSteve French { 113*d14bbfffSSteve French *(u16 *)ptr = *(const u16 *)vp; 114*d14bbfffSSteve French } 115*d14bbfffSSteve French 116*d14bbfffSSteve French static __always_inline void lz77_write_ptr32(void *ptr, const void *vp) 117*d14bbfffSSteve French { 118*d14bbfffSSteve French *(u32 *)ptr = *(const u32 *)vp; 119*d14bbfffSSteve French } 120*d14bbfffSSteve French 121*d14bbfffSSteve French static __always_inline void lz77_write_ptr64(void *ptr, const void *vp) 122*d14bbfffSSteve French { 123*d14bbfffSSteve French *(u64 *)ptr = *(const u64 *)vp; 124*d14bbfffSSteve French } 125*d14bbfffSSteve French 126*d14bbfffSSteve French static __always_inline long lz77_copy(u8 *dst, const u8 *src, size_t count) 127*d14bbfffSSteve French { 128*d14bbfffSSteve French return copy_from_kernel_nofault(dst, src, count); 129*d14bbfffSSteve French } 130*d14bbfffSSteve French #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ 131*d14bbfffSSteve French static __always_inline u8 lz77_read8(const void *ptr) 132*d14bbfffSSteve French { 133*d14bbfffSSteve French return get_unaligned((u8 *)ptr); 134*d14bbfffSSteve French } 135*d14bbfffSSteve French 136*d14bbfffSSteve French static __always_inline u16 lz77_read16(const void *ptr) 137*d14bbfffSSteve French { 138*d14bbfffSSteve French return lz77_read8(ptr) | (lz77_read8(ptr + 1) << 8); 139*d14bbfffSSteve French } 140*d14bbfffSSteve French 141*d14bbfffSSteve French static __always_inline u32 lz77_read32(const void *ptr) 142*d14bbfffSSteve French { 143*d14bbfffSSteve French return lz77_read16(ptr) | (lz77_read16(ptr + 2) << 16); 144*d14bbfffSSteve French } 145*d14bbfffSSteve French 146*d14bbfffSSteve French static __always_inline u64 lz77_read64(const void *ptr) 147*d14bbfffSSteve French { 148*d14bbfffSSteve French return lz77_read32(ptr) | ((u64)lz77_read32(ptr + 4) << 32); 149*d14bbfffSSteve French } 150*d14bbfffSSteve French 151*d14bbfffSSteve French static __always_inline void lz77_write8(void *ptr, const u8 v) 152*d14bbfffSSteve French { 153*d14bbfffSSteve French put_unaligned(v, (u8 *)ptr); 154*d14bbfffSSteve French } 155*d14bbfffSSteve French 156*d14bbfffSSteve French static __always_inline void lz77_write16(void *ptr, const u16 v) 157*d14bbfffSSteve French { 158*d14bbfffSSteve French lz77_write8(ptr, v & 0xff); 159*d14bbfffSSteve French lz77_write8(ptr + 1, (v >> 8) & 0xff); 160*d14bbfffSSteve French } 161*d14bbfffSSteve French 162*d14bbfffSSteve French static __always_inline void lz77_write32(void *ptr, const u32 v) 163*d14bbfffSSteve French { 164*d14bbfffSSteve French lz77_write16(ptr, v & 0xffff); 165*d14bbfffSSteve French lz77_write16(ptr + 2, (v >> 16) & 0xffff); 166*d14bbfffSSteve French } 167*d14bbfffSSteve French 168*d14bbfffSSteve French static __always_inline void lz77_write64(void *ptr, const u64 v) 169*d14bbfffSSteve French { 170*d14bbfffSSteve French lz77_write32(ptr, v & 0xffffffff); 171*d14bbfffSSteve French lz77_write32(ptr + 4, (v >> 32) & 0xffffffff); 172*d14bbfffSSteve French } 173*d14bbfffSSteve French 174*d14bbfffSSteve French static __always_inline void lz77_write_ptr16(void *ptr, const void *vp) 175*d14bbfffSSteve French { 176*d14bbfffSSteve French const u16 v = lz77_read16(vp); 177*d14bbfffSSteve French 178*d14bbfffSSteve French lz77_write16(ptr, v); 179*d14bbfffSSteve French } 180*d14bbfffSSteve French 181*d14bbfffSSteve French static __always_inline void lz77_write_ptr32(void *ptr, const void *vp) 182*d14bbfffSSteve French { 183*d14bbfffSSteve French const u32 v = lz77_read32(vp); 184*d14bbfffSSteve French 185*d14bbfffSSteve French lz77_write32(ptr, v); 186*d14bbfffSSteve French } 187*d14bbfffSSteve French 188*d14bbfffSSteve French static __always_inline void lz77_write_ptr64(void *ptr, const void *vp) 189*d14bbfffSSteve French { 190*d14bbfffSSteve French const u64 v = lz77_read64(vp); 191*d14bbfffSSteve French 192*d14bbfffSSteve French lz77_write64(ptr, v); 193*d14bbfffSSteve French } 194*d14bbfffSSteve French static __always_inline long lz77_copy(u8 *dst, const u8 *src, size_t count) 195*d14bbfffSSteve French { 196*d14bbfffSSteve French memcpy(dst, src, count); 197*d14bbfffSSteve French return 0; 198*d14bbfffSSteve French } 199*d14bbfffSSteve French #endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ 200*d14bbfffSSteve French 201*d14bbfffSSteve French static __always_inline unsigned int __count_common_bytes(const unsigned long diff) 202*d14bbfffSSteve French { 203*d14bbfffSSteve French #ifdef __has_builtin 204*d14bbfffSSteve French # if __has_builtin(__builtin_ctzll) 205*d14bbfffSSteve French return (unsigned int)__builtin_ctzll(diff) >> 3; 206*d14bbfffSSteve French # endif 207*d14bbfffSSteve French #else 208*d14bbfffSSteve French /* count trailing zeroes */ 209*d14bbfffSSteve French unsigned long bits = 0, i, z = 0; 210*d14bbfffSSteve French 211*d14bbfffSSteve French bits |= diff; 212*d14bbfffSSteve French for (i = 0; i < 64; i++) { 213*d14bbfffSSteve French if (bits[i]) 214*d14bbfffSSteve French break; 215*d14bbfffSSteve French z++; 216*d14bbfffSSteve French } 217*d14bbfffSSteve French 218*d14bbfffSSteve French return (unsigned int)z >> 3; 219*d14bbfffSSteve French #endif 220*d14bbfffSSteve French } 221*d14bbfffSSteve French 222*d14bbfffSSteve French static __always_inline size_t lz77_match(const u8 *match, const u8 *cur, const u8 *end) 223*d14bbfffSSteve French { 224*d14bbfffSSteve French const u8 *start = cur; 225*d14bbfffSSteve French 226*d14bbfffSSteve French if (cur == match) 227*d14bbfffSSteve French return 0; 228*d14bbfffSSteve French 229*d14bbfffSSteve French if (likely(cur < end - (LZ77_STEPSIZE_64 - 1))) { 230*d14bbfffSSteve French u64 const diff = lz77_read64(cur) ^ lz77_read64(match); 231*d14bbfffSSteve French 232*d14bbfffSSteve French if (!diff) { 233*d14bbfffSSteve French cur += LZ77_STEPSIZE_64; 234*d14bbfffSSteve French match += LZ77_STEPSIZE_64; 235*d14bbfffSSteve French } else { 236*d14bbfffSSteve French return __count_common_bytes(diff); 237*d14bbfffSSteve French } 238*d14bbfffSSteve French } 239*d14bbfffSSteve French 240*d14bbfffSSteve French while (likely(cur < end - (LZ77_STEPSIZE_64 - 1))) { 241*d14bbfffSSteve French u64 const diff = lz77_read64(cur) ^ lz77_read64(match); 242*d14bbfffSSteve French 243*d14bbfffSSteve French if (!diff) { 244*d14bbfffSSteve French cur += LZ77_STEPSIZE_64; 245*d14bbfffSSteve French match += LZ77_STEPSIZE_64; 246*d14bbfffSSteve French continue; 247*d14bbfffSSteve French } 248*d14bbfffSSteve French 249*d14bbfffSSteve French cur += __count_common_bytes(diff); 250*d14bbfffSSteve French return (size_t)(cur - start); 251*d14bbfffSSteve French } 252*d14bbfffSSteve French 253*d14bbfffSSteve French if (cur < end - 3 && !(lz77_read32(cur) ^ lz77_read32(match))) { 254*d14bbfffSSteve French cur += LZ77_STEPSIZE_32; 255*d14bbfffSSteve French match += LZ77_STEPSIZE_32; 256*d14bbfffSSteve French } 257*d14bbfffSSteve French 258*d14bbfffSSteve French if (cur < end - 1 && lz77_read16(cur) == lz77_read16(match)) { 259*d14bbfffSSteve French cur += LZ77_STEPSIZE_16; 260*d14bbfffSSteve French match += LZ77_STEPSIZE_16; 261*d14bbfffSSteve French } 262*d14bbfffSSteve French 263*d14bbfffSSteve French if (cur < end && *cur == *match) 264*d14bbfffSSteve French cur++; 265*d14bbfffSSteve French 266*d14bbfffSSteve French return (size_t)(cur - start); 267*d14bbfffSSteve French } 268*d14bbfffSSteve French 269*d14bbfffSSteve French static __always_inline unsigned long lz77_max(unsigned long a, unsigned long b) 270*d14bbfffSSteve French { 271*d14bbfffSSteve French int m = (a < b) - 1; 272*d14bbfffSSteve French 273*d14bbfffSSteve French return (a & m) | (b & ~m); 274*d14bbfffSSteve French } 275*d14bbfffSSteve French 276*d14bbfffSSteve French static __always_inline unsigned long lz77_min(unsigned long a, unsigned long b) 277*d14bbfffSSteve French { 278*d14bbfffSSteve French int m = (a > b) - 1; 279*d14bbfffSSteve French 280*d14bbfffSSteve French return (a & m) | (b & ~m); 281*d14bbfffSSteve French } 282*d14bbfffSSteve French 283*d14bbfffSSteve French int lz77_compress(const u8 *src, size_t src_len, u8 *dst, size_t *dst_len); 284*d14bbfffSSteve French /* when CONFIG_CIFS_COMPRESSION not set lz77_compress() is not called */ 285*d14bbfffSSteve French #endif /* !CONFIG_CIFS_COMPRESSION */ 286*d14bbfffSSteve French #endif /* _SMB_COMPRESS_LZ77_H */ 287