1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * decompress_common.h - Code shared by the XPRESS and LZX decompressors 4 * 5 * Copyright (C) 2015 Eric Biggers 6 */ 7 8 #ifndef _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H 9 #define _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H 10 11 #include <linux/string.h> 12 #include <linux/compiler.h> 13 #include <linux/types.h> 14 #include <linux/slab.h> 15 #include <asm/unaligned.h> 16 17 18 /* "Force inline" macro (not required, but helpful for performance) */ 19 #define forceinline __always_inline 20 21 /* Enable whole-word match copying on selected architectures */ 22 #if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED) 23 # define FAST_UNALIGNED_ACCESS 24 #endif 25 26 /* Size of a machine word */ 27 #define WORDBYTES (sizeof(size_t)) 28 29 static forceinline void 30 copy_unaligned_word(const void *src, void *dst) 31 { 32 put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst); 33 } 34 35 36 /* Generate a "word" with platform-dependent size whose bytes all contain the 37 * value 'b'. 38 */ 39 static forceinline size_t repeat_byte(u8 b) 40 { 41 size_t v; 42 43 v = b; 44 v |= v << 8; 45 v |= v << 16; 46 v |= v << ((WORDBYTES == 8) ? 32 : 0); 47 return v; 48 } 49 50 /* Structure that encapsulates a block of in-memory data being interpreted as a 51 * stream of bits, optionally with interwoven literal bytes. Bits are assumed 52 * to be stored in little endian 16-bit coding units, with the bits ordered high 53 * to low. 54 */ 55 struct input_bitstream { 56 57 /* Bits that have been read from the input buffer. The bits are 58 * left-justified; the next bit is always bit 31. 59 */ 60 u32 bitbuf; 61 62 /* Number of bits currently held in @bitbuf. */ 63 u32 bitsleft; 64 65 /* Pointer to the next byte to be retrieved from the input buffer. */ 66 const u8 *next; 67 68 /* Pointer to just past the end of the input buffer. */ 69 const u8 *end; 70 }; 71 72 /* Initialize a bitstream to read from the specified input buffer. */ 73 static forceinline void init_input_bitstream(struct input_bitstream *is, 74 const void *buffer, u32 size) 75 { 76 is->bitbuf = 0; 77 is->bitsleft = 0; 78 is->next = buffer; 79 is->end = is->next + size; 80 } 81 82 /* Ensure the bit buffer variable for the bitstream contains at least @num_bits 83 * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() 84 * may be called on the bitstream to peek or remove up to @num_bits bits. Note 85 * that @num_bits must be <= 16. 86 */ 87 static forceinline void bitstream_ensure_bits(struct input_bitstream *is, 88 u32 num_bits) 89 { 90 if (is->bitsleft < num_bits) { 91 if (is->end - is->next >= 2) { 92 is->bitbuf |= (u32)get_unaligned_le16(is->next) 93 << (16 - is->bitsleft); 94 is->next += 2; 95 } 96 is->bitsleft += 16; 97 } 98 } 99 100 /* Return the next @num_bits bits from the bitstream, without removing them. 101 * There must be at least @num_bits remaining in the buffer variable, from a 102 * previous call to bitstream_ensure_bits(). 103 */ 104 static forceinline u32 105 bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits) 106 { 107 return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); 108 } 109 110 /* Remove @num_bits from the bitstream. There must be at least @num_bits 111 * remaining in the buffer variable, from a previous call to 112 * bitstream_ensure_bits(). 113 */ 114 static forceinline void 115 bitstream_remove_bits(struct input_bitstream *is, u32 num_bits) 116 { 117 is->bitbuf <<= num_bits; 118 is->bitsleft -= num_bits; 119 } 120 121 /* Remove and return @num_bits bits from the bitstream. There must be at least 122 * @num_bits remaining in the buffer variable, from a previous call to 123 * bitstream_ensure_bits(). 124 */ 125 static forceinline u32 126 bitstream_pop_bits(struct input_bitstream *is, u32 num_bits) 127 { 128 u32 bits = bitstream_peek_bits(is, num_bits); 129 130 bitstream_remove_bits(is, num_bits); 131 return bits; 132 } 133 134 /* Read and return the next @num_bits bits from the bitstream. */ 135 static forceinline u32 136 bitstream_read_bits(struct input_bitstream *is, u32 num_bits) 137 { 138 bitstream_ensure_bits(is, num_bits); 139 return bitstream_pop_bits(is, num_bits); 140 } 141 142 /* Read and return the next literal byte embedded in the bitstream. */ 143 static forceinline u8 144 bitstream_read_byte(struct input_bitstream *is) 145 { 146 if (unlikely(is->end == is->next)) 147 return 0; 148 return *is->next++; 149 } 150 151 /* Read and return the next 16-bit integer embedded in the bitstream. */ 152 static forceinline u16 153 bitstream_read_u16(struct input_bitstream *is) 154 { 155 u16 v; 156 157 if (unlikely(is->end - is->next < 2)) 158 return 0; 159 v = get_unaligned_le16(is->next); 160 is->next += 2; 161 return v; 162 } 163 164 /* Read and return the next 32-bit integer embedded in the bitstream. */ 165 static forceinline u32 166 bitstream_read_u32(struct input_bitstream *is) 167 { 168 u32 v; 169 170 if (unlikely(is->end - is->next < 4)) 171 return 0; 172 v = get_unaligned_le32(is->next); 173 is->next += 4; 174 return v; 175 } 176 177 /* Read into @dst_buffer an array of literal bytes embedded in the bitstream. 178 * Return either a pointer to the byte past the last written, or NULL if the 179 * read overflows the input buffer. 180 */ 181 static forceinline void *bitstream_read_bytes(struct input_bitstream *is, 182 void *dst_buffer, size_t count) 183 { 184 if ((size_t)(is->end - is->next) < count) 185 return NULL; 186 memcpy(dst_buffer, is->next, count); 187 is->next += count; 188 return (u8 *)dst_buffer + count; 189 } 190 191 /* Align the input bitstream on a coding-unit boundary. */ 192 static forceinline void bitstream_align(struct input_bitstream *is) 193 { 194 is->bitsleft = 0; 195 is->bitbuf = 0; 196 } 197 198 extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms, 199 const u32 num_bits, const u8 lens[], 200 const u32 max_codeword_len, 201 u16 working_space[]); 202 203 204 /* Reads and returns the next Huffman-encoded symbol from a bitstream. If the 205 * input data is exhausted, the Huffman symbol is decoded as if the missing bits 206 * are all zeroes. 207 */ 208 static forceinline u32 read_huffsym(struct input_bitstream *istream, 209 const u16 decode_table[], 210 u32 table_bits, 211 u32 max_codeword_len) 212 { 213 u32 entry; 214 u32 key_bits; 215 216 bitstream_ensure_bits(istream, max_codeword_len); 217 218 /* Index the decode table by the next table_bits bits of the input. */ 219 key_bits = bitstream_peek_bits(istream, table_bits); 220 entry = decode_table[key_bits]; 221 if (entry < 0xC000) { 222 /* Fast case: The decode table directly provided the 223 * symbol and codeword length. The low 11 bits are the 224 * symbol, and the high 5 bits are the codeword length. 225 */ 226 bitstream_remove_bits(istream, entry >> 11); 227 return entry & 0x7FF; 228 } 229 /* Slow case: The codeword for the symbol is longer than 230 * table_bits, so the symbol does not have an entry 231 * directly in the first (1 << table_bits) entries of the 232 * decode table. Traverse the appropriate binary tree 233 * bit-by-bit to decode the symbol. 234 */ 235 bitstream_remove_bits(istream, table_bits); 236 do { 237 key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1); 238 } while ((entry = decode_table[key_bits]) >= 0xC000); 239 return entry; 240 } 241 242 /* 243 * Copy an LZ77 match at (dst - offset) to dst. 244 * 245 * The length and offset must be already validated --- that is, (dst - offset) 246 * can't underrun the output buffer, and (dst + length) can't overrun the output 247 * buffer. Also, the length cannot be 0. 248 * 249 * @bufend points to the byte past the end of the output buffer. This function 250 * won't write any data beyond this position. 251 * 252 * Returns dst + length. 253 */ 254 static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend, 255 u32 min_length) 256 { 257 const u8 *src = dst - offset; 258 259 /* 260 * Try to copy one machine word at a time. On i386 and x86_64 this is 261 * faster than copying one byte at a time, unless the data is 262 * near-random and all the matches have very short lengths. Note that 263 * since this requires unaligned memory accesses, it won't necessarily 264 * be faster on every architecture. 265 * 266 * Also note that we might copy more than the length of the match. For 267 * example, if a word is 8 bytes and the match is of length 5, then 268 * we'll simply copy 8 bytes. This is okay as long as we don't write 269 * beyond the end of the output buffer, hence the check for (bufend - 270 * end >= WORDBYTES - 1). 271 */ 272 #ifdef FAST_UNALIGNED_ACCESS 273 u8 * const end = dst + length; 274 275 if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) { 276 277 if (offset >= WORDBYTES) { 278 /* The source and destination words don't overlap. */ 279 280 /* To improve branch prediction, one iteration of this 281 * loop is unrolled. Most matches are short and will 282 * fail the first check. But if that check passes, then 283 * it becomes increasing likely that the match is long 284 * and we'll need to continue copying. 285 */ 286 287 copy_unaligned_word(src, dst); 288 src += WORDBYTES; 289 dst += WORDBYTES; 290 291 if (dst < end) { 292 do { 293 copy_unaligned_word(src, dst); 294 src += WORDBYTES; 295 dst += WORDBYTES; 296 } while (dst < end); 297 } 298 return end; 299 } else if (offset == 1) { 300 301 /* Offset 1 matches are equivalent to run-length 302 * encoding of the previous byte. This case is common 303 * if the data contains many repeated bytes. 304 */ 305 size_t v = repeat_byte(*(dst - 1)); 306 307 do { 308 put_unaligned(v, (size_t *)dst); 309 src += WORDBYTES; 310 dst += WORDBYTES; 311 } while (dst < end); 312 return end; 313 } 314 /* 315 * We don't bother with special cases for other 'offset < 316 * WORDBYTES', which are usually rarer than 'offset == 1'. Extra 317 * checks will just slow things down. Actually, it's possible 318 * to handle all the 'offset < WORDBYTES' cases using the same 319 * code, but it still becomes more complicated doesn't seem any 320 * faster overall; it definitely slows down the more common 321 * 'offset == 1' case. 322 */ 323 } 324 #endif /* FAST_UNALIGNED_ACCESS */ 325 326 /* Fall back to a bytewise copy. */ 327 328 if (min_length >= 2) { 329 *dst++ = *src++; 330 length--; 331 } 332 if (min_length >= 3) { 333 *dst++ = *src++; 334 length--; 335 } 336 do { 337 *dst++ = *src++; 338 } while (--length); 339 340 return dst; 341 } 342 343 #endif /* _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H */ 344