1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 /* zstd_decompress_block :
13 * this module takes care of decompressing _compressed_ block */
14
15 /*-*******************************************************
16 * Dependencies
17 *********************************************************/
18 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
19 #include "../common/compiler.h" /* prefetch */
20 #include "../common/cpu.h" /* bmi2 */
21 #include "../common/mem.h" /* low level memory routines */
22 #define FSE_STATIC_LINKING_ONLY
23 #include "../common/fse.h"
24 #include "../common/huf.h"
25 #include "../common/zstd_internal.h"
26 #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27 #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28 #include "zstd_decompress_block.h"
29 #include "../common/bits.h" /* ZSTD_highbit32 */
30
31 /*_*******************************************************
32 * Macros
33 **********************************************************/
34
35 /* These two optional macros force the use one way or another of the two
36 * ZSTD_decompressSequences implementations. You can't force in both directions
37 * at the same time.
38 */
39 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
40 defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
41 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
42 #endif
43
44
45 /*_*******************************************************
46 * Memory operations
47 **********************************************************/
ZSTD_copy4(void * dst,const void * src)48 static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
49
50
51 /*-*************************************************************
52 * Block decoding
53 ***************************************************************/
54
ZSTD_blockSizeMax(ZSTD_DCtx const * dctx)55 static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
56 {
57 size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
58 assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
59 return blockSizeMax;
60 }
61
62 /*! ZSTD_getcBlockSize() :
63 * Provides the size of compressed block from block header `src` */
ZSTD_getcBlockSize(const void * src,size_t srcSize,blockProperties_t * bpPtr)64 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
65 blockProperties_t* bpPtr)
66 {
67 RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
68
69 { U32 const cBlockHeader = MEM_readLE24(src);
70 U32 const cSize = cBlockHeader >> 3;
71 bpPtr->lastBlock = cBlockHeader & 1;
72 bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
73 bpPtr->origSize = cSize; /* only useful for RLE */
74 if (bpPtr->blockType == bt_rle) return 1;
75 RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
76 return cSize;
77 }
78 }
79
80 /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
ZSTD_allocateLiteralsBuffer(ZSTD_DCtx * dctx,void * const dst,const size_t dstCapacity,const size_t litSize,const streaming_operation streaming,const size_t expectedWriteSize,const unsigned splitImmediately)81 static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
82 const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
83 {
84 size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
85 assert(litSize <= blockSizeMax);
86 assert(dctx->isFrameDecompression || streaming == not_streaming);
87 assert(expectedWriteSize <= blockSizeMax);
88 if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
89 /* If we aren't streaming, we can just put the literals after the output
90 * of the current block. We don't need to worry about overwriting the
91 * extDict of our window, because it doesn't exist.
92 * So if we have space after the end of the block, just put it there.
93 */
94 dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
95 dctx->litBufferEnd = dctx->litBuffer + litSize;
96 dctx->litBufferLocation = ZSTD_in_dst;
97 } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
98 /* Literals fit entirely within the extra buffer, put them there to avoid
99 * having to split the literals.
100 */
101 dctx->litBuffer = dctx->litExtraBuffer;
102 dctx->litBufferEnd = dctx->litBuffer + litSize;
103 dctx->litBufferLocation = ZSTD_not_in_dst;
104 } else {
105 assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
106 /* Literals must be split between the output block and the extra lit
107 * buffer. We fill the extra lit buffer with the tail of the literals,
108 * and put the rest of the literals at the end of the block, with
109 * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
110 * This MUST not write more than our maxBlockSize beyond dst, because in
111 * streaming mode, that could overwrite part of our extDict window.
112 */
113 if (splitImmediately) {
114 /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
115 dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
116 dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
117 } else {
118 /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
119 dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
120 dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
121 }
122 dctx->litBufferLocation = ZSTD_split;
123 assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
124 }
125 }
126
127 /*! ZSTD_decodeLiteralsBlock() :
128 * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
129 * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
130 * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
131 * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
132 *
133 * @return : nb of bytes read from src (< srcSize )
134 * note : symbol not declared but exposed for fullbench */
ZSTD_decodeLiteralsBlock(ZSTD_DCtx * dctx,const void * src,size_t srcSize,void * dst,size_t dstCapacity,const streaming_operation streaming)135 static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
136 const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
137 void* dst, size_t dstCapacity, const streaming_operation streaming)
138 {
139 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
140 RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
141
142 { const BYTE* const istart = (const BYTE*) src;
143 SymbolEncodingType_e const litEncType = (SymbolEncodingType_e)(istart[0] & 3);
144 size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
145
146 switch(litEncType)
147 {
148 case set_repeat:
149 DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
150 RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
151 ZSTD_FALLTHROUGH;
152
153 case set_compressed:
154 RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
155 { size_t lhSize, litSize, litCSize;
156 U32 singleStream=0;
157 U32 const lhlCode = (istart[0] >> 2) & 3;
158 U32 const lhc = MEM_readLE32(istart);
159 size_t hufSuccess;
160 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
161 int const flags = 0
162 | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
163 | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
164 switch(lhlCode)
165 {
166 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
167 /* 2 - 2 - 10 - 10 */
168 singleStream = !lhlCode;
169 lhSize = 3;
170 litSize = (lhc >> 4) & 0x3FF;
171 litCSize = (lhc >> 14) & 0x3FF;
172 break;
173 case 2:
174 /* 2 - 2 - 14 - 14 */
175 lhSize = 4;
176 litSize = (lhc >> 4) & 0x3FFF;
177 litCSize = lhc >> 18;
178 break;
179 case 3:
180 /* 2 - 2 - 18 - 18 */
181 lhSize = 5;
182 litSize = (lhc >> 4) & 0x3FFFF;
183 litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
184 break;
185 }
186 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
187 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
188 if (!singleStream)
189 RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
190 "Not enough literals (%zu) for the 4-streams mode (min %u)",
191 litSize, MIN_LITERALS_FOR_4_STREAMS);
192 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
193 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
194 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
195
196 /* prefetch huffman table if cold */
197 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
198 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
199 }
200
201 if (litEncType==set_repeat) {
202 if (singleStream) {
203 hufSuccess = HUF_decompress1X_usingDTable(
204 dctx->litBuffer, litSize, istart+lhSize, litCSize,
205 dctx->HUFptr, flags);
206 } else {
207 assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
208 hufSuccess = HUF_decompress4X_usingDTable(
209 dctx->litBuffer, litSize, istart+lhSize, litCSize,
210 dctx->HUFptr, flags);
211 }
212 } else {
213 if (singleStream) {
214 #if defined(HUF_FORCE_DECOMPRESS_X2)
215 hufSuccess = HUF_decompress1X_DCtx_wksp(
216 dctx->entropy.hufTable, dctx->litBuffer, litSize,
217 istart+lhSize, litCSize, dctx->workspace,
218 sizeof(dctx->workspace), flags);
219 #else
220 hufSuccess = HUF_decompress1X1_DCtx_wksp(
221 dctx->entropy.hufTable, dctx->litBuffer, litSize,
222 istart+lhSize, litCSize, dctx->workspace,
223 sizeof(dctx->workspace), flags);
224 #endif
225 } else {
226 hufSuccess = HUF_decompress4X_hufOnly_wksp(
227 dctx->entropy.hufTable, dctx->litBuffer, litSize,
228 istart+lhSize, litCSize, dctx->workspace,
229 sizeof(dctx->workspace), flags);
230 }
231 }
232 if (dctx->litBufferLocation == ZSTD_split)
233 {
234 assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
235 ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
236 ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
237 dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
238 dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
239 assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
240 }
241
242 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
243
244 dctx->litPtr = dctx->litBuffer;
245 dctx->litSize = litSize;
246 dctx->litEntropy = 1;
247 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
248 return litCSize + lhSize;
249 }
250
251 case set_basic:
252 { size_t litSize, lhSize;
253 U32 const lhlCode = ((istart[0]) >> 2) & 3;
254 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
255 switch(lhlCode)
256 {
257 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
258 lhSize = 1;
259 litSize = istart[0] >> 3;
260 break;
261 case 1:
262 lhSize = 2;
263 litSize = MEM_readLE16(istart) >> 4;
264 break;
265 case 3:
266 lhSize = 3;
267 RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
268 litSize = MEM_readLE24(istart) >> 4;
269 break;
270 }
271
272 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
273 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
274 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
275 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
276 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
277 RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
278 if (dctx->litBufferLocation == ZSTD_split)
279 {
280 ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
281 ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
282 }
283 else
284 {
285 ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
286 }
287 dctx->litPtr = dctx->litBuffer;
288 dctx->litSize = litSize;
289 return lhSize+litSize;
290 }
291 /* direct reference into compressed stream */
292 dctx->litPtr = istart+lhSize;
293 dctx->litSize = litSize;
294 dctx->litBufferEnd = dctx->litPtr + litSize;
295 dctx->litBufferLocation = ZSTD_not_in_dst;
296 return lhSize+litSize;
297 }
298
299 case set_rle:
300 { U32 const lhlCode = ((istart[0]) >> 2) & 3;
301 size_t litSize, lhSize;
302 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
303 switch(lhlCode)
304 {
305 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
306 lhSize = 1;
307 litSize = istart[0] >> 3;
308 break;
309 case 1:
310 lhSize = 2;
311 RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
312 litSize = MEM_readLE16(istart) >> 4;
313 break;
314 case 3:
315 lhSize = 3;
316 RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
317 litSize = MEM_readLE24(istart) >> 4;
318 break;
319 }
320 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
321 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
322 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
323 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
324 if (dctx->litBufferLocation == ZSTD_split)
325 {
326 ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
327 ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
328 }
329 else
330 {
331 ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
332 }
333 dctx->litPtr = dctx->litBuffer;
334 dctx->litSize = litSize;
335 return lhSize+1;
336 }
337 default:
338 RETURN_ERROR(corruption_detected, "impossible");
339 }
340 }
341 }
342
343 /* Hidden declaration for fullbench */
344 size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
345 const void* src, size_t srcSize,
346 void* dst, size_t dstCapacity);
ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx * dctx,const void * src,size_t srcSize,void * dst,size_t dstCapacity)347 size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
348 const void* src, size_t srcSize,
349 void* dst, size_t dstCapacity)
350 {
351 dctx->isFrameDecompression = 0;
352 return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
353 }
354
355 /* Default FSE distribution tables.
356 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
357 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
358 * They were generated programmatically with following method :
359 * - start from default distributions, present in /lib/common/zstd_internal.h
360 * - generate tables normally, using ZSTD_buildFSETable()
361 * - printout the content of tables
362 * - prettify output, report below, test with fuzzer to ensure it's correct */
363
364 /* Default FSE distribution table for Literal Lengths */
365 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
366 { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
367 /* nextState, nbAddBits, nbBits, baseVal */
368 { 0, 0, 4, 0}, { 16, 0, 4, 0},
369 { 32, 0, 5, 1}, { 0, 0, 5, 3},
370 { 0, 0, 5, 4}, { 0, 0, 5, 6},
371 { 0, 0, 5, 7}, { 0, 0, 5, 9},
372 { 0, 0, 5, 10}, { 0, 0, 5, 12},
373 { 0, 0, 6, 14}, { 0, 1, 5, 16},
374 { 0, 1, 5, 20}, { 0, 1, 5, 22},
375 { 0, 2, 5, 28}, { 0, 3, 5, 32},
376 { 0, 4, 5, 48}, { 32, 6, 5, 64},
377 { 0, 7, 5, 128}, { 0, 8, 6, 256},
378 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
379 { 32, 0, 4, 0}, { 0, 0, 4, 1},
380 { 0, 0, 5, 2}, { 32, 0, 5, 4},
381 { 0, 0, 5, 5}, { 32, 0, 5, 7},
382 { 0, 0, 5, 8}, { 32, 0, 5, 10},
383 { 0, 0, 5, 11}, { 0, 0, 6, 13},
384 { 32, 1, 5, 16}, { 0, 1, 5, 18},
385 { 32, 1, 5, 22}, { 0, 2, 5, 24},
386 { 32, 3, 5, 32}, { 0, 3, 5, 40},
387 { 0, 6, 4, 64}, { 16, 6, 4, 64},
388 { 32, 7, 5, 128}, { 0, 9, 6, 512},
389 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
390 { 16, 0, 4, 1}, { 32, 0, 5, 2},
391 { 32, 0, 5, 3}, { 32, 0, 5, 5},
392 { 32, 0, 5, 6}, { 32, 0, 5, 8},
393 { 32, 0, 5, 9}, { 32, 0, 5, 11},
394 { 32, 0, 5, 12}, { 0, 0, 6, 15},
395 { 32, 1, 5, 18}, { 32, 1, 5, 20},
396 { 32, 2, 5, 24}, { 32, 2, 5, 28},
397 { 32, 3, 5, 40}, { 32, 4, 5, 48},
398 { 0, 16, 6,65536}, { 0, 15, 6,32768},
399 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
400 }; /* LL_defaultDTable */
401
402 /* Default FSE distribution table for Offset Codes */
403 static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
404 { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
405 /* nextState, nbAddBits, nbBits, baseVal */
406 { 0, 0, 5, 0}, { 0, 6, 4, 61},
407 { 0, 9, 5, 509}, { 0, 15, 5,32765},
408 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
409 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
410 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
411 { 0, 5, 5, 29}, { 0, 8, 4, 253},
412 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
413 { 0, 2, 5, 1}, { 16, 7, 4, 125},
414 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
415 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
416 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
417 { 0, 19, 5,524285}, { 0, 1, 5, 1},
418 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
419 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
420 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
421 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
422 }; /* OF_defaultDTable */
423
424
425 /* Default FSE distribution table for Match Lengths */
426 static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
427 { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
428 /* nextState, nbAddBits, nbBits, baseVal */
429 { 0, 0, 6, 3}, { 0, 0, 4, 4},
430 { 32, 0, 5, 5}, { 0, 0, 5, 6},
431 { 0, 0, 5, 8}, { 0, 0, 5, 9},
432 { 0, 0, 5, 11}, { 0, 0, 6, 13},
433 { 0, 0, 6, 16}, { 0, 0, 6, 19},
434 { 0, 0, 6, 22}, { 0, 0, 6, 25},
435 { 0, 0, 6, 28}, { 0, 0, 6, 31},
436 { 0, 0, 6, 34}, { 0, 1, 6, 37},
437 { 0, 1, 6, 41}, { 0, 2, 6, 47},
438 { 0, 3, 6, 59}, { 0, 4, 6, 83},
439 { 0, 7, 6, 131}, { 0, 9, 6, 515},
440 { 16, 0, 4, 4}, { 0, 0, 4, 5},
441 { 32, 0, 5, 6}, { 0, 0, 5, 7},
442 { 32, 0, 5, 9}, { 0, 0, 5, 10},
443 { 0, 0, 6, 12}, { 0, 0, 6, 15},
444 { 0, 0, 6, 18}, { 0, 0, 6, 21},
445 { 0, 0, 6, 24}, { 0, 0, 6, 27},
446 { 0, 0, 6, 30}, { 0, 0, 6, 33},
447 { 0, 1, 6, 35}, { 0, 1, 6, 39},
448 { 0, 2, 6, 43}, { 0, 3, 6, 51},
449 { 0, 4, 6, 67}, { 0, 5, 6, 99},
450 { 0, 8, 6, 259}, { 32, 0, 4, 4},
451 { 48, 0, 4, 4}, { 16, 0, 4, 5},
452 { 32, 0, 5, 7}, { 32, 0, 5, 8},
453 { 32, 0, 5, 10}, { 32, 0, 5, 11},
454 { 0, 0, 6, 14}, { 0, 0, 6, 17},
455 { 0, 0, 6, 20}, { 0, 0, 6, 23},
456 { 0, 0, 6, 26}, { 0, 0, 6, 29},
457 { 0, 0, 6, 32}, { 0, 16, 6,65539},
458 { 0, 15, 6,32771}, { 0, 14, 6,16387},
459 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
460 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
461 }; /* ML_defaultDTable */
462
463
ZSTD_buildSeqTable_rle(ZSTD_seqSymbol * dt,U32 baseValue,U8 nbAddBits)464 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
465 {
466 void* ptr = dt;
467 ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
468 ZSTD_seqSymbol* const cell = dt + 1;
469
470 DTableH->tableLog = 0;
471 DTableH->fastMode = 0;
472
473 cell->nbBits = 0;
474 cell->nextState = 0;
475 assert(nbAddBits < 255);
476 cell->nbAdditionalBits = nbAddBits;
477 cell->baseValue = baseValue;
478 }
479
480
481 /* ZSTD_buildFSETable() :
482 * generate FSE decoding table for one symbol (ll, ml or off)
483 * cannot fail if input is valid =>
484 * all inputs are presumed validated at this stage */
485 FORCE_INLINE_TEMPLATE
ZSTD_buildFSETable_body(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)486 void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
487 const short* normalizedCounter, unsigned maxSymbolValue,
488 const U32* baseValue, const U8* nbAdditionalBits,
489 unsigned tableLog, void* wksp, size_t wkspSize)
490 {
491 ZSTD_seqSymbol* const tableDecode = dt+1;
492 U32 const maxSV1 = maxSymbolValue + 1;
493 U32 const tableSize = 1 << tableLog;
494
495 U16* symbolNext = (U16*)wksp;
496 BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
497 U32 highThreshold = tableSize - 1;
498
499
500 /* Sanity Checks */
501 assert(maxSymbolValue <= MaxSeq);
502 assert(tableLog <= MaxFSELog);
503 assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
504 (void)wkspSize;
505 /* Init, lay down lowprob symbols */
506 { ZSTD_seqSymbol_header DTableH;
507 DTableH.tableLog = tableLog;
508 DTableH.fastMode = 1;
509 { S16 const largeLimit= (S16)(1 << (tableLog-1));
510 U32 s;
511 for (s=0; s<maxSV1; s++) {
512 if (normalizedCounter[s]==-1) {
513 tableDecode[highThreshold--].baseValue = s;
514 symbolNext[s] = 1;
515 } else {
516 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
517 assert(normalizedCounter[s]>=0);
518 symbolNext[s] = (U16)normalizedCounter[s];
519 } } }
520 ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
521 }
522
523 /* Spread symbols */
524 assert(tableSize <= 512);
525 /* Specialized symbol spreading for the case when there are
526 * no low probability (-1 count) symbols. When compressing
527 * small blocks we avoid low probability symbols to hit this
528 * case, since header decoding speed matters more.
529 */
530 if (highThreshold == tableSize - 1) {
531 size_t const tableMask = tableSize-1;
532 size_t const step = FSE_TABLESTEP(tableSize);
533 /* First lay down the symbols in order.
534 * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
535 * misses since small blocks generally have small table logs, so nearly
536 * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
537 * our buffer to handle the over-write.
538 */
539 {
540 U64 const add = 0x0101010101010101ull;
541 size_t pos = 0;
542 U64 sv = 0;
543 U32 s;
544 for (s=0; s<maxSV1; ++s, sv += add) {
545 int i;
546 int const n = normalizedCounter[s];
547 MEM_write64(spread + pos, sv);
548 for (i = 8; i < n; i += 8) {
549 MEM_write64(spread + pos + i, sv);
550 }
551 assert(n>=0);
552 pos += (size_t)n;
553 }
554 }
555 /* Now we spread those positions across the table.
556 * The benefit of doing it in two stages is that we avoid the
557 * variable size inner loop, which caused lots of branch misses.
558 * Now we can run through all the positions without any branch misses.
559 * We unroll the loop twice, since that is what empirically worked best.
560 */
561 {
562 size_t position = 0;
563 size_t s;
564 size_t const unroll = 2;
565 assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
566 for (s = 0; s < (size_t)tableSize; s += unroll) {
567 size_t u;
568 for (u = 0; u < unroll; ++u) {
569 size_t const uPosition = (position + (u * step)) & tableMask;
570 tableDecode[uPosition].baseValue = spread[s + u];
571 }
572 position = (position + (unroll * step)) & tableMask;
573 }
574 assert(position == 0);
575 }
576 } else {
577 U32 const tableMask = tableSize-1;
578 U32 const step = FSE_TABLESTEP(tableSize);
579 U32 s, position = 0;
580 for (s=0; s<maxSV1; s++) {
581 int i;
582 int const n = normalizedCounter[s];
583 for (i=0; i<n; i++) {
584 tableDecode[position].baseValue = s;
585 position = (position + step) & tableMask;
586 while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
587 } }
588 assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
589 }
590
591 /* Build Decoding table */
592 {
593 U32 u;
594 for (u=0; u<tableSize; u++) {
595 U32 const symbol = tableDecode[u].baseValue;
596 U32 const nextState = symbolNext[symbol]++;
597 tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
598 tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
599 assert(nbAdditionalBits[symbol] < 255);
600 tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
601 tableDecode[u].baseValue = baseValue[symbol];
602 }
603 }
604 }
605
606 /* Avoids the FORCE_INLINE of the _body() function. */
ZSTD_buildFSETable_body_default(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)607 static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
608 const short* normalizedCounter, unsigned maxSymbolValue,
609 const U32* baseValue, const U8* nbAdditionalBits,
610 unsigned tableLog, void* wksp, size_t wkspSize)
611 {
612 ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
613 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
614 }
615
616 #if DYNAMIC_BMI2
ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)617 BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
618 const short* normalizedCounter, unsigned maxSymbolValue,
619 const U32* baseValue, const U8* nbAdditionalBits,
620 unsigned tableLog, void* wksp, size_t wkspSize)
621 {
622 ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
623 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
624 }
625 #endif
626
ZSTD_buildFSETable(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize,int bmi2)627 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
628 const short* normalizedCounter, unsigned maxSymbolValue,
629 const U32* baseValue, const U8* nbAdditionalBits,
630 unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
631 {
632 #if DYNAMIC_BMI2
633 if (bmi2) {
634 ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
635 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
636 return;
637 }
638 #endif
639 (void)bmi2;
640 ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
641 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
642 }
643
644
645 /*! ZSTD_buildSeqTable() :
646 * @return : nb bytes read from src,
647 * or an error code if it fails */
ZSTD_buildSeqTable(ZSTD_seqSymbol * DTableSpace,const ZSTD_seqSymbol ** DTablePtr,SymbolEncodingType_e type,unsigned max,U32 maxLog,const void * src,size_t srcSize,const U32 * baseValue,const U8 * nbAdditionalBits,const ZSTD_seqSymbol * defaultTable,U32 flagRepeatTable,int ddictIsCold,int nbSeq,U32 * wksp,size_t wkspSize,int bmi2)648 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
649 SymbolEncodingType_e type, unsigned max, U32 maxLog,
650 const void* src, size_t srcSize,
651 const U32* baseValue, const U8* nbAdditionalBits,
652 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
653 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
654 int bmi2)
655 {
656 switch(type)
657 {
658 case set_rle :
659 RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
660 RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
661 { U32 const symbol = *(const BYTE*)src;
662 U32 const baseline = baseValue[symbol];
663 U8 const nbBits = nbAdditionalBits[symbol];
664 ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
665 }
666 *DTablePtr = DTableSpace;
667 return 1;
668 case set_basic :
669 *DTablePtr = defaultTable;
670 return 0;
671 case set_repeat:
672 RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
673 /* prefetch FSE table if used */
674 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
675 const void* const pStart = *DTablePtr;
676 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
677 PREFETCH_AREA(pStart, pSize);
678 }
679 return 0;
680 case set_compressed :
681 { unsigned tableLog;
682 S16 norm[MaxSeq+1];
683 size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
684 RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
685 RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
686 ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
687 *DTablePtr = DTableSpace;
688 return headerSize;
689 }
690 default :
691 assert(0);
692 RETURN_ERROR(GENERIC, "impossible");
693 }
694 }
695
ZSTD_decodeSeqHeaders(ZSTD_DCtx * dctx,int * nbSeqPtr,const void * src,size_t srcSize)696 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
697 const void* src, size_t srcSize)
698 {
699 const BYTE* const istart = (const BYTE*)src;
700 const BYTE* const iend = istart + srcSize;
701 const BYTE* ip = istart;
702 int nbSeq;
703 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
704
705 /* check */
706 RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
707
708 /* SeqHead */
709 nbSeq = *ip++;
710 if (nbSeq > 0x7F) {
711 if (nbSeq == 0xFF) {
712 RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
713 nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
714 ip+=2;
715 } else {
716 RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
717 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
718 }
719 }
720 *nbSeqPtr = nbSeq;
721
722 if (nbSeq == 0) {
723 /* No sequence : section ends immediately */
724 RETURN_ERROR_IF(ip != iend, corruption_detected,
725 "extraneous data present in the Sequences section");
726 return (size_t)(ip - istart);
727 }
728
729 /* FSE table descriptors */
730 RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
731 RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
732 { SymbolEncodingType_e const LLtype = (SymbolEncodingType_e)(*ip >> 6);
733 SymbolEncodingType_e const OFtype = (SymbolEncodingType_e)((*ip >> 4) & 3);
734 SymbolEncodingType_e const MLtype = (SymbolEncodingType_e)((*ip >> 2) & 3);
735 ip++;
736
737 /* Build DTables */
738 { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
739 LLtype, MaxLL, LLFSELog,
740 ip, iend-ip,
741 LL_base, LL_bits,
742 LL_defaultDTable, dctx->fseEntropy,
743 dctx->ddictIsCold, nbSeq,
744 dctx->workspace, sizeof(dctx->workspace),
745 ZSTD_DCtx_get_bmi2(dctx));
746 RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
747 ip += llhSize;
748 }
749
750 { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
751 OFtype, MaxOff, OffFSELog,
752 ip, iend-ip,
753 OF_base, OF_bits,
754 OF_defaultDTable, dctx->fseEntropy,
755 dctx->ddictIsCold, nbSeq,
756 dctx->workspace, sizeof(dctx->workspace),
757 ZSTD_DCtx_get_bmi2(dctx));
758 RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
759 ip += ofhSize;
760 }
761
762 { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
763 MLtype, MaxML, MLFSELog,
764 ip, iend-ip,
765 ML_base, ML_bits,
766 ML_defaultDTable, dctx->fseEntropy,
767 dctx->ddictIsCold, nbSeq,
768 dctx->workspace, sizeof(dctx->workspace),
769 ZSTD_DCtx_get_bmi2(dctx));
770 RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
771 ip += mlhSize;
772 }
773 }
774
775 return ip-istart;
776 }
777
778
779 typedef struct {
780 size_t litLength;
781 size_t matchLength;
782 size_t offset;
783 } seq_t;
784
785 typedef struct {
786 size_t state;
787 const ZSTD_seqSymbol* table;
788 } ZSTD_fseState;
789
790 typedef struct {
791 BIT_DStream_t DStream;
792 ZSTD_fseState stateLL;
793 ZSTD_fseState stateOffb;
794 ZSTD_fseState stateML;
795 size_t prevOffset[ZSTD_REP_NUM];
796 } seqState_t;
797
798 /*! ZSTD_overlapCopy8() :
799 * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
800 * If the offset is < 8 then the offset is spread to at least 8 bytes.
801 *
802 * Precondition: *ip <= *op
803 * Postcondition: *op - *op >= 8
804 */
ZSTD_overlapCopy8(BYTE ** op,BYTE const ** ip,size_t offset)805 HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
806 assert(*ip <= *op);
807 if (offset < 8) {
808 /* close range match, overlap */
809 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
810 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
811 int const sub2 = dec64table[offset];
812 (*op)[0] = (*ip)[0];
813 (*op)[1] = (*ip)[1];
814 (*op)[2] = (*ip)[2];
815 (*op)[3] = (*ip)[3];
816 *ip += dec32table[offset];
817 ZSTD_copy4(*op+4, *ip);
818 *ip -= sub2;
819 } else {
820 ZSTD_copy8(*op, *ip);
821 }
822 *ip += 8;
823 *op += 8;
824 assert(*op - *ip >= 8);
825 }
826
827 /*! ZSTD_safecopy() :
828 * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
829 * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
830 * This function is only called in the uncommon case where the sequence is near the end of the block. It
831 * should be fast for a single long sequence, but can be slow for several short sequences.
832 *
833 * @param ovtype controls the overlap detection
834 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
835 * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
836 * The src buffer must be before the dst buffer.
837 */
ZSTD_safecopy(BYTE * op,const BYTE * const oend_w,BYTE const * ip,ptrdiff_t length,ZSTD_overlap_e ovtype)838 static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
839 ptrdiff_t const diff = op - ip;
840 BYTE* const oend = op + length;
841
842 assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
843 (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
844
845 if (length < 8) {
846 /* Handle short lengths. */
847 while (op < oend) *op++ = *ip++;
848 return;
849 }
850 if (ovtype == ZSTD_overlap_src_before_dst) {
851 /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
852 assert(length >= 8);
853 ZSTD_overlapCopy8(&op, &ip, diff);
854 length -= 8;
855 assert(op - ip >= 8);
856 assert(op <= oend);
857 }
858
859 if (oend <= oend_w) {
860 /* No risk of overwrite. */
861 ZSTD_wildcopy(op, ip, length, ovtype);
862 return;
863 }
864 if (op <= oend_w) {
865 /* Wildcopy until we get close to the end. */
866 assert(oend > oend_w);
867 ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
868 ip += oend_w - op;
869 op += oend_w - op;
870 }
871 /* Handle the leftovers. */
872 while (op < oend) *op++ = *ip++;
873 }
874
875 /* ZSTD_safecopyDstBeforeSrc():
876 * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
877 * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
ZSTD_safecopyDstBeforeSrc(BYTE * op,const BYTE * ip,ptrdiff_t length)878 static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
879 ptrdiff_t const diff = op - ip;
880 BYTE* const oend = op + length;
881
882 if (length < 8 || diff > -8) {
883 /* Handle short lengths, close overlaps, and dst not before src. */
884 while (op < oend) *op++ = *ip++;
885 return;
886 }
887
888 if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
889 ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
890 ip += oend - WILDCOPY_OVERLENGTH - op;
891 op += oend - WILDCOPY_OVERLENGTH - op;
892 }
893
894 /* Handle the leftovers. */
895 while (op < oend) *op++ = *ip++;
896 }
897
898 /* ZSTD_execSequenceEnd():
899 * This version handles cases that are near the end of the output buffer. It requires
900 * more careful checks to make sure there is no overflow. By separating out these hard
901 * and unlikely cases, we can speed up the common cases.
902 *
903 * NOTE: This function needs to be fast for a single long sequence, but doesn't need
904 * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
905 */
906 FORCE_NOINLINE
907 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_execSequenceEnd(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)908 size_t ZSTD_execSequenceEnd(BYTE* op,
909 BYTE* const oend, seq_t sequence,
910 const BYTE** litPtr, const BYTE* const litLimit,
911 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912 {
913 BYTE* const oLitEnd = op + sequence.litLength;
914 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916 const BYTE* match = oLitEnd - sequence.offset;
917 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
918
919 /* bounds checks : careful of address space overflow in 32-bit mode */
920 RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921 RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922 assert(op < op + sequenceLength);
923 assert(oLitEnd < op + sequenceLength);
924
925 /* copy literals */
926 ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
927 op = oLitEnd;
928 *litPtr = iLitEnd;
929
930 /* copy Match */
931 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
932 /* offset beyond prefix */
933 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
934 match = dictEnd - (prefixStart - match);
935 if (match + sequence.matchLength <= dictEnd) {
936 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
937 return sequenceLength;
938 }
939 /* span extDict & currentPrefixSegment */
940 { size_t const length1 = dictEnd - match;
941 ZSTD_memmove(oLitEnd, match, length1);
942 op = oLitEnd + length1;
943 sequence.matchLength -= length1;
944 match = prefixStart;
945 }
946 }
947 ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
948 return sequenceLength;
949 }
950
951 /* ZSTD_execSequenceEndSplitLitBuffer():
952 * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
953 */
954 FORCE_NOINLINE
955 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_execSequenceEndSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)956 size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
957 BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
958 const BYTE** litPtr, const BYTE* const litLimit,
959 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
960 {
961 BYTE* const oLitEnd = op + sequence.litLength;
962 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
963 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
964 const BYTE* match = oLitEnd - sequence.offset;
965
966
967 /* bounds checks : careful of address space overflow in 32-bit mode */
968 RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
969 RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
970 assert(op < op + sequenceLength);
971 assert(oLitEnd < op + sequenceLength);
972
973 /* copy literals */
974 RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
975 ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
976 op = oLitEnd;
977 *litPtr = iLitEnd;
978
979 /* copy Match */
980 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
981 /* offset beyond prefix */
982 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
983 match = dictEnd - (prefixStart - match);
984 if (match + sequence.matchLength <= dictEnd) {
985 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
986 return sequenceLength;
987 }
988 /* span extDict & currentPrefixSegment */
989 { size_t const length1 = dictEnd - match;
990 ZSTD_memmove(oLitEnd, match, length1);
991 op = oLitEnd + length1;
992 sequence.matchLength -= length1;
993 match = prefixStart;
994 }
995 }
996 ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
997 return sequenceLength;
998 }
999
1000 HINT_INLINE
1001 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_execSequence(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)1002 size_t ZSTD_execSequence(BYTE* op,
1003 BYTE* const oend, seq_t sequence,
1004 const BYTE** litPtr, const BYTE* const litLimit,
1005 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1006 {
1007 BYTE* const oLitEnd = op + sequence.litLength;
1008 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1009 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1010 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
1011 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1012 const BYTE* match = oLitEnd - sequence.offset;
1013
1014 assert(op != NULL /* Precondition */);
1015 assert(oend_w < oend /* No underflow */);
1016
1017 #if defined(__aarch64__)
1018 /* prefetch sequence starting from match that will be used for copy later */
1019 PREFETCH_L1(match);
1020 #endif
1021 /* Handle edge cases in a slow path:
1022 * - Read beyond end of literals
1023 * - Match end is within WILDCOPY_OVERLIMIT of oend
1024 * - 32-bit mode and the match length overflows
1025 */
1026 if (UNLIKELY(
1027 iLitEnd > litLimit ||
1028 oMatchEnd > oend_w ||
1029 (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1030 return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1031
1032 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1033 assert(op <= oLitEnd /* No overflow */);
1034 assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1035 assert(oMatchEnd <= oend /* No underflow */);
1036 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1037 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1038 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1039
1040 /* Copy Literals:
1041 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1042 * We likely don't need the full 32-byte wildcopy.
1043 */
1044 assert(WILDCOPY_OVERLENGTH >= 16);
1045 ZSTD_copy16(op, (*litPtr));
1046 if (UNLIKELY(sequence.litLength > 16)) {
1047 ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
1048 }
1049 op = oLitEnd;
1050 *litPtr = iLitEnd; /* update for next sequence */
1051
1052 /* Copy Match */
1053 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1054 /* offset beyond prefix -> go into extDict */
1055 RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1056 match = dictEnd + (match - prefixStart);
1057 if (match + sequence.matchLength <= dictEnd) {
1058 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1059 return sequenceLength;
1060 }
1061 /* span extDict & currentPrefixSegment */
1062 { size_t const length1 = dictEnd - match;
1063 ZSTD_memmove(oLitEnd, match, length1);
1064 op = oLitEnd + length1;
1065 sequence.matchLength -= length1;
1066 match = prefixStart;
1067 }
1068 }
1069 /* Match within prefix of 1 or more bytes */
1070 assert(op <= oMatchEnd);
1071 assert(oMatchEnd <= oend_w);
1072 assert(match >= prefixStart);
1073 assert(sequence.matchLength >= 1);
1074
1075 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1076 * without overlap checking.
1077 */
1078 if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1079 /* We bet on a full wildcopy for matches, since we expect matches to be
1080 * longer than literals (in general). In silesia, ~10% of matches are longer
1081 * than 16 bytes.
1082 */
1083 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1084 return sequenceLength;
1085 }
1086 assert(sequence.offset < WILDCOPY_VECLEN);
1087
1088 /* Copy 8 bytes and spread the offset to be >= 8. */
1089 ZSTD_overlapCopy8(&op, &match, sequence.offset);
1090
1091 /* If the match length is > 8 bytes, then continue with the wildcopy. */
1092 if (sequence.matchLength > 8) {
1093 assert(op < oMatchEnd);
1094 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1095 }
1096 return sequenceLength;
1097 }
1098
1099 HINT_INLINE
1100 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_execSequenceSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)1101 size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1102 BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1103 const BYTE** litPtr, const BYTE* const litLimit,
1104 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1105 {
1106 BYTE* const oLitEnd = op + sequence.litLength;
1107 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1108 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1109 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1110 const BYTE* match = oLitEnd - sequence.offset;
1111
1112 assert(op != NULL /* Precondition */);
1113 assert(oend_w < oend /* No underflow */);
1114 /* Handle edge cases in a slow path:
1115 * - Read beyond end of literals
1116 * - Match end is within WILDCOPY_OVERLIMIT of oend
1117 * - 32-bit mode and the match length overflows
1118 */
1119 if (UNLIKELY(
1120 iLitEnd > litLimit ||
1121 oMatchEnd > oend_w ||
1122 (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1123 return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1124
1125 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1126 assert(op <= oLitEnd /* No overflow */);
1127 assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1128 assert(oMatchEnd <= oend /* No underflow */);
1129 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1130 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1131 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1132
1133 /* Copy Literals:
1134 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1135 * We likely don't need the full 32-byte wildcopy.
1136 */
1137 assert(WILDCOPY_OVERLENGTH >= 16);
1138 ZSTD_copy16(op, (*litPtr));
1139 if (UNLIKELY(sequence.litLength > 16)) {
1140 ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
1141 }
1142 op = oLitEnd;
1143 *litPtr = iLitEnd; /* update for next sequence */
1144
1145 /* Copy Match */
1146 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1147 /* offset beyond prefix -> go into extDict */
1148 RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1149 match = dictEnd + (match - prefixStart);
1150 if (match + sequence.matchLength <= dictEnd) {
1151 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1152 return sequenceLength;
1153 }
1154 /* span extDict & currentPrefixSegment */
1155 { size_t const length1 = dictEnd - match;
1156 ZSTD_memmove(oLitEnd, match, length1);
1157 op = oLitEnd + length1;
1158 sequence.matchLength -= length1;
1159 match = prefixStart;
1160 } }
1161 /* Match within prefix of 1 or more bytes */
1162 assert(op <= oMatchEnd);
1163 assert(oMatchEnd <= oend_w);
1164 assert(match >= prefixStart);
1165 assert(sequence.matchLength >= 1);
1166
1167 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1168 * without overlap checking.
1169 */
1170 if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1171 /* We bet on a full wildcopy for matches, since we expect matches to be
1172 * longer than literals (in general). In silesia, ~10% of matches are longer
1173 * than 16 bytes.
1174 */
1175 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1176 return sequenceLength;
1177 }
1178 assert(sequence.offset < WILDCOPY_VECLEN);
1179
1180 /* Copy 8 bytes and spread the offset to be >= 8. */
1181 ZSTD_overlapCopy8(&op, &match, sequence.offset);
1182
1183 /* If the match length is > 8 bytes, then continue with the wildcopy. */
1184 if (sequence.matchLength > 8) {
1185 assert(op < oMatchEnd);
1186 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
1187 }
1188 return sequenceLength;
1189 }
1190
1191
1192 static void
ZSTD_initFseState(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,const ZSTD_seqSymbol * dt)1193 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1194 {
1195 const void* ptr = dt;
1196 const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1197 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1198 DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1199 (U32)DStatePtr->state, DTableH->tableLog);
1200 BIT_reloadDStream(bitD);
1201 DStatePtr->table = dt + 1;
1202 }
1203
1204 FORCE_INLINE_TEMPLATE void
ZSTD_updateFseStateWithDInfo(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,U16 nextState,U32 nbBits)1205 ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1206 {
1207 size_t const lowBits = BIT_readBits(bitD, nbBits);
1208 DStatePtr->state = nextState + lowBits;
1209 }
1210
1211 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1212 * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1213 * bits before reloading. This value is the maximum number of bytes we read
1214 * after reloading when we are decoding long offsets.
1215 */
1216 #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1217 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1218 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1219 : 0)
1220
1221 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1222
1223 /*
1224 * ZSTD_decodeSequence():
1225 * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
1226 * only used in 32-bit mode
1227 * @return : Sequence (litL + matchL + offset)
1228 */
1229 FORCE_INLINE_TEMPLATE seq_t
ZSTD_decodeSequence(seqState_t * seqState,const ZSTD_longOffset_e longOffsets,const int isLastSeq)1230 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
1231 {
1232 seq_t seq;
1233 /*
1234 * ZSTD_seqSymbol is a 64 bits wide structure.
1235 * It can be loaded in one operation
1236 * and its fields extracted by simply shifting or bit-extracting on aarch64.
1237 * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1238 * operations that cause performance drop. This can be avoided by using this
1239 * ZSTD_memcpy hack.
1240 */
1241 #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1242 ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1243 ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1244 ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1245 ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1246 ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1247 ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1248 ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1249 #else
1250 const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1251 const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1252 const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1253 #endif
1254 seq.matchLength = mlDInfo->baseValue;
1255 seq.litLength = llDInfo->baseValue;
1256 { U32 const ofBase = ofDInfo->baseValue;
1257 BYTE const llBits = llDInfo->nbAdditionalBits;
1258 BYTE const mlBits = mlDInfo->nbAdditionalBits;
1259 BYTE const ofBits = ofDInfo->nbAdditionalBits;
1260 BYTE const totalBits = llBits+mlBits+ofBits;
1261
1262 U16 const llNext = llDInfo->nextState;
1263 U16 const mlNext = mlDInfo->nextState;
1264 U16 const ofNext = ofDInfo->nextState;
1265 U32 const llnbBits = llDInfo->nbBits;
1266 U32 const mlnbBits = mlDInfo->nbBits;
1267 U32 const ofnbBits = ofDInfo->nbBits;
1268
1269 assert(llBits <= MaxLLBits);
1270 assert(mlBits <= MaxMLBits);
1271 assert(ofBits <= MaxOff);
1272 /*
1273 * As gcc has better branch and block analyzers, sometimes it is only
1274 * valuable to mark likeliness for clang, it gives around 3-4% of
1275 * performance.
1276 */
1277
1278 /* sequence */
1279 { size_t offset;
1280 if (ofBits > 1) {
1281 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1282 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1283 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1284 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1285 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1286 /* Always read extra bits, this keeps the logic simple,
1287 * avoids branches, and avoids accidentally reading 0 bits.
1288 */
1289 U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1290 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1291 BIT_reloadDStream(&seqState->DStream);
1292 offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1293 } else {
1294 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1295 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1296 }
1297 seqState->prevOffset[2] = seqState->prevOffset[1];
1298 seqState->prevOffset[1] = seqState->prevOffset[0];
1299 seqState->prevOffset[0] = offset;
1300 } else {
1301 U32 const ll0 = (llDInfo->baseValue == 0);
1302 if (LIKELY((ofBits == 0))) {
1303 offset = seqState->prevOffset[ll0];
1304 seqState->prevOffset[1] = seqState->prevOffset[!ll0];
1305 seqState->prevOffset[0] = offset;
1306 } else {
1307 offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
1308 { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1309 temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
1310 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1311 seqState->prevOffset[1] = seqState->prevOffset[0];
1312 seqState->prevOffset[0] = offset = temp;
1313 } } }
1314 seq.offset = offset;
1315 }
1316
1317 if (mlBits > 0)
1318 seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1319
1320 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1321 BIT_reloadDStream(&seqState->DStream);
1322 if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1323 BIT_reloadDStream(&seqState->DStream);
1324 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1325 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1326
1327 if (llBits > 0)
1328 seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1329
1330 if (MEM_32bits())
1331 BIT_reloadDStream(&seqState->DStream);
1332
1333 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1334 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1335
1336 if (!isLastSeq) {
1337 /* don't update FSE state for last Sequence */
1338 ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
1339 ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
1340 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1341 ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
1342 BIT_reloadDStream(&seqState->DStream);
1343 }
1344 }
1345
1346 return seq;
1347 }
1348
1349 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1350 #if DEBUGLEVEL >= 1
ZSTD_dictionaryIsActive(ZSTD_DCtx const * dctx,BYTE const * prefixStart,BYTE const * oLitEnd)1351 static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1352 {
1353 size_t const windowSize = dctx->fParams.windowSize;
1354 /* No dictionary used. */
1355 if (dctx->dictContentEndForFuzzing == NULL) return 0;
1356 /* Dictionary is our prefix. */
1357 if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
1358 /* Dictionary is not our ext-dict. */
1359 if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
1360 /* Dictionary is not within our window size. */
1361 if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
1362 /* Dictionary is active. */
1363 return 1;
1364 }
1365 #endif
1366
ZSTD_assertValidSequence(ZSTD_DCtx const * dctx,BYTE const * op,BYTE const * oend,seq_t const seq,BYTE const * prefixStart,BYTE const * virtualStart)1367 static void ZSTD_assertValidSequence(
1368 ZSTD_DCtx const* dctx,
1369 BYTE const* op, BYTE const* oend,
1370 seq_t const seq,
1371 BYTE const* prefixStart, BYTE const* virtualStart)
1372 {
1373 #if DEBUGLEVEL >= 1
1374 if (dctx->isFrameDecompression) {
1375 size_t const windowSize = dctx->fParams.windowSize;
1376 size_t const sequenceSize = seq.litLength + seq.matchLength;
1377 BYTE const* const oLitEnd = op + seq.litLength;
1378 DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1379 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1380 assert(op <= oend);
1381 assert((size_t)(oend - op) >= sequenceSize);
1382 assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
1383 if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1384 size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1385 /* Offset must be within the dictionary. */
1386 assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1387 assert(seq.offset <= windowSize + dictSize);
1388 } else {
1389 /* Offset must be within our window. */
1390 assert(seq.offset <= windowSize);
1391 }
1392 }
1393 #else
1394 (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1395 #endif
1396 }
1397 #endif
1398
1399 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1400
1401
1402 FORCE_INLINE_TEMPLATE size_t
1403 DONT_VECTORIZE
ZSTD_decompressSequences_bodySplitLitBuffer(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1404 ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1405 void* dst, size_t maxDstSize,
1406 const void* seqStart, size_t seqSize, int nbSeq,
1407 const ZSTD_longOffset_e isLongOffset)
1408 {
1409 const BYTE* ip = (const BYTE*)seqStart;
1410 const BYTE* const iend = ip + seqSize;
1411 BYTE* const ostart = (BYTE*)dst;
1412 BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
1413 BYTE* op = ostart;
1414 const BYTE* litPtr = dctx->litPtr;
1415 const BYTE* litBufferEnd = dctx->litBufferEnd;
1416 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1417 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1418 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1419 DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
1420
1421 /* Literals are split between internal buffer & output buffer */
1422 if (nbSeq) {
1423 seqState_t seqState;
1424 dctx->fseEntropy = 1;
1425 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1426 RETURN_ERROR_IF(
1427 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1428 corruption_detected, "");
1429 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1430 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1431 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1432 assert(dst != NULL);
1433
1434 ZSTD_STATIC_ASSERT(
1435 BIT_DStream_unfinished < BIT_DStream_completed &&
1436 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1437 BIT_DStream_completed < BIT_DStream_overflow);
1438
1439 /* decompress without overrunning litPtr begins */
1440 { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
1441 /* Align the decompression loop to 32 + 16 bytes.
1442 *
1443 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1444 * speed swings based on the alignment of the decompression loop. This
1445 * performance swing is caused by parts of the decompression loop falling
1446 * out of the DSB. The entire decompression loop should fit in the DSB,
1447 * when it can't we get much worse performance. You can measure if you've
1448 * hit the good case or the bad case with this perf command for some
1449 * compressed file test.zst:
1450 *
1451 * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1452 * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1453 *
1454 * If you see most cycles served out of the MITE you've hit the bad case.
1455 * If you see most cycles served out of the DSB you've hit the good case.
1456 * If it is pretty even then you may be in an okay case.
1457 *
1458 * This issue has been reproduced on the following CPUs:
1459 * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1460 * Use Instruments->Counters to get DSB/MITE cycles.
1461 * I never got performance swings, but I was able to
1462 * go from the good case of mostly DSB to half of the
1463 * cycles served from MITE.
1464 * - Coffeelake: Intel i9-9900k
1465 * - Coffeelake: Intel i7-9700k
1466 *
1467 * I haven't been able to reproduce the instability or DSB misses on any
1468 * of the following CPUS:
1469 * - Haswell
1470 * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1471 * - Skylake
1472 *
1473 * Alignment is done for each of the three major decompression loops:
1474 * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1475 * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1476 * - ZSTD_decompressSequences_body
1477 * Alignment choices are made to minimize large swings on bad cases and influence on performance
1478 * from changes external to this code, rather than to overoptimize on the current commit.
1479 *
1480 * If you are seeing performance stability this script can help test.
1481 * It tests on 4 commits in zstd where I saw performance change.
1482 *
1483 * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1484 */
1485 #if defined(__x86_64__)
1486 __asm__(".p2align 6");
1487 # if __GNUC__ >= 7
1488 /* good for gcc-7, gcc-9, and gcc-11 */
1489 __asm__("nop");
1490 __asm__(".p2align 5");
1491 __asm__("nop");
1492 __asm__(".p2align 4");
1493 # if __GNUC__ == 8 || __GNUC__ == 10
1494 /* good for gcc-8 and gcc-10 */
1495 __asm__("nop");
1496 __asm__(".p2align 3");
1497 # endif
1498 # endif
1499 #endif
1500
1501 /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1502 for ( ; nbSeq; nbSeq--) {
1503 sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1504 if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
1505 { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1506 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1507 assert(!ZSTD_isError(oneSeqSize));
1508 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1509 #endif
1510 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1511 return oneSeqSize;
1512 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1513 op += oneSeqSize;
1514 } }
1515 DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
1516
1517 /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1518 if (nbSeq > 0) {
1519 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1520 DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
1521 if (leftoverLit) {
1522 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1523 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1524 sequence.litLength -= leftoverLit;
1525 op += leftoverLit;
1526 }
1527 litPtr = dctx->litExtraBuffer;
1528 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1529 dctx->litBufferLocation = ZSTD_not_in_dst;
1530 { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1531 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1532 assert(!ZSTD_isError(oneSeqSize));
1533 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1534 #endif
1535 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1536 return oneSeqSize;
1537 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1538 op += oneSeqSize;
1539 }
1540 nbSeq--;
1541 }
1542 }
1543
1544 if (nbSeq > 0) {
1545 /* there is remaining lit from extra buffer */
1546
1547 #if defined(__x86_64__)
1548 __asm__(".p2align 6");
1549 __asm__("nop");
1550 # if __GNUC__ != 7
1551 /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1552 __asm__(".p2align 4");
1553 __asm__("nop");
1554 __asm__(".p2align 3");
1555 # elif __GNUC__ >= 11
1556 __asm__(".p2align 3");
1557 # else
1558 __asm__(".p2align 5");
1559 __asm__("nop");
1560 __asm__(".p2align 3");
1561 # endif
1562 #endif
1563
1564 for ( ; nbSeq ; nbSeq--) {
1565 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1566 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1567 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1568 assert(!ZSTD_isError(oneSeqSize));
1569 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1570 #endif
1571 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1572 return oneSeqSize;
1573 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1574 op += oneSeqSize;
1575 }
1576 }
1577
1578 /* check if reached exact end */
1579 DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1580 RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1581 DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
1582 RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1583 /* save reps for next block */
1584 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1585 }
1586
1587 /* last literal segment */
1588 if (dctx->litBufferLocation == ZSTD_split) {
1589 /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1590 size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1591 DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
1592 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1593 if (op != NULL) {
1594 ZSTD_memmove(op, litPtr, lastLLSize);
1595 op += lastLLSize;
1596 }
1597 litPtr = dctx->litExtraBuffer;
1598 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1599 dctx->litBufferLocation = ZSTD_not_in_dst;
1600 }
1601 /* copy last literals from internal buffer */
1602 { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1603 DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
1604 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1605 if (op != NULL) {
1606 ZSTD_memcpy(op, litPtr, lastLLSize);
1607 op += lastLLSize;
1608 } }
1609
1610 DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1611 return (size_t)(op - ostart);
1612 }
1613
1614 FORCE_INLINE_TEMPLATE size_t
1615 DONT_VECTORIZE
ZSTD_decompressSequences_body(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1616 ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1617 void* dst, size_t maxDstSize,
1618 const void* seqStart, size_t seqSize, int nbSeq,
1619 const ZSTD_longOffset_e isLongOffset)
1620 {
1621 const BYTE* ip = (const BYTE*)seqStart;
1622 const BYTE* const iend = ip + seqSize;
1623 BYTE* const ostart = (BYTE*)dst;
1624 BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
1625 BYTE* op = ostart;
1626 const BYTE* litPtr = dctx->litPtr;
1627 const BYTE* const litEnd = litPtr + dctx->litSize;
1628 const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1629 const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1630 const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1631 DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1632
1633 /* Regen sequences */
1634 if (nbSeq) {
1635 seqState_t seqState;
1636 dctx->fseEntropy = 1;
1637 { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1638 RETURN_ERROR_IF(
1639 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1640 corruption_detected, "");
1641 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1642 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1643 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1644 assert(dst != NULL);
1645
1646 #if defined(__x86_64__)
1647 __asm__(".p2align 6");
1648 __asm__("nop");
1649 # if __GNUC__ >= 7
1650 __asm__(".p2align 5");
1651 __asm__("nop");
1652 __asm__(".p2align 3");
1653 # else
1654 __asm__(".p2align 4");
1655 __asm__("nop");
1656 __asm__(".p2align 3");
1657 # endif
1658 #endif
1659
1660 for ( ; nbSeq ; nbSeq--) {
1661 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1662 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1663 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1664 assert(!ZSTD_isError(oneSeqSize));
1665 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1666 #endif
1667 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1668 return oneSeqSize;
1669 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1670 op += oneSeqSize;
1671 }
1672
1673 /* check if reached exact end */
1674 assert(nbSeq == 0);
1675 RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1676 /* save reps for next block */
1677 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1678 }
1679
1680 /* last literal segment */
1681 { size_t const lastLLSize = (size_t)(litEnd - litPtr);
1682 DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
1683 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1684 if (op != NULL) {
1685 ZSTD_memcpy(op, litPtr, lastLLSize);
1686 op += lastLLSize;
1687 } }
1688
1689 DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1690 return (size_t)(op - ostart);
1691 }
1692
1693 static size_t
ZSTD_decompressSequences_default(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1694 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1695 void* dst, size_t maxDstSize,
1696 const void* seqStart, size_t seqSize, int nbSeq,
1697 const ZSTD_longOffset_e isLongOffset)
1698 {
1699 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1700 }
1701
1702 static size_t
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1703 ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1704 void* dst, size_t maxDstSize,
1705 const void* seqStart, size_t seqSize, int nbSeq,
1706 const ZSTD_longOffset_e isLongOffset)
1707 {
1708 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1709 }
1710 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1711
1712 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1713
1714 FORCE_INLINE_TEMPLATE
1715
ZSTD_prefetchMatch(size_t prefetchPos,seq_t const sequence,const BYTE * const prefixStart,const BYTE * const dictEnd)1716 size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1717 const BYTE* const prefixStart, const BYTE* const dictEnd)
1718 {
1719 prefetchPos += sequence.litLength;
1720 { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1721 /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1722 * No consequence though : memory address is only used for prefetching, not for dereferencing */
1723 const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
1724 PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1725 }
1726 return prefetchPos + sequence.matchLength;
1727 }
1728
1729 /* This decoding function employs prefetching
1730 * to reduce latency impact of cache misses.
1731 * It's generally employed when block contains a significant portion of long-distance matches
1732 * or when coupled with a "cold" dictionary */
1733 FORCE_INLINE_TEMPLATE size_t
ZSTD_decompressSequencesLong_body(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1734 ZSTD_decompressSequencesLong_body(
1735 ZSTD_DCtx* dctx,
1736 void* dst, size_t maxDstSize,
1737 const void* seqStart, size_t seqSize, int nbSeq,
1738 const ZSTD_longOffset_e isLongOffset)
1739 {
1740 const BYTE* ip = (const BYTE*)seqStart;
1741 const BYTE* const iend = ip + seqSize;
1742 BYTE* const ostart = (BYTE*)dst;
1743 BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
1744 BYTE* op = ostart;
1745 const BYTE* litPtr = dctx->litPtr;
1746 const BYTE* litBufferEnd = dctx->litBufferEnd;
1747 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1748 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1749 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1750
1751 /* Regen sequences */
1752 if (nbSeq) {
1753 #define STORED_SEQS 8
1754 #define STORED_SEQS_MASK (STORED_SEQS-1)
1755 #define ADVANCED_SEQS STORED_SEQS
1756 seq_t sequences[STORED_SEQS];
1757 int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1758 seqState_t seqState;
1759 int seqNb;
1760 size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1761
1762 dctx->fseEntropy = 1;
1763 { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1764 assert(dst != NULL);
1765 assert(iend >= ip);
1766 RETURN_ERROR_IF(
1767 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1768 corruption_detected, "");
1769 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1770 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1771 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1772
1773 /* prepare in advance */
1774 for (seqNb=0; seqNb<seqAdvance; seqNb++) {
1775 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
1776 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1777 sequences[seqNb] = sequence;
1778 }
1779
1780 /* decompress without stomping litBuffer */
1781 for (; seqNb < nbSeq; seqNb++) {
1782 seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
1783
1784 if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
1785 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1786 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1787 if (leftoverLit)
1788 {
1789 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1790 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1791 sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1792 op += leftoverLit;
1793 }
1794 litPtr = dctx->litExtraBuffer;
1795 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1796 dctx->litBufferLocation = ZSTD_not_in_dst;
1797 { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1798 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1799 assert(!ZSTD_isError(oneSeqSize));
1800 ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1801 #endif
1802 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1803
1804 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1805 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1806 op += oneSeqSize;
1807 } }
1808 else
1809 {
1810 /* lit buffer is either wholly contained in first or second split, or not split at all*/
1811 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1812 ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1813 ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1814 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1815 assert(!ZSTD_isError(oneSeqSize));
1816 ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1817 #endif
1818 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1819
1820 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1821 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1822 op += oneSeqSize;
1823 }
1824 }
1825 RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1826
1827 /* finish queue */
1828 seqNb -= seqAdvance;
1829 for ( ; seqNb<nbSeq ; seqNb++) {
1830 seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1831 if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
1832 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1833 if (leftoverLit) {
1834 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1835 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1836 sequence->litLength -= leftoverLit;
1837 op += leftoverLit;
1838 }
1839 litPtr = dctx->litExtraBuffer;
1840 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1841 dctx->litBufferLocation = ZSTD_not_in_dst;
1842 { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1843 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1844 assert(!ZSTD_isError(oneSeqSize));
1845 ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1846 #endif
1847 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1848 op += oneSeqSize;
1849 }
1850 }
1851 else
1852 {
1853 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1854 ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1855 ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1856 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1857 assert(!ZSTD_isError(oneSeqSize));
1858 ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1859 #endif
1860 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1861 op += oneSeqSize;
1862 }
1863 }
1864
1865 /* save reps for next block */
1866 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1867 }
1868
1869 /* last literal segment */
1870 if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
1871 size_t const lastLLSize = litBufferEnd - litPtr;
1872 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1873 if (op != NULL) {
1874 ZSTD_memmove(op, litPtr, lastLLSize);
1875 op += lastLLSize;
1876 }
1877 litPtr = dctx->litExtraBuffer;
1878 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1879 }
1880 { size_t const lastLLSize = litBufferEnd - litPtr;
1881 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1882 if (op != NULL) {
1883 ZSTD_memmove(op, litPtr, lastLLSize);
1884 op += lastLLSize;
1885 }
1886 }
1887
1888 return (size_t)(op - ostart);
1889 }
1890
1891 static size_t
ZSTD_decompressSequencesLong_default(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1892 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1893 void* dst, size_t maxDstSize,
1894 const void* seqStart, size_t seqSize, int nbSeq,
1895 const ZSTD_longOffset_e isLongOffset)
1896 {
1897 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1898 }
1899 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1900
1901
1902
1903 #if DYNAMIC_BMI2
1904
1905 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1906 static BMI2_TARGET_ATTRIBUTE size_t
1907 DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1908 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1909 void* dst, size_t maxDstSize,
1910 const void* seqStart, size_t seqSize, int nbSeq,
1911 const ZSTD_longOffset_e isLongOffset)
1912 {
1913 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1914 }
1915 static BMI2_TARGET_ATTRIBUTE size_t
1916 DONT_VECTORIZE
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1917 ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1918 void* dst, size_t maxDstSize,
1919 const void* seqStart, size_t seqSize, int nbSeq,
1920 const ZSTD_longOffset_e isLongOffset)
1921 {
1922 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1923 }
1924 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1925
1926 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1927 static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1928 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1929 void* dst, size_t maxDstSize,
1930 const void* seqStart, size_t seqSize, int nbSeq,
1931 const ZSTD_longOffset_e isLongOffset)
1932 {
1933 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1934 }
1935 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1936
1937 #endif /* DYNAMIC_BMI2 */
1938
1939 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1940 static size_t
ZSTD_decompressSequences(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1941 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1942 const void* seqStart, size_t seqSize, int nbSeq,
1943 const ZSTD_longOffset_e isLongOffset)
1944 {
1945 DEBUGLOG(5, "ZSTD_decompressSequences");
1946 #if DYNAMIC_BMI2
1947 if (ZSTD_DCtx_get_bmi2(dctx)) {
1948 return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1949 }
1950 #endif
1951 return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1952 }
1953 static size_t
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1954 ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1955 const void* seqStart, size_t seqSize, int nbSeq,
1956 const ZSTD_longOffset_e isLongOffset)
1957 {
1958 DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1959 #if DYNAMIC_BMI2
1960 if (ZSTD_DCtx_get_bmi2(dctx)) {
1961 return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1962 }
1963 #endif
1964 return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1965 }
1966 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1967
1968
1969 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1970 /* ZSTD_decompressSequencesLong() :
1971 * decompression function triggered when a minimum share of offsets is considered "long",
1972 * aka out of cache.
1973 * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1974 * This function will try to mitigate main memory latency through the use of prefetching */
1975 static size_t
ZSTD_decompressSequencesLong(ZSTD_DCtx * dctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,int nbSeq,const ZSTD_longOffset_e isLongOffset)1976 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1977 void* dst, size_t maxDstSize,
1978 const void* seqStart, size_t seqSize, int nbSeq,
1979 const ZSTD_longOffset_e isLongOffset)
1980 {
1981 DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1982 #if DYNAMIC_BMI2
1983 if (ZSTD_DCtx_get_bmi2(dctx)) {
1984 return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1985 }
1986 #endif
1987 return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1988 }
1989 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1990
1991
1992 /*
1993 * @returns The total size of the history referenceable by zstd, including
1994 * both the prefix and the extDict. At @p op any offset larger than this
1995 * is invalid.
1996 */
ZSTD_totalHistorySize(BYTE * op,BYTE const * virtualStart)1997 static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
1998 {
1999 return (size_t)(op - virtualStart);
2000 }
2001
2002 typedef struct {
2003 unsigned longOffsetShare;
2004 unsigned maxNbAdditionalBits;
2005 } ZSTD_OffsetInfo;
2006
2007 /* ZSTD_getOffsetInfo() :
2008 * condition : offTable must be valid
2009 * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
2010 * compared to maximum possible of (1<<OffFSELog),
2011 * as well as the maximum number additional bits required.
2012 */
2013 static ZSTD_OffsetInfo
ZSTD_getOffsetInfo(const ZSTD_seqSymbol * offTable,int nbSeq)2014 ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
2015 {
2016 ZSTD_OffsetInfo info = {0, 0};
2017 /* If nbSeq == 0, then the offTable is uninitialized, but we have
2018 * no sequences, so both values should be 0.
2019 */
2020 if (nbSeq != 0) {
2021 const void* ptr = offTable;
2022 U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
2023 const ZSTD_seqSymbol* table = offTable + 1;
2024 U32 const max = 1 << tableLog;
2025 U32 u;
2026 DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2027
2028 assert(max <= (1 << OffFSELog)); /* max not too large */
2029 for (u=0; u<max; u++) {
2030 info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2031 if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
2032 }
2033
2034 assert(tableLog <= OffFSELog);
2035 info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
2036 }
2037
2038 return info;
2039 }
2040
2041 /*
2042 * @returns The maximum offset we can decode in one read of our bitstream, without
2043 * reloading more bits in the middle of the offset bits read. Any offsets larger
2044 * than this must use the long offset decoder.
2045 */
ZSTD_maxShortOffset(void)2046 static size_t ZSTD_maxShortOffset(void)
2047 {
2048 if (MEM_64bits()) {
2049 /* We can decode any offset without reloading bits.
2050 * This might change if the max window size grows.
2051 */
2052 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2053 return (size_t)-1;
2054 } else {
2055 /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
2056 * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2057 * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2058 */
2059 size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
2060 size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2061 assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2062 return maxOffset;
2063 }
2064 }
2065
2066 size_t
ZSTD_decompressBlock_internal(ZSTD_DCtx * dctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const streaming_operation streaming)2067 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2068 void* dst, size_t dstCapacity,
2069 const void* src, size_t srcSize, const streaming_operation streaming)
2070 { /* blockType == blockCompressed */
2071 const BYTE* ip = (const BYTE*)src;
2072 DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
2073
2074 /* Note : the wording of the specification
2075 * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
2076 * This generally does not happen, as it makes little sense,
2077 * since an uncompressed block would feature same size and have no decompression cost.
2078 * Also, note that decoder from reference libzstd before < v1.5.4
2079 * would consider this edge case as an error.
2080 * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
2081 * for broader compatibility with the deployed ecosystem of zstd decoders */
2082 RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
2083
2084 /* Decode literals section */
2085 { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
2086 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
2087 if (ZSTD_isError(litCSize)) return litCSize;
2088 ip += litCSize;
2089 srcSize -= litCSize;
2090 }
2091
2092 /* Build Decoding Tables */
2093 {
2094 /* Compute the maximum block size, which must also work when !frame and fParams are unset.
2095 * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2096 */
2097 size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
2098 size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
2099 /* isLongOffset must be true if there are long offsets.
2100 * Offsets are long if they are larger than ZSTD_maxShortOffset().
2101 * We don't expect that to be the case in 64-bit mode.
2102 *
2103 * We check here to see if our history is large enough to allow long offsets.
2104 * If it isn't, then we can't possible have (valid) long offsets. If the offset
2105 * is invalid, then it is okay to read it incorrectly.
2106 *
2107 * If isLongOffsets is true, then we will later check our decoding table to see
2108 * if it is even possible to generate long offsets.
2109 */
2110 ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2111 /* These macros control at build-time which decompressor implementation
2112 * we use. If neither is defined, we do some inspection and dispatch at
2113 * runtime.
2114 */
2115 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2116 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2117 int usePrefetchDecoder = dctx->ddictIsCold;
2118 #else
2119 /* Set to 1 to avoid computing offset info if we don't need to.
2120 * Otherwise this value is ignored.
2121 */
2122 int usePrefetchDecoder = 1;
2123 #endif
2124 int nbSeq;
2125 size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2126 if (ZSTD_isError(seqHSize)) return seqHSize;
2127 ip += seqHSize;
2128 srcSize -= seqHSize;
2129
2130 RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2131 RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
2132 "invalid dst");
2133
2134 /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
2135 * compute information about the share of long offsets, and the maximum nbAdditionalBits.
2136 * NOTE: could probably use a larger nbSeq limit
2137 */
2138 if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
2139 ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
2140 if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2141 /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
2142 * enough, then we know it is impossible to have too long an offset in this block, so we can
2143 * use the regular offset decoder.
2144 */
2145 isLongOffset = ZSTD_lo_isRegularOffset;
2146 }
2147 if (!usePrefetchDecoder) {
2148 U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2149 usePrefetchDecoder = (info.longOffsetShare >= minShare);
2150 }
2151 }
2152
2153 dctx->ddictIsCold = 0;
2154
2155 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2156 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2157 if (usePrefetchDecoder) {
2158 #else
2159 (void)usePrefetchDecoder;
2160 {
2161 #endif
2162 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2163 return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
2164 #endif
2165 }
2166
2167 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2168 /* else */
2169 if (dctx->litBufferLocation == ZSTD_split)
2170 return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
2171 else
2172 return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
2173 #endif
2174 }
2175 }
2176
2177
2178 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
2179 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2180 {
2181 if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
2182 dctx->dictEnd = dctx->previousDstEnd;
2183 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2184 dctx->prefixStart = dst;
2185 dctx->previousDstEnd = dst;
2186 }
2187 }
2188
2189
2190 size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2191 void* dst, size_t dstCapacity,
2192 const void* src, size_t srcSize)
2193 {
2194 size_t dSize;
2195 dctx->isFrameDecompression = 0;
2196 ZSTD_checkContinuity(dctx, dst, dstCapacity);
2197 dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
2198 FORWARD_IF_ERROR(dSize, "");
2199 dctx->previousDstEnd = (char*)dst + dSize;
2200 return dSize;
2201 }
2202
2203
2204 /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
2205 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2206 void* dst, size_t dstCapacity,
2207 const void* src, size_t srcSize)
2208 {
2209 return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2210 }
2211