xref: /freebsd/sys/contrib/zstd/lib/decompress/zstd_decompress_block.c (revision 28f4385e45a2681c14bd04b83fe1796eaefe8265)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* zstd_decompress_block :
12  * this module takes care of decompressing _compressed_ block */
13 
14 /*-*******************************************************
15 *  Dependencies
16 *********************************************************/
17 #include <string.h>      /* memcpy, memmove, memset */
18 #include "compiler.h"    /* prefetch */
19 #include "cpu.h"         /* bmi2 */
20 #include "mem.h"         /* low level memory routines */
21 #define FSE_STATIC_LINKING_ONLY
22 #include "fse.h"
23 #define HUF_STATIC_LINKING_ONLY
24 #include "huf.h"
25 #include "zstd_internal.h"
26 #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
27 #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
28 #include "zstd_decompress_block.h"
29 
30 /*_*******************************************************
31 *  Macros
32 **********************************************************/
33 
34 /* These two optional macros force the use one way or another of the two
35  * ZSTD_decompressSequences implementations. You can't force in both directions
36  * at the same time.
37  */
38 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39     defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41 #endif
42 
43 
44 /*_*******************************************************
45 *  Memory operations
46 **********************************************************/
47 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
48 
49 
50 /*-*************************************************************
51  *   Block decoding
52  ***************************************************************/
53 
54 /*! ZSTD_getcBlockSize() :
55  *  Provides the size of compressed block from block header `src` */
56 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57                           blockProperties_t* bpPtr)
58 {
59     if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
60     {   U32 const cBlockHeader = MEM_readLE24(src);
61         U32 const cSize = cBlockHeader >> 3;
62         bpPtr->lastBlock = cBlockHeader & 1;
63         bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
64         bpPtr->origSize = cSize;   /* only useful for RLE */
65         if (bpPtr->blockType == bt_rle) return 1;
66         if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
67         return cSize;
68     }
69 }
70 
71 
72 /* Hidden declaration for fullbench */
73 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
74                           const void* src, size_t srcSize);
75 /*! ZSTD_decodeLiteralsBlock() :
76  * @return : nb of bytes read from src (< srcSize )
77  *  note : symbol not declared but exposed for fullbench */
78 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79                           const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
80 {
81     if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
82 
83     {   const BYTE* const istart = (const BYTE*) src;
84         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
85 
86         switch(litEncType)
87         {
88         case set_repeat:
89             if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
90             /* fall-through */
91 
92         case set_compressed:
93             if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
94             {   size_t lhSize, litSize, litCSize;
95                 U32 singleStream=0;
96                 U32 const lhlCode = (istart[0] >> 2) & 3;
97                 U32 const lhc = MEM_readLE32(istart);
98                 size_t hufSuccess;
99                 switch(lhlCode)
100                 {
101                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
102                     /* 2 - 2 - 10 - 10 */
103                     singleStream = !lhlCode;
104                     lhSize = 3;
105                     litSize  = (lhc >> 4) & 0x3FF;
106                     litCSize = (lhc >> 14) & 0x3FF;
107                     break;
108                 case 2:
109                     /* 2 - 2 - 14 - 14 */
110                     lhSize = 4;
111                     litSize  = (lhc >> 4) & 0x3FFF;
112                     litCSize = lhc >> 18;
113                     break;
114                 case 3:
115                     /* 2 - 2 - 18 - 18 */
116                     lhSize = 5;
117                     litSize  = (lhc >> 4) & 0x3FFFF;
118                     litCSize = (lhc >> 22) + (istart[4] << 10);
119                     break;
120                 }
121                 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
122                 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
123 
124                 /* prefetch huffman table if cold */
125                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
126                     PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
127                 }
128 
129                 if (litEncType==set_repeat) {
130                     if (singleStream) {
131                         hufSuccess = HUF_decompress1X_usingDTable_bmi2(
132                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
133                             dctx->HUFptr, dctx->bmi2);
134                     } else {
135                         hufSuccess = HUF_decompress4X_usingDTable_bmi2(
136                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
137                             dctx->HUFptr, dctx->bmi2);
138                     }
139                 } else {
140                     if (singleStream) {
141 #if defined(HUF_FORCE_DECOMPRESS_X2)
142                         hufSuccess = HUF_decompress1X_DCtx_wksp(
143                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
144                             istart+lhSize, litCSize, dctx->workspace,
145                             sizeof(dctx->workspace));
146 #else
147                         hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
148                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
149                             istart+lhSize, litCSize, dctx->workspace,
150                             sizeof(dctx->workspace), dctx->bmi2);
151 #endif
152                     } else {
153                         hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
154                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
155                             istart+lhSize, litCSize, dctx->workspace,
156                             sizeof(dctx->workspace), dctx->bmi2);
157                     }
158                 }
159 
160                 if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
161 
162                 dctx->litPtr = dctx->litBuffer;
163                 dctx->litSize = litSize;
164                 dctx->litEntropy = 1;
165                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
166                 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
167                 return litCSize + lhSize;
168             }
169 
170         case set_basic:
171             {   size_t litSize, lhSize;
172                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
173                 switch(lhlCode)
174                 {
175                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
176                     lhSize = 1;
177                     litSize = istart[0] >> 3;
178                     break;
179                 case 1:
180                     lhSize = 2;
181                     litSize = MEM_readLE16(istart) >> 4;
182                     break;
183                 case 3:
184                     lhSize = 3;
185                     litSize = MEM_readLE24(istart) >> 4;
186                     break;
187                 }
188 
189                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
190                     if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
191                     memcpy(dctx->litBuffer, istart+lhSize, litSize);
192                     dctx->litPtr = dctx->litBuffer;
193                     dctx->litSize = litSize;
194                     memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
195                     return lhSize+litSize;
196                 }
197                 /* direct reference into compressed stream */
198                 dctx->litPtr = istart+lhSize;
199                 dctx->litSize = litSize;
200                 return lhSize+litSize;
201             }
202 
203         case set_rle:
204             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
205                 size_t litSize, lhSize;
206                 switch(lhlCode)
207                 {
208                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
209                     lhSize = 1;
210                     litSize = istart[0] >> 3;
211                     break;
212                 case 1:
213                     lhSize = 2;
214                     litSize = MEM_readLE16(istart) >> 4;
215                     break;
216                 case 3:
217                     lhSize = 3;
218                     litSize = MEM_readLE24(istart) >> 4;
219                     if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
220                     break;
221                 }
222                 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
223                 memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
224                 dctx->litPtr = dctx->litBuffer;
225                 dctx->litSize = litSize;
226                 return lhSize+1;
227             }
228         default:
229             return ERROR(corruption_detected);   /* impossible */
230         }
231     }
232 }
233 
234 /* Default FSE distribution tables.
235  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
236  * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
237  * They were generated programmatically with following method :
238  * - start from default distributions, present in /lib/common/zstd_internal.h
239  * - generate tables normally, using ZSTD_buildFSETable()
240  * - printout the content of tables
241  * - pretify output, report below, test with fuzzer to ensure it's correct */
242 
243 /* Default FSE distribution table for Literal Lengths */
244 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
245      {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
246      /* nextState, nbAddBits, nbBits, baseVal */
247      {  0,  0,  4,    0},  { 16,  0,  4,    0},
248      { 32,  0,  5,    1},  {  0,  0,  5,    3},
249      {  0,  0,  5,    4},  {  0,  0,  5,    6},
250      {  0,  0,  5,    7},  {  0,  0,  5,    9},
251      {  0,  0,  5,   10},  {  0,  0,  5,   12},
252      {  0,  0,  6,   14},  {  0,  1,  5,   16},
253      {  0,  1,  5,   20},  {  0,  1,  5,   22},
254      {  0,  2,  5,   28},  {  0,  3,  5,   32},
255      {  0,  4,  5,   48},  { 32,  6,  5,   64},
256      {  0,  7,  5,  128},  {  0,  8,  6,  256},
257      {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
258      { 32,  0,  4,    0},  {  0,  0,  4,    1},
259      {  0,  0,  5,    2},  { 32,  0,  5,    4},
260      {  0,  0,  5,    5},  { 32,  0,  5,    7},
261      {  0,  0,  5,    8},  { 32,  0,  5,   10},
262      {  0,  0,  5,   11},  {  0,  0,  6,   13},
263      { 32,  1,  5,   16},  {  0,  1,  5,   18},
264      { 32,  1,  5,   22},  {  0,  2,  5,   24},
265      { 32,  3,  5,   32},  {  0,  3,  5,   40},
266      {  0,  6,  4,   64},  { 16,  6,  4,   64},
267      { 32,  7,  5,  128},  {  0,  9,  6,  512},
268      {  0, 11,  6, 2048},  { 48,  0,  4,    0},
269      { 16,  0,  4,    1},  { 32,  0,  5,    2},
270      { 32,  0,  5,    3},  { 32,  0,  5,    5},
271      { 32,  0,  5,    6},  { 32,  0,  5,    8},
272      { 32,  0,  5,    9},  { 32,  0,  5,   11},
273      { 32,  0,  5,   12},  {  0,  0,  6,   15},
274      { 32,  1,  5,   18},  { 32,  1,  5,   20},
275      { 32,  2,  5,   24},  { 32,  2,  5,   28},
276      { 32,  3,  5,   40},  { 32,  4,  5,   48},
277      {  0, 16,  6,65536},  {  0, 15,  6,32768},
278      {  0, 14,  6,16384},  {  0, 13,  6, 8192},
279 };   /* LL_defaultDTable */
280 
281 /* Default FSE distribution table for Offset Codes */
282 static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
283     {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
284     /* nextState, nbAddBits, nbBits, baseVal */
285     {  0,  0,  5,    0},     {  0,  6,  4,   61},
286     {  0,  9,  5,  509},     {  0, 15,  5,32765},
287     {  0, 21,  5,2097149},   {  0,  3,  5,    5},
288     {  0,  7,  4,  125},     {  0, 12,  5, 4093},
289     {  0, 18,  5,262141},    {  0, 23,  5,8388605},
290     {  0,  5,  5,   29},     {  0,  8,  4,  253},
291     {  0, 14,  5,16381},     {  0, 20,  5,1048573},
292     {  0,  2,  5,    1},     { 16,  7,  4,  125},
293     {  0, 11,  5, 2045},     {  0, 17,  5,131069},
294     {  0, 22,  5,4194301},   {  0,  4,  5,   13},
295     { 16,  8,  4,  253},     {  0, 13,  5, 8189},
296     {  0, 19,  5,524285},    {  0,  1,  5,    1},
297     { 16,  6,  4,   61},     {  0, 10,  5, 1021},
298     {  0, 16,  5,65533},     {  0, 28,  5,268435453},
299     {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
300     {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
301 };   /* OF_defaultDTable */
302 
303 
304 /* Default FSE distribution table for Match Lengths */
305 static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
306     {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
307     /* nextState, nbAddBits, nbBits, baseVal */
308     {  0,  0,  6,    3},  {  0,  0,  4,    4},
309     { 32,  0,  5,    5},  {  0,  0,  5,    6},
310     {  0,  0,  5,    8},  {  0,  0,  5,    9},
311     {  0,  0,  5,   11},  {  0,  0,  6,   13},
312     {  0,  0,  6,   16},  {  0,  0,  6,   19},
313     {  0,  0,  6,   22},  {  0,  0,  6,   25},
314     {  0,  0,  6,   28},  {  0,  0,  6,   31},
315     {  0,  0,  6,   34},  {  0,  1,  6,   37},
316     {  0,  1,  6,   41},  {  0,  2,  6,   47},
317     {  0,  3,  6,   59},  {  0,  4,  6,   83},
318     {  0,  7,  6,  131},  {  0,  9,  6,  515},
319     { 16,  0,  4,    4},  {  0,  0,  4,    5},
320     { 32,  0,  5,    6},  {  0,  0,  5,    7},
321     { 32,  0,  5,    9},  {  0,  0,  5,   10},
322     {  0,  0,  6,   12},  {  0,  0,  6,   15},
323     {  0,  0,  6,   18},  {  0,  0,  6,   21},
324     {  0,  0,  6,   24},  {  0,  0,  6,   27},
325     {  0,  0,  6,   30},  {  0,  0,  6,   33},
326     {  0,  1,  6,   35},  {  0,  1,  6,   39},
327     {  0,  2,  6,   43},  {  0,  3,  6,   51},
328     {  0,  4,  6,   67},  {  0,  5,  6,   99},
329     {  0,  8,  6,  259},  { 32,  0,  4,    4},
330     { 48,  0,  4,    4},  { 16,  0,  4,    5},
331     { 32,  0,  5,    7},  { 32,  0,  5,    8},
332     { 32,  0,  5,   10},  { 32,  0,  5,   11},
333     {  0,  0,  6,   14},  {  0,  0,  6,   17},
334     {  0,  0,  6,   20},  {  0,  0,  6,   23},
335     {  0,  0,  6,   26},  {  0,  0,  6,   29},
336     {  0,  0,  6,   32},  {  0, 16,  6,65539},
337     {  0, 15,  6,32771},  {  0, 14,  6,16387},
338     {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
339     {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
340 };   /* ML_defaultDTable */
341 
342 
343 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
344 {
345     void* ptr = dt;
346     ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
347     ZSTD_seqSymbol* const cell = dt + 1;
348 
349     DTableH->tableLog = 0;
350     DTableH->fastMode = 0;
351 
352     cell->nbBits = 0;
353     cell->nextState = 0;
354     assert(nbAddBits < 255);
355     cell->nbAdditionalBits = (BYTE)nbAddBits;
356     cell->baseValue = baseValue;
357 }
358 
359 
360 /* ZSTD_buildFSETable() :
361  * generate FSE decoding table for one symbol (ll, ml or off)
362  * cannot fail if input is valid =>
363  * all inputs are presumed validated at this stage */
364 void
365 ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
366             const short* normalizedCounter, unsigned maxSymbolValue,
367             const U32* baseValue, const U32* nbAdditionalBits,
368             unsigned tableLog)
369 {
370     ZSTD_seqSymbol* const tableDecode = dt+1;
371     U16 symbolNext[MaxSeq+1];
372 
373     U32 const maxSV1 = maxSymbolValue + 1;
374     U32 const tableSize = 1 << tableLog;
375     U32 highThreshold = tableSize-1;
376 
377     /* Sanity Checks */
378     assert(maxSymbolValue <= MaxSeq);
379     assert(tableLog <= MaxFSELog);
380 
381     /* Init, lay down lowprob symbols */
382     {   ZSTD_seqSymbol_header DTableH;
383         DTableH.tableLog = tableLog;
384         DTableH.fastMode = 1;
385         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
386             U32 s;
387             for (s=0; s<maxSV1; s++) {
388                 if (normalizedCounter[s]==-1) {
389                     tableDecode[highThreshold--].baseValue = s;
390                     symbolNext[s] = 1;
391                 } else {
392                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
393                     symbolNext[s] = normalizedCounter[s];
394         }   }   }
395         memcpy(dt, &DTableH, sizeof(DTableH));
396     }
397 
398     /* Spread symbols */
399     {   U32 const tableMask = tableSize-1;
400         U32 const step = FSE_TABLESTEP(tableSize);
401         U32 s, position = 0;
402         for (s=0; s<maxSV1; s++) {
403             int i;
404             for (i=0; i<normalizedCounter[s]; i++) {
405                 tableDecode[position].baseValue = s;
406                 position = (position + step) & tableMask;
407                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
408         }   }
409         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
410     }
411 
412     /* Build Decoding table */
413     {   U32 u;
414         for (u=0; u<tableSize; u++) {
415             U32 const symbol = tableDecode[u].baseValue;
416             U32 const nextState = symbolNext[symbol]++;
417             tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
418             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
419             assert(nbAdditionalBits[symbol] < 255);
420             tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
421             tableDecode[u].baseValue = baseValue[symbol];
422     }   }
423 }
424 
425 
426 /*! ZSTD_buildSeqTable() :
427  * @return : nb bytes read from src,
428  *           or an error code if it fails */
429 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
430                                  symbolEncodingType_e type, unsigned max, U32 maxLog,
431                                  const void* src, size_t srcSize,
432                                  const U32* baseValue, const U32* nbAdditionalBits,
433                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
434                                  int ddictIsCold, int nbSeq)
435 {
436     switch(type)
437     {
438     case set_rle :
439         if (!srcSize) return ERROR(srcSize_wrong);
440         if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
441         {   U32 const symbol = *(const BYTE*)src;
442             U32 const baseline = baseValue[symbol];
443             U32 const nbBits = nbAdditionalBits[symbol];
444             ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
445         }
446         *DTablePtr = DTableSpace;
447         return 1;
448     case set_basic :
449         *DTablePtr = defaultTable;
450         return 0;
451     case set_repeat:
452         if (!flagRepeatTable) return ERROR(corruption_detected);
453         /* prefetch FSE table if used */
454         if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
455             const void* const pStart = *DTablePtr;
456             size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
457             PREFETCH_AREA(pStart, pSize);
458         }
459         return 0;
460     case set_compressed :
461         {   unsigned tableLog;
462             S16 norm[MaxSeq+1];
463             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
464             if (FSE_isError(headerSize)) return ERROR(corruption_detected);
465             if (tableLog > maxLog) return ERROR(corruption_detected);
466             ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
467             *DTablePtr = DTableSpace;
468             return headerSize;
469         }
470     default :   /* impossible */
471         assert(0);
472         return ERROR(GENERIC);
473     }
474 }
475 
476 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
477                              const void* src, size_t srcSize)
478 {
479     const BYTE* const istart = (const BYTE* const)src;
480     const BYTE* const iend = istart + srcSize;
481     const BYTE* ip = istart;
482     int nbSeq;
483     DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
484 
485     /* check */
486     if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
487 
488     /* SeqHead */
489     nbSeq = *ip++;
490     if (!nbSeq) {
491         *nbSeqPtr=0;
492         if (srcSize != 1) return ERROR(srcSize_wrong);
493         return 1;
494     }
495     if (nbSeq > 0x7F) {
496         if (nbSeq == 0xFF) {
497             if (ip+2 > iend) return ERROR(srcSize_wrong);
498             nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
499         } else {
500             if (ip >= iend) return ERROR(srcSize_wrong);
501             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
502         }
503     }
504     *nbSeqPtr = nbSeq;
505 
506     /* FSE table descriptors */
507     if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
508     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
509         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
510         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
511         ip++;
512 
513         /* Build DTables */
514         {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
515                                                       LLtype, MaxLL, LLFSELog,
516                                                       ip, iend-ip,
517                                                       LL_base, LL_bits,
518                                                       LL_defaultDTable, dctx->fseEntropy,
519                                                       dctx->ddictIsCold, nbSeq);
520             if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
521             ip += llhSize;
522         }
523 
524         {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
525                                                       OFtype, MaxOff, OffFSELog,
526                                                       ip, iend-ip,
527                                                       OF_base, OF_bits,
528                                                       OF_defaultDTable, dctx->fseEntropy,
529                                                       dctx->ddictIsCold, nbSeq);
530             if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
531             ip += ofhSize;
532         }
533 
534         {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
535                                                       MLtype, MaxML, MLFSELog,
536                                                       ip, iend-ip,
537                                                       ML_base, ML_bits,
538                                                       ML_defaultDTable, dctx->fseEntropy,
539                                                       dctx->ddictIsCold, nbSeq);
540             if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
541             ip += mlhSize;
542         }
543     }
544 
545     return ip-istart;
546 }
547 
548 
549 typedef struct {
550     size_t litLength;
551     size_t matchLength;
552     size_t offset;
553     const BYTE* match;
554 } seq_t;
555 
556 typedef struct {
557     size_t state;
558     const ZSTD_seqSymbol* table;
559 } ZSTD_fseState;
560 
561 typedef struct {
562     BIT_DStream_t DStream;
563     ZSTD_fseState stateLL;
564     ZSTD_fseState stateOffb;
565     ZSTD_fseState stateML;
566     size_t prevOffset[ZSTD_REP_NUM];
567     const BYTE* prefixStart;
568     const BYTE* dictEnd;
569     size_t pos;
570 } seqState_t;
571 
572 
573 /* ZSTD_execSequenceLast7():
574  * exceptional case : decompress a match starting within last 7 bytes of output buffer.
575  * requires more careful checks, to ensure there is no overflow.
576  * performance does not matter though.
577  * note : this case is supposed to be never generated "naturally" by reference encoder,
578  *        since in most cases it needs at least 8 bytes to look for a match.
579  *        but it's allowed by the specification. */
580 FORCE_NOINLINE
581 size_t ZSTD_execSequenceLast7(BYTE* op,
582                               BYTE* const oend, seq_t sequence,
583                               const BYTE** litPtr, const BYTE* const litLimit,
584                               const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
585 {
586     BYTE* const oLitEnd = op + sequence.litLength;
587     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
588     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
589     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
590     const BYTE* match = oLitEnd - sequence.offset;
591 
592     /* check */
593     if (oMatchEnd>oend) return ERROR(dstSize_tooSmall);   /* last match must fit within dstBuffer */
594     if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* try to read beyond literal buffer */
595 
596     /* copy literals */
597     while (op < oLitEnd) *op++ = *(*litPtr)++;
598 
599     /* copy Match */
600     if (sequence.offset > (size_t)(oLitEnd - base)) {
601         /* offset beyond prefix */
602         if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
603         match = dictEnd - (base-match);
604         if (match + sequence.matchLength <= dictEnd) {
605             memmove(oLitEnd, match, sequence.matchLength);
606             return sequenceLength;
607         }
608         /* span extDict & currentPrefixSegment */
609         {   size_t const length1 = dictEnd - match;
610             memmove(oLitEnd, match, length1);
611             op = oLitEnd + length1;
612             sequence.matchLength -= length1;
613             match = base;
614     }   }
615     while (op < oMatchEnd) *op++ = *match++;
616     return sequenceLength;
617 }
618 
619 
620 HINT_INLINE
621 size_t ZSTD_execSequence(BYTE* op,
622                          BYTE* const oend, seq_t sequence,
623                          const BYTE** litPtr, const BYTE* const litLimit,
624                          const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
625 {
626     BYTE* const oLitEnd = op + sequence.litLength;
627     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
628     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
629     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
630     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
631     const BYTE* match = oLitEnd - sequence.offset;
632 
633     /* check */
634     if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
635     if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
636     if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
637 
638     /* copy Literals */
639     ZSTD_copy8(op, *litPtr);
640     if (sequence.litLength > 8)
641         ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
642     op = oLitEnd;
643     *litPtr = iLitEnd;   /* update for next sequence */
644 
645     /* copy Match */
646     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
647         /* offset beyond prefix -> go into extDict */
648         if (sequence.offset > (size_t)(oLitEnd - virtualStart))
649             return ERROR(corruption_detected);
650         match = dictEnd + (match - prefixStart);
651         if (match + sequence.matchLength <= dictEnd) {
652             memmove(oLitEnd, match, sequence.matchLength);
653             return sequenceLength;
654         }
655         /* span extDict & currentPrefixSegment */
656         {   size_t const length1 = dictEnd - match;
657             memmove(oLitEnd, match, length1);
658             op = oLitEnd + length1;
659             sequence.matchLength -= length1;
660             match = prefixStart;
661             if (op > oend_w || sequence.matchLength < MINMATCH) {
662               U32 i;
663               for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
664               return sequenceLength;
665             }
666     }   }
667     /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
668 
669     /* match within prefix */
670     if (sequence.offset < 8) {
671         /* close range match, overlap */
672         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
673         static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
674         int const sub2 = dec64table[sequence.offset];
675         op[0] = match[0];
676         op[1] = match[1];
677         op[2] = match[2];
678         op[3] = match[3];
679         match += dec32table[sequence.offset];
680         ZSTD_copy4(op+4, match);
681         match -= sub2;
682     } else {
683         ZSTD_copy8(op, match);
684     }
685     op += 8; match += 8;
686 
687     if (oMatchEnd > oend-(16-MINMATCH)) {
688         if (op < oend_w) {
689             ZSTD_wildcopy(op, match, oend_w - op);
690             match += oend_w - op;
691             op = oend_w;
692         }
693         while (op < oMatchEnd) *op++ = *match++;
694     } else {
695         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
696     }
697     return sequenceLength;
698 }
699 
700 
701 HINT_INLINE
702 size_t ZSTD_execSequenceLong(BYTE* op,
703                              BYTE* const oend, seq_t sequence,
704                              const BYTE** litPtr, const BYTE* const litLimit,
705                              const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
706 {
707     BYTE* const oLitEnd = op + sequence.litLength;
708     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
709     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
710     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
711     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
712     const BYTE* match = sequence.match;
713 
714     /* check */
715     if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
716     if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
717     if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718 
719     /* copy Literals */
720     ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
721     if (sequence.litLength > 8)
722         ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723     op = oLitEnd;
724     *litPtr = iLitEnd;   /* update for next sequence */
725 
726     /* copy Match */
727     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
728         /* offset beyond prefix */
729         if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
730         if (match + sequence.matchLength <= dictEnd) {
731             memmove(oLitEnd, match, sequence.matchLength);
732             return sequenceLength;
733         }
734         /* span extDict & currentPrefixSegment */
735         {   size_t const length1 = dictEnd - match;
736             memmove(oLitEnd, match, length1);
737             op = oLitEnd + length1;
738             sequence.matchLength -= length1;
739             match = prefixStart;
740             if (op > oend_w || sequence.matchLength < MINMATCH) {
741               U32 i;
742               for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
743               return sequenceLength;
744             }
745     }   }
746     assert(op <= oend_w);
747     assert(sequence.matchLength >= MINMATCH);
748 
749     /* match within prefix */
750     if (sequence.offset < 8) {
751         /* close range match, overlap */
752         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
753         static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
754         int const sub2 = dec64table[sequence.offset];
755         op[0] = match[0];
756         op[1] = match[1];
757         op[2] = match[2];
758         op[3] = match[3];
759         match += dec32table[sequence.offset];
760         ZSTD_copy4(op+4, match);
761         match -= sub2;
762     } else {
763         ZSTD_copy8(op, match);
764     }
765     op += 8; match += 8;
766 
767     if (oMatchEnd > oend-(16-MINMATCH)) {
768         if (op < oend_w) {
769             ZSTD_wildcopy(op, match, oend_w - op);
770             match += oend_w - op;
771             op = oend_w;
772         }
773         while (op < oMatchEnd) *op++ = *match++;
774     } else {
775         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
776     }
777     return sequenceLength;
778 }
779 
780 static void
781 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
782 {
783     const void* ptr = dt;
784     const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
785     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
786     DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
787                 (U32)DStatePtr->state, DTableH->tableLog);
788     BIT_reloadDStream(bitD);
789     DStatePtr->table = dt + 1;
790 }
791 
792 FORCE_INLINE_TEMPLATE void
793 ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
794 {
795     ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
796     U32 const nbBits = DInfo.nbBits;
797     size_t const lowBits = BIT_readBits(bitD, nbBits);
798     DStatePtr->state = DInfo.nextState + lowBits;
799 }
800 
801 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803  * bits before reloading. This value is the maximum number of bytes we read
804  * after reloading when we are decoding long offets.
805  */
806 #define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
807     (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
808         ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
809         : 0)
810 
811 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
812 
813 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
814 FORCE_INLINE_TEMPLATE seq_t
815 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
816 {
817     seq_t seq;
818     U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
819     U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
820     U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
821     U32 const totalBits = llBits+mlBits+ofBits;
822     U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
823     U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
824     U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
825 
826     /* sequence */
827     {   size_t offset;
828         if (!ofBits)
829             offset = 0;
830         else {
831             ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
832             ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
833             assert(ofBits <= MaxOff);
834             if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
835                 U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
836                 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
837                 BIT_reloadDStream(&seqState->DStream);
838                 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
839                 assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
840             } else {
841                 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
842                 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
843             }
844         }
845 
846         if (ofBits <= 1) {
847             offset += (llBase==0);
848             if (offset) {
849                 size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
850                 temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
851                 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
852                 seqState->prevOffset[1] = seqState->prevOffset[0];
853                 seqState->prevOffset[0] = offset = temp;
854             } else {  /* offset == 0 */
855                 offset = seqState->prevOffset[0];
856             }
857         } else {
858             seqState->prevOffset[2] = seqState->prevOffset[1];
859             seqState->prevOffset[1] = seqState->prevOffset[0];
860             seqState->prevOffset[0] = offset;
861         }
862         seq.offset = offset;
863     }
864 
865     seq.matchLength = mlBase
866                     + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0);  /* <=  16 bits */
867     if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
868         BIT_reloadDStream(&seqState->DStream);
869     if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
870         BIT_reloadDStream(&seqState->DStream);
871     /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
872     ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
873 
874     seq.litLength = llBase
875                   + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0);    /* <=  16 bits */
876     if (MEM_32bits())
877         BIT_reloadDStream(&seqState->DStream);
878 
879     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
880                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
881 
882     /* ANS state update */
883     ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
884     ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
885     if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
886     ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
887 
888     return seq;
889 }
890 
891 FORCE_INLINE_TEMPLATE size_t
892 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
893                                void* dst, size_t maxDstSize,
894                          const void* seqStart, size_t seqSize, int nbSeq,
895                          const ZSTD_longOffset_e isLongOffset)
896 {
897     const BYTE* ip = (const BYTE*)seqStart;
898     const BYTE* const iend = ip + seqSize;
899     BYTE* const ostart = (BYTE* const)dst;
900     BYTE* const oend = ostart + maxDstSize;
901     BYTE* op = ostart;
902     const BYTE* litPtr = dctx->litPtr;
903     const BYTE* const litEnd = litPtr + dctx->litSize;
904     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
905     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
906     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
907     DEBUGLOG(5, "ZSTD_decompressSequences_body");
908 
909     /* Regen sequences */
910     if (nbSeq) {
911         seqState_t seqState;
912         dctx->fseEntropy = 1;
913         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
914         CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
915         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
916         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
917         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
918 
919         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
920             nbSeq--;
921             {   seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
922                 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
923                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
924                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
925                 op += oneSeqSize;
926         }   }
927 
928         /* check if reached exact end */
929         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
930         if (nbSeq) return ERROR(corruption_detected);
931         /* save reps for next block */
932         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
933     }
934 
935     /* last literal segment */
936     {   size_t const lastLLSize = litEnd - litPtr;
937         if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
938         memcpy(op, litPtr, lastLLSize);
939         op += lastLLSize;
940     }
941 
942     return op-ostart;
943 }
944 
945 static size_t
946 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
947                                  void* dst, size_t maxDstSize,
948                            const void* seqStart, size_t seqSize, int nbSeq,
949                            const ZSTD_longOffset_e isLongOffset)
950 {
951     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
952 }
953 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
954 
955 
956 
957 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
958 FORCE_INLINE_TEMPLATE seq_t
959 ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
960 {
961     seq_t seq;
962     U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
963     U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
964     U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
965     U32 const totalBits = llBits+mlBits+ofBits;
966     U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
967     U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
968     U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
969 
970     /* sequence */
971     {   size_t offset;
972         if (!ofBits)
973             offset = 0;
974         else {
975             ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
976             ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
977             assert(ofBits <= MaxOff);
978             if (MEM_32bits() && longOffsets) {
979                 U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
980                 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
981                 if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
982                 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
983             } else {
984                 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
985                 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
986             }
987         }
988 
989         if (ofBits <= 1) {
990             offset += (llBase==0);
991             if (offset) {
992                 size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
993                 temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
994                 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
995                 seqState->prevOffset[1] = seqState->prevOffset[0];
996                 seqState->prevOffset[0] = offset = temp;
997             } else {
998                 offset = seqState->prevOffset[0];
999             }
1000         } else {
1001             seqState->prevOffset[2] = seqState->prevOffset[1];
1002             seqState->prevOffset[1] = seqState->prevOffset[0];
1003             seqState->prevOffset[0] = offset;
1004         }
1005         seq.offset = offset;
1006     }
1007 
1008     seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
1009     if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1010         BIT_reloadDStream(&seqState->DStream);
1011     if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1012         BIT_reloadDStream(&seqState->DStream);
1013     /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1014     ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1015 
1016     seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
1017     if (MEM_32bits())
1018         BIT_reloadDStream(&seqState->DStream);
1019 
1020     {   size_t const pos = seqState->pos + seq.litLength;
1021         const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1022         seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1023                                                     * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1024         seqState->pos = pos + seq.matchLength;
1025     }
1026 
1027     /* ANS state update */
1028     ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
1029     ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
1030     if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
1031     ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
1032 
1033     return seq;
1034 }
1035 
1036 FORCE_INLINE_TEMPLATE size_t
1037 ZSTD_decompressSequencesLong_body(
1038                                ZSTD_DCtx* dctx,
1039                                void* dst, size_t maxDstSize,
1040                          const void* seqStart, size_t seqSize, int nbSeq,
1041                          const ZSTD_longOffset_e isLongOffset)
1042 {
1043     const BYTE* ip = (const BYTE*)seqStart;
1044     const BYTE* const iend = ip + seqSize;
1045     BYTE* const ostart = (BYTE* const)dst;
1046     BYTE* const oend = ostart + maxDstSize;
1047     BYTE* op = ostart;
1048     const BYTE* litPtr = dctx->litPtr;
1049     const BYTE* const litEnd = litPtr + dctx->litSize;
1050     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1051     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1052     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1053 
1054     /* Regen sequences */
1055     if (nbSeq) {
1056 #define STORED_SEQS 4
1057 #define STORED_SEQS_MASK (STORED_SEQS-1)
1058 #define ADVANCED_SEQS 4
1059         seq_t sequences[STORED_SEQS];
1060         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1061         seqState_t seqState;
1062         int seqNb;
1063         dctx->fseEntropy = 1;
1064         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1065         seqState.prefixStart = prefixStart;
1066         seqState.pos = (size_t)(op-prefixStart);
1067         seqState.dictEnd = dictEnd;
1068         assert(iend >= ip);
1069         CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1070         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1071         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1072         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1073 
1074         /* prepare in advance */
1075         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1076             sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1077             PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1078         }
1079         if (seqNb<seqAdvance) return ERROR(corruption_detected);
1080 
1081         /* decode and decompress */
1082         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1083             seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1084             size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1085             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1086             PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1087             sequences[seqNb & STORED_SEQS_MASK] = sequence;
1088             op += oneSeqSize;
1089         }
1090         if (seqNb<nbSeq) return ERROR(corruption_detected);
1091 
1092         /* finish queue */
1093         seqNb -= seqAdvance;
1094         for ( ; seqNb<nbSeq ; seqNb++) {
1095             size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1096             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1097             op += oneSeqSize;
1098         }
1099 
1100         /* save reps for next block */
1101         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1102     }
1103 
1104     /* last literal segment */
1105     {   size_t const lastLLSize = litEnd - litPtr;
1106         if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
1107         memcpy(op, litPtr, lastLLSize);
1108         op += lastLLSize;
1109     }
1110 
1111     return op-ostart;
1112 }
1113 
1114 static size_t
1115 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1116                                  void* dst, size_t maxDstSize,
1117                            const void* seqStart, size_t seqSize, int nbSeq,
1118                            const ZSTD_longOffset_e isLongOffset)
1119 {
1120     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1121 }
1122 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1123 
1124 
1125 
1126 #if DYNAMIC_BMI2
1127 
1128 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1129 static TARGET_ATTRIBUTE("bmi2") size_t
1130 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1131                                  void* dst, size_t maxDstSize,
1132                            const void* seqStart, size_t seqSize, int nbSeq,
1133                            const ZSTD_longOffset_e isLongOffset)
1134 {
1135     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1136 }
1137 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1138 
1139 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1140 static TARGET_ATTRIBUTE("bmi2") size_t
1141 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1142                                  void* dst, size_t maxDstSize,
1143                            const void* seqStart, size_t seqSize, int nbSeq,
1144                            const ZSTD_longOffset_e isLongOffset)
1145 {
1146     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1147 }
1148 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1149 
1150 #endif /* DYNAMIC_BMI2 */
1151 
1152 typedef size_t (*ZSTD_decompressSequences_t)(
1153                             ZSTD_DCtx* dctx,
1154                             void* dst, size_t maxDstSize,
1155                             const void* seqStart, size_t seqSize, int nbSeq,
1156                             const ZSTD_longOffset_e isLongOffset);
1157 
1158 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1159 static size_t
1160 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1161                    const void* seqStart, size_t seqSize, int nbSeq,
1162                    const ZSTD_longOffset_e isLongOffset)
1163 {
1164     DEBUGLOG(5, "ZSTD_decompressSequences");
1165 #if DYNAMIC_BMI2
1166     if (dctx->bmi2) {
1167         return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1168     }
1169 #endif
1170   return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1171 }
1172 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1173 
1174 
1175 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1176 /* ZSTD_decompressSequencesLong() :
1177  * decompression function triggered when a minimum share of offsets is considered "long",
1178  * aka out of cache.
1179  * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
1180  * This function will try to mitigate main memory latency through the use of prefetching */
1181 static size_t
1182 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1183                              void* dst, size_t maxDstSize,
1184                              const void* seqStart, size_t seqSize, int nbSeq,
1185                              const ZSTD_longOffset_e isLongOffset)
1186 {
1187     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1188 #if DYNAMIC_BMI2
1189     if (dctx->bmi2) {
1190         return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1191     }
1192 #endif
1193   return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1194 }
1195 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1196 
1197 
1198 
1199 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1200     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1201 /* ZSTD_getLongOffsetsShare() :
1202  * condition : offTable must be valid
1203  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1204  *           compared to maximum possible of (1<<OffFSELog) */
1205 static unsigned
1206 ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1207 {
1208     const void* ptr = offTable;
1209     U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1210     const ZSTD_seqSymbol* table = offTable + 1;
1211     U32 const max = 1 << tableLog;
1212     U32 u, total = 0;
1213     DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1214 
1215     assert(max <= (1 << OffFSELog));  /* max not too large */
1216     for (u=0; u<max; u++) {
1217         if (table[u].nbAdditionalBits > 22) total += 1;
1218     }
1219 
1220     assert(tableLog <= OffFSELog);
1221     total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
1222 
1223     return total;
1224 }
1225 #endif
1226 
1227 
1228 size_t
1229 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1230                               void* dst, size_t dstCapacity,
1231                         const void* src, size_t srcSize, const int frame)
1232 {   /* blockType == blockCompressed */
1233     const BYTE* ip = (const BYTE*)src;
1234     /* isLongOffset must be true if there are long offsets.
1235      * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1236      * We don't expect that to be the case in 64-bit mode.
1237      * In block mode, window size is not known, so we have to be conservative.
1238      * (note: but it could be evaluated from current-lowLimit)
1239      */
1240     ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1241     DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1242 
1243     if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
1244 
1245     /* Decode literals section */
1246     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
1247         DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1248         if (ZSTD_isError(litCSize)) return litCSize;
1249         ip += litCSize;
1250         srcSize -= litCSize;
1251     }
1252 
1253     /* Build Decoding Tables */
1254     {
1255         /* These macros control at build-time which decompressor implementation
1256          * we use. If neither is defined, we do some inspection and dispatch at
1257          * runtime.
1258          */
1259 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1260     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1261         int usePrefetchDecoder = dctx->ddictIsCold;
1262 #endif
1263         int nbSeq;
1264         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
1265         if (ZSTD_isError(seqHSize)) return seqHSize;
1266         ip += seqHSize;
1267         srcSize -= seqHSize;
1268 
1269 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1270     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1271         if ( !usePrefetchDecoder
1272           && (!frame || (dctx->fParams.windowSize > (1<<24)))
1273           && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
1274             U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1275             U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1276             usePrefetchDecoder = (shareLongOffsets >= minShare);
1277         }
1278 #endif
1279 
1280         dctx->ddictIsCold = 0;
1281 
1282 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1283     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1284         if (usePrefetchDecoder)
1285 #endif
1286 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1287             return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1288 #endif
1289 
1290 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1291         /* else */
1292         return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1293 #endif
1294     }
1295 }
1296 
1297 
1298 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1299                             void* dst, size_t dstCapacity,
1300                       const void* src, size_t srcSize)
1301 {
1302     size_t dSize;
1303     ZSTD_checkContinuity(dctx, dst);
1304     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
1305     dctx->previousDstEnd = (char*)dst + dSize;
1306     return dSize;
1307 }
1308