1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 /*-*************************************
13 * Dependencies
14 ***************************************/
15 #include "zstd_compress_literals.h"
16
17
18 /* **************************************************************
19 * Debug Traces
20 ****************************************************************/
21 #if DEBUGLEVEL >= 2
22
showHexa(const void * src,size_t srcSize)23 static size_t showHexa(const void* src, size_t srcSize)
24 {
25 const BYTE* const ip = (const BYTE*)src;
26 size_t u;
27 for (u=0; u<srcSize; u++) {
28 RAWLOG(5, " %02X", ip[u]); (void)ip;
29 }
30 RAWLOG(5, " \n");
31 return srcSize;
32 }
33
34 #endif
35
36
37 /* **************************************************************
38 * Literals compression - special cases
39 ****************************************************************/
ZSTD_noCompressLiterals(void * dst,size_t dstCapacity,const void * src,size_t srcSize)40 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
41 {
42 BYTE* const ostart = (BYTE*)dst;
43 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
44
45 DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
46
47 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
48
49 switch(flSize)
50 {
51 case 1: /* 2 - 1 - 5 */
52 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
53 break;
54 case 2: /* 2 - 2 - 12 */
55 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
56 break;
57 case 3: /* 2 - 2 - 20 */
58 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
59 break;
60 default: /* not necessary : flSize is {1,2,3} */
61 assert(0);
62 }
63
64 ZSTD_memcpy(ostart + flSize, src, srcSize);
65 DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
66 return srcSize + flSize;
67 }
68
allBytesIdentical(const void * src,size_t srcSize)69 static int allBytesIdentical(const void* src, size_t srcSize)
70 {
71 assert(srcSize >= 1);
72 assert(src != NULL);
73 { const BYTE b = ((const BYTE*)src)[0];
74 size_t p;
75 for (p=1; p<srcSize; p++) {
76 if (((const BYTE*)src)[p] != b) return 0;
77 }
78 return 1;
79 }
80 }
81
ZSTD_compressRleLiteralsBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize)82 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
83 {
84 BYTE* const ostart = (BYTE*)dst;
85 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
86
87 assert(dstCapacity >= 4); (void)dstCapacity;
88 assert(allBytesIdentical(src, srcSize));
89
90 switch(flSize)
91 {
92 case 1: /* 2 - 1 - 5 */
93 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
94 break;
95 case 2: /* 2 - 2 - 12 */
96 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
97 break;
98 case 3: /* 2 - 2 - 20 */
99 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
100 break;
101 default: /* not necessary : flSize is {1,2,3} */
102 assert(0);
103 }
104
105 ostart[flSize] = *(const BYTE*)src;
106 DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
107 return flSize+1;
108 }
109
110 /* ZSTD_minLiteralsToCompress() :
111 * returns minimal amount of literals
112 * for literal compression to even be attempted.
113 * Minimum is made tighter as compression strategy increases.
114 */
115 static size_t
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy,HUF_repeat huf_repeat)116 ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
117 {
118 assert((int)strategy >= 0);
119 assert((int)strategy <= 9);
120 /* btultra2 : min 8 bytes;
121 * then 2x larger for each successive compression strategy
122 * max threshold 64 bytes */
123 { int const shift = MIN(9-(int)strategy, 3);
124 size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
125 DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
126 return mintc;
127 }
128 }
129
ZSTD_compressLiterals(void * dst,size_t dstCapacity,const void * src,size_t srcSize,void * entropyWorkspace,size_t entropyWorkspaceSize,const ZSTD_hufCTables_t * prevHuf,ZSTD_hufCTables_t * nextHuf,ZSTD_strategy strategy,int disableLiteralCompression,int suspectUncompressible,int bmi2)130 size_t ZSTD_compressLiterals (
131 void* dst, size_t dstCapacity,
132 const void* src, size_t srcSize,
133 void* entropyWorkspace, size_t entropyWorkspaceSize,
134 const ZSTD_hufCTables_t* prevHuf,
135 ZSTD_hufCTables_t* nextHuf,
136 ZSTD_strategy strategy,
137 int disableLiteralCompression,
138 int suspectUncompressible,
139 int bmi2)
140 {
141 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
142 BYTE* const ostart = (BYTE*)dst;
143 U32 singleStream = srcSize < 256;
144 SymbolEncodingType_e hType = set_compressed;
145 size_t cLitSize;
146
147 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
148 disableLiteralCompression, (U32)srcSize, dstCapacity);
149
150 DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
151
152 /* Prepare nextEntropy assuming reusing the existing table */
153 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
154
155 if (disableLiteralCompression)
156 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
157
158 /* if too small, don't even attempt compression (speed opt) */
159 if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
160 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
161
162 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
163 { HUF_repeat repeat = prevHuf->repeatMode;
164 int const flags = 0
165 | (bmi2 ? HUF_flags_bmi2 : 0)
166 | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
167 | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
168 | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
169
170 typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
171 huf_compress_f huf_compress;
172 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
173 huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
174 cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
175 src, srcSize,
176 HUF_SYMBOLVALUE_MAX, LitHufLog,
177 entropyWorkspace, entropyWorkspaceSize,
178 (HUF_CElt*)nextHuf->CTable,
179 &repeat, flags);
180 DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
181 if (repeat != HUF_repeat_none) {
182 /* reused the existing table */
183 DEBUGLOG(5, "reusing statistics from previous huffman block");
184 hType = set_repeat;
185 }
186 }
187
188 { size_t const minGain = ZSTD_minGain(srcSize, strategy);
189 if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
190 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
191 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
192 } }
193 if (cLitSize==1) {
194 /* A return value of 1 signals that the alphabet consists of a single symbol.
195 * However, in some rare circumstances, it could be the compressed size (a single byte).
196 * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
197 * (it's also necessary to not generate statistics).
198 * Therefore, in such a case, actively check that all bytes are identical. */
199 if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
200 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
201 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
202 } }
203
204 if (hType == set_compressed) {
205 /* using a newly constructed table */
206 nextHuf->repeatMode = HUF_repeat_check;
207 }
208
209 /* Build header */
210 switch(lhSize)
211 {
212 case 3: /* 2 - 2 - 10 - 10 */
213 if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
214 { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
215 MEM_writeLE24(ostart, lhc);
216 break;
217 }
218 case 4: /* 2 - 2 - 14 - 14 */
219 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
220 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
221 MEM_writeLE32(ostart, lhc);
222 break;
223 }
224 case 5: /* 2 - 2 - 18 - 18 */
225 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
226 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
227 MEM_writeLE32(ostart, lhc);
228 ostart[4] = (BYTE)(cLitSize >> 10);
229 break;
230 }
231 default: /* not possible : lhSize is {3,4,5} */
232 assert(0);
233 }
234 DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
235 return lhSize+cLitSize;
236 }
237