1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" 12 #include "zstd_fast.h" 13 14 15 void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 16 void const* end, ZSTD_dictTableLoadMethod_e dtlm) 17 { 18 const ZSTD_compressionParameters* const cParams = &ms->cParams; 19 U32* const hashTable = ms->hashTable; 20 U32 const hBits = cParams->hashLog; 21 U32 const mls = cParams->minMatch; 22 const BYTE* const base = ms->window.base; 23 const BYTE* ip = base + ms->nextToUpdate; 24 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 25 const U32 fastHashFillStep = 3; 26 27 /* Always insert every fastHashFillStep position into the hash table. 28 * Insert the other positions if their hash entry is empty. 29 */ 30 for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { 31 U32 const current = (U32)(ip - base); 32 size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); 33 hashTable[hash0] = current; 34 if (dtlm == ZSTD_dtlm_fast) continue; 35 /* Only load extra positions for ZSTD_dtlm_full */ 36 { U32 p; 37 for (p = 1; p < fastHashFillStep; ++p) { 38 size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); 39 if (hashTable[hash] == 0) { /* not yet filled */ 40 hashTable[hash] = current + p; 41 } } } } 42 } 43 44 FORCE_INLINE_TEMPLATE 45 size_t ZSTD_compressBlock_fast_generic( 46 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 47 void const* src, size_t srcSize, 48 U32 const mls) 49 { 50 const ZSTD_compressionParameters* const cParams = &ms->cParams; 51 U32* const hashTable = ms->hashTable; 52 U32 const hlog = cParams->hashLog; 53 /* support stepSize of 0 */ 54 size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; 55 const BYTE* const base = ms->window.base; 56 const BYTE* const istart = (const BYTE*)src; 57 /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ 58 const BYTE* ip0 = istart; 59 const BYTE* ip1; 60 const BYTE* anchor = istart; 61 const U32 prefixStartIndex = ms->window.dictLimit; 62 const BYTE* const prefixStart = base + prefixStartIndex; 63 const BYTE* const iend = istart + srcSize; 64 const BYTE* const ilimit = iend - HASH_READ_SIZE; 65 U32 offset_1=rep[0], offset_2=rep[1]; 66 U32 offsetSaved = 0; 67 68 /* init */ 69 ip0 += (ip0 == prefixStart); 70 ip1 = ip0 + 1; 71 { 72 U32 const maxRep = (U32)(ip0 - prefixStart); 73 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; 74 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; 75 } 76 77 /* Main Search Loop */ 78 while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ 79 size_t mLength; 80 BYTE const* ip2 = ip0 + 2; 81 size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); 82 U32 const val0 = MEM_read32(ip0); 83 size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); 84 U32 const val1 = MEM_read32(ip1); 85 U32 const current0 = (U32)(ip0-base); 86 U32 const current1 = (U32)(ip1-base); 87 U32 const matchIndex0 = hashTable[h0]; 88 U32 const matchIndex1 = hashTable[h1]; 89 BYTE const* repMatch = ip2-offset_1; 90 const BYTE* match0 = base + matchIndex0; 91 const BYTE* match1 = base + matchIndex1; 92 U32 offcode; 93 hashTable[h0] = current0; /* update hash table */ 94 hashTable[h1] = current1; /* update hash table */ 95 96 assert(ip0 + 1 == ip1); 97 98 if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { 99 mLength = ip2[-1] == repMatch[-1] ? 1 : 0; 100 ip0 = ip2 - mLength; 101 match0 = repMatch - mLength; 102 offcode = 0; 103 goto _match; 104 } 105 if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { 106 /* found a regular match */ 107 goto _offset; 108 } 109 if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { 110 /* found a regular match after one literal */ 111 ip0 = ip1; 112 match0 = match1; 113 goto _offset; 114 } 115 { 116 size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; 117 assert(step >= 2); 118 ip0 += step; 119 ip1 += step; 120 continue; 121 } 122 _offset: /* Requires: ip0, match0 */ 123 /* Compute the offset code */ 124 offset_2 = offset_1; 125 offset_1 = (U32)(ip0-match0); 126 offcode = offset_1 + ZSTD_REP_MOVE; 127 mLength = 0; 128 /* Count the backwards match length */ 129 while (((ip0>anchor) & (match0>prefixStart)) 130 && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ 131 132 _match: /* Requires: ip0, match0, offcode */ 133 /* Count the forward length */ 134 mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; 135 ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); 136 /* match found */ 137 ip0 += mLength; 138 anchor = ip0; 139 ip1 = ip0 + 1; 140 141 if (ip0 <= ilimit) { 142 /* Fill Table */ 143 assert(base+current0+2 > istart); /* check base overflow */ 144 hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ 145 hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); 146 147 while ( (ip0 <= ilimit) 148 && ( (offset_2>0) 149 & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { 150 /* store sequence */ 151 size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; 152 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ 153 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); 154 ip0 += rLength; 155 ip1 = ip0 + 1; 156 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); 157 anchor = ip0; 158 continue; /* faster when present (confirmed on gcc-8) ... (?) */ 159 } 160 } 161 } 162 163 /* save reps for next block */ 164 rep[0] = offset_1 ? offset_1 : offsetSaved; 165 rep[1] = offset_2 ? offset_2 : offsetSaved; 166 167 /* Return the last literals size */ 168 return iend - anchor; 169 } 170 171 172 size_t ZSTD_compressBlock_fast( 173 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 174 void const* src, size_t srcSize) 175 { 176 ZSTD_compressionParameters const* cParams = &ms->cParams; 177 U32 const mls = cParams->minMatch; 178 assert(ms->dictMatchState == NULL); 179 switch(mls) 180 { 181 default: /* includes case 3 */ 182 case 4 : 183 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); 184 case 5 : 185 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); 186 case 6 : 187 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); 188 case 7 : 189 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); 190 } 191 } 192 193 FORCE_INLINE_TEMPLATE 194 size_t ZSTD_compressBlock_fast_dictMatchState_generic( 195 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 196 void const* src, size_t srcSize, U32 const mls) 197 { 198 const ZSTD_compressionParameters* const cParams = &ms->cParams; 199 U32* const hashTable = ms->hashTable; 200 U32 const hlog = cParams->hashLog; 201 /* support stepSize of 0 */ 202 U32 const stepSize = cParams->targetLength + !(cParams->targetLength); 203 const BYTE* const base = ms->window.base; 204 const BYTE* const istart = (const BYTE*)src; 205 const BYTE* ip = istart; 206 const BYTE* anchor = istart; 207 const U32 prefixStartIndex = ms->window.dictLimit; 208 const BYTE* const prefixStart = base + prefixStartIndex; 209 const BYTE* const iend = istart + srcSize; 210 const BYTE* const ilimit = iend - HASH_READ_SIZE; 211 U32 offset_1=rep[0], offset_2=rep[1]; 212 U32 offsetSaved = 0; 213 214 const ZSTD_matchState_t* const dms = ms->dictMatchState; 215 const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; 216 const U32* const dictHashTable = dms->hashTable; 217 const U32 dictStartIndex = dms->window.dictLimit; 218 const BYTE* const dictBase = dms->window.base; 219 const BYTE* const dictStart = dictBase + dictStartIndex; 220 const BYTE* const dictEnd = dms->window.nextSrc; 221 const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); 222 const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); 223 const U32 dictHLog = dictCParams->hashLog; 224 225 /* otherwise, we would get index underflow when translating a dict index 226 * into a local index */ 227 assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); 228 229 /* init */ 230 ip += (dictAndPrefixLength == 0); 231 /* dictMatchState repCode checks don't currently handle repCode == 0 232 * disabling. */ 233 assert(offset_1 <= dictAndPrefixLength); 234 assert(offset_2 <= dictAndPrefixLength); 235 236 /* Main Search Loop */ 237 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 238 size_t mLength; 239 size_t const h = ZSTD_hashPtr(ip, hlog, mls); 240 U32 const current = (U32)(ip-base); 241 U32 const matchIndex = hashTable[h]; 242 const BYTE* match = base + matchIndex; 243 const U32 repIndex = current + 1 - offset_1; 244 const BYTE* repMatch = (repIndex < prefixStartIndex) ? 245 dictBase + (repIndex - dictIndexDelta) : 246 base + repIndex; 247 hashTable[h] = current; /* update hash table */ 248 249 if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ 250 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 251 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 252 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 253 ip++; 254 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); 255 } else if ( (matchIndex <= prefixStartIndex) ) { 256 size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); 257 U32 const dictMatchIndex = dictHashTable[dictHash]; 258 const BYTE* dictMatch = dictBase + dictMatchIndex; 259 if (dictMatchIndex <= dictStartIndex || 260 MEM_read32(dictMatch) != MEM_read32(ip)) { 261 assert(stepSize >= 1); 262 ip += ((ip-anchor) >> kSearchStrength) + stepSize; 263 continue; 264 } else { 265 /* found a dict match */ 266 U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); 267 mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; 268 while (((ip>anchor) & (dictMatch>dictStart)) 269 && (ip[-1] == dictMatch[-1])) { 270 ip--; dictMatch--; mLength++; 271 } /* catch up */ 272 offset_2 = offset_1; 273 offset_1 = offset; 274 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); 275 } 276 } else if (MEM_read32(match) != MEM_read32(ip)) { 277 /* it's not a match, and we're not going to check the dictionary */ 278 assert(stepSize >= 1); 279 ip += ((ip-anchor) >> kSearchStrength) + stepSize; 280 continue; 281 } else { 282 /* found a regular match */ 283 U32 const offset = (U32)(ip-match); 284 mLength = ZSTD_count(ip+4, match+4, iend) + 4; 285 while (((ip>anchor) & (match>prefixStart)) 286 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 287 offset_2 = offset_1; 288 offset_1 = offset; 289 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); 290 } 291 292 /* match found */ 293 ip += mLength; 294 anchor = ip; 295 296 if (ip <= ilimit) { 297 /* Fill Table */ 298 assert(base+current+2 > istart); /* check base overflow */ 299 hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ 300 hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 301 302 /* check immediate repcode */ 303 while (ip <= ilimit) { 304 U32 const current2 = (U32)(ip-base); 305 U32 const repIndex2 = current2 - offset_2; 306 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? 307 dictBase - dictIndexDelta + repIndex2 : 308 base + repIndex2; 309 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) 310 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 311 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 312 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 313 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 314 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); 315 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; 316 ip += repLength2; 317 anchor = ip; 318 continue; 319 } 320 break; 321 } 322 } 323 } 324 325 /* save reps for next block */ 326 rep[0] = offset_1 ? offset_1 : offsetSaved; 327 rep[1] = offset_2 ? offset_2 : offsetSaved; 328 329 /* Return the last literals size */ 330 return iend - anchor; 331 } 332 333 size_t ZSTD_compressBlock_fast_dictMatchState( 334 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 335 void const* src, size_t srcSize) 336 { 337 ZSTD_compressionParameters const* cParams = &ms->cParams; 338 U32 const mls = cParams->minMatch; 339 assert(ms->dictMatchState != NULL); 340 switch(mls) 341 { 342 default: /* includes case 3 */ 343 case 4 : 344 return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); 345 case 5 : 346 return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); 347 case 6 : 348 return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); 349 case 7 : 350 return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); 351 } 352 } 353 354 355 static size_t ZSTD_compressBlock_fast_extDict_generic( 356 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 357 void const* src, size_t srcSize, U32 const mls) 358 { 359 const ZSTD_compressionParameters* const cParams = &ms->cParams; 360 U32* const hashTable = ms->hashTable; 361 U32 const hlog = cParams->hashLog; 362 /* support stepSize of 0 */ 363 U32 const stepSize = cParams->targetLength + !(cParams->targetLength); 364 const BYTE* const base = ms->window.base; 365 const BYTE* const dictBase = ms->window.dictBase; 366 const BYTE* const istart = (const BYTE*)src; 367 const BYTE* ip = istart; 368 const BYTE* anchor = istart; 369 const U32 dictStartIndex = ms->window.lowLimit; 370 const BYTE* const dictStart = dictBase + dictStartIndex; 371 const U32 prefixStartIndex = ms->window.dictLimit; 372 const BYTE* const prefixStart = base + prefixStartIndex; 373 const BYTE* const dictEnd = dictBase + prefixStartIndex; 374 const BYTE* const iend = istart + srcSize; 375 const BYTE* const ilimit = iend - 8; 376 U32 offset_1=rep[0], offset_2=rep[1]; 377 378 /* Search Loop */ 379 while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 380 const size_t h = ZSTD_hashPtr(ip, hlog, mls); 381 const U32 matchIndex = hashTable[h]; 382 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; 383 const BYTE* match = matchBase + matchIndex; 384 const U32 current = (U32)(ip-base); 385 const U32 repIndex = current + 1 - offset_1; 386 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; 387 const BYTE* const repMatch = repBase + repIndex; 388 size_t mLength; 389 hashTable[h] = current; /* update hash table */ 390 assert(offset_1 <= current +1); /* check repIndex */ 391 392 if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) 393 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 394 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 395 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 396 ip++; 397 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); 398 } else { 399 if ( (matchIndex < dictStartIndex) || 400 (MEM_read32(match) != MEM_read32(ip)) ) { 401 assert(stepSize >= 1); 402 ip += ((ip-anchor) >> kSearchStrength) + stepSize; 403 continue; 404 } 405 { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; 406 const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; 407 U32 offset; 408 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; 409 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 410 offset = current - matchIndex; 411 offset_2 = offset_1; 412 offset_1 = offset; 413 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); 414 } } 415 416 /* found a match : store it */ 417 ip += mLength; 418 anchor = ip; 419 420 if (ip <= ilimit) { 421 /* Fill Table */ 422 hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; 423 hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 424 /* check immediate repcode */ 425 while (ip <= ilimit) { 426 U32 const current2 = (U32)(ip-base); 427 U32 const repIndex2 = current2 - offset_2; 428 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 429 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ 430 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 431 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 432 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 433 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 434 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); 435 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; 436 ip += repLength2; 437 anchor = ip; 438 continue; 439 } 440 break; 441 } } } 442 443 /* save reps for next block */ 444 rep[0] = offset_1; 445 rep[1] = offset_2; 446 447 /* Return the last literals size */ 448 return iend - anchor; 449 } 450 451 452 size_t ZSTD_compressBlock_fast_extDict( 453 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 454 void const* src, size_t srcSize) 455 { 456 ZSTD_compressionParameters const* cParams = &ms->cParams; 457 U32 const mls = cParams->minMatch; 458 switch(mls) 459 { 460 default: /* includes case 3 */ 461 case 4 : 462 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); 463 case 5 : 464 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); 465 case 6 : 466 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); 467 case 7 : 468 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); 469 } 470 } 471