10c16b537SWarner Losh /* 20c16b537SWarner Losh * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 30c16b537SWarner Losh * All rights reserved. 40c16b537SWarner Losh * 50c16b537SWarner Losh * This source code is licensed under both the BSD-style license (found in the 60c16b537SWarner Losh * LICENSE file in the root directory of this source tree) and the GPLv2 (found 70c16b537SWarner Losh * in the COPYING file in the root directory of this source tree). 80c16b537SWarner Losh * You may select, at your option, one of the above-listed licenses. 90c16b537SWarner Losh */ 100c16b537SWarner Losh 11052d3c12SConrad Meyer #include "zstd_compress_internal.h" 120c16b537SWarner Losh #include "zstd_fast.h" 130c16b537SWarner Losh 140c16b537SWarner Losh 15*19fcbaf1SConrad Meyer void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 16*19fcbaf1SConrad Meyer ZSTD_compressionParameters const* cParams, 17*19fcbaf1SConrad Meyer void const* end) 180c16b537SWarner Losh { 19*19fcbaf1SConrad Meyer U32* const hashTable = ms->hashTable; 20*19fcbaf1SConrad Meyer U32 const hBits = cParams->hashLog; 21*19fcbaf1SConrad Meyer U32 const mls = cParams->searchLength; 22*19fcbaf1SConrad Meyer const BYTE* const base = ms->window.base; 23*19fcbaf1SConrad Meyer const BYTE* ip = base + ms->nextToUpdate; 240c16b537SWarner Losh const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 25*19fcbaf1SConrad Meyer const U32 fastHashFillStep = 3; 260c16b537SWarner Losh 27*19fcbaf1SConrad Meyer /* Always insert every fastHashFillStep position into the hash table. 28*19fcbaf1SConrad Meyer * Insert the other positions if their hash entry is empty. 29*19fcbaf1SConrad Meyer */ 30*19fcbaf1SConrad Meyer for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { 31*19fcbaf1SConrad Meyer U32 const current = (U32)(ip - base); 32*19fcbaf1SConrad Meyer U32 i; 33*19fcbaf1SConrad Meyer for (i = 0; i < fastHashFillStep; ++i) { 34*19fcbaf1SConrad Meyer size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls); 35*19fcbaf1SConrad Meyer if (i == 0 || hashTable[hash] == 0) 36*19fcbaf1SConrad Meyer hashTable[hash] = current + i; 370c16b537SWarner Losh } 380c16b537SWarner Losh } 39*19fcbaf1SConrad Meyer } 400c16b537SWarner Losh 410c16b537SWarner Losh FORCE_INLINE_TEMPLATE 42*19fcbaf1SConrad Meyer size_t ZSTD_compressBlock_fast_generic( 43*19fcbaf1SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 44*19fcbaf1SConrad Meyer void const* src, size_t srcSize, 45*19fcbaf1SConrad Meyer U32 const hlog, U32 const stepSize, U32 const mls) 460c16b537SWarner Losh { 47*19fcbaf1SConrad Meyer U32* const hashTable = ms->hashTable; 48*19fcbaf1SConrad Meyer const BYTE* const base = ms->window.base; 490c16b537SWarner Losh const BYTE* const istart = (const BYTE*)src; 500c16b537SWarner Losh const BYTE* ip = istart; 510c16b537SWarner Losh const BYTE* anchor = istart; 52*19fcbaf1SConrad Meyer const U32 lowestIndex = ms->window.dictLimit; 530c16b537SWarner Losh const BYTE* const lowest = base + lowestIndex; 540c16b537SWarner Losh const BYTE* const iend = istart + srcSize; 550c16b537SWarner Losh const BYTE* const ilimit = iend - HASH_READ_SIZE; 56*19fcbaf1SConrad Meyer U32 offset_1=rep[0], offset_2=rep[1]; 570c16b537SWarner Losh U32 offsetSaved = 0; 580c16b537SWarner Losh 590c16b537SWarner Losh /* init */ 600c16b537SWarner Losh ip += (ip==lowest); 610c16b537SWarner Losh { U32 const maxRep = (U32)(ip-lowest); 620c16b537SWarner Losh if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; 630c16b537SWarner Losh if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; 640c16b537SWarner Losh } 650c16b537SWarner Losh 660c16b537SWarner Losh /* Main Search Loop */ 670c16b537SWarner Losh while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 680c16b537SWarner Losh size_t mLength; 69*19fcbaf1SConrad Meyer size_t const h = ZSTD_hashPtr(ip, hlog, mls); 700c16b537SWarner Losh U32 const current = (U32)(ip-base); 710c16b537SWarner Losh U32 const matchIndex = hashTable[h]; 720c16b537SWarner Losh const BYTE* match = base + matchIndex; 730c16b537SWarner Losh hashTable[h] = current; /* update hash table */ 740c16b537SWarner Losh 750c16b537SWarner Losh if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { 760c16b537SWarner Losh mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 770c16b537SWarner Losh ip++; 78*19fcbaf1SConrad Meyer ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); 790c16b537SWarner Losh } else { 80*19fcbaf1SConrad Meyer if ( (matchIndex <= lowestIndex) 81*19fcbaf1SConrad Meyer || (MEM_read32(match) != MEM_read32(ip)) ) { 82*19fcbaf1SConrad Meyer assert(stepSize >= 1); 83*19fcbaf1SConrad Meyer ip += ((ip-anchor) >> kSearchStrength) + stepSize; 840c16b537SWarner Losh continue; 850c16b537SWarner Losh } 860c16b537SWarner Losh mLength = ZSTD_count(ip+4, match+4, iend) + 4; 87*19fcbaf1SConrad Meyer { U32 const offset = (U32)(ip-match); 880c16b537SWarner Losh while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 890c16b537SWarner Losh offset_2 = offset_1; 900c16b537SWarner Losh offset_1 = offset; 91*19fcbaf1SConrad Meyer ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); 92*19fcbaf1SConrad Meyer } } 930c16b537SWarner Losh 940c16b537SWarner Losh /* match found */ 950c16b537SWarner Losh ip += mLength; 960c16b537SWarner Losh anchor = ip; 970c16b537SWarner Losh 980c16b537SWarner Losh if (ip <= ilimit) { 990c16b537SWarner Losh /* Fill Table */ 100*19fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ 101*19fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 1020c16b537SWarner Losh /* check immediate repcode */ 1030c16b537SWarner Losh while ( (ip <= ilimit) 1040c16b537SWarner Losh && ( (offset_2>0) 1050c16b537SWarner Losh & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { 1060c16b537SWarner Losh /* store sequence */ 1070c16b537SWarner Losh size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 1080c16b537SWarner Losh { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ 109*19fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); 110*19fcbaf1SConrad Meyer ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); 1110c16b537SWarner Losh ip += rLength; 1120c16b537SWarner Losh anchor = ip; 1130c16b537SWarner Losh continue; /* faster when present ... (?) */ 1140c16b537SWarner Losh } } } 1150c16b537SWarner Losh 1160c16b537SWarner Losh /* save reps for next block */ 117*19fcbaf1SConrad Meyer rep[0] = offset_1 ? offset_1 : offsetSaved; 118*19fcbaf1SConrad Meyer rep[1] = offset_2 ? offset_2 : offsetSaved; 1190c16b537SWarner Losh 1200c16b537SWarner Losh /* Return the last literals size */ 1210c16b537SWarner Losh return iend - anchor; 1220c16b537SWarner Losh } 1230c16b537SWarner Losh 1240c16b537SWarner Losh 125*19fcbaf1SConrad Meyer size_t ZSTD_compressBlock_fast( 126*19fcbaf1SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 127*19fcbaf1SConrad Meyer ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) 1280c16b537SWarner Losh { 129*19fcbaf1SConrad Meyer U32 const hlog = cParams->hashLog; 130*19fcbaf1SConrad Meyer U32 const mls = cParams->searchLength; 131*19fcbaf1SConrad Meyer U32 const stepSize = cParams->targetLength; 1320c16b537SWarner Losh switch(mls) 1330c16b537SWarner Losh { 1340c16b537SWarner Losh default: /* includes case 3 */ 1350c16b537SWarner Losh case 4 : 136*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); 1370c16b537SWarner Losh case 5 : 138*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); 1390c16b537SWarner Losh case 6 : 140*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); 1410c16b537SWarner Losh case 7 : 142*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); 1430c16b537SWarner Losh } 1440c16b537SWarner Losh } 1450c16b537SWarner Losh 1460c16b537SWarner Losh 147*19fcbaf1SConrad Meyer static size_t ZSTD_compressBlock_fast_extDict_generic( 148*19fcbaf1SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 149*19fcbaf1SConrad Meyer void const* src, size_t srcSize, 150*19fcbaf1SConrad Meyer U32 const hlog, U32 const stepSize, U32 const mls) 1510c16b537SWarner Losh { 152*19fcbaf1SConrad Meyer U32* hashTable = ms->hashTable; 153*19fcbaf1SConrad Meyer const BYTE* const base = ms->window.base; 154*19fcbaf1SConrad Meyer const BYTE* const dictBase = ms->window.dictBase; 1550c16b537SWarner Losh const BYTE* const istart = (const BYTE*)src; 1560c16b537SWarner Losh const BYTE* ip = istart; 1570c16b537SWarner Losh const BYTE* anchor = istart; 158*19fcbaf1SConrad Meyer const U32 lowestIndex = ms->window.lowLimit; 1590c16b537SWarner Losh const BYTE* const dictStart = dictBase + lowestIndex; 160*19fcbaf1SConrad Meyer const U32 dictLimit = ms->window.dictLimit; 1610c16b537SWarner Losh const BYTE* const lowPrefixPtr = base + dictLimit; 1620c16b537SWarner Losh const BYTE* const dictEnd = dictBase + dictLimit; 1630c16b537SWarner Losh const BYTE* const iend = istart + srcSize; 1640c16b537SWarner Losh const BYTE* const ilimit = iend - 8; 165*19fcbaf1SConrad Meyer U32 offset_1=rep[0], offset_2=rep[1]; 1660c16b537SWarner Losh 1670c16b537SWarner Losh /* Search Loop */ 1680c16b537SWarner Losh while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 169*19fcbaf1SConrad Meyer const size_t h = ZSTD_hashPtr(ip, hlog, mls); 1700c16b537SWarner Losh const U32 matchIndex = hashTable[h]; 1710c16b537SWarner Losh const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; 1720c16b537SWarner Losh const BYTE* match = matchBase + matchIndex; 1730c16b537SWarner Losh const U32 current = (U32)(ip-base); 1740c16b537SWarner Losh const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ 1750c16b537SWarner Losh const BYTE* repBase = repIndex < dictLimit ? dictBase : base; 1760c16b537SWarner Losh const BYTE* repMatch = repBase + repIndex; 1770c16b537SWarner Losh size_t mLength; 1780c16b537SWarner Losh hashTable[h] = current; /* update hash table */ 1790c16b537SWarner Losh 1800c16b537SWarner Losh if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) 1810c16b537SWarner Losh && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 1820c16b537SWarner Losh const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; 1830c16b537SWarner Losh mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; 1840c16b537SWarner Losh ip++; 185*19fcbaf1SConrad Meyer ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); 1860c16b537SWarner Losh } else { 1870c16b537SWarner Losh if ( (matchIndex < lowestIndex) || 1880c16b537SWarner Losh (MEM_read32(match) != MEM_read32(ip)) ) { 189*19fcbaf1SConrad Meyer assert(stepSize >= 1); 190*19fcbaf1SConrad Meyer ip += ((ip-anchor) >> kSearchStrength) + stepSize; 1910c16b537SWarner Losh continue; 1920c16b537SWarner Losh } 1930c16b537SWarner Losh { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; 1940c16b537SWarner Losh const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; 1950c16b537SWarner Losh U32 offset; 1960c16b537SWarner Losh mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; 1970c16b537SWarner Losh while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 1980c16b537SWarner Losh offset = current - matchIndex; 1990c16b537SWarner Losh offset_2 = offset_1; 2000c16b537SWarner Losh offset_1 = offset; 201*19fcbaf1SConrad Meyer ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); 2020c16b537SWarner Losh } } 2030c16b537SWarner Losh 2040c16b537SWarner Losh /* found a match : store it */ 2050c16b537SWarner Losh ip += mLength; 2060c16b537SWarner Losh anchor = ip; 2070c16b537SWarner Losh 2080c16b537SWarner Losh if (ip <= ilimit) { 2090c16b537SWarner Losh /* Fill Table */ 210*19fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; 211*19fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 2120c16b537SWarner Losh /* check immediate repcode */ 2130c16b537SWarner Losh while (ip <= ilimit) { 2140c16b537SWarner Losh U32 const current2 = (U32)(ip-base); 2150c16b537SWarner Losh U32 const repIndex2 = current2 - offset_2; 2160c16b537SWarner Losh const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; 2170c16b537SWarner Losh if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ 2180c16b537SWarner Losh && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 2190c16b537SWarner Losh const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; 2200c16b537SWarner Losh size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; 2210c16b537SWarner Losh U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 222*19fcbaf1SConrad Meyer ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); 223*19fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; 2240c16b537SWarner Losh ip += repLength2; 2250c16b537SWarner Losh anchor = ip; 2260c16b537SWarner Losh continue; 2270c16b537SWarner Losh } 2280c16b537SWarner Losh break; 2290c16b537SWarner Losh } } } 2300c16b537SWarner Losh 2310c16b537SWarner Losh /* save reps for next block */ 232*19fcbaf1SConrad Meyer rep[0] = offset_1; 233*19fcbaf1SConrad Meyer rep[1] = offset_2; 2340c16b537SWarner Losh 2350c16b537SWarner Losh /* Return the last literals size */ 2360c16b537SWarner Losh return iend - anchor; 2370c16b537SWarner Losh } 2380c16b537SWarner Losh 2390c16b537SWarner Losh 240*19fcbaf1SConrad Meyer size_t ZSTD_compressBlock_fast_extDict( 241*19fcbaf1SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 242*19fcbaf1SConrad Meyer ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) 2430c16b537SWarner Losh { 244*19fcbaf1SConrad Meyer U32 const hlog = cParams->hashLog; 245*19fcbaf1SConrad Meyer U32 const mls = cParams->searchLength; 246*19fcbaf1SConrad Meyer U32 const stepSize = cParams->targetLength; 2470c16b537SWarner Losh switch(mls) 2480c16b537SWarner Losh { 2490c16b537SWarner Losh default: /* includes case 3 */ 2500c16b537SWarner Losh case 4 : 251*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); 2520c16b537SWarner Losh case 5 : 253*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); 2540c16b537SWarner Losh case 6 : 255*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); 2560c16b537SWarner Losh case 7 : 257*19fcbaf1SConrad Meyer return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); 2580c16b537SWarner Losh } 2590c16b537SWarner Losh } 260