1ca3e8d88SDave Plauger 2ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 3ca3e8d88SDave Plauger /*--- Block sorting machinery ---*/ 4ca3e8d88SDave Plauger /*--- blocksort.c ---*/ 5ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 6ca3e8d88SDave Plauger 7ca3e8d88SDave Plauger /* ------------------------------------------------------------------ 8ca3e8d88SDave Plauger This file is part of bzip2/libbzip2, a program and library for 9ca3e8d88SDave Plauger lossless, block-sorting data compression. 10ca3e8d88SDave Plauger 11*b9071c34SGordon Ross bzip2/libbzip2 version 1.0.6 of 6 September 2010 12*b9071c34SGordon Ross Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 13ca3e8d88SDave Plauger 14ca3e8d88SDave Plauger Please read the WARNING, DISCLAIMER and PATENTS sections in the 15ca3e8d88SDave Plauger README file. 16ca3e8d88SDave Plauger 17ca3e8d88SDave Plauger This program is released under the terms of the license contained 18ca3e8d88SDave Plauger in the file LICENSE. 19ca3e8d88SDave Plauger ------------------------------------------------------------------ */ 20ca3e8d88SDave Plauger 21ca3e8d88SDave Plauger 22ca3e8d88SDave Plauger #include "bzlib_private.h" 23ca3e8d88SDave Plauger 24ca3e8d88SDave Plauger /*---------------------------------------------*/ 25ca3e8d88SDave Plauger /*--- Fallback O(N log(N)^2) sorting ---*/ 26ca3e8d88SDave Plauger /*--- algorithm, for repetitive blocks ---*/ 27ca3e8d88SDave Plauger /*---------------------------------------------*/ 28ca3e8d88SDave Plauger 29ca3e8d88SDave Plauger /*---------------------------------------------*/ 30ca3e8d88SDave Plauger static 31ca3e8d88SDave Plauger __inline__ 32ca3e8d88SDave Plauger void fallbackSimpleSort ( UInt32* fmap, 33ca3e8d88SDave Plauger UInt32* eclass, 34ca3e8d88SDave Plauger Int32 lo, 35ca3e8d88SDave Plauger Int32 hi ) 36ca3e8d88SDave Plauger { 37ca3e8d88SDave Plauger Int32 i, j, tmp; 38ca3e8d88SDave Plauger UInt32 ec_tmp; 39ca3e8d88SDave Plauger 40ca3e8d88SDave Plauger if (lo == hi) return; 41ca3e8d88SDave Plauger 42ca3e8d88SDave Plauger if (hi - lo > 3) { 43ca3e8d88SDave Plauger for ( i = hi-4; i >= lo; i-- ) { 44ca3e8d88SDave Plauger tmp = fmap[i]; 45ca3e8d88SDave Plauger ec_tmp = eclass[tmp]; 46ca3e8d88SDave Plauger for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 47ca3e8d88SDave Plauger fmap[j-4] = fmap[j]; 48ca3e8d88SDave Plauger fmap[j-4] = tmp; 49ca3e8d88SDave Plauger } 50ca3e8d88SDave Plauger } 51ca3e8d88SDave Plauger 52ca3e8d88SDave Plauger for ( i = hi-1; i >= lo; i-- ) { 53ca3e8d88SDave Plauger tmp = fmap[i]; 54ca3e8d88SDave Plauger ec_tmp = eclass[tmp]; 55ca3e8d88SDave Plauger for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 56ca3e8d88SDave Plauger fmap[j-1] = fmap[j]; 57ca3e8d88SDave Plauger fmap[j-1] = tmp; 58ca3e8d88SDave Plauger } 59ca3e8d88SDave Plauger } 60ca3e8d88SDave Plauger 61ca3e8d88SDave Plauger 62ca3e8d88SDave Plauger /*---------------------------------------------*/ 63ca3e8d88SDave Plauger #define fswap(zz1, zz2) \ 64ca3e8d88SDave Plauger { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 65ca3e8d88SDave Plauger 66ca3e8d88SDave Plauger #define fvswap(zzp1, zzp2, zzn) \ 67ca3e8d88SDave Plauger { \ 68ca3e8d88SDave Plauger Int32 yyp1 = (zzp1); \ 69ca3e8d88SDave Plauger Int32 yyp2 = (zzp2); \ 70ca3e8d88SDave Plauger Int32 yyn = (zzn); \ 71ca3e8d88SDave Plauger while (yyn > 0) { \ 72ca3e8d88SDave Plauger fswap(fmap[yyp1], fmap[yyp2]); \ 73ca3e8d88SDave Plauger yyp1++; yyp2++; yyn--; \ 74ca3e8d88SDave Plauger } \ 75ca3e8d88SDave Plauger } 76ca3e8d88SDave Plauger 77ca3e8d88SDave Plauger 78ca3e8d88SDave Plauger #define fmin(a,b) ((a) < (b)) ? (a) : (b) 79ca3e8d88SDave Plauger 80ca3e8d88SDave Plauger #define fpush(lz,hz) { stackLo[sp] = lz; \ 81ca3e8d88SDave Plauger stackHi[sp] = hz; \ 82ca3e8d88SDave Plauger sp++; } 83ca3e8d88SDave Plauger 84ca3e8d88SDave Plauger #define fpop(lz,hz) { sp--; \ 85ca3e8d88SDave Plauger lz = stackLo[sp]; \ 86ca3e8d88SDave Plauger hz = stackHi[sp]; } 87ca3e8d88SDave Plauger 88ca3e8d88SDave Plauger #define FALLBACK_QSORT_SMALL_THRESH 10 89ca3e8d88SDave Plauger #define FALLBACK_QSORT_STACK_SIZE 100 90ca3e8d88SDave Plauger 91ca3e8d88SDave Plauger 92ca3e8d88SDave Plauger static 93ca3e8d88SDave Plauger void fallbackQSort3 ( UInt32* fmap, 94ca3e8d88SDave Plauger UInt32* eclass, 95ca3e8d88SDave Plauger Int32 loSt, 96ca3e8d88SDave Plauger Int32 hiSt ) 97ca3e8d88SDave Plauger { 98ca3e8d88SDave Plauger Int32 unLo, unHi, ltLo, gtHi, n, m; 99ca3e8d88SDave Plauger Int32 sp, lo, hi; 100ca3e8d88SDave Plauger UInt32 med, r, r3; 101ca3e8d88SDave Plauger Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 102ca3e8d88SDave Plauger Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 103ca3e8d88SDave Plauger 104ca3e8d88SDave Plauger r = 0; 105ca3e8d88SDave Plauger 106ca3e8d88SDave Plauger sp = 0; 107ca3e8d88SDave Plauger fpush ( loSt, hiSt ); 108ca3e8d88SDave Plauger 109ca3e8d88SDave Plauger while (sp > 0) { 110ca3e8d88SDave Plauger 111ca3e8d88SDave Plauger AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); 112ca3e8d88SDave Plauger 113ca3e8d88SDave Plauger fpop ( lo, hi ); 114ca3e8d88SDave Plauger if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 115ca3e8d88SDave Plauger fallbackSimpleSort ( fmap, eclass, lo, hi ); 116ca3e8d88SDave Plauger continue; 117ca3e8d88SDave Plauger } 118ca3e8d88SDave Plauger 119ca3e8d88SDave Plauger /* Random partitioning. Median of 3 sometimes fails to 120ca3e8d88SDave Plauger avoid bad cases. Median of 9 seems to help but 121ca3e8d88SDave Plauger looks rather expensive. This too seems to work but 122ca3e8d88SDave Plauger is cheaper. Guidance for the magic constants 123ca3e8d88SDave Plauger 7621 and 32768 is taken from Sedgewick's algorithms 124ca3e8d88SDave Plauger book, chapter 35. 125ca3e8d88SDave Plauger */ 126ca3e8d88SDave Plauger r = ((r * 7621) + 1) % 32768; 127ca3e8d88SDave Plauger r3 = r % 3; 128ca3e8d88SDave Plauger if (r3 == 0) med = eclass[fmap[lo]]; else 129ca3e8d88SDave Plauger if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 130ca3e8d88SDave Plauger med = eclass[fmap[hi]]; 131ca3e8d88SDave Plauger 132ca3e8d88SDave Plauger unLo = ltLo = lo; 133ca3e8d88SDave Plauger unHi = gtHi = hi; 134ca3e8d88SDave Plauger 135ca3e8d88SDave Plauger while (1) { 136ca3e8d88SDave Plauger while (1) { 137ca3e8d88SDave Plauger if (unLo > unHi) break; 138ca3e8d88SDave Plauger n = (Int32)eclass[fmap[unLo]] - (Int32)med; 139ca3e8d88SDave Plauger if (n == 0) { 140ca3e8d88SDave Plauger fswap(fmap[unLo], fmap[ltLo]); 141ca3e8d88SDave Plauger ltLo++; unLo++; 142ca3e8d88SDave Plauger continue; 143ca3e8d88SDave Plauger }; 144ca3e8d88SDave Plauger if (n > 0) break; 145ca3e8d88SDave Plauger unLo++; 146ca3e8d88SDave Plauger } 147ca3e8d88SDave Plauger while (1) { 148ca3e8d88SDave Plauger if (unLo > unHi) break; 149ca3e8d88SDave Plauger n = (Int32)eclass[fmap[unHi]] - (Int32)med; 150ca3e8d88SDave Plauger if (n == 0) { 151ca3e8d88SDave Plauger fswap(fmap[unHi], fmap[gtHi]); 152ca3e8d88SDave Plauger gtHi--; unHi--; 153ca3e8d88SDave Plauger continue; 154ca3e8d88SDave Plauger }; 155ca3e8d88SDave Plauger if (n < 0) break; 156ca3e8d88SDave Plauger unHi--; 157ca3e8d88SDave Plauger } 158ca3e8d88SDave Plauger if (unLo > unHi) break; 159ca3e8d88SDave Plauger fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 160ca3e8d88SDave Plauger } 161ca3e8d88SDave Plauger 162ca3e8d88SDave Plauger AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 163ca3e8d88SDave Plauger 164ca3e8d88SDave Plauger if (gtHi < ltLo) continue; 165ca3e8d88SDave Plauger 166ca3e8d88SDave Plauger n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 167ca3e8d88SDave Plauger m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 168ca3e8d88SDave Plauger 169ca3e8d88SDave Plauger n = lo + unLo - ltLo - 1; 170ca3e8d88SDave Plauger m = hi - (gtHi - unHi) + 1; 171ca3e8d88SDave Plauger 172ca3e8d88SDave Plauger if (n - lo > hi - m) { 173ca3e8d88SDave Plauger fpush ( lo, n ); 174ca3e8d88SDave Plauger fpush ( m, hi ); 175ca3e8d88SDave Plauger } else { 176ca3e8d88SDave Plauger fpush ( m, hi ); 177ca3e8d88SDave Plauger fpush ( lo, n ); 178ca3e8d88SDave Plauger } 179ca3e8d88SDave Plauger } 180ca3e8d88SDave Plauger } 181ca3e8d88SDave Plauger 182ca3e8d88SDave Plauger #undef fmin 183ca3e8d88SDave Plauger #undef fpush 184ca3e8d88SDave Plauger #undef fpop 185ca3e8d88SDave Plauger #undef fswap 186ca3e8d88SDave Plauger #undef fvswap 187ca3e8d88SDave Plauger #undef FALLBACK_QSORT_SMALL_THRESH 188ca3e8d88SDave Plauger #undef FALLBACK_QSORT_STACK_SIZE 189ca3e8d88SDave Plauger 190ca3e8d88SDave Plauger 191ca3e8d88SDave Plauger /*---------------------------------------------*/ 192ca3e8d88SDave Plauger /* Pre: 193ca3e8d88SDave Plauger nblock > 0 194ca3e8d88SDave Plauger eclass exists for [0 .. nblock-1] 195ca3e8d88SDave Plauger ((UChar*)eclass) [0 .. nblock-1] holds block 196ca3e8d88SDave Plauger ptr exists for [0 .. nblock-1] 197ca3e8d88SDave Plauger 198ca3e8d88SDave Plauger Post: 199ca3e8d88SDave Plauger ((UChar*)eclass) [0 .. nblock-1] holds block 200ca3e8d88SDave Plauger All other areas of eclass destroyed 201ca3e8d88SDave Plauger fmap [0 .. nblock-1] holds sorted order 202ca3e8d88SDave Plauger bhtab [ 0 .. 2+(nblock/32) ] destroyed 203ca3e8d88SDave Plauger */ 204ca3e8d88SDave Plauger 205ca3e8d88SDave Plauger #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 206ca3e8d88SDave Plauger #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 207ca3e8d88SDave Plauger #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 208ca3e8d88SDave Plauger #define WORD_BH(zz) bhtab[(zz) >> 5] 209ca3e8d88SDave Plauger #define UNALIGNED_BH(zz) ((zz) & 0x01f) 210ca3e8d88SDave Plauger 211ca3e8d88SDave Plauger static 212ca3e8d88SDave Plauger void fallbackSort ( UInt32* fmap, 213ca3e8d88SDave Plauger UInt32* eclass, 214ca3e8d88SDave Plauger UInt32* bhtab, 215ca3e8d88SDave Plauger Int32 nblock, 216ca3e8d88SDave Plauger Int32 verb ) 217ca3e8d88SDave Plauger { 218ca3e8d88SDave Plauger Int32 ftab[257]; 219ca3e8d88SDave Plauger Int32 ftabCopy[256]; 220ca3e8d88SDave Plauger Int32 H, i, j, k, l, r, cc, cc1; 221ca3e8d88SDave Plauger Int32 nNotDone; 222ca3e8d88SDave Plauger Int32 nBhtab; 223ca3e8d88SDave Plauger UChar* eclass8 = (UChar*)eclass; 224ca3e8d88SDave Plauger 225ca3e8d88SDave Plauger /*-- 226ca3e8d88SDave Plauger Initial 1-char radix sort to generate 227ca3e8d88SDave Plauger initial fmap and initial BH bits. 228ca3e8d88SDave Plauger --*/ 229ca3e8d88SDave Plauger if (verb >= 4) 230ca3e8d88SDave Plauger VPrintf0 ( " bucket sorting ...\n" ); 231ca3e8d88SDave Plauger for (i = 0; i < 257; i++) ftab[i] = 0; 232ca3e8d88SDave Plauger for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 233ca3e8d88SDave Plauger for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 234ca3e8d88SDave Plauger for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 235ca3e8d88SDave Plauger 236ca3e8d88SDave Plauger for (i = 0; i < nblock; i++) { 237ca3e8d88SDave Plauger j = eclass8[i]; 238ca3e8d88SDave Plauger k = ftab[j] - 1; 239ca3e8d88SDave Plauger ftab[j] = k; 240ca3e8d88SDave Plauger fmap[k] = i; 241ca3e8d88SDave Plauger } 242ca3e8d88SDave Plauger 243ca3e8d88SDave Plauger nBhtab = 2 + (nblock / 32); 244ca3e8d88SDave Plauger for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 245ca3e8d88SDave Plauger for (i = 0; i < 256; i++) SET_BH(ftab[i]); 246ca3e8d88SDave Plauger 247ca3e8d88SDave Plauger /*-- 248ca3e8d88SDave Plauger Inductively refine the buckets. Kind-of an 249ca3e8d88SDave Plauger "exponential radix sort" (!), inspired by the 250ca3e8d88SDave Plauger Manber-Myers suffix array construction algorithm. 251ca3e8d88SDave Plauger --*/ 252ca3e8d88SDave Plauger 253ca3e8d88SDave Plauger /*-- set sentinel bits for block-end detection --*/ 254ca3e8d88SDave Plauger for (i = 0; i < 32; i++) { 255ca3e8d88SDave Plauger SET_BH(nblock + 2*i); 256ca3e8d88SDave Plauger CLEAR_BH(nblock + 2*i + 1); 257ca3e8d88SDave Plauger } 258ca3e8d88SDave Plauger 259ca3e8d88SDave Plauger /*-- the log(N) loop --*/ 260ca3e8d88SDave Plauger H = 1; 261ca3e8d88SDave Plauger while (1) { 262ca3e8d88SDave Plauger 263ca3e8d88SDave Plauger if (verb >= 4) 264ca3e8d88SDave Plauger VPrintf1 ( " depth %6d has ", H ); 265ca3e8d88SDave Plauger 266ca3e8d88SDave Plauger j = 0; 267ca3e8d88SDave Plauger for (i = 0; i < nblock; i++) { 268ca3e8d88SDave Plauger if (ISSET_BH(i)) j = i; 269ca3e8d88SDave Plauger k = fmap[i] - H; if (k < 0) k += nblock; 270ca3e8d88SDave Plauger eclass[k] = j; 271ca3e8d88SDave Plauger } 272ca3e8d88SDave Plauger 273ca3e8d88SDave Plauger nNotDone = 0; 274ca3e8d88SDave Plauger r = -1; 275ca3e8d88SDave Plauger while (1) { 276ca3e8d88SDave Plauger 277ca3e8d88SDave Plauger /*-- find the next non-singleton bucket --*/ 278ca3e8d88SDave Plauger k = r + 1; 279ca3e8d88SDave Plauger while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 280ca3e8d88SDave Plauger if (ISSET_BH(k)) { 281ca3e8d88SDave Plauger while (WORD_BH(k) == 0xffffffff) k += 32; 282ca3e8d88SDave Plauger while (ISSET_BH(k)) k++; 283ca3e8d88SDave Plauger } 284ca3e8d88SDave Plauger l = k - 1; 285ca3e8d88SDave Plauger if (l >= nblock) break; 286ca3e8d88SDave Plauger while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 287ca3e8d88SDave Plauger if (!ISSET_BH(k)) { 288ca3e8d88SDave Plauger while (WORD_BH(k) == 0x00000000) k += 32; 289ca3e8d88SDave Plauger while (!ISSET_BH(k)) k++; 290ca3e8d88SDave Plauger } 291ca3e8d88SDave Plauger r = k - 1; 292ca3e8d88SDave Plauger if (r >= nblock) break; 293ca3e8d88SDave Plauger 294ca3e8d88SDave Plauger /*-- now [l, r] bracket current bucket --*/ 295ca3e8d88SDave Plauger if (r > l) { 296ca3e8d88SDave Plauger nNotDone += (r - l + 1); 297ca3e8d88SDave Plauger fallbackQSort3 ( fmap, eclass, l, r ); 298ca3e8d88SDave Plauger 299ca3e8d88SDave Plauger /*-- scan bucket and generate header bits-- */ 300ca3e8d88SDave Plauger cc = -1; 301ca3e8d88SDave Plauger for (i = l; i <= r; i++) { 302ca3e8d88SDave Plauger cc1 = eclass[fmap[i]]; 303ca3e8d88SDave Plauger if (cc != cc1) { SET_BH(i); cc = cc1; }; 304ca3e8d88SDave Plauger } 305ca3e8d88SDave Plauger } 306ca3e8d88SDave Plauger } 307ca3e8d88SDave Plauger 308ca3e8d88SDave Plauger if (verb >= 4) 309ca3e8d88SDave Plauger VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 310ca3e8d88SDave Plauger 311ca3e8d88SDave Plauger H *= 2; 312ca3e8d88SDave Plauger if (H > nblock || nNotDone == 0) break; 313ca3e8d88SDave Plauger } 314ca3e8d88SDave Plauger 315ca3e8d88SDave Plauger /*-- 316ca3e8d88SDave Plauger Reconstruct the original block in 317ca3e8d88SDave Plauger eclass8 [0 .. nblock-1], since the 318ca3e8d88SDave Plauger previous phase destroyed it. 319ca3e8d88SDave Plauger --*/ 320ca3e8d88SDave Plauger if (verb >= 4) 321ca3e8d88SDave Plauger VPrintf0 ( " reconstructing block ...\n" ); 322ca3e8d88SDave Plauger j = 0; 323ca3e8d88SDave Plauger for (i = 0; i < nblock; i++) { 324ca3e8d88SDave Plauger while (ftabCopy[j] == 0) j++; 325ca3e8d88SDave Plauger ftabCopy[j]--; 326ca3e8d88SDave Plauger eclass8[fmap[i]] = (UChar)j; 327ca3e8d88SDave Plauger } 328ca3e8d88SDave Plauger AssertH ( j < 256, 1005 ); 329ca3e8d88SDave Plauger } 330ca3e8d88SDave Plauger 331ca3e8d88SDave Plauger #undef SET_BH 332ca3e8d88SDave Plauger #undef CLEAR_BH 333ca3e8d88SDave Plauger #undef ISSET_BH 334ca3e8d88SDave Plauger #undef WORD_BH 335ca3e8d88SDave Plauger #undef UNALIGNED_BH 336ca3e8d88SDave Plauger 337ca3e8d88SDave Plauger 338ca3e8d88SDave Plauger /*---------------------------------------------*/ 339ca3e8d88SDave Plauger /*--- The main, O(N^2 log(N)) sorting ---*/ 340ca3e8d88SDave Plauger /*--- algorithm. Faster for "normal" ---*/ 341ca3e8d88SDave Plauger /*--- non-repetitive blocks. ---*/ 342ca3e8d88SDave Plauger /*---------------------------------------------*/ 343ca3e8d88SDave Plauger 344ca3e8d88SDave Plauger /*---------------------------------------------*/ 345ca3e8d88SDave Plauger static 346ca3e8d88SDave Plauger __inline__ 347ca3e8d88SDave Plauger Bool mainGtU ( UInt32 i1, 348ca3e8d88SDave Plauger UInt32 i2, 349ca3e8d88SDave Plauger UChar* block, 350ca3e8d88SDave Plauger UInt16* quadrant, 351ca3e8d88SDave Plauger UInt32 nblock, 352ca3e8d88SDave Plauger Int32* budget ) 353ca3e8d88SDave Plauger { 354ca3e8d88SDave Plauger Int32 k; 355ca3e8d88SDave Plauger UChar c1, c2; 356ca3e8d88SDave Plauger UInt16 s1, s2; 357ca3e8d88SDave Plauger 358ca3e8d88SDave Plauger AssertD ( i1 != i2, "mainGtU" ); 359ca3e8d88SDave Plauger /* 1 */ 360ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 361ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 362ca3e8d88SDave Plauger i1++; i2++; 363ca3e8d88SDave Plauger /* 2 */ 364ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 365ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 366ca3e8d88SDave Plauger i1++; i2++; 367ca3e8d88SDave Plauger /* 3 */ 368ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 369ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 370ca3e8d88SDave Plauger i1++; i2++; 371ca3e8d88SDave Plauger /* 4 */ 372ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 373ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 374ca3e8d88SDave Plauger i1++; i2++; 375ca3e8d88SDave Plauger /* 5 */ 376ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 377ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 378ca3e8d88SDave Plauger i1++; i2++; 379ca3e8d88SDave Plauger /* 6 */ 380ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 381ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 382ca3e8d88SDave Plauger i1++; i2++; 383ca3e8d88SDave Plauger /* 7 */ 384ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 385ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 386ca3e8d88SDave Plauger i1++; i2++; 387ca3e8d88SDave Plauger /* 8 */ 388ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 389ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 390ca3e8d88SDave Plauger i1++; i2++; 391ca3e8d88SDave Plauger /* 9 */ 392ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 393ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 394ca3e8d88SDave Plauger i1++; i2++; 395ca3e8d88SDave Plauger /* 10 */ 396ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 397ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 398ca3e8d88SDave Plauger i1++; i2++; 399ca3e8d88SDave Plauger /* 11 */ 400ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 401ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 402ca3e8d88SDave Plauger i1++; i2++; 403ca3e8d88SDave Plauger /* 12 */ 404ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 405ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 406ca3e8d88SDave Plauger i1++; i2++; 407ca3e8d88SDave Plauger 408ca3e8d88SDave Plauger k = nblock + 8; 409ca3e8d88SDave Plauger 410ca3e8d88SDave Plauger do { 411ca3e8d88SDave Plauger /* 1 */ 412ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 413ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 414ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 415ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 416ca3e8d88SDave Plauger i1++; i2++; 417ca3e8d88SDave Plauger /* 2 */ 418ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 419ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 420ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 421ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 422ca3e8d88SDave Plauger i1++; i2++; 423ca3e8d88SDave Plauger /* 3 */ 424ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 425ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 426ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 427ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 428ca3e8d88SDave Plauger i1++; i2++; 429ca3e8d88SDave Plauger /* 4 */ 430ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 431ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 432ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 433ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 434ca3e8d88SDave Plauger i1++; i2++; 435ca3e8d88SDave Plauger /* 5 */ 436ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 437ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 438ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 439ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 440ca3e8d88SDave Plauger i1++; i2++; 441ca3e8d88SDave Plauger /* 6 */ 442ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 443ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 444ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 445ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 446ca3e8d88SDave Plauger i1++; i2++; 447ca3e8d88SDave Plauger /* 7 */ 448ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 449ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 450ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 451ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 452ca3e8d88SDave Plauger i1++; i2++; 453ca3e8d88SDave Plauger /* 8 */ 454ca3e8d88SDave Plauger c1 = block[i1]; c2 = block[i2]; 455ca3e8d88SDave Plauger if (c1 != c2) return (c1 > c2); 456ca3e8d88SDave Plauger s1 = quadrant[i1]; s2 = quadrant[i2]; 457ca3e8d88SDave Plauger if (s1 != s2) return (s1 > s2); 458ca3e8d88SDave Plauger i1++; i2++; 459ca3e8d88SDave Plauger 460ca3e8d88SDave Plauger if (i1 >= nblock) i1 -= nblock; 461ca3e8d88SDave Plauger if (i2 >= nblock) i2 -= nblock; 462ca3e8d88SDave Plauger 463ca3e8d88SDave Plauger k -= 8; 464ca3e8d88SDave Plauger (*budget)--; 465ca3e8d88SDave Plauger } 466ca3e8d88SDave Plauger while (k >= 0); 467ca3e8d88SDave Plauger 468ca3e8d88SDave Plauger return False; 469ca3e8d88SDave Plauger } 470ca3e8d88SDave Plauger 471ca3e8d88SDave Plauger 472ca3e8d88SDave Plauger /*---------------------------------------------*/ 473ca3e8d88SDave Plauger /*-- 474ca3e8d88SDave Plauger Knuth's increments seem to work better 475ca3e8d88SDave Plauger than Incerpi-Sedgewick here. Possibly 476ca3e8d88SDave Plauger because the number of elems to sort is 477ca3e8d88SDave Plauger usually small, typically <= 20. 478ca3e8d88SDave Plauger --*/ 479ca3e8d88SDave Plauger static 480ca3e8d88SDave Plauger Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 481ca3e8d88SDave Plauger 9841, 29524, 88573, 265720, 482ca3e8d88SDave Plauger 797161, 2391484 }; 483ca3e8d88SDave Plauger 484ca3e8d88SDave Plauger static 485ca3e8d88SDave Plauger void mainSimpleSort ( UInt32* ptr, 486ca3e8d88SDave Plauger UChar* block, 487ca3e8d88SDave Plauger UInt16* quadrant, 488ca3e8d88SDave Plauger Int32 nblock, 489ca3e8d88SDave Plauger Int32 lo, 490ca3e8d88SDave Plauger Int32 hi, 491ca3e8d88SDave Plauger Int32 d, 492ca3e8d88SDave Plauger Int32* budget ) 493ca3e8d88SDave Plauger { 494ca3e8d88SDave Plauger Int32 i, j, h, bigN, hp; 495ca3e8d88SDave Plauger UInt32 v; 496ca3e8d88SDave Plauger 497ca3e8d88SDave Plauger bigN = hi - lo + 1; 498ca3e8d88SDave Plauger if (bigN < 2) return; 499ca3e8d88SDave Plauger 500ca3e8d88SDave Plauger hp = 0; 501ca3e8d88SDave Plauger while (incs[hp] < bigN) hp++; 502ca3e8d88SDave Plauger hp--; 503ca3e8d88SDave Plauger 504ca3e8d88SDave Plauger for (; hp >= 0; hp--) { 505ca3e8d88SDave Plauger h = incs[hp]; 506ca3e8d88SDave Plauger 507ca3e8d88SDave Plauger i = lo + h; 508ca3e8d88SDave Plauger while (True) { 509ca3e8d88SDave Plauger 510ca3e8d88SDave Plauger /*-- copy 1 --*/ 511ca3e8d88SDave Plauger if (i > hi) break; 512ca3e8d88SDave Plauger v = ptr[i]; 513ca3e8d88SDave Plauger j = i; 514ca3e8d88SDave Plauger while ( mainGtU ( 515ca3e8d88SDave Plauger ptr[j-h]+d, v+d, block, quadrant, nblock, budget 516ca3e8d88SDave Plauger ) ) { 517ca3e8d88SDave Plauger ptr[j] = ptr[j-h]; 518ca3e8d88SDave Plauger j = j - h; 519ca3e8d88SDave Plauger if (j <= (lo + h - 1)) break; 520ca3e8d88SDave Plauger } 521ca3e8d88SDave Plauger ptr[j] = v; 522ca3e8d88SDave Plauger i++; 523ca3e8d88SDave Plauger 524ca3e8d88SDave Plauger /*-- copy 2 --*/ 525ca3e8d88SDave Plauger if (i > hi) break; 526ca3e8d88SDave Plauger v = ptr[i]; 527ca3e8d88SDave Plauger j = i; 528ca3e8d88SDave Plauger while ( mainGtU ( 529ca3e8d88SDave Plauger ptr[j-h]+d, v+d, block, quadrant, nblock, budget 530ca3e8d88SDave Plauger ) ) { 531ca3e8d88SDave Plauger ptr[j] = ptr[j-h]; 532ca3e8d88SDave Plauger j = j - h; 533ca3e8d88SDave Plauger if (j <= (lo + h - 1)) break; 534ca3e8d88SDave Plauger } 535ca3e8d88SDave Plauger ptr[j] = v; 536ca3e8d88SDave Plauger i++; 537ca3e8d88SDave Plauger 538ca3e8d88SDave Plauger /*-- copy 3 --*/ 539ca3e8d88SDave Plauger if (i > hi) break; 540ca3e8d88SDave Plauger v = ptr[i]; 541ca3e8d88SDave Plauger j = i; 542ca3e8d88SDave Plauger while ( mainGtU ( 543ca3e8d88SDave Plauger ptr[j-h]+d, v+d, block, quadrant, nblock, budget 544ca3e8d88SDave Plauger ) ) { 545ca3e8d88SDave Plauger ptr[j] = ptr[j-h]; 546ca3e8d88SDave Plauger j = j - h; 547ca3e8d88SDave Plauger if (j <= (lo + h - 1)) break; 548ca3e8d88SDave Plauger } 549ca3e8d88SDave Plauger ptr[j] = v; 550ca3e8d88SDave Plauger i++; 551ca3e8d88SDave Plauger 552ca3e8d88SDave Plauger if (*budget < 0) return; 553ca3e8d88SDave Plauger } 554ca3e8d88SDave Plauger } 555ca3e8d88SDave Plauger } 556ca3e8d88SDave Plauger 557ca3e8d88SDave Plauger 558ca3e8d88SDave Plauger /*---------------------------------------------*/ 559ca3e8d88SDave Plauger /*-- 560ca3e8d88SDave Plauger The following is an implementation of 561ca3e8d88SDave Plauger an elegant 3-way quicksort for strings, 562ca3e8d88SDave Plauger described in a paper "Fast Algorithms for 563ca3e8d88SDave Plauger Sorting and Searching Strings", by Robert 564ca3e8d88SDave Plauger Sedgewick and Jon L. Bentley. 565ca3e8d88SDave Plauger --*/ 566ca3e8d88SDave Plauger 567ca3e8d88SDave Plauger #define mswap(zz1, zz2) \ 568ca3e8d88SDave Plauger { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 569ca3e8d88SDave Plauger 570ca3e8d88SDave Plauger #define mvswap(zzp1, zzp2, zzn) \ 571ca3e8d88SDave Plauger { \ 572ca3e8d88SDave Plauger Int32 yyp1 = (zzp1); \ 573ca3e8d88SDave Plauger Int32 yyp2 = (zzp2); \ 574ca3e8d88SDave Plauger Int32 yyn = (zzn); \ 575ca3e8d88SDave Plauger while (yyn > 0) { \ 576ca3e8d88SDave Plauger mswap(ptr[yyp1], ptr[yyp2]); \ 577ca3e8d88SDave Plauger yyp1++; yyp2++; yyn--; \ 578ca3e8d88SDave Plauger } \ 579ca3e8d88SDave Plauger } 580ca3e8d88SDave Plauger 581ca3e8d88SDave Plauger static 582ca3e8d88SDave Plauger __inline__ 583ca3e8d88SDave Plauger UChar mmed3 ( UChar a, UChar b, UChar c ) 584ca3e8d88SDave Plauger { 585ca3e8d88SDave Plauger UChar t; 586ca3e8d88SDave Plauger if (a > b) { t = a; a = b; b = t; }; 587ca3e8d88SDave Plauger if (b > c) { 588ca3e8d88SDave Plauger b = c; 589ca3e8d88SDave Plauger if (a > b) b = a; 590ca3e8d88SDave Plauger } 591ca3e8d88SDave Plauger return b; 592ca3e8d88SDave Plauger } 593ca3e8d88SDave Plauger 594ca3e8d88SDave Plauger #define mmin(a,b) ((a) < (b)) ? (a) : (b) 595ca3e8d88SDave Plauger 596ca3e8d88SDave Plauger #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 597ca3e8d88SDave Plauger stackHi[sp] = hz; \ 598ca3e8d88SDave Plauger stackD [sp] = dz; \ 599ca3e8d88SDave Plauger sp++; } 600ca3e8d88SDave Plauger 601ca3e8d88SDave Plauger #define mpop(lz,hz,dz) { sp--; \ 602ca3e8d88SDave Plauger lz = stackLo[sp]; \ 603ca3e8d88SDave Plauger hz = stackHi[sp]; \ 604ca3e8d88SDave Plauger dz = stackD [sp]; } 605ca3e8d88SDave Plauger 606ca3e8d88SDave Plauger 607ca3e8d88SDave Plauger #define mnextsize(az) (nextHi[az]-nextLo[az]) 608ca3e8d88SDave Plauger 609ca3e8d88SDave Plauger #define mnextswap(az,bz) \ 610ca3e8d88SDave Plauger { Int32 tz; \ 611ca3e8d88SDave Plauger tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 612ca3e8d88SDave Plauger tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 613ca3e8d88SDave Plauger tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 614ca3e8d88SDave Plauger 615ca3e8d88SDave Plauger 616ca3e8d88SDave Plauger #define MAIN_QSORT_SMALL_THRESH 20 617ca3e8d88SDave Plauger #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 618ca3e8d88SDave Plauger #define MAIN_QSORT_STACK_SIZE 100 619ca3e8d88SDave Plauger 620ca3e8d88SDave Plauger static 621ca3e8d88SDave Plauger void mainQSort3 ( UInt32* ptr, 622ca3e8d88SDave Plauger UChar* block, 623ca3e8d88SDave Plauger UInt16* quadrant, 624ca3e8d88SDave Plauger Int32 nblock, 625ca3e8d88SDave Plauger Int32 loSt, 626ca3e8d88SDave Plauger Int32 hiSt, 627ca3e8d88SDave Plauger Int32 dSt, 628ca3e8d88SDave Plauger Int32* budget ) 629ca3e8d88SDave Plauger { 630ca3e8d88SDave Plauger Int32 unLo, unHi, ltLo, gtHi, n, m, med; 631ca3e8d88SDave Plauger Int32 sp, lo, hi, d; 632ca3e8d88SDave Plauger 633ca3e8d88SDave Plauger Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 634ca3e8d88SDave Plauger Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 635ca3e8d88SDave Plauger Int32 stackD [MAIN_QSORT_STACK_SIZE]; 636ca3e8d88SDave Plauger 637ca3e8d88SDave Plauger Int32 nextLo[3]; 638ca3e8d88SDave Plauger Int32 nextHi[3]; 639ca3e8d88SDave Plauger Int32 nextD [3]; 640ca3e8d88SDave Plauger 641ca3e8d88SDave Plauger sp = 0; 642ca3e8d88SDave Plauger mpush ( loSt, hiSt, dSt ); 643ca3e8d88SDave Plauger 644ca3e8d88SDave Plauger while (sp > 0) { 645ca3e8d88SDave Plauger 646ca3e8d88SDave Plauger AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); 647ca3e8d88SDave Plauger 648ca3e8d88SDave Plauger mpop ( lo, hi, d ); 649ca3e8d88SDave Plauger if (hi - lo < MAIN_QSORT_SMALL_THRESH || 650ca3e8d88SDave Plauger d > MAIN_QSORT_DEPTH_THRESH) { 651ca3e8d88SDave Plauger mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 652ca3e8d88SDave Plauger if (*budget < 0) return; 653ca3e8d88SDave Plauger continue; 654ca3e8d88SDave Plauger } 655ca3e8d88SDave Plauger 656ca3e8d88SDave Plauger med = (Int32) 657ca3e8d88SDave Plauger mmed3 ( block[ptr[ lo ]+d], 658ca3e8d88SDave Plauger block[ptr[ hi ]+d], 659ca3e8d88SDave Plauger block[ptr[ (lo+hi)>>1 ]+d] ); 660ca3e8d88SDave Plauger 661ca3e8d88SDave Plauger unLo = ltLo = lo; 662ca3e8d88SDave Plauger unHi = gtHi = hi; 663ca3e8d88SDave Plauger 664ca3e8d88SDave Plauger while (True) { 665ca3e8d88SDave Plauger while (True) { 666ca3e8d88SDave Plauger if (unLo > unHi) break; 667ca3e8d88SDave Plauger n = ((Int32)block[ptr[unLo]+d]) - med; 668ca3e8d88SDave Plauger if (n == 0) { 669ca3e8d88SDave Plauger mswap(ptr[unLo], ptr[ltLo]); 670ca3e8d88SDave Plauger ltLo++; unLo++; continue; 671ca3e8d88SDave Plauger }; 672ca3e8d88SDave Plauger if (n > 0) break; 673ca3e8d88SDave Plauger unLo++; 674ca3e8d88SDave Plauger } 675ca3e8d88SDave Plauger while (True) { 676ca3e8d88SDave Plauger if (unLo > unHi) break; 677ca3e8d88SDave Plauger n = ((Int32)block[ptr[unHi]+d]) - med; 678ca3e8d88SDave Plauger if (n == 0) { 679ca3e8d88SDave Plauger mswap(ptr[unHi], ptr[gtHi]); 680ca3e8d88SDave Plauger gtHi--; unHi--; continue; 681ca3e8d88SDave Plauger }; 682ca3e8d88SDave Plauger if (n < 0) break; 683ca3e8d88SDave Plauger unHi--; 684ca3e8d88SDave Plauger } 685ca3e8d88SDave Plauger if (unLo > unHi) break; 686ca3e8d88SDave Plauger mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 687ca3e8d88SDave Plauger } 688ca3e8d88SDave Plauger 689ca3e8d88SDave Plauger AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 690ca3e8d88SDave Plauger 691ca3e8d88SDave Plauger if (gtHi < ltLo) { 692ca3e8d88SDave Plauger mpush(lo, hi, d+1 ); 693ca3e8d88SDave Plauger continue; 694ca3e8d88SDave Plauger } 695ca3e8d88SDave Plauger 696ca3e8d88SDave Plauger n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 697ca3e8d88SDave Plauger m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 698ca3e8d88SDave Plauger 699ca3e8d88SDave Plauger n = lo + unLo - ltLo - 1; 700ca3e8d88SDave Plauger m = hi - (gtHi - unHi) + 1; 701ca3e8d88SDave Plauger 702ca3e8d88SDave Plauger nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 703ca3e8d88SDave Plauger nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 704ca3e8d88SDave Plauger nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 705ca3e8d88SDave Plauger 706ca3e8d88SDave Plauger if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 707ca3e8d88SDave Plauger if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 708ca3e8d88SDave Plauger if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 709ca3e8d88SDave Plauger 710ca3e8d88SDave Plauger AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 711ca3e8d88SDave Plauger AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 712ca3e8d88SDave Plauger 713ca3e8d88SDave Plauger mpush (nextLo[0], nextHi[0], nextD[0]); 714ca3e8d88SDave Plauger mpush (nextLo[1], nextHi[1], nextD[1]); 715ca3e8d88SDave Plauger mpush (nextLo[2], nextHi[2], nextD[2]); 716ca3e8d88SDave Plauger } 717ca3e8d88SDave Plauger } 718ca3e8d88SDave Plauger 719ca3e8d88SDave Plauger #undef mswap 720ca3e8d88SDave Plauger #undef mvswap 721ca3e8d88SDave Plauger #undef mpush 722ca3e8d88SDave Plauger #undef mpop 723ca3e8d88SDave Plauger #undef mmin 724ca3e8d88SDave Plauger #undef mnextsize 725ca3e8d88SDave Plauger #undef mnextswap 726ca3e8d88SDave Plauger #undef MAIN_QSORT_SMALL_THRESH 727ca3e8d88SDave Plauger #undef MAIN_QSORT_DEPTH_THRESH 728ca3e8d88SDave Plauger #undef MAIN_QSORT_STACK_SIZE 729ca3e8d88SDave Plauger 730ca3e8d88SDave Plauger 731ca3e8d88SDave Plauger /*---------------------------------------------*/ 732ca3e8d88SDave Plauger /* Pre: 733ca3e8d88SDave Plauger nblock > N_OVERSHOOT 734ca3e8d88SDave Plauger block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 735ca3e8d88SDave Plauger ((UChar*)block32) [0 .. nblock-1] holds block 736ca3e8d88SDave Plauger ptr exists for [0 .. nblock-1] 737ca3e8d88SDave Plauger 738ca3e8d88SDave Plauger Post: 739ca3e8d88SDave Plauger ((UChar*)block32) [0 .. nblock-1] holds block 740ca3e8d88SDave Plauger All other areas of block32 destroyed 741ca3e8d88SDave Plauger ftab [0 .. 65536 ] destroyed 742ca3e8d88SDave Plauger ptr [0 .. nblock-1] holds sorted order 743ca3e8d88SDave Plauger if (*budget < 0), sorting was abandoned 744ca3e8d88SDave Plauger */ 745ca3e8d88SDave Plauger 746ca3e8d88SDave Plauger #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 747ca3e8d88SDave Plauger #define SETMASK (1 << 21) 748ca3e8d88SDave Plauger #define CLEARMASK (~(SETMASK)) 749ca3e8d88SDave Plauger 750ca3e8d88SDave Plauger static 751ca3e8d88SDave Plauger void mainSort ( UInt32* ptr, 752ca3e8d88SDave Plauger UChar* block, 753ca3e8d88SDave Plauger UInt16* quadrant, 754ca3e8d88SDave Plauger UInt32* ftab, 755ca3e8d88SDave Plauger Int32 nblock, 756ca3e8d88SDave Plauger Int32 verb, 757ca3e8d88SDave Plauger Int32* budget ) 758ca3e8d88SDave Plauger { 759ca3e8d88SDave Plauger Int32 i, j, k, ss, sb; 760ca3e8d88SDave Plauger Int32 runningOrder[256]; 761ca3e8d88SDave Plauger Bool bigDone[256]; 762ca3e8d88SDave Plauger Int32 copyStart[256]; 763ca3e8d88SDave Plauger Int32 copyEnd [256]; 764ca3e8d88SDave Plauger UChar c1; 765ca3e8d88SDave Plauger Int32 numQSorted; 766ca3e8d88SDave Plauger UInt16 s; 767ca3e8d88SDave Plauger if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 768ca3e8d88SDave Plauger 769ca3e8d88SDave Plauger /*-- set up the 2-byte frequency table --*/ 770ca3e8d88SDave Plauger for (i = 65536; i >= 0; i--) ftab[i] = 0; 771ca3e8d88SDave Plauger 772ca3e8d88SDave Plauger j = block[0] << 8; 773ca3e8d88SDave Plauger i = nblock-1; 774ca3e8d88SDave Plauger for (; i >= 3; i -= 4) { 775ca3e8d88SDave Plauger quadrant[i] = 0; 776ca3e8d88SDave Plauger j = (j >> 8) | ( ((UInt16)block[i]) << 8); 777ca3e8d88SDave Plauger ftab[j]++; 778ca3e8d88SDave Plauger quadrant[i-1] = 0; 779ca3e8d88SDave Plauger j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 780ca3e8d88SDave Plauger ftab[j]++; 781ca3e8d88SDave Plauger quadrant[i-2] = 0; 782ca3e8d88SDave Plauger j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 783ca3e8d88SDave Plauger ftab[j]++; 784ca3e8d88SDave Plauger quadrant[i-3] = 0; 785ca3e8d88SDave Plauger j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 786ca3e8d88SDave Plauger ftab[j]++; 787ca3e8d88SDave Plauger } 788ca3e8d88SDave Plauger for (; i >= 0; i--) { 789ca3e8d88SDave Plauger quadrant[i] = 0; 790ca3e8d88SDave Plauger j = (j >> 8) | ( ((UInt16)block[i]) << 8); 791ca3e8d88SDave Plauger ftab[j]++; 792ca3e8d88SDave Plauger } 793ca3e8d88SDave Plauger 794ca3e8d88SDave Plauger /*-- (emphasises close relationship of block & quadrant) --*/ 795ca3e8d88SDave Plauger for (i = 0; i < BZ_N_OVERSHOOT; i++) { 796ca3e8d88SDave Plauger block [nblock+i] = block[i]; 797ca3e8d88SDave Plauger quadrant[nblock+i] = 0; 798ca3e8d88SDave Plauger } 799ca3e8d88SDave Plauger 800ca3e8d88SDave Plauger if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 801ca3e8d88SDave Plauger 802ca3e8d88SDave Plauger /*-- Complete the initial radix sort --*/ 803ca3e8d88SDave Plauger for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 804ca3e8d88SDave Plauger 805ca3e8d88SDave Plauger s = block[0] << 8; 806ca3e8d88SDave Plauger i = nblock-1; 807ca3e8d88SDave Plauger for (; i >= 3; i -= 4) { 808ca3e8d88SDave Plauger s = (s >> 8) | (block[i] << 8); 809ca3e8d88SDave Plauger j = ftab[s] -1; 810ca3e8d88SDave Plauger ftab[s] = j; 811ca3e8d88SDave Plauger ptr[j] = i; 812ca3e8d88SDave Plauger s = (s >> 8) | (block[i-1] << 8); 813ca3e8d88SDave Plauger j = ftab[s] -1; 814ca3e8d88SDave Plauger ftab[s] = j; 815ca3e8d88SDave Plauger ptr[j] = i-1; 816ca3e8d88SDave Plauger s = (s >> 8) | (block[i-2] << 8); 817ca3e8d88SDave Plauger j = ftab[s] -1; 818ca3e8d88SDave Plauger ftab[s] = j; 819ca3e8d88SDave Plauger ptr[j] = i-2; 820ca3e8d88SDave Plauger s = (s >> 8) | (block[i-3] << 8); 821ca3e8d88SDave Plauger j = ftab[s] -1; 822ca3e8d88SDave Plauger ftab[s] = j; 823ca3e8d88SDave Plauger ptr[j] = i-3; 824ca3e8d88SDave Plauger } 825ca3e8d88SDave Plauger for (; i >= 0; i--) { 826ca3e8d88SDave Plauger s = (s >> 8) | (block[i] << 8); 827ca3e8d88SDave Plauger j = ftab[s] -1; 828ca3e8d88SDave Plauger ftab[s] = j; 829ca3e8d88SDave Plauger ptr[j] = i; 830ca3e8d88SDave Plauger } 831ca3e8d88SDave Plauger 832ca3e8d88SDave Plauger /*-- 833ca3e8d88SDave Plauger Now ftab contains the first loc of every small bucket. 834ca3e8d88SDave Plauger Calculate the running order, from smallest to largest 835ca3e8d88SDave Plauger big bucket. 836ca3e8d88SDave Plauger --*/ 837ca3e8d88SDave Plauger for (i = 0; i <= 255; i++) { 838ca3e8d88SDave Plauger bigDone [i] = False; 839ca3e8d88SDave Plauger runningOrder[i] = i; 840ca3e8d88SDave Plauger } 841ca3e8d88SDave Plauger 842ca3e8d88SDave Plauger { 843ca3e8d88SDave Plauger Int32 vv; 844ca3e8d88SDave Plauger Int32 h = 1; 845ca3e8d88SDave Plauger do h = 3 * h + 1; while (h <= 256); 846ca3e8d88SDave Plauger do { 847ca3e8d88SDave Plauger h = h / 3; 848ca3e8d88SDave Plauger for (i = h; i <= 255; i++) { 849ca3e8d88SDave Plauger vv = runningOrder[i]; 850ca3e8d88SDave Plauger j = i; 851ca3e8d88SDave Plauger while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 852ca3e8d88SDave Plauger runningOrder[j] = runningOrder[j-h]; 853ca3e8d88SDave Plauger j = j - h; 854ca3e8d88SDave Plauger if (j <= (h - 1)) goto zero; 855ca3e8d88SDave Plauger } 856ca3e8d88SDave Plauger zero: 857ca3e8d88SDave Plauger runningOrder[j] = vv; 858ca3e8d88SDave Plauger } 859ca3e8d88SDave Plauger } while (h != 1); 860ca3e8d88SDave Plauger } 861ca3e8d88SDave Plauger 862ca3e8d88SDave Plauger /*-- 863ca3e8d88SDave Plauger The main sorting loop. 864ca3e8d88SDave Plauger --*/ 865ca3e8d88SDave Plauger 866ca3e8d88SDave Plauger numQSorted = 0; 867ca3e8d88SDave Plauger 868ca3e8d88SDave Plauger for (i = 0; i <= 255; i++) { 869ca3e8d88SDave Plauger 870ca3e8d88SDave Plauger /*-- 871ca3e8d88SDave Plauger Process big buckets, starting with the least full. 872ca3e8d88SDave Plauger Basically this is a 3-step process in which we call 873ca3e8d88SDave Plauger mainQSort3 to sort the small buckets [ss, j], but 874ca3e8d88SDave Plauger also make a big effort to avoid the calls if we can. 875ca3e8d88SDave Plauger --*/ 876ca3e8d88SDave Plauger ss = runningOrder[i]; 877ca3e8d88SDave Plauger 878ca3e8d88SDave Plauger /*-- 879ca3e8d88SDave Plauger Step 1: 880ca3e8d88SDave Plauger Complete the big bucket [ss] by quicksorting 881ca3e8d88SDave Plauger any unsorted small buckets [ss, j], for j != ss. 882ca3e8d88SDave Plauger Hopefully previous pointer-scanning phases have already 883ca3e8d88SDave Plauger completed many of the small buckets [ss, j], so 884ca3e8d88SDave Plauger we don't have to sort them at all. 885ca3e8d88SDave Plauger --*/ 886ca3e8d88SDave Plauger for (j = 0; j <= 255; j++) { 887ca3e8d88SDave Plauger if (j != ss) { 888ca3e8d88SDave Plauger sb = (ss << 8) + j; 889ca3e8d88SDave Plauger if ( ! (ftab[sb] & SETMASK) ) { 890ca3e8d88SDave Plauger Int32 lo = ftab[sb] & CLEARMASK; 891ca3e8d88SDave Plauger Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 892ca3e8d88SDave Plauger if (hi > lo) { 893ca3e8d88SDave Plauger if (verb >= 4) 894ca3e8d88SDave Plauger VPrintf4 ( " qsort [0x%x, 0x%x] " 895ca3e8d88SDave Plauger "done %d this %d\n", 896ca3e8d88SDave Plauger ss, j, numQSorted, hi - lo + 1 ); 897ca3e8d88SDave Plauger mainQSort3 ( 898ca3e8d88SDave Plauger ptr, block, quadrant, nblock, 899ca3e8d88SDave Plauger lo, hi, BZ_N_RADIX, budget 900ca3e8d88SDave Plauger ); 901ca3e8d88SDave Plauger numQSorted += (hi - lo + 1); 902ca3e8d88SDave Plauger if (*budget < 0) return; 903ca3e8d88SDave Plauger } 904ca3e8d88SDave Plauger } 905ca3e8d88SDave Plauger ftab[sb] |= SETMASK; 906ca3e8d88SDave Plauger } 907ca3e8d88SDave Plauger } 908ca3e8d88SDave Plauger 909ca3e8d88SDave Plauger AssertH ( !bigDone[ss], 1006 ); 910ca3e8d88SDave Plauger 911ca3e8d88SDave Plauger /*-- 912ca3e8d88SDave Plauger Step 2: 913ca3e8d88SDave Plauger Now scan this big bucket [ss] so as to synthesise the 914ca3e8d88SDave Plauger sorted order for small buckets [t, ss] for all t, 915ca3e8d88SDave Plauger including, magically, the bucket [ss,ss] too. 916ca3e8d88SDave Plauger This will avoid doing Real Work in subsequent Step 1's. 917ca3e8d88SDave Plauger --*/ 918ca3e8d88SDave Plauger { 919ca3e8d88SDave Plauger for (j = 0; j <= 255; j++) { 920ca3e8d88SDave Plauger copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 921ca3e8d88SDave Plauger copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 922ca3e8d88SDave Plauger } 923ca3e8d88SDave Plauger for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 924ca3e8d88SDave Plauger k = ptr[j]-1; if (k < 0) k += nblock; 925ca3e8d88SDave Plauger c1 = block[k]; 926ca3e8d88SDave Plauger if (!bigDone[c1]) 927ca3e8d88SDave Plauger ptr[ copyStart[c1]++ ] = k; 928ca3e8d88SDave Plauger } 929ca3e8d88SDave Plauger for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 930ca3e8d88SDave Plauger k = ptr[j]-1; if (k < 0) k += nblock; 931ca3e8d88SDave Plauger c1 = block[k]; 932ca3e8d88SDave Plauger if (!bigDone[c1]) 933ca3e8d88SDave Plauger ptr[ copyEnd[c1]-- ] = k; 934ca3e8d88SDave Plauger } 935ca3e8d88SDave Plauger } 936ca3e8d88SDave Plauger 937ca3e8d88SDave Plauger AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 938ca3e8d88SDave Plauger || 939ca3e8d88SDave Plauger /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 940ca3e8d88SDave Plauger Necessity for this case is demonstrated by compressing 941ca3e8d88SDave Plauger a sequence of approximately 48.5 million of character 942ca3e8d88SDave Plauger 251; 1.0.0/1.0.1 will then die here. */ 943ca3e8d88SDave Plauger (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 944ca3e8d88SDave Plauger 1007 ) 945ca3e8d88SDave Plauger 946ca3e8d88SDave Plauger for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 947ca3e8d88SDave Plauger 948ca3e8d88SDave Plauger /*-- 949ca3e8d88SDave Plauger Step 3: 950ca3e8d88SDave Plauger The [ss] big bucket is now done. Record this fact, 951ca3e8d88SDave Plauger and update the quadrant descriptors. Remember to 952ca3e8d88SDave Plauger update quadrants in the overshoot area too, if 953ca3e8d88SDave Plauger necessary. The "if (i < 255)" test merely skips 954ca3e8d88SDave Plauger this updating for the last bucket processed, since 955ca3e8d88SDave Plauger updating for the last bucket is pointless. 956ca3e8d88SDave Plauger 957ca3e8d88SDave Plauger The quadrant array provides a way to incrementally 958ca3e8d88SDave Plauger cache sort orderings, as they appear, so as to 959ca3e8d88SDave Plauger make subsequent comparisons in fullGtU() complete 960ca3e8d88SDave Plauger faster. For repetitive blocks this makes a big 961ca3e8d88SDave Plauger difference (but not big enough to be able to avoid 962ca3e8d88SDave Plauger the fallback sorting mechanism, exponential radix sort). 963ca3e8d88SDave Plauger 964ca3e8d88SDave Plauger The precise meaning is: at all times: 965ca3e8d88SDave Plauger 966ca3e8d88SDave Plauger for 0 <= i < nblock and 0 <= j <= nblock 967ca3e8d88SDave Plauger 968ca3e8d88SDave Plauger if block[i] != block[j], 969ca3e8d88SDave Plauger 970ca3e8d88SDave Plauger then the relative values of quadrant[i] and 971ca3e8d88SDave Plauger quadrant[j] are meaningless. 972ca3e8d88SDave Plauger 973ca3e8d88SDave Plauger else { 974ca3e8d88SDave Plauger if quadrant[i] < quadrant[j] 975ca3e8d88SDave Plauger then the string starting at i lexicographically 976ca3e8d88SDave Plauger precedes the string starting at j 977ca3e8d88SDave Plauger 978ca3e8d88SDave Plauger else if quadrant[i] > quadrant[j] 979ca3e8d88SDave Plauger then the string starting at j lexicographically 980ca3e8d88SDave Plauger precedes the string starting at i 981ca3e8d88SDave Plauger 982ca3e8d88SDave Plauger else 983ca3e8d88SDave Plauger the relative ordering of the strings starting 984ca3e8d88SDave Plauger at i and j has not yet been determined. 985ca3e8d88SDave Plauger } 986ca3e8d88SDave Plauger --*/ 987ca3e8d88SDave Plauger bigDone[ss] = True; 988ca3e8d88SDave Plauger 989ca3e8d88SDave Plauger if (i < 255) { 990ca3e8d88SDave Plauger Int32 bbStart = ftab[ss << 8] & CLEARMASK; 991ca3e8d88SDave Plauger Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 992ca3e8d88SDave Plauger Int32 shifts = 0; 993ca3e8d88SDave Plauger 994ca3e8d88SDave Plauger while ((bbSize >> shifts) > 65534) shifts++; 995ca3e8d88SDave Plauger 996ca3e8d88SDave Plauger for (j = bbSize-1; j >= 0; j--) { 997ca3e8d88SDave Plauger Int32 a2update = ptr[bbStart + j]; 998ca3e8d88SDave Plauger UInt16 qVal = (UInt16)(j >> shifts); 999ca3e8d88SDave Plauger quadrant[a2update] = qVal; 1000ca3e8d88SDave Plauger if (a2update < BZ_N_OVERSHOOT) 1001ca3e8d88SDave Plauger quadrant[a2update + nblock] = qVal; 1002ca3e8d88SDave Plauger } 1003ca3e8d88SDave Plauger AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 1004ca3e8d88SDave Plauger } 1005ca3e8d88SDave Plauger 1006ca3e8d88SDave Plauger } 1007ca3e8d88SDave Plauger 1008ca3e8d88SDave Plauger if (verb >= 4) 1009ca3e8d88SDave Plauger VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 1010ca3e8d88SDave Plauger nblock, numQSorted, nblock - numQSorted ); 1011ca3e8d88SDave Plauger } 1012ca3e8d88SDave Plauger 1013ca3e8d88SDave Plauger #undef BIGFREQ 1014ca3e8d88SDave Plauger #undef SETMASK 1015ca3e8d88SDave Plauger #undef CLEARMASK 1016ca3e8d88SDave Plauger 1017ca3e8d88SDave Plauger 1018ca3e8d88SDave Plauger /*---------------------------------------------*/ 1019ca3e8d88SDave Plauger /* Pre: 1020ca3e8d88SDave Plauger nblock > 0 1021ca3e8d88SDave Plauger arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 1022ca3e8d88SDave Plauger ((UChar*)arr2) [0 .. nblock-1] holds block 1023ca3e8d88SDave Plauger arr1 exists for [0 .. nblock-1] 1024ca3e8d88SDave Plauger 1025ca3e8d88SDave Plauger Post: 1026ca3e8d88SDave Plauger ((UChar*)arr2) [0 .. nblock-1] holds block 1027ca3e8d88SDave Plauger All other areas of block destroyed 1028ca3e8d88SDave Plauger ftab [ 0 .. 65536 ] destroyed 1029ca3e8d88SDave Plauger arr1 [0 .. nblock-1] holds sorted order 1030ca3e8d88SDave Plauger */ 1031ca3e8d88SDave Plauger void BZ2_blockSort ( EState* s ) 1032ca3e8d88SDave Plauger { 1033ca3e8d88SDave Plauger UInt32* ptr = s->ptr; 1034ca3e8d88SDave Plauger UChar* block = s->block; 1035ca3e8d88SDave Plauger UInt32* ftab = s->ftab; 1036ca3e8d88SDave Plauger Int32 nblock = s->nblock; 1037ca3e8d88SDave Plauger Int32 verb = s->verbosity; 1038ca3e8d88SDave Plauger Int32 wfact = s->workFactor; 1039ca3e8d88SDave Plauger UInt16* quadrant; 1040ca3e8d88SDave Plauger Int32 budget; 1041ca3e8d88SDave Plauger Int32 budgetInit; 1042ca3e8d88SDave Plauger Int32 i; 1043ca3e8d88SDave Plauger 1044ca3e8d88SDave Plauger if (nblock < 10000) { 1045ca3e8d88SDave Plauger fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1046ca3e8d88SDave Plauger } else { 1047ca3e8d88SDave Plauger /* Calculate the location for quadrant, remembering to get 1048ca3e8d88SDave Plauger the alignment right. Assumes that &(block[0]) is at least 1049ca3e8d88SDave Plauger 2-byte aligned -- this should be ok since block is really 1050ca3e8d88SDave Plauger the first section of arr2. 1051ca3e8d88SDave Plauger */ 1052ca3e8d88SDave Plauger i = nblock+BZ_N_OVERSHOOT; 1053ca3e8d88SDave Plauger if (i & 1) i++; 1054ca3e8d88SDave Plauger quadrant = (UInt16*)(&(block[i])); 1055ca3e8d88SDave Plauger 1056ca3e8d88SDave Plauger /* (wfact-1) / 3 puts the default-factor-30 1057ca3e8d88SDave Plauger transition point at very roughly the same place as 1058ca3e8d88SDave Plauger with v0.1 and v0.9.0. 1059ca3e8d88SDave Plauger Not that it particularly matters any more, since the 1060ca3e8d88SDave Plauger resulting compressed stream is now the same regardless 1061ca3e8d88SDave Plauger of whether or not we use the main sort or fallback sort. 1062ca3e8d88SDave Plauger */ 1063ca3e8d88SDave Plauger if (wfact < 1 ) wfact = 1; 1064ca3e8d88SDave Plauger if (wfact > 100) wfact = 100; 1065ca3e8d88SDave Plauger budgetInit = nblock * ((wfact-1) / 3); 1066ca3e8d88SDave Plauger budget = budgetInit; 1067ca3e8d88SDave Plauger 1068ca3e8d88SDave Plauger mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 1069ca3e8d88SDave Plauger if (verb >= 3) 1070ca3e8d88SDave Plauger VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 1071ca3e8d88SDave Plauger budgetInit - budget, 1072ca3e8d88SDave Plauger nblock, 1073ca3e8d88SDave Plauger (float)(budgetInit - budget) / 1074ca3e8d88SDave Plauger (float)(nblock==0 ? 1 : nblock) ); 1075ca3e8d88SDave Plauger if (budget < 0) { 1076ca3e8d88SDave Plauger if (verb >= 2) 1077ca3e8d88SDave Plauger VPrintf0 ( " too repetitive; using fallback" 1078ca3e8d88SDave Plauger " sorting algorithm\n" ); 1079ca3e8d88SDave Plauger fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1080ca3e8d88SDave Plauger } 1081ca3e8d88SDave Plauger } 1082ca3e8d88SDave Plauger 1083ca3e8d88SDave Plauger s->origPtr = -1; 1084ca3e8d88SDave Plauger for (i = 0; i < s->nblock; i++) 1085ca3e8d88SDave Plauger if (ptr[i] == 0) 1086ca3e8d88SDave Plauger { s->origPtr = i; break; }; 1087ca3e8d88SDave Plauger 1088ca3e8d88SDave Plauger AssertH( s->origPtr != -1, 1003 ); 1089ca3e8d88SDave Plauger } 1090ca3e8d88SDave Plauger 1091ca3e8d88SDave Plauger 1092ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 1093ca3e8d88SDave Plauger /*--- end blocksort.c ---*/ 1094ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 1095