1 /* 2 * sssort.c for libdivsufsort 3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person 6 * obtaining a copy of this software and associated documentation 7 * files (the "Software"), to deal in the Software without 8 * restriction, including without limitation the rights to use, 9 * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following 12 * conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 #include "divsufsort_private.h" 28 29 30 /*- Private Functions -*/ 31 32 static const saint_t lg_table[256]= { 33 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 34 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 35 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 36 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 37 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 38 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 39 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 40 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 41 }; 42 43 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 44 45 static INLINE 46 saint_t 47 ss_ilg(saidx_t n) { 48 #if SS_BLOCKSIZE == 0 49 # if defined(BUILD_DIVSUFSORT64) 50 return (n >> 32) ? 51 ((n >> 48) ? 52 ((n >> 56) ? 53 56 + lg_table[(n >> 56) & 0xff] : 54 48 + lg_table[(n >> 48) & 0xff]) : 55 ((n >> 40) ? 56 40 + lg_table[(n >> 40) & 0xff] : 57 32 + lg_table[(n >> 32) & 0xff])) : 58 ((n & 0xffff0000) ? 59 ((n & 0xff000000) ? 60 24 + lg_table[(n >> 24) & 0xff] : 61 16 + lg_table[(n >> 16) & 0xff]) : 62 ((n & 0x0000ff00) ? 63 8 + lg_table[(n >> 8) & 0xff] : 64 0 + lg_table[(n >> 0) & 0xff])); 65 # else 66 return (n & 0xffff0000) ? 67 ((n & 0xff000000) ? 68 24 + lg_table[(n >> 24) & 0xff] : 69 16 + lg_table[(n >> 16) & 0xff]) : 70 ((n & 0x0000ff00) ? 71 8 + lg_table[(n >> 8) & 0xff] : 72 0 + lg_table[(n >> 0) & 0xff]); 73 # endif 74 #elif SS_BLOCKSIZE < 256 75 return lg_table[n]; 76 #else 77 return (n & 0xff00) ? 78 8 + lg_table[(n >> 8) & 0xff] : 79 0 + lg_table[(n >> 0) & 0xff]; 80 #endif 81 } 82 83 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 84 85 #if SS_BLOCKSIZE != 0 86 87 static const saint_t sqq_table[256] = { 88 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 89 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, 91 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 92 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 93 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 94 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 95 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 96 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 97 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, 98 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, 99 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 100 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 101 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 102 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 103 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 104 }; 105 106 static INLINE 107 saidx_t 108 ss_isqrt(saidx_t x) { 109 saidx_t y, e; 110 111 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } 112 e = (x & 0xffff0000) ? 113 ((x & 0xff000000) ? 114 24 + lg_table[(x >> 24) & 0xff] : 115 16 + lg_table[(x >> 16) & 0xff]) : 116 ((x & 0x0000ff00) ? 117 8 + lg_table[(x >> 8) & 0xff] : 118 0 + lg_table[(x >> 0) & 0xff]); 119 120 if(e >= 16) { 121 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); 122 if(e >= 24) { y = (y + 1 + x / y) >> 1; } 123 y = (y + 1 + x / y) >> 1; 124 } else if(e >= 8) { 125 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; 126 } else { 127 return sqq_table[x] >> 4; 128 } 129 130 return (x < (y * y)) ? y - 1 : y; 131 } 132 133 #endif /* SS_BLOCKSIZE != 0 */ 134 135 136 /*---------------------------------------------------------------------------*/ 137 138 /* Compares two suffixes. */ 139 static INLINE 140 saint_t 141 ss_compare(const sauchar_t *T, 142 const saidx_t *p1, const saidx_t *p2, 143 saidx_t depth) { 144 const sauchar_t *U1, *U2, *U1n, *U2n; 145 146 for(U1 = T + depth + *p1, 147 U2 = T + depth + *p2, 148 U1n = T + *(p1 + 1) + 2, 149 U2n = T + *(p2 + 1) + 2; 150 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); 151 ++U1, ++U2) { 152 } 153 154 return U1 < U1n ? 155 (U2 < U2n ? *U1 - *U2 : 1) : 156 (U2 < U2n ? -1 : 0); 157 } 158 159 160 /*---------------------------------------------------------------------------*/ 161 162 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) 163 164 /* Insertionsort for small size groups */ 165 static 166 void 167 ss_insertionsort(const sauchar_t *T, const saidx_t *PA, 168 saidx_t *first, saidx_t *last, saidx_t depth) { 169 saidx_t *i, *j; 170 saidx_t t; 171 saint_t r; 172 173 for(i = last - 2; first <= i; --i) { 174 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { 175 do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); 176 if(last <= j) { break; } 177 } 178 if(r == 0) { *j = ~*j; } 179 *(j - 1) = t; 180 } 181 } 182 183 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ 184 185 186 /*---------------------------------------------------------------------------*/ 187 188 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 189 190 static INLINE 191 void 192 ss_fixdown(const sauchar_t *Td, const saidx_t *PA, 193 saidx_t *SA, saidx_t i, saidx_t size) { 194 saidx_t j, k; 195 saidx_t v; 196 saint_t c, d, e; 197 198 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 199 d = Td[PA[SA[k = j++]]]; 200 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } 201 if(d <= c) { break; } 202 } 203 SA[i] = v; 204 } 205 206 /* Simple top-down heapsort. */ 207 static 208 void 209 ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) { 210 saidx_t i, m; 211 saidx_t t; 212 213 m = size; 214 if((size % 2) == 0) { 215 m--; 216 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } 217 } 218 219 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } 220 if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } 221 for(i = m - 1; 0 < i; --i) { 222 t = SA[0], SA[0] = SA[i]; 223 ss_fixdown(Td, PA, SA, 0, i); 224 SA[i] = t; 225 } 226 } 227 228 229 /*---------------------------------------------------------------------------*/ 230 231 /* Returns the median of three elements. */ 232 static INLINE 233 saidx_t * 234 ss_median3(const sauchar_t *Td, const saidx_t *PA, 235 saidx_t *v1, saidx_t *v2, saidx_t *v3) { 236 saidx_t *t; 237 if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } 238 if(Td[PA[*v2]] > Td[PA[*v3]]) { 239 if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } 240 else { return v3; } 241 } 242 return v2; 243 } 244 245 /* Returns the median of five elements. */ 246 static INLINE 247 saidx_t * 248 ss_median5(const sauchar_t *Td, const saidx_t *PA, 249 saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { 250 saidx_t *t; 251 if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } 252 if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } 253 if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } 254 if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } 255 if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } 256 if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } 257 return v3; 258 } 259 260 /* Returns the pivot element. */ 261 static INLINE 262 saidx_t * 263 ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) { 264 saidx_t *middle; 265 saidx_t t; 266 267 t = last - first; 268 middle = first + t / 2; 269 270 if(t <= 512) { 271 if(t <= 32) { 272 return ss_median3(Td, PA, first, middle, last - 1); 273 } else { 274 t >>= 2; 275 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); 276 } 277 } 278 t >>= 3; 279 first = ss_median3(Td, PA, first, first + t, first + (t << 1)); 280 middle = ss_median3(Td, PA, middle - t, middle, middle + t); 281 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); 282 return ss_median3(Td, PA, first, middle, last); 283 } 284 285 286 /*---------------------------------------------------------------------------*/ 287 288 /* Binary partition for substrings. */ 289 static INLINE 290 saidx_t * 291 ss_partition(const saidx_t *PA, 292 saidx_t *first, saidx_t *last, saidx_t depth) { 293 saidx_t *a, *b; 294 saidx_t t; 295 for(a = first - 1, b = last;;) { 296 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } 297 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } 298 if(b <= a) { break; } 299 t = ~*b; 300 *b = *a; 301 *a = t; 302 } 303 if(first < a) { *first = ~*first; } 304 return a; 305 } 306 307 /* Multikey introsort for medium size groups. */ 308 static 309 void 310 ss_mintrosort(const sauchar_t *T, const saidx_t *PA, 311 saidx_t *first, saidx_t *last, 312 saidx_t depth) { 313 #define STACK_SIZE SS_MISORT_STACKSIZE 314 struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; 315 const sauchar_t *Td; 316 saidx_t *a, *b, *c, *d, *e, *f; 317 saidx_t s, t; 318 saint_t ssize; 319 saint_t limit; 320 saint_t v, x = 0; 321 322 for(ssize = 0, limit = ss_ilg(last - first);;) { 323 324 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { 325 #if 1 < SS_INSERTIONSORT_THRESHOLD 326 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } 327 #endif 328 STACK_POP(first, last, depth, limit); 329 continue; 330 } 331 332 Td = T + depth; 333 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } 334 if(limit < 0) { 335 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { 336 if((x = Td[PA[*a]]) != v) { 337 if(1 < (a - first)) { break; } 338 v = x; 339 first = a; 340 } 341 } 342 if(Td[PA[*first] - 1] < v) { 343 first = ss_partition(PA, first, a, depth); 344 } 345 if((a - first) <= (last - a)) { 346 if(1 < (a - first)) { 347 STACK_PUSH(a, last, depth, -1); 348 last = a, depth += 1, limit = ss_ilg(a - first); 349 } else { 350 first = a, limit = -1; 351 } 352 } else { 353 if(1 < (last - a)) { 354 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); 355 first = a, limit = -1; 356 } else { 357 last = a, depth += 1, limit = ss_ilg(a - first); 358 } 359 } 360 continue; 361 } 362 363 /* choose pivot */ 364 a = ss_pivot(Td, PA, first, last); 365 v = Td[PA[*a]]; 366 SWAP(*first, *a); 367 368 /* partition */ 369 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } 370 if(((a = b) < last) && (x < v)) { 371 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { 372 if(x == v) { SWAP(*b, *a); ++a; } 373 } 374 } 375 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } 376 if((b < (d = c)) && (x > v)) { 377 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 378 if(x == v) { SWAP(*c, *d); --d; } 379 } 380 } 381 for(; b < c;) { 382 SWAP(*b, *c); 383 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { 384 if(x == v) { SWAP(*b, *a); ++a; } 385 } 386 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 387 if(x == v) { SWAP(*c, *d); --d; } 388 } 389 } 390 391 if(a <= d) { 392 c = b - 1; 393 394 if((s = a - first) > (t = b - a)) { s = t; } 395 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 396 if((s = d - c) > (t = last - d - 1)) { s = t; } 397 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 398 399 a = first + (b - a), c = last - (d - c); 400 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); 401 402 if((a - first) <= (last - c)) { 403 if((last - c) <= (c - b)) { 404 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 405 STACK_PUSH(c, last, depth, limit); 406 last = a; 407 } else if((a - first) <= (c - b)) { 408 STACK_PUSH(c, last, depth, limit); 409 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 410 last = a; 411 } else { 412 STACK_PUSH(c, last, depth, limit); 413 STACK_PUSH(first, a, depth, limit); 414 first = b, last = c, depth += 1, limit = ss_ilg(c - b); 415 } 416 } else { 417 if((a - first) <= (c - b)) { 418 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 419 STACK_PUSH(first, a, depth, limit); 420 first = c; 421 } else if((last - c) <= (c - b)) { 422 STACK_PUSH(first, a, depth, limit); 423 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 424 first = c; 425 } else { 426 STACK_PUSH(first, a, depth, limit); 427 STACK_PUSH(c, last, depth, limit); 428 first = b, last = c, depth += 1, limit = ss_ilg(c - b); 429 } 430 } 431 } else { 432 limit += 1; 433 if(Td[PA[*first] - 1] < v) { 434 first = ss_partition(PA, first, last, depth); 435 limit = ss_ilg(last - first); 436 } 437 depth += 1; 438 } 439 } 440 #undef STACK_SIZE 441 } 442 443 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 444 445 446 /*---------------------------------------------------------------------------*/ 447 448 #if SS_BLOCKSIZE != 0 449 450 static INLINE 451 void 452 ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { 453 saidx_t t; 454 for(; 0 < n; --n, ++a, ++b) { 455 t = *a, *a = *b, *b = t; 456 } 457 } 458 459 static INLINE 460 void 461 ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { 462 saidx_t *a, *b, t; 463 saidx_t l, r; 464 l = middle - first, r = last - middle; 465 for(; (0 < l) && (0 < r);) { 466 if(l == r) { ss_blockswap(first, middle, l); break; } 467 if(l < r) { 468 a = last - 1, b = middle - 1; 469 t = *a; 470 do { 471 *a-- = *b, *b-- = *a; 472 if(b < first) { 473 *a = t; 474 last = a; 475 if((r -= l + 1) <= l) { break; } 476 a -= 1, b = middle - 1; 477 t = *a; 478 } 479 } while(1); 480 } else { 481 a = first, b = middle; 482 t = *a; 483 do { 484 *a++ = *b, *b++ = *a; 485 if(last <= b) { 486 *a = t; 487 first = a + 1; 488 if((l -= r + 1) <= r) { break; } 489 a += 1, b = middle; 490 t = *a; 491 } 492 } while(1); 493 } 494 } 495 } 496 497 498 /*---------------------------------------------------------------------------*/ 499 500 static 501 void 502 ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, 503 saidx_t *first, saidx_t *middle, saidx_t *last, 504 saidx_t depth) { 505 const saidx_t *p; 506 saidx_t *a, *b; 507 saidx_t len, half; 508 saint_t q, r; 509 saint_t x; 510 511 for(;;) { 512 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } 513 else { x = 0; p = PA + *(last - 1); } 514 for(a = first, len = middle - first, half = len >> 1, r = -1; 515 0 < len; 516 len = half, half >>= 1) { 517 b = a + half; 518 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); 519 if(q < 0) { 520 a = b + 1; 521 half -= (len & 1) ^ 1; 522 } else { 523 r = q; 524 } 525 } 526 if(a < middle) { 527 if(r == 0) { *a = ~*a; } 528 ss_rotate(a, middle, last); 529 last -= middle - a; 530 middle = a; 531 if(first == middle) { break; } 532 } 533 --last; 534 if(x != 0) { while(*--last < 0) { } } 535 if(middle == last) { break; } 536 } 537 } 538 539 540 /*---------------------------------------------------------------------------*/ 541 542 /* Merge-forward with internal buffer. */ 543 static 544 void 545 ss_mergeforward(const sauchar_t *T, const saidx_t *PA, 546 saidx_t *first, saidx_t *middle, saidx_t *last, 547 saidx_t *buf, saidx_t depth) { 548 saidx_t *a, *b, *c, *bufend; 549 saidx_t t; 550 saint_t r; 551 552 bufend = buf + (middle - first) - 1; 553 ss_blockswap(buf, first, middle - first); 554 555 for(t = *(a = first), b = buf, c = middle;;) { 556 r = ss_compare(T, PA + *b, PA + *c, depth); 557 if(r < 0) { 558 do { 559 *a++ = *b; 560 if(bufend <= b) { *bufend = t; return; } 561 *b++ = *a; 562 } while(*b < 0); 563 } else if(r > 0) { 564 do { 565 *a++ = *c, *c++ = *a; 566 if(last <= c) { 567 while(b < bufend) { *a++ = *b, *b++ = *a; } 568 *a = *b, *b = t; 569 return; 570 } 571 } while(*c < 0); 572 } else { 573 *c = ~*c; 574 do { 575 *a++ = *b; 576 if(bufend <= b) { *bufend = t; return; } 577 *b++ = *a; 578 } while(*b < 0); 579 580 do { 581 *a++ = *c, *c++ = *a; 582 if(last <= c) { 583 while(b < bufend) { *a++ = *b, *b++ = *a; } 584 *a = *b, *b = t; 585 return; 586 } 587 } while(*c < 0); 588 } 589 } 590 } 591 592 /* Merge-backward with internal buffer. */ 593 static 594 void 595 ss_mergebackward(const sauchar_t *T, const saidx_t *PA, 596 saidx_t *first, saidx_t *middle, saidx_t *last, 597 saidx_t *buf, saidx_t depth) { 598 const saidx_t *p1, *p2; 599 saidx_t *a, *b, *c, *bufend; 600 saidx_t t; 601 saint_t r; 602 saint_t x; 603 604 bufend = buf + (last - middle) - 1; 605 ss_blockswap(buf, middle, last - middle); 606 607 x = 0; 608 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } 609 else { p1 = PA + *bufend; } 610 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } 611 else { p2 = PA + *(middle - 1); } 612 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { 613 r = ss_compare(T, p1, p2, depth); 614 if(0 < r) { 615 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 616 *a-- = *b; 617 if(b <= buf) { *buf = t; break; } 618 *b-- = *a; 619 if(*b < 0) { p1 = PA + ~*b; x |= 1; } 620 else { p1 = PA + *b; } 621 } else if(r < 0) { 622 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 623 *a-- = *c, *c-- = *a; 624 if(c < first) { 625 while(buf < b) { *a-- = *b, *b-- = *a; } 626 *a = *b, *b = t; 627 break; 628 } 629 if(*c < 0) { p2 = PA + ~*c; x |= 2; } 630 else { p2 = PA + *c; } 631 } else { 632 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 633 *a-- = ~*b; 634 if(b <= buf) { *buf = t; break; } 635 *b-- = *a; 636 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 637 *a-- = *c, *c-- = *a; 638 if(c < first) { 639 while(buf < b) { *a-- = *b, *b-- = *a; } 640 *a = *b, *b = t; 641 break; 642 } 643 if(*b < 0) { p1 = PA + ~*b; x |= 1; } 644 else { p1 = PA + *b; } 645 if(*c < 0) { p2 = PA + ~*c; x |= 2; } 646 else { p2 = PA + *c; } 647 } 648 } 649 } 650 651 /* D&C based merge. */ 652 static 653 void 654 ss_swapmerge(const sauchar_t *T, const saidx_t *PA, 655 saidx_t *first, saidx_t *middle, saidx_t *last, 656 saidx_t *buf, saidx_t bufsize, saidx_t depth) { 657 #define STACK_SIZE SS_SMERGE_STACKSIZE 658 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) 659 #define MERGE_CHECK(a, b, c)\ 660 do {\ 661 if(((c) & 1) ||\ 662 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ 663 *(a) = ~*(a);\ 664 }\ 665 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ 666 *(b) = ~*(b);\ 667 }\ 668 } while(0) 669 struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; 670 saidx_t *l, *r, *lm, *rm; 671 saidx_t m, len, half; 672 saint_t ssize; 673 saint_t check, next; 674 675 for(check = 0, ssize = 0;;) { 676 if((last - middle) <= bufsize) { 677 if((first < middle) && (middle < last)) { 678 ss_mergebackward(T, PA, first, middle, last, buf, depth); 679 } 680 MERGE_CHECK(first, last, check); 681 STACK_POP(first, middle, last, check); 682 continue; 683 } 684 685 if((middle - first) <= bufsize) { 686 if(first < middle) { 687 ss_mergeforward(T, PA, first, middle, last, buf, depth); 688 } 689 MERGE_CHECK(first, last, check); 690 STACK_POP(first, middle, last, check); 691 continue; 692 } 693 694 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; 695 0 < len; 696 len = half, half >>= 1) { 697 if(ss_compare(T, PA + GETIDX(*(middle + m + half)), 698 PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { 699 m += half + 1; 700 half -= (len & 1) ^ 1; 701 } 702 } 703 704 if(0 < m) { 705 lm = middle - m, rm = middle + m; 706 ss_blockswap(lm, middle, m); 707 l = r = middle, next = 0; 708 if(rm < last) { 709 if(*rm < 0) { 710 *rm = ~*rm; 711 if(first < lm) { for(; *--l < 0;) { } next |= 4; } 712 next |= 1; 713 } else if(first < lm) { 714 for(; *r < 0; ++r) { } 715 next |= 2; 716 } 717 } 718 719 if((l - first) <= (last - r)) { 720 STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); 721 middle = lm, last = l, check = (check & 3) | (next & 4); 722 } else { 723 if((next & 2) && (r == middle)) { next ^= 6; } 724 STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); 725 first = r, middle = rm, check = (next & 3) | (check & 4); 726 } 727 } else { 728 if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { 729 *middle = ~*middle; 730 } 731 MERGE_CHECK(first, last, check); 732 STACK_POP(first, middle, last, check); 733 } 734 } 735 #undef STACK_SIZE 736 } 737 738 #endif /* SS_BLOCKSIZE != 0 */ 739 740 741 /*---------------------------------------------------------------------------*/ 742 743 /*- Function -*/ 744 745 /* Substring sort */ 746 void 747 sssort(const sauchar_t *T, const saidx_t *PA, 748 saidx_t *first, saidx_t *last, 749 saidx_t *buf, saidx_t bufsize, 750 saidx_t depth, saidx_t n, saint_t lastsuffix) { 751 saidx_t *a; 752 #if SS_BLOCKSIZE != 0 753 saidx_t *b, *middle, *curbuf; 754 saidx_t j, k, curbufsize, limit; 755 #endif 756 saidx_t i; 757 758 if(lastsuffix != 0) { ++first; } 759 760 #if SS_BLOCKSIZE == 0 761 ss_mintrosort(T, PA, first, last, depth); 762 #else 763 if((bufsize < SS_BLOCKSIZE) && 764 (bufsize < (last - first)) && 765 (bufsize < (limit = ss_isqrt(last - first)))) { 766 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } 767 buf = middle = last - limit, bufsize = limit; 768 } else { 769 middle = last, limit = 0; 770 } 771 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { 772 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 773 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); 774 #elif 1 < SS_BLOCKSIZE 775 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); 776 #endif 777 curbufsize = last - (a + SS_BLOCKSIZE); 778 curbuf = a + SS_BLOCKSIZE; 779 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } 780 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { 781 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); 782 } 783 } 784 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 785 ss_mintrosort(T, PA, a, middle, depth); 786 #elif 1 < SS_BLOCKSIZE 787 ss_insertionsort(T, PA, a, middle, depth); 788 #endif 789 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { 790 if(i & 1) { 791 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); 792 a -= k; 793 } 794 } 795 if(limit != 0) { 796 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 797 ss_mintrosort(T, PA, middle, last, depth); 798 #elif 1 < SS_BLOCKSIZE 799 ss_insertionsort(T, PA, middle, last, depth); 800 #endif 801 ss_inplacemerge(T, PA, first, middle, last, depth); 802 } 803 #endif 804 805 if(lastsuffix != 0) { 806 /* Insert last type B* suffix. */ 807 saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; 808 for(a = first, i = *(first - 1); 809 (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); 810 ++a) { 811 *(a - 1) = *a; 812 } 813 *(a - 1) = i; 814 } 815 } 816