xref: /freebsd/contrib/libdivsufsort/lib/sssort.c (revision 28f6c2f292806bf31230a959bc4b19d7081669a7)
1 /*
2  * sssort.c for libdivsufsort
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "divsufsort_private.h"
28 
29 
30 /*- Private Functions -*/
31 
32 static const saint_t lg_table[256]= {
33  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
34   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
35   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
36   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
37   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
38   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
39   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
40   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
41 };
42 
43 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
44 
45 static INLINE
46 saint_t
47 ss_ilg(saidx_t n) {
48 #if SS_BLOCKSIZE == 0
49 # if defined(BUILD_DIVSUFSORT64)
50   return (n >> 32) ?
51           ((n >> 48) ?
52             ((n >> 56) ?
53               56 + lg_table[(n >> 56) & 0xff] :
54               48 + lg_table[(n >> 48) & 0xff]) :
55             ((n >> 40) ?
56               40 + lg_table[(n >> 40) & 0xff] :
57               32 + lg_table[(n >> 32) & 0xff])) :
58           ((n & 0xffff0000) ?
59             ((n & 0xff000000) ?
60               24 + lg_table[(n >> 24) & 0xff] :
61               16 + lg_table[(n >> 16) & 0xff]) :
62             ((n & 0x0000ff00) ?
63                8 + lg_table[(n >>  8) & 0xff] :
64                0 + lg_table[(n >>  0) & 0xff]));
65 # else
66   return (n & 0xffff0000) ?
67           ((n & 0xff000000) ?
68             24 + lg_table[(n >> 24) & 0xff] :
69             16 + lg_table[(n >> 16) & 0xff]) :
70           ((n & 0x0000ff00) ?
71              8 + lg_table[(n >>  8) & 0xff] :
72              0 + lg_table[(n >>  0) & 0xff]);
73 # endif
74 #elif SS_BLOCKSIZE < 256
75   return lg_table[n];
76 #else
77   return (n & 0xff00) ?
78           8 + lg_table[(n >> 8) & 0xff] :
79           0 + lg_table[(n >> 0) & 0xff];
80 #endif
81 }
82 
83 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
84 
85 #if SS_BLOCKSIZE != 0
86 
87 static const saint_t sqq_table[256] = {
88   0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
89  64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
90  90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
91 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
92 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
93 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
94 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
95 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
96 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
97 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
98 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
99 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
100 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
101 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
102 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
103 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
104 };
105 
106 static INLINE
107 saidx_t
108 ss_isqrt(saidx_t x) {
109   saidx_t y, e;
110 
111   if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
112   e = (x & 0xffff0000) ?
113         ((x & 0xff000000) ?
114           24 + lg_table[(x >> 24) & 0xff] :
115           16 + lg_table[(x >> 16) & 0xff]) :
116         ((x & 0x0000ff00) ?
117            8 + lg_table[(x >>  8) & 0xff] :
118            0 + lg_table[(x >>  0) & 0xff]);
119 
120   if(e >= 16) {
121     y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
122     if(e >= 24) { y = (y + 1 + x / y) >> 1; }
123     y = (y + 1 + x / y) >> 1;
124   } else if(e >= 8) {
125     y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
126   } else {
127     return sqq_table[x] >> 4;
128   }
129 
130   return (x < (y * y)) ? y - 1 : y;
131 }
132 
133 #endif /* SS_BLOCKSIZE != 0 */
134 
135 
136 /*---------------------------------------------------------------------------*/
137 
138 /* Compares two suffixes. */
139 static INLINE
140 saint_t
141 ss_compare(const sauchar_t *T,
142            const saidx_t *p1, const saidx_t *p2,
143            saidx_t depth) {
144   const sauchar_t *U1, *U2, *U1n, *U2n;
145 
146   for(U1 = T + depth + *p1,
147       U2 = T + depth + *p2,
148       U1n = T + *(p1 + 1) + 2,
149       U2n = T + *(p2 + 1) + 2;
150       (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
151       ++U1, ++U2) {
152   }
153 
154   return U1 < U1n ?
155         (U2 < U2n ? *U1 - *U2 : 1) :
156         (U2 < U2n ? -1 : 0);
157 }
158 
159 
160 /*---------------------------------------------------------------------------*/
161 
162 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
163 
164 /* Insertionsort for small size groups */
165 static
166 void
167 ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
168                  saidx_t *first, saidx_t *last, saidx_t depth) {
169   saidx_t *i, *j;
170   saidx_t t;
171   saint_t r;
172 
173   for(i = last - 2; first <= i; --i) {
174     for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
175       do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
176       if(last <= j) { break; }
177     }
178     if(r == 0) { *j = ~*j; }
179     *(j - 1) = t;
180   }
181 }
182 
183 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
184 
185 
186 /*---------------------------------------------------------------------------*/
187 
188 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
189 
190 static INLINE
191 void
192 ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
193            saidx_t *SA, saidx_t i, saidx_t size) {
194   saidx_t j, k;
195   saidx_t v;
196   saint_t c, d, e;
197 
198   for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
199     d = Td[PA[SA[k = j++]]];
200     if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
201     if(d <= c) { break; }
202   }
203   SA[i] = v;
204 }
205 
206 /* Simple top-down heapsort. */
207 static
208 void
209 ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
210   saidx_t i, m;
211   saidx_t t;
212 
213   m = size;
214   if((size % 2) == 0) {
215     m--;
216     if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
217   }
218 
219   for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
220   if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
221   for(i = m - 1; 0 < i; --i) {
222     t = SA[0], SA[0] = SA[i];
223     ss_fixdown(Td, PA, SA, 0, i);
224     SA[i] = t;
225   }
226 }
227 
228 
229 /*---------------------------------------------------------------------------*/
230 
231 /* Returns the median of three elements. */
232 static INLINE
233 saidx_t *
234 ss_median3(const sauchar_t *Td, const saidx_t *PA,
235            saidx_t *v1, saidx_t *v2, saidx_t *v3) {
236   saidx_t *t;
237   if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
238   if(Td[PA[*v2]] > Td[PA[*v3]]) {
239     if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
240     else { return v3; }
241   }
242   return v2;
243 }
244 
245 /* Returns the median of five elements. */
246 static INLINE
247 saidx_t *
248 ss_median5(const sauchar_t *Td, const saidx_t *PA,
249            saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
250   saidx_t *t;
251   if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
252   if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
253   if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
254   if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
255   if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
256   if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
257   return v3;
258 }
259 
260 /* Returns the pivot element. */
261 static INLINE
262 saidx_t *
263 ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
264   saidx_t *middle;
265   saidx_t t;
266 
267   t = last - first;
268   middle = first + t / 2;
269 
270   if(t <= 512) {
271     if(t <= 32) {
272       return ss_median3(Td, PA, first, middle, last - 1);
273     } else {
274       t >>= 2;
275       return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
276     }
277   }
278   t >>= 3;
279   first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
280   middle = ss_median3(Td, PA, middle - t, middle, middle + t);
281   last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
282   return ss_median3(Td, PA, first, middle, last);
283 }
284 
285 
286 /*---------------------------------------------------------------------------*/
287 
288 /* Binary partition for substrings. */
289 static INLINE
290 saidx_t *
291 ss_partition(const saidx_t *PA,
292                     saidx_t *first, saidx_t *last, saidx_t depth) {
293   saidx_t *a, *b;
294   saidx_t t;
295   for(a = first - 1, b = last;;) {
296     for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
297     for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
298     if(b <= a) { break; }
299     t = ~*b;
300     *b = *a;
301     *a = t;
302   }
303   if(first < a) { *first = ~*first; }
304   return a;
305 }
306 
307 /* Multikey introsort for medium size groups. */
308 static
309 void
310 ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
311               saidx_t *first, saidx_t *last,
312               saidx_t depth) {
313 #define STACK_SIZE SS_MISORT_STACKSIZE
314   struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
315   const sauchar_t *Td;
316   saidx_t *a, *b, *c, *d, *e, *f;
317   saidx_t s, t;
318   saint_t ssize;
319   saint_t limit;
320   saint_t v, x = 0;
321 
322   for(ssize = 0, limit = ss_ilg(last - first);;) {
323 
324     if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
325 #if 1 < SS_INSERTIONSORT_THRESHOLD
326       if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
327 #endif
328       STACK_POP(first, last, depth, limit);
329       continue;
330     }
331 
332     Td = T + depth;
333     if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
334     if(limit < 0) {
335       for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
336         if((x = Td[PA[*a]]) != v) {
337           if(1 < (a - first)) { break; }
338           v = x;
339           first = a;
340         }
341       }
342       if(Td[PA[*first] - 1] < v) {
343         first = ss_partition(PA, first, a, depth);
344       }
345       if((a - first) <= (last - a)) {
346         if(1 < (a - first)) {
347           STACK_PUSH(a, last, depth, -1);
348           last = a, depth += 1, limit = ss_ilg(a - first);
349         } else {
350           first = a, limit = -1;
351         }
352       } else {
353         if(1 < (last - a)) {
354           STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
355           first = a, limit = -1;
356         } else {
357           last = a, depth += 1, limit = ss_ilg(a - first);
358         }
359       }
360       continue;
361     }
362 
363     /* choose pivot */
364     a = ss_pivot(Td, PA, first, last);
365     v = Td[PA[*a]];
366     SWAP(*first, *a);
367 
368     /* partition */
369     for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
370     if(((a = b) < last) && (x < v)) {
371       for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
372         if(x == v) { SWAP(*b, *a); ++a; }
373       }
374     }
375     for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
376     if((b < (d = c)) && (x > v)) {
377       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
378         if(x == v) { SWAP(*c, *d); --d; }
379       }
380     }
381     for(; b < c;) {
382       SWAP(*b, *c);
383       for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
384         if(x == v) { SWAP(*b, *a); ++a; }
385       }
386       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
387         if(x == v) { SWAP(*c, *d); --d; }
388       }
389     }
390 
391     if(a <= d) {
392       c = b - 1;
393 
394       if((s = a - first) > (t = b - a)) { s = t; }
395       for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
396       if((s = d - c) > (t = last - d - 1)) { s = t; }
397       for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
398 
399       a = first + (b - a), c = last - (d - c);
400       b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
401 
402       if((a - first) <= (last - c)) {
403         if((last - c) <= (c - b)) {
404           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
405           STACK_PUSH(c, last, depth, limit);
406           last = a;
407         } else if((a - first) <= (c - b)) {
408           STACK_PUSH(c, last, depth, limit);
409           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
410           last = a;
411         } else {
412           STACK_PUSH(c, last, depth, limit);
413           STACK_PUSH(first, a, depth, limit);
414           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
415         }
416       } else {
417         if((a - first) <= (c - b)) {
418           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
419           STACK_PUSH(first, a, depth, limit);
420           first = c;
421         } else if((last - c) <= (c - b)) {
422           STACK_PUSH(first, a, depth, limit);
423           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
424           first = c;
425         } else {
426           STACK_PUSH(first, a, depth, limit);
427           STACK_PUSH(c, last, depth, limit);
428           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
429         }
430       }
431     } else {
432       limit += 1;
433       if(Td[PA[*first] - 1] < v) {
434         first = ss_partition(PA, first, last, depth);
435         limit = ss_ilg(last - first);
436       }
437       depth += 1;
438     }
439   }
440 #undef STACK_SIZE
441 }
442 
443 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
444 
445 
446 /*---------------------------------------------------------------------------*/
447 
448 #if SS_BLOCKSIZE != 0
449 
450 static INLINE
451 void
452 ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
453   saidx_t t;
454   for(; 0 < n; --n, ++a, ++b) {
455     t = *a, *a = *b, *b = t;
456   }
457 }
458 
459 static INLINE
460 void
461 ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
462   saidx_t *a, *b, t;
463   saidx_t l, r;
464   l = middle - first, r = last - middle;
465   for(; (0 < l) && (0 < r);) {
466     if(l == r) { ss_blockswap(first, middle, l); break; }
467     if(l < r) {
468       a = last - 1, b = middle - 1;
469       t = *a;
470       do {
471         *a-- = *b, *b-- = *a;
472         if(b < first) {
473           *a = t;
474           last = a;
475           if((r -= l + 1) <= l) { break; }
476           a -= 1, b = middle - 1;
477           t = *a;
478         }
479       } while(1);
480     } else {
481       a = first, b = middle;
482       t = *a;
483       do {
484         *a++ = *b, *b++ = *a;
485         if(last <= b) {
486           *a = t;
487           first = a + 1;
488           if((l -= r + 1) <= r) { break; }
489           a += 1, b = middle;
490           t = *a;
491         }
492       } while(1);
493     }
494   }
495 }
496 
497 
498 /*---------------------------------------------------------------------------*/
499 
500 static
501 void
502 ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
503                 saidx_t *first, saidx_t *middle, saidx_t *last,
504                 saidx_t depth) {
505   const saidx_t *p;
506   saidx_t *a, *b;
507   saidx_t len, half;
508   saint_t q, r;
509   saint_t x;
510 
511   for(;;) {
512     if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
513     else                { x = 0; p = PA +  *(last - 1); }
514     for(a = first, len = middle - first, half = len >> 1, r = -1;
515         0 < len;
516         len = half, half >>= 1) {
517       b = a + half;
518       q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
519       if(q < 0) {
520         a = b + 1;
521         half -= (len & 1) ^ 1;
522       } else {
523         r = q;
524       }
525     }
526     if(a < middle) {
527       if(r == 0) { *a = ~*a; }
528       ss_rotate(a, middle, last);
529       last -= middle - a;
530       middle = a;
531       if(first == middle) { break; }
532     }
533     --last;
534     if(x != 0) { while(*--last < 0) { } }
535     if(middle == last) { break; }
536   }
537 }
538 
539 
540 /*---------------------------------------------------------------------------*/
541 
542 /* Merge-forward with internal buffer. */
543 static
544 void
545 ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
546                 saidx_t *first, saidx_t *middle, saidx_t *last,
547                 saidx_t *buf, saidx_t depth) {
548   saidx_t *a, *b, *c, *bufend;
549   saidx_t t;
550   saint_t r;
551 
552   bufend = buf + (middle - first) - 1;
553   ss_blockswap(buf, first, middle - first);
554 
555   for(t = *(a = first), b = buf, c = middle;;) {
556     r = ss_compare(T, PA + *b, PA + *c, depth);
557     if(r < 0) {
558       do {
559         *a++ = *b;
560         if(bufend <= b) { *bufend = t; return; }
561         *b++ = *a;
562       } while(*b < 0);
563     } else if(r > 0) {
564       do {
565         *a++ = *c, *c++ = *a;
566         if(last <= c) {
567           while(b < bufend) { *a++ = *b, *b++ = *a; }
568           *a = *b, *b = t;
569           return;
570         }
571       } while(*c < 0);
572     } else {
573       *c = ~*c;
574       do {
575         *a++ = *b;
576         if(bufend <= b) { *bufend = t; return; }
577         *b++ = *a;
578       } while(*b < 0);
579 
580       do {
581         *a++ = *c, *c++ = *a;
582         if(last <= c) {
583           while(b < bufend) { *a++ = *b, *b++ = *a; }
584           *a = *b, *b = t;
585           return;
586         }
587       } while(*c < 0);
588     }
589   }
590 }
591 
592 /* Merge-backward with internal buffer. */
593 static
594 void
595 ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
596                  saidx_t *first, saidx_t *middle, saidx_t *last,
597                  saidx_t *buf, saidx_t depth) {
598   const saidx_t *p1, *p2;
599   saidx_t *a, *b, *c, *bufend;
600   saidx_t t;
601   saint_t r;
602   saint_t x;
603 
604   bufend = buf + (last - middle) - 1;
605   ss_blockswap(buf, middle, last - middle);
606 
607   x = 0;
608   if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
609   else                  { p1 = PA +  *bufend; }
610   if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
611   else                  { p2 = PA +  *(middle - 1); }
612   for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
613     r = ss_compare(T, p1, p2, depth);
614     if(0 < r) {
615       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
616       *a-- = *b;
617       if(b <= buf) { *buf = t; break; }
618       *b-- = *a;
619       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
620       else       { p1 = PA +  *b; }
621     } else if(r < 0) {
622       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
623       *a-- = *c, *c-- = *a;
624       if(c < first) {
625         while(buf < b) { *a-- = *b, *b-- = *a; }
626         *a = *b, *b = t;
627         break;
628       }
629       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
630       else       { p2 = PA +  *c; }
631     } else {
632       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
633       *a-- = ~*b;
634       if(b <= buf) { *buf = t; break; }
635       *b-- = *a;
636       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
637       *a-- = *c, *c-- = *a;
638       if(c < first) {
639         while(buf < b) { *a-- = *b, *b-- = *a; }
640         *a = *b, *b = t;
641         break;
642       }
643       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
644       else       { p1 = PA +  *b; }
645       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
646       else       { p2 = PA +  *c; }
647     }
648   }
649 }
650 
651 /* D&C based merge. */
652 static
653 void
654 ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
655              saidx_t *first, saidx_t *middle, saidx_t *last,
656              saidx_t *buf, saidx_t bufsize, saidx_t depth) {
657 #define STACK_SIZE SS_SMERGE_STACKSIZE
658 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
659 #define MERGE_CHECK(a, b, c)\
660   do {\
661     if(((c) & 1) ||\
662        (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
663       *(a) = ~*(a);\
664     }\
665     if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
666       *(b) = ~*(b);\
667     }\
668   } while(0)
669   struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
670   saidx_t *l, *r, *lm, *rm;
671   saidx_t m, len, half;
672   saint_t ssize;
673   saint_t check, next;
674 
675   for(check = 0, ssize = 0;;) {
676     if((last - middle) <= bufsize) {
677       if((first < middle) && (middle < last)) {
678         ss_mergebackward(T, PA, first, middle, last, buf, depth);
679       }
680       MERGE_CHECK(first, last, check);
681       STACK_POP(first, middle, last, check);
682       continue;
683     }
684 
685     if((middle - first) <= bufsize) {
686       if(first < middle) {
687         ss_mergeforward(T, PA, first, middle, last, buf, depth);
688       }
689       MERGE_CHECK(first, last, check);
690       STACK_POP(first, middle, last, check);
691       continue;
692     }
693 
694     for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
695         0 < len;
696         len = half, half >>= 1) {
697       if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
698                        PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
699         m += half + 1;
700         half -= (len & 1) ^ 1;
701       }
702     }
703 
704     if(0 < m) {
705       lm = middle - m, rm = middle + m;
706       ss_blockswap(lm, middle, m);
707       l = r = middle, next = 0;
708       if(rm < last) {
709         if(*rm < 0) {
710           *rm = ~*rm;
711           if(first < lm) { for(; *--l < 0;) { } next |= 4; }
712           next |= 1;
713         } else if(first < lm) {
714           for(; *r < 0; ++r) { }
715           next |= 2;
716         }
717       }
718 
719       if((l - first) <= (last - r)) {
720         STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
721         middle = lm, last = l, check = (check & 3) | (next & 4);
722       } else {
723         if((next & 2) && (r == middle)) { next ^= 6; }
724         STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
725         first = r, middle = rm, check = (next & 3) | (check & 4);
726       }
727     } else {
728       if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
729         *middle = ~*middle;
730       }
731       MERGE_CHECK(first, last, check);
732       STACK_POP(first, middle, last, check);
733     }
734   }
735 #undef STACK_SIZE
736 }
737 
738 #endif /* SS_BLOCKSIZE != 0 */
739 
740 
741 /*---------------------------------------------------------------------------*/
742 
743 /*- Function -*/
744 
745 /* Substring sort */
746 void
747 sssort(const sauchar_t *T, const saidx_t *PA,
748        saidx_t *first, saidx_t *last,
749        saidx_t *buf, saidx_t bufsize,
750        saidx_t depth, saidx_t n, saint_t lastsuffix) {
751   saidx_t *a;
752 #if SS_BLOCKSIZE != 0
753   saidx_t *b, *middle, *curbuf;
754   saidx_t j, k, curbufsize, limit;
755 #endif
756   saidx_t i;
757 
758   if(lastsuffix != 0) { ++first; }
759 
760 #if SS_BLOCKSIZE == 0
761   ss_mintrosort(T, PA, first, last, depth);
762 #else
763   if((bufsize < SS_BLOCKSIZE) &&
764       (bufsize < (last - first)) &&
765       (bufsize < (limit = ss_isqrt(last - first)))) {
766     if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
767     buf = middle = last - limit, bufsize = limit;
768   } else {
769     middle = last, limit = 0;
770   }
771   for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
772 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
773     ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
774 #elif 1 < SS_BLOCKSIZE
775     ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
776 #endif
777     curbufsize = last - (a + SS_BLOCKSIZE);
778     curbuf = a + SS_BLOCKSIZE;
779     if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
780     for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
781       ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
782     }
783   }
784 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
785   ss_mintrosort(T, PA, a, middle, depth);
786 #elif 1 < SS_BLOCKSIZE
787   ss_insertionsort(T, PA, a, middle, depth);
788 #endif
789   for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
790     if(i & 1) {
791       ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
792       a -= k;
793     }
794   }
795   if(limit != 0) {
796 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
797     ss_mintrosort(T, PA, middle, last, depth);
798 #elif 1 < SS_BLOCKSIZE
799     ss_insertionsort(T, PA, middle, last, depth);
800 #endif
801     ss_inplacemerge(T, PA, first, middle, last, depth);
802   }
803 #endif
804 
805   if(lastsuffix != 0) {
806     /* Insert last type B* suffix. */
807     saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
808     for(a = first, i = *(first - 1);
809         (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
810         ++a) {
811       *(a - 1) = *a;
812     }
813     *(a - 1) = i;
814   }
815 }
816