xref: /freebsd/sys/contrib/zstd/lib/dictBuilder/divsufsort.c (revision 5ff13fbc199bdf5f0572845351c68ee5ca828e71)
1 /*
2  * divsufsort.c for libdivsufsort-lite
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 /*- Compiler specifics -*/
28 #ifdef __clang__
29 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
30 #endif
31 
32 #if defined(_MSC_VER)
33 #  pragma warning(disable : 4244)
34 #  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */
35 #endif
36 
37 
38 /*- Dependencies -*/
39 #include <assert.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 #include "divsufsort.h"
44 
45 /*- Constants -*/
46 #if defined(INLINE)
47 # undef INLINE
48 #endif
49 #if !defined(INLINE)
50 # define INLINE __inline
51 #endif
52 #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
53 # undef ALPHABET_SIZE
54 #endif
55 #if !defined(ALPHABET_SIZE)
56 # define ALPHABET_SIZE (256)
57 #endif
58 #define BUCKET_A_SIZE (ALPHABET_SIZE)
59 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
60 #if defined(SS_INSERTIONSORT_THRESHOLD)
61 # if SS_INSERTIONSORT_THRESHOLD < 1
62 #  undef SS_INSERTIONSORT_THRESHOLD
63 #  define SS_INSERTIONSORT_THRESHOLD (1)
64 # endif
65 #else
66 # define SS_INSERTIONSORT_THRESHOLD (8)
67 #endif
68 #if defined(SS_BLOCKSIZE)
69 # if SS_BLOCKSIZE < 0
70 #  undef SS_BLOCKSIZE
71 #  define SS_BLOCKSIZE (0)
72 # elif 32768 <= SS_BLOCKSIZE
73 #  undef SS_BLOCKSIZE
74 #  define SS_BLOCKSIZE (32767)
75 # endif
76 #else
77 # define SS_BLOCKSIZE (1024)
78 #endif
79 /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
80 #if SS_BLOCKSIZE == 0
81 # define SS_MISORT_STACKSIZE (96)
82 #elif SS_BLOCKSIZE <= 4096
83 # define SS_MISORT_STACKSIZE (16)
84 #else
85 # define SS_MISORT_STACKSIZE (24)
86 #endif
87 #define SS_SMERGE_STACKSIZE (32)
88 #define TR_INSERTIONSORT_THRESHOLD (8)
89 #define TR_STACKSIZE (64)
90 
91 
92 /*- Macros -*/
93 #ifndef SWAP
94 # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
95 #endif /* SWAP */
96 #ifndef MIN
97 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
98 #endif /* MIN */
99 #ifndef MAX
100 # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
101 #endif /* MAX */
102 #define STACK_PUSH(_a, _b, _c, _d)\
103   do {\
104     assert(ssize < STACK_SIZE);\
105     stack[ssize].a = (_a), stack[ssize].b = (_b),\
106     stack[ssize].c = (_c), stack[ssize++].d = (_d);\
107   } while(0)
108 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
109   do {\
110     assert(ssize < STACK_SIZE);\
111     stack[ssize].a = (_a), stack[ssize].b = (_b),\
112     stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
113   } while(0)
114 #define STACK_POP(_a, _b, _c, _d)\
115   do {\
116     assert(0 <= ssize);\
117     if(ssize == 0) { return; }\
118     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
119     (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
120   } while(0)
121 #define STACK_POP5(_a, _b, _c, _d, _e)\
122   do {\
123     assert(0 <= ssize);\
124     if(ssize == 0) { return; }\
125     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
126     (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
127   } while(0)
128 #define BUCKET_A(_c0) bucket_A[(_c0)]
129 #if ALPHABET_SIZE == 256
130 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
131 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
132 #else
133 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
134 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
135 #endif
136 
137 
138 /*- Private Functions -*/
139 
140 static const int lg_table[256]= {
141  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
142   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
143   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
144   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
145   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
146   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
147   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
148   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
149 };
150 
151 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
152 
153 static INLINE
154 int
ss_ilg(int n)155 ss_ilg(int n) {
156 #if SS_BLOCKSIZE == 0
157   return (n & 0xffff0000) ?
158           ((n & 0xff000000) ?
159             24 + lg_table[(n >> 24) & 0xff] :
160             16 + lg_table[(n >> 16) & 0xff]) :
161           ((n & 0x0000ff00) ?
162              8 + lg_table[(n >>  8) & 0xff] :
163              0 + lg_table[(n >>  0) & 0xff]);
164 #elif SS_BLOCKSIZE < 256
165   return lg_table[n];
166 #else
167   return (n & 0xff00) ?
168           8 + lg_table[(n >> 8) & 0xff] :
169           0 + lg_table[(n >> 0) & 0xff];
170 #endif
171 }
172 
173 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
174 
175 #if SS_BLOCKSIZE != 0
176 
177 static const int sqq_table[256] = {
178   0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
179  64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
180  90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
181 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
182 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
183 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
184 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
185 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
186 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
187 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
188 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
189 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
190 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
191 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
192 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
193 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
194 };
195 
196 static INLINE
197 int
ss_isqrt(int x)198 ss_isqrt(int x) {
199   int y, e;
200 
201   if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
202   e = (x & 0xffff0000) ?
203         ((x & 0xff000000) ?
204           24 + lg_table[(x >> 24) & 0xff] :
205           16 + lg_table[(x >> 16) & 0xff]) :
206         ((x & 0x0000ff00) ?
207            8 + lg_table[(x >>  8) & 0xff] :
208            0 + lg_table[(x >>  0) & 0xff]);
209 
210   if(e >= 16) {
211     y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
212     if(e >= 24) { y = (y + 1 + x / y) >> 1; }
213     y = (y + 1 + x / y) >> 1;
214   } else if(e >= 8) {
215     y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
216   } else {
217     return sqq_table[x] >> 4;
218   }
219 
220   return (x < (y * y)) ? y - 1 : y;
221 }
222 
223 #endif /* SS_BLOCKSIZE != 0 */
224 
225 
226 /*---------------------------------------------------------------------------*/
227 
228 /* Compares two suffixes. */
229 static INLINE
230 int
ss_compare(const unsigned char * T,const int * p1,const int * p2,int depth)231 ss_compare(const unsigned char *T,
232            const int *p1, const int *p2,
233            int depth) {
234   const unsigned char *U1, *U2, *U1n, *U2n;
235 
236   for(U1 = T + depth + *p1,
237       U2 = T + depth + *p2,
238       U1n = T + *(p1 + 1) + 2,
239       U2n = T + *(p2 + 1) + 2;
240       (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
241       ++U1, ++U2) {
242   }
243 
244   return U1 < U1n ?
245         (U2 < U2n ? *U1 - *U2 : 1) :
246         (U2 < U2n ? -1 : 0);
247 }
248 
249 
250 /*---------------------------------------------------------------------------*/
251 
252 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
253 
254 /* Insertionsort for small size groups */
255 static
256 void
ss_insertionsort(const unsigned char * T,const int * PA,int * first,int * last,int depth)257 ss_insertionsort(const unsigned char *T, const int *PA,
258                  int *first, int *last, int depth) {
259   int *i, *j;
260   int t;
261   int r;
262 
263   for(i = last - 2; first <= i; --i) {
264     for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
265       do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
266       if(last <= j) { break; }
267     }
268     if(r == 0) { *j = ~*j; }
269     *(j - 1) = t;
270   }
271 }
272 
273 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
274 
275 
276 /*---------------------------------------------------------------------------*/
277 
278 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
279 
280 static INLINE
281 void
ss_fixdown(const unsigned char * Td,const int * PA,int * SA,int i,int size)282 ss_fixdown(const unsigned char *Td, const int *PA,
283            int *SA, int i, int size) {
284   int j, k;
285   int v;
286   int c, d, e;
287 
288   for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
289     d = Td[PA[SA[k = j++]]];
290     if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
291     if(d <= c) { break; }
292   }
293   SA[i] = v;
294 }
295 
296 /* Simple top-down heapsort. */
297 static
298 void
ss_heapsort(const unsigned char * Td,const int * PA,int * SA,int size)299 ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
300   int i, m;
301   int t;
302 
303   m = size;
304   if((size % 2) == 0) {
305     m--;
306     if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
307   }
308 
309   for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
310   if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
311   for(i = m - 1; 0 < i; --i) {
312     t = SA[0], SA[0] = SA[i];
313     ss_fixdown(Td, PA, SA, 0, i);
314     SA[i] = t;
315   }
316 }
317 
318 
319 /*---------------------------------------------------------------------------*/
320 
321 /* Returns the median of three elements. */
322 static INLINE
323 int *
ss_median3(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3)324 ss_median3(const unsigned char *Td, const int *PA,
325            int *v1, int *v2, int *v3) {
326   int *t;
327   if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
328   if(Td[PA[*v2]] > Td[PA[*v3]]) {
329     if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
330     else { return v3; }
331   }
332   return v2;
333 }
334 
335 /* Returns the median of five elements. */
336 static INLINE
337 int *
ss_median5(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3,int * v4,int * v5)338 ss_median5(const unsigned char *Td, const int *PA,
339            int *v1, int *v2, int *v3, int *v4, int *v5) {
340   int *t;
341   if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
342   if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
343   if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
344   if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
345   if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
346   if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
347   return v3;
348 }
349 
350 /* Returns the pivot element. */
351 static INLINE
352 int *
ss_pivot(const unsigned char * Td,const int * PA,int * first,int * last)353 ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
354   int *middle;
355   int t;
356 
357   t = last - first;
358   middle = first + t / 2;
359 
360   if(t <= 512) {
361     if(t <= 32) {
362       return ss_median3(Td, PA, first, middle, last - 1);
363     } else {
364       t >>= 2;
365       return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
366     }
367   }
368   t >>= 3;
369   first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
370   middle = ss_median3(Td, PA, middle - t, middle, middle + t);
371   last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
372   return ss_median3(Td, PA, first, middle, last);
373 }
374 
375 
376 /*---------------------------------------------------------------------------*/
377 
378 /* Binary partition for substrings. */
379 static INLINE
380 int *
ss_partition(const int * PA,int * first,int * last,int depth)381 ss_partition(const int *PA,
382                     int *first, int *last, int depth) {
383   int *a, *b;
384   int t;
385   for(a = first - 1, b = last;;) {
386     for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
387     for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
388     if(b <= a) { break; }
389     t = ~*b;
390     *b = *a;
391     *a = t;
392   }
393   if(first < a) { *first = ~*first; }
394   return a;
395 }
396 
397 /* Multikey introsort for medium size groups. */
398 static
399 void
ss_mintrosort(const unsigned char * T,const int * PA,int * first,int * last,int depth)400 ss_mintrosort(const unsigned char *T, const int *PA,
401               int *first, int *last,
402               int depth) {
403 #define STACK_SIZE SS_MISORT_STACKSIZE
404   struct { int *a, *b, c; int d; } stack[STACK_SIZE];
405   const unsigned char *Td;
406   int *a, *b, *c, *d, *e, *f;
407   int s, t;
408   int ssize;
409   int limit;
410   int v, x = 0;
411 
412   for(ssize = 0, limit = ss_ilg(last - first);;) {
413 
414     if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
415 #if 1 < SS_INSERTIONSORT_THRESHOLD
416       if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
417 #endif
418       STACK_POP(first, last, depth, limit);
419       continue;
420     }
421 
422     Td = T + depth;
423     if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
424     if(limit < 0) {
425       for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
426         if((x = Td[PA[*a]]) != v) {
427           if(1 < (a - first)) { break; }
428           v = x;
429           first = a;
430         }
431       }
432       if(Td[PA[*first] - 1] < v) {
433         first = ss_partition(PA, first, a, depth);
434       }
435       if((a - first) <= (last - a)) {
436         if(1 < (a - first)) {
437           STACK_PUSH(a, last, depth, -1);
438           last = a, depth += 1, limit = ss_ilg(a - first);
439         } else {
440           first = a, limit = -1;
441         }
442       } else {
443         if(1 < (last - a)) {
444           STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
445           first = a, limit = -1;
446         } else {
447           last = a, depth += 1, limit = ss_ilg(a - first);
448         }
449       }
450       continue;
451     }
452 
453     /* choose pivot */
454     a = ss_pivot(Td, PA, first, last);
455     v = Td[PA[*a]];
456     SWAP(*first, *a);
457 
458     /* partition */
459     for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
460     if(((a = b) < last) && (x < v)) {
461       for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
462         if(x == v) { SWAP(*b, *a); ++a; }
463       }
464     }
465     for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
466     if((b < (d = c)) && (x > v)) {
467       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
468         if(x == v) { SWAP(*c, *d); --d; }
469       }
470     }
471     for(; b < c;) {
472       SWAP(*b, *c);
473       for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
474         if(x == v) { SWAP(*b, *a); ++a; }
475       }
476       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
477         if(x == v) { SWAP(*c, *d); --d; }
478       }
479     }
480 
481     if(a <= d) {
482       c = b - 1;
483 
484       if((s = a - first) > (t = b - a)) { s = t; }
485       for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
486       if((s = d - c) > (t = last - d - 1)) { s = t; }
487       for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
488 
489       a = first + (b - a), c = last - (d - c);
490       b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
491 
492       if((a - first) <= (last - c)) {
493         if((last - c) <= (c - b)) {
494           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495           STACK_PUSH(c, last, depth, limit);
496           last = a;
497         } else if((a - first) <= (c - b)) {
498           STACK_PUSH(c, last, depth, limit);
499           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
500           last = a;
501         } else {
502           STACK_PUSH(c, last, depth, limit);
503           STACK_PUSH(first, a, depth, limit);
504           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
505         }
506       } else {
507         if((a - first) <= (c - b)) {
508           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
509           STACK_PUSH(first, a, depth, limit);
510           first = c;
511         } else if((last - c) <= (c - b)) {
512           STACK_PUSH(first, a, depth, limit);
513           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
514           first = c;
515         } else {
516           STACK_PUSH(first, a, depth, limit);
517           STACK_PUSH(c, last, depth, limit);
518           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
519         }
520       }
521     } else {
522       limit += 1;
523       if(Td[PA[*first] - 1] < v) {
524         first = ss_partition(PA, first, last, depth);
525         limit = ss_ilg(last - first);
526       }
527       depth += 1;
528     }
529   }
530 #undef STACK_SIZE
531 }
532 
533 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
534 
535 
536 /*---------------------------------------------------------------------------*/
537 
538 #if SS_BLOCKSIZE != 0
539 
540 static INLINE
541 void
ss_blockswap(int * a,int * b,int n)542 ss_blockswap(int *a, int *b, int n) {
543   int t;
544   for(; 0 < n; --n, ++a, ++b) {
545     t = *a, *a = *b, *b = t;
546   }
547 }
548 
549 static INLINE
550 void
ss_rotate(int * first,int * middle,int * last)551 ss_rotate(int *first, int *middle, int *last) {
552   int *a, *b, t;
553   int l, r;
554   l = middle - first, r = last - middle;
555   for(; (0 < l) && (0 < r);) {
556     if(l == r) { ss_blockswap(first, middle, l); break; }
557     if(l < r) {
558       a = last - 1, b = middle - 1;
559       t = *a;
560       do {
561         *a-- = *b, *b-- = *a;
562         if(b < first) {
563           *a = t;
564           last = a;
565           if((r -= l + 1) <= l) { break; }
566           a -= 1, b = middle - 1;
567           t = *a;
568         }
569       } while(1);
570     } else {
571       a = first, b = middle;
572       t = *a;
573       do {
574         *a++ = *b, *b++ = *a;
575         if(last <= b) {
576           *a = t;
577           first = a + 1;
578           if((l -= r + 1) <= r) { break; }
579           a += 1, b = middle;
580           t = *a;
581         }
582       } while(1);
583     }
584   }
585 }
586 
587 
588 /*---------------------------------------------------------------------------*/
589 
590 static
591 void
ss_inplacemerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int depth)592 ss_inplacemerge(const unsigned char *T, const int *PA,
593                 int *first, int *middle, int *last,
594                 int depth) {
595   const int *p;
596   int *a, *b;
597   int len, half;
598   int q, r;
599   int x;
600 
601   for(;;) {
602     if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
603     else                { x = 0; p = PA +  *(last - 1); }
604     for(a = first, len = middle - first, half = len >> 1, r = -1;
605         0 < len;
606         len = half, half >>= 1) {
607       b = a + half;
608       q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
609       if(q < 0) {
610         a = b + 1;
611         half -= (len & 1) ^ 1;
612       } else {
613         r = q;
614       }
615     }
616     if(a < middle) {
617       if(r == 0) { *a = ~*a; }
618       ss_rotate(a, middle, last);
619       last -= middle - a;
620       middle = a;
621       if(first == middle) { break; }
622     }
623     --last;
624     if(x != 0) { while(*--last < 0) { } }
625     if(middle == last) { break; }
626   }
627 }
628 
629 
630 /*---------------------------------------------------------------------------*/
631 
632 /* Merge-forward with internal buffer. */
633 static
634 void
ss_mergeforward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)635 ss_mergeforward(const unsigned char *T, const int *PA,
636                 int *first, int *middle, int *last,
637                 int *buf, int depth) {
638   int *a, *b, *c, *bufend;
639   int t;
640   int r;
641 
642   bufend = buf + (middle - first) - 1;
643   ss_blockswap(buf, first, middle - first);
644 
645   for(t = *(a = first), b = buf, c = middle;;) {
646     r = ss_compare(T, PA + *b, PA + *c, depth);
647     if(r < 0) {
648       do {
649         *a++ = *b;
650         if(bufend <= b) { *bufend = t; return; }
651         *b++ = *a;
652       } while(*b < 0);
653     } else if(r > 0) {
654       do {
655         *a++ = *c, *c++ = *a;
656         if(last <= c) {
657           while(b < bufend) { *a++ = *b, *b++ = *a; }
658           *a = *b, *b = t;
659           return;
660         }
661       } while(*c < 0);
662     } else {
663       *c = ~*c;
664       do {
665         *a++ = *b;
666         if(bufend <= b) { *bufend = t; return; }
667         *b++ = *a;
668       } while(*b < 0);
669 
670       do {
671         *a++ = *c, *c++ = *a;
672         if(last <= c) {
673           while(b < bufend) { *a++ = *b, *b++ = *a; }
674           *a = *b, *b = t;
675           return;
676         }
677       } while(*c < 0);
678     }
679   }
680 }
681 
682 /* Merge-backward with internal buffer. */
683 static
684 void
ss_mergebackward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)685 ss_mergebackward(const unsigned char *T, const int *PA,
686                  int *first, int *middle, int *last,
687                  int *buf, int depth) {
688   const int *p1, *p2;
689   int *a, *b, *c, *bufend;
690   int t;
691   int r;
692   int x;
693 
694   bufend = buf + (last - middle) - 1;
695   ss_blockswap(buf, middle, last - middle);
696 
697   x = 0;
698   if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
699   else                  { p1 = PA +  *bufend; }
700   if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
701   else                  { p2 = PA +  *(middle - 1); }
702   for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
703     r = ss_compare(T, p1, p2, depth);
704     if(0 < r) {
705       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
706       *a-- = *b;
707       if(b <= buf) { *buf = t; break; }
708       *b-- = *a;
709       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
710       else       { p1 = PA +  *b; }
711     } else if(r < 0) {
712       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
713       *a-- = *c, *c-- = *a;
714       if(c < first) {
715         while(buf < b) { *a-- = *b, *b-- = *a; }
716         *a = *b, *b = t;
717         break;
718       }
719       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
720       else       { p2 = PA +  *c; }
721     } else {
722       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
723       *a-- = ~*b;
724       if(b <= buf) { *buf = t; break; }
725       *b-- = *a;
726       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
727       *a-- = *c, *c-- = *a;
728       if(c < first) {
729         while(buf < b) { *a-- = *b, *b-- = *a; }
730         *a = *b, *b = t;
731         break;
732       }
733       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
734       else       { p1 = PA +  *b; }
735       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
736       else       { p2 = PA +  *c; }
737     }
738   }
739 }
740 
741 /* D&C based merge. */
742 static
743 void
ss_swapmerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int bufsize,int depth)744 ss_swapmerge(const unsigned char *T, const int *PA,
745              int *first, int *middle, int *last,
746              int *buf, int bufsize, int depth) {
747 #define STACK_SIZE SS_SMERGE_STACKSIZE
748 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
749 #define MERGE_CHECK(a, b, c)\
750   do {\
751     if(((c) & 1) ||\
752        (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
753       *(a) = ~*(a);\
754     }\
755     if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
756       *(b) = ~*(b);\
757     }\
758   } while(0)
759   struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
760   int *l, *r, *lm, *rm;
761   int m, len, half;
762   int ssize;
763   int check, next;
764 
765   for(check = 0, ssize = 0;;) {
766     if((last - middle) <= bufsize) {
767       if((first < middle) && (middle < last)) {
768         ss_mergebackward(T, PA, first, middle, last, buf, depth);
769       }
770       MERGE_CHECK(first, last, check);
771       STACK_POP(first, middle, last, check);
772       continue;
773     }
774 
775     if((middle - first) <= bufsize) {
776       if(first < middle) {
777         ss_mergeforward(T, PA, first, middle, last, buf, depth);
778       }
779       MERGE_CHECK(first, last, check);
780       STACK_POP(first, middle, last, check);
781       continue;
782     }
783 
784     for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
785         0 < len;
786         len = half, half >>= 1) {
787       if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
788                        PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
789         m += half + 1;
790         half -= (len & 1) ^ 1;
791       }
792     }
793 
794     if(0 < m) {
795       lm = middle - m, rm = middle + m;
796       ss_blockswap(lm, middle, m);
797       l = r = middle, next = 0;
798       if(rm < last) {
799         if(*rm < 0) {
800           *rm = ~*rm;
801           if(first < lm) { for(; *--l < 0;) { } next |= 4; }
802           next |= 1;
803         } else if(first < lm) {
804           for(; *r < 0; ++r) { }
805           next |= 2;
806         }
807       }
808 
809       if((l - first) <= (last - r)) {
810         STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
811         middle = lm, last = l, check = (check & 3) | (next & 4);
812       } else {
813         if((next & 2) && (r == middle)) { next ^= 6; }
814         STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
815         first = r, middle = rm, check = (next & 3) | (check & 4);
816       }
817     } else {
818       if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
819         *middle = ~*middle;
820       }
821       MERGE_CHECK(first, last, check);
822       STACK_POP(first, middle, last, check);
823     }
824   }
825 #undef STACK_SIZE
826 }
827 
828 #endif /* SS_BLOCKSIZE != 0 */
829 
830 
831 /*---------------------------------------------------------------------------*/
832 
833 /* Substring sort */
834 static
835 void
sssort(const unsigned char * T,const int * PA,int * first,int * last,int * buf,int bufsize,int depth,int n,int lastsuffix)836 sssort(const unsigned char *T, const int *PA,
837        int *first, int *last,
838        int *buf, int bufsize,
839        int depth, int n, int lastsuffix) {
840   int *a;
841 #if SS_BLOCKSIZE != 0
842   int *b, *middle, *curbuf;
843   int j, k, curbufsize, limit;
844 #endif
845   int i;
846 
847   if(lastsuffix != 0) { ++first; }
848 
849 #if SS_BLOCKSIZE == 0
850   ss_mintrosort(T, PA, first, last, depth);
851 #else
852   if((bufsize < SS_BLOCKSIZE) &&
853       (bufsize < (last - first)) &&
854       (bufsize < (limit = ss_isqrt(last - first)))) {
855     if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
856     buf = middle = last - limit, bufsize = limit;
857   } else {
858     middle = last, limit = 0;
859   }
860   for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
861 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
862     ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
863 #elif 1 < SS_BLOCKSIZE
864     ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
865 #endif
866     curbufsize = last - (a + SS_BLOCKSIZE);
867     curbuf = a + SS_BLOCKSIZE;
868     if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
869     for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
870       ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
871     }
872   }
873 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
874   ss_mintrosort(T, PA, a, middle, depth);
875 #elif 1 < SS_BLOCKSIZE
876   ss_insertionsort(T, PA, a, middle, depth);
877 #endif
878   for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
879     if(i & 1) {
880       ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
881       a -= k;
882     }
883   }
884   if(limit != 0) {
885 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
886     ss_mintrosort(T, PA, middle, last, depth);
887 #elif 1 < SS_BLOCKSIZE
888     ss_insertionsort(T, PA, middle, last, depth);
889 #endif
890     ss_inplacemerge(T, PA, first, middle, last, depth);
891   }
892 #endif
893 
894   if(lastsuffix != 0) {
895     /* Insert last type B* suffix. */
896     int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
897     for(a = first, i = *(first - 1);
898         (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
899         ++a) {
900       *(a - 1) = *a;
901     }
902     *(a - 1) = i;
903   }
904 }
905 
906 
907 /*---------------------------------------------------------------------------*/
908 
909 static INLINE
910 int
tr_ilg(int n)911 tr_ilg(int n) {
912   return (n & 0xffff0000) ?
913           ((n & 0xff000000) ?
914             24 + lg_table[(n >> 24) & 0xff] :
915             16 + lg_table[(n >> 16) & 0xff]) :
916           ((n & 0x0000ff00) ?
917              8 + lg_table[(n >>  8) & 0xff] :
918              0 + lg_table[(n >>  0) & 0xff]);
919 }
920 
921 
922 /*---------------------------------------------------------------------------*/
923 
924 /* Simple insertionsort for small size groups. */
925 static
926 void
tr_insertionsort(const int * ISAd,int * first,int * last)927 tr_insertionsort(const int *ISAd, int *first, int *last) {
928   int *a, *b;
929   int t, r;
930 
931   for(a = first + 1; a < last; ++a) {
932     for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
933       do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
934       if(b < first) { break; }
935     }
936     if(r == 0) { *b = ~*b; }
937     *(b + 1) = t;
938   }
939 }
940 
941 
942 /*---------------------------------------------------------------------------*/
943 
944 static INLINE
945 void
tr_fixdown(const int * ISAd,int * SA,int i,int size)946 tr_fixdown(const int *ISAd, int *SA, int i, int size) {
947   int j, k;
948   int v;
949   int c, d, e;
950 
951   for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
952     d = ISAd[SA[k = j++]];
953     if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
954     if(d <= c) { break; }
955   }
956   SA[i] = v;
957 }
958 
959 /* Simple top-down heapsort. */
960 static
961 void
tr_heapsort(const int * ISAd,int * SA,int size)962 tr_heapsort(const int *ISAd, int *SA, int size) {
963   int i, m;
964   int t;
965 
966   m = size;
967   if((size % 2) == 0) {
968     m--;
969     if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
970   }
971 
972   for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
973   if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
974   for(i = m - 1; 0 < i; --i) {
975     t = SA[0], SA[0] = SA[i];
976     tr_fixdown(ISAd, SA, 0, i);
977     SA[i] = t;
978   }
979 }
980 
981 
982 /*---------------------------------------------------------------------------*/
983 
984 /* Returns the median of three elements. */
985 static INLINE
986 int *
tr_median3(const int * ISAd,int * v1,int * v2,int * v3)987 tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
988   int *t;
989   if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
990   if(ISAd[*v2] > ISAd[*v3]) {
991     if(ISAd[*v1] > ISAd[*v3]) { return v1; }
992     else { return v3; }
993   }
994   return v2;
995 }
996 
997 /* Returns the median of five elements. */
998 static INLINE
999 int *
tr_median5(const int * ISAd,int * v1,int * v2,int * v3,int * v4,int * v5)1000 tr_median5(const int *ISAd,
1001            int *v1, int *v2, int *v3, int *v4, int *v5) {
1002   int *t;
1003   if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
1004   if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
1005   if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
1006   if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
1007   if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
1008   if(ISAd[*v3] > ISAd[*v4]) { return v4; }
1009   return v3;
1010 }
1011 
1012 /* Returns the pivot element. */
1013 static INLINE
1014 int *
tr_pivot(const int * ISAd,int * first,int * last)1015 tr_pivot(const int *ISAd, int *first, int *last) {
1016   int *middle;
1017   int t;
1018 
1019   t = last - first;
1020   middle = first + t / 2;
1021 
1022   if(t <= 512) {
1023     if(t <= 32) {
1024       return tr_median3(ISAd, first, middle, last - 1);
1025     } else {
1026       t >>= 2;
1027       return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1028     }
1029   }
1030   t >>= 3;
1031   first  = tr_median3(ISAd, first, first + t, first + (t << 1));
1032   middle = tr_median3(ISAd, middle - t, middle, middle + t);
1033   last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1034   return tr_median3(ISAd, first, middle, last);
1035 }
1036 
1037 
1038 /*---------------------------------------------------------------------------*/
1039 
1040 typedef struct _trbudget_t trbudget_t;
1041 struct _trbudget_t {
1042   int chance;
1043   int remain;
1044   int incval;
1045   int count;
1046 };
1047 
1048 static INLINE
1049 void
trbudget_init(trbudget_t * budget,int chance,int incval)1050 trbudget_init(trbudget_t *budget, int chance, int incval) {
1051   budget->chance = chance;
1052   budget->remain = budget->incval = incval;
1053 }
1054 
1055 static INLINE
1056 int
trbudget_check(trbudget_t * budget,int size)1057 trbudget_check(trbudget_t *budget, int size) {
1058   if(size <= budget->remain) { budget->remain -= size; return 1; }
1059   if(budget->chance == 0) { budget->count += size; return 0; }
1060   budget->remain += budget->incval - size;
1061   budget->chance -= 1;
1062   return 1;
1063 }
1064 
1065 
1066 /*---------------------------------------------------------------------------*/
1067 
1068 static INLINE
1069 void
tr_partition(const int * ISAd,int * first,int * middle,int * last,int ** pa,int ** pb,int v)1070 tr_partition(const int *ISAd,
1071              int *first, int *middle, int *last,
1072              int **pa, int **pb, int v) {
1073   int *a, *b, *c, *d, *e, *f;
1074   int t, s;
1075   int x = 0;
1076 
1077   for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1078   if(((a = b) < last) && (x < v)) {
1079     for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1080       if(x == v) { SWAP(*b, *a); ++a; }
1081     }
1082   }
1083   for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1084   if((b < (d = c)) && (x > v)) {
1085     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1086       if(x == v) { SWAP(*c, *d); --d; }
1087     }
1088   }
1089   for(; b < c;) {
1090     SWAP(*b, *c);
1091     for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1092       if(x == v) { SWAP(*b, *a); ++a; }
1093     }
1094     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1095       if(x == v) { SWAP(*c, *d); --d; }
1096     }
1097   }
1098 
1099   if(a <= d) {
1100     c = b - 1;
1101     if((s = a - first) > (t = b - a)) { s = t; }
1102     for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1103     if((s = d - c) > (t = last - d - 1)) { s = t; }
1104     for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1105     first += (b - a), last -= (d - c);
1106   }
1107   *pa = first, *pb = last;
1108 }
1109 
1110 static
1111 void
tr_copy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)1112 tr_copy(int *ISA, const int *SA,
1113         int *first, int *a, int *b, int *last,
1114         int depth) {
1115   /* sort suffixes of middle partition
1116      by using sorted order of suffixes of left and right partition. */
1117   int *c, *d, *e;
1118   int s, v;
1119 
1120   v = b - SA - 1;
1121   for(c = first, d = a - 1; c <= d; ++c) {
1122     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1123       *++d = s;
1124       ISA[s] = d - SA;
1125     }
1126   }
1127   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1128     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1129       *--d = s;
1130       ISA[s] = d - SA;
1131     }
1132   }
1133 }
1134 
1135 static
1136 void
tr_partialcopy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)1137 tr_partialcopy(int *ISA, const int *SA,
1138                int *first, int *a, int *b, int *last,
1139                int depth) {
1140   int *c, *d, *e;
1141   int s, v;
1142   int rank, lastrank, newrank = -1;
1143 
1144   v = b - SA - 1;
1145   lastrank = -1;
1146   for(c = first, d = a - 1; c <= d; ++c) {
1147     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1148       *++d = s;
1149       rank = ISA[s + depth];
1150       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1151       ISA[s] = newrank;
1152     }
1153   }
1154 
1155   lastrank = -1;
1156   for(e = d; first <= e; --e) {
1157     rank = ISA[*e];
1158     if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1159     if(newrank != rank) { ISA[*e] = newrank; }
1160   }
1161 
1162   lastrank = -1;
1163   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1164     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1165       *--d = s;
1166       rank = ISA[s + depth];
1167       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1168       ISA[s] = newrank;
1169     }
1170   }
1171 }
1172 
1173 static
1174 void
tr_introsort(int * ISA,const int * ISAd,int * SA,int * first,int * last,trbudget_t * budget)1175 tr_introsort(int *ISA, const int *ISAd,
1176              int *SA, int *first, int *last,
1177              trbudget_t *budget) {
1178 #define STACK_SIZE TR_STACKSIZE
1179   struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1180   int *a, *b, *c;
1181   int t;
1182   int v, x = 0;
1183   int incr = ISAd - ISA;
1184   int limit, next;
1185   int ssize, trlink = -1;
1186 
1187   for(ssize = 0, limit = tr_ilg(last - first);;) {
1188 
1189     if(limit < 0) {
1190       if(limit == -1) {
1191         /* tandem repeat partition */
1192         tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1193 
1194         /* update ranks */
1195         if(a < last) {
1196           for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1197         }
1198         if(b < last) {
1199           for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1200         }
1201 
1202         /* push */
1203         if(1 < (b - a)) {
1204           STACK_PUSH5(NULL, a, b, 0, 0);
1205           STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1206           trlink = ssize - 2;
1207         }
1208         if((a - first) <= (last - b)) {
1209           if(1 < (a - first)) {
1210             STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1211             last = a, limit = tr_ilg(a - first);
1212           } else if(1 < (last - b)) {
1213             first = b, limit = tr_ilg(last - b);
1214           } else {
1215             STACK_POP5(ISAd, first, last, limit, trlink);
1216           }
1217         } else {
1218           if(1 < (last - b)) {
1219             STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1220             first = b, limit = tr_ilg(last - b);
1221           } else if(1 < (a - first)) {
1222             last = a, limit = tr_ilg(a - first);
1223           } else {
1224             STACK_POP5(ISAd, first, last, limit, trlink);
1225           }
1226         }
1227       } else if(limit == -2) {
1228         /* tandem repeat copy */
1229         a = stack[--ssize].b, b = stack[ssize].c;
1230         if(stack[ssize].d == 0) {
1231           tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1232         } else {
1233           if(0 <= trlink) { stack[trlink].d = -1; }
1234           tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1235         }
1236         STACK_POP5(ISAd, first, last, limit, trlink);
1237       } else {
1238         /* sorted partition */
1239         if(0 <= *first) {
1240           a = first;
1241           do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1242           first = a;
1243         }
1244         if(first < last) {
1245           a = first; do { *a = ~*a; } while(*++a < 0);
1246           next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1247           if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1248 
1249           /* push */
1250           if(trbudget_check(budget, a - first)) {
1251             if((a - first) <= (last - a)) {
1252               STACK_PUSH5(ISAd, a, last, -3, trlink);
1253               ISAd += incr, last = a, limit = next;
1254             } else {
1255               if(1 < (last - a)) {
1256                 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1257                 first = a, limit = -3;
1258               } else {
1259                 ISAd += incr, last = a, limit = next;
1260               }
1261             }
1262           } else {
1263             if(0 <= trlink) { stack[trlink].d = -1; }
1264             if(1 < (last - a)) {
1265               first = a, limit = -3;
1266             } else {
1267               STACK_POP5(ISAd, first, last, limit, trlink);
1268             }
1269           }
1270         } else {
1271           STACK_POP5(ISAd, first, last, limit, trlink);
1272         }
1273       }
1274       continue;
1275     }
1276 
1277     if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1278       tr_insertionsort(ISAd, first, last);
1279       limit = -3;
1280       continue;
1281     }
1282 
1283     if(limit-- == 0) {
1284       tr_heapsort(ISAd, first, last - first);
1285       for(a = last - 1; first < a; a = b) {
1286         for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1287       }
1288       limit = -3;
1289       continue;
1290     }
1291 
1292     /* choose pivot */
1293     a = tr_pivot(ISAd, first, last);
1294     SWAP(*first, *a);
1295     v = ISAd[*first];
1296 
1297     /* partition */
1298     tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1299     if((last - first) != (b - a)) {
1300       next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1301 
1302       /* update ranks */
1303       for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1304       if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1305 
1306       /* push */
1307       if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1308         if((a - first) <= (last - b)) {
1309           if((last - b) <= (b - a)) {
1310             if(1 < (a - first)) {
1311               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1312               STACK_PUSH5(ISAd, b, last, limit, trlink);
1313               last = a;
1314             } else if(1 < (last - b)) {
1315               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1316               first = b;
1317             } else {
1318               ISAd += incr, first = a, last = b, limit = next;
1319             }
1320           } else if((a - first) <= (b - a)) {
1321             if(1 < (a - first)) {
1322               STACK_PUSH5(ISAd, b, last, limit, trlink);
1323               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1324               last = a;
1325             } else {
1326               STACK_PUSH5(ISAd, b, last, limit, trlink);
1327               ISAd += incr, first = a, last = b, limit = next;
1328             }
1329           } else {
1330             STACK_PUSH5(ISAd, b, last, limit, trlink);
1331             STACK_PUSH5(ISAd, first, a, limit, trlink);
1332             ISAd += incr, first = a, last = b, limit = next;
1333           }
1334         } else {
1335           if((a - first) <= (b - a)) {
1336             if(1 < (last - b)) {
1337               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1338               STACK_PUSH5(ISAd, first, a, limit, trlink);
1339               first = b;
1340             } else if(1 < (a - first)) {
1341               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1342               last = a;
1343             } else {
1344               ISAd += incr, first = a, last = b, limit = next;
1345             }
1346           } else if((last - b) <= (b - a)) {
1347             if(1 < (last - b)) {
1348               STACK_PUSH5(ISAd, first, a, limit, trlink);
1349               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1350               first = b;
1351             } else {
1352               STACK_PUSH5(ISAd, first, a, limit, trlink);
1353               ISAd += incr, first = a, last = b, limit = next;
1354             }
1355           } else {
1356             STACK_PUSH5(ISAd, first, a, limit, trlink);
1357             STACK_PUSH5(ISAd, b, last, limit, trlink);
1358             ISAd += incr, first = a, last = b, limit = next;
1359           }
1360         }
1361       } else {
1362         if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1363         if((a - first) <= (last - b)) {
1364           if(1 < (a - first)) {
1365             STACK_PUSH5(ISAd, b, last, limit, trlink);
1366             last = a;
1367           } else if(1 < (last - b)) {
1368             first = b;
1369           } else {
1370             STACK_POP5(ISAd, first, last, limit, trlink);
1371           }
1372         } else {
1373           if(1 < (last - b)) {
1374             STACK_PUSH5(ISAd, first, a, limit, trlink);
1375             first = b;
1376           } else if(1 < (a - first)) {
1377             last = a;
1378           } else {
1379             STACK_POP5(ISAd, first, last, limit, trlink);
1380           }
1381         }
1382       }
1383     } else {
1384       if(trbudget_check(budget, last - first)) {
1385         limit = tr_ilg(last - first), ISAd += incr;
1386       } else {
1387         if(0 <= trlink) { stack[trlink].d = -1; }
1388         STACK_POP5(ISAd, first, last, limit, trlink);
1389       }
1390     }
1391   }
1392 #undef STACK_SIZE
1393 }
1394 
1395 
1396 
1397 /*---------------------------------------------------------------------------*/
1398 
1399 /* Tandem repeat sort */
1400 static
1401 void
trsort(int * ISA,int * SA,int n,int depth)1402 trsort(int *ISA, int *SA, int n, int depth) {
1403   int *ISAd;
1404   int *first, *last;
1405   trbudget_t budget;
1406   int t, skip, unsorted;
1407 
1408   trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1409 /*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1410   for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1411     first = SA;
1412     skip = 0;
1413     unsorted = 0;
1414     do {
1415       if((t = *first) < 0) { first -= t; skip += t; }
1416       else {
1417         if(skip != 0) { *(first + skip) = skip; skip = 0; }
1418         last = SA + ISA[t] + 1;
1419         if(1 < (last - first)) {
1420           budget.count = 0;
1421           tr_introsort(ISA, ISAd, SA, first, last, &budget);
1422           if(budget.count != 0) { unsorted += budget.count; }
1423           else { skip = first - last; }
1424         } else if((last - first) == 1) {
1425           skip = -1;
1426         }
1427         first = last;
1428       }
1429     } while(first < (SA + n));
1430     if(skip != 0) { *(first + skip) = skip; }
1431     if(unsorted == 0) { break; }
1432   }
1433 }
1434 
1435 
1436 /*---------------------------------------------------------------------------*/
1437 
1438 /* Sorts suffixes of type B*. */
1439 static
1440 int
sort_typeBstar(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int openMP)1441 sort_typeBstar(const unsigned char *T, int *SA,
1442                int *bucket_A, int *bucket_B,
1443                int n, int openMP) {
1444   int *PAb, *ISAb, *buf;
1445 #ifdef LIBBSC_OPENMP
1446   int *curbuf;
1447   int l;
1448 #endif
1449   int i, j, k, t, m, bufsize;
1450   int c0, c1;
1451 #ifdef LIBBSC_OPENMP
1452   int d0, d1;
1453 #endif
1454   (void)openMP;
1455 
1456   /* Initialize bucket arrays. */
1457   for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1458   for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1459 
1460   /* Count the number of occurrences of the first one or two characters of each
1461      type A, B and B* suffix. Moreover, store the beginning position of all
1462      type B* suffixes into the array SA. */
1463   for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1464     /* type A suffix. */
1465     do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1466     if(0 <= i) {
1467       /* type B* suffix. */
1468       ++BUCKET_BSTAR(c0, c1);
1469       SA[--m] = i;
1470       /* type B suffix. */
1471       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1472         ++BUCKET_B(c0, c1);
1473       }
1474     }
1475   }
1476   m = n - m;
1477 /*
1478 note:
1479   A type B* suffix is lexicographically smaller than a type B suffix that
1480   begins with the same first two characters.
1481 */
1482 
1483   /* Calculate the index of start/end point of each bucket. */
1484   for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1485     t = i + BUCKET_A(c0);
1486     BUCKET_A(c0) = i + j; /* start point */
1487     i = t + BUCKET_B(c0, c0);
1488     for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1489       j += BUCKET_BSTAR(c0, c1);
1490       BUCKET_BSTAR(c0, c1) = j; /* end point */
1491       i += BUCKET_B(c0, c1);
1492     }
1493   }
1494 
1495   if(0 < m) {
1496     /* Sort the type B* suffixes by their first two characters. */
1497     PAb = SA + n - m; ISAb = SA + m;
1498     for(i = m - 2; 0 <= i; --i) {
1499       t = PAb[i], c0 = T[t], c1 = T[t + 1];
1500       SA[--BUCKET_BSTAR(c0, c1)] = i;
1501     }
1502     t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1503     SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1504 
1505     /* Sort the type B* substrings using sssort. */
1506 #ifdef LIBBSC_OPENMP
1507     if (openMP)
1508     {
1509         buf = SA + m;
1510         c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1511 #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
1512         {
1513           bufsize = (n - (2 * m)) / omp_get_num_threads();
1514           curbuf = buf + omp_get_thread_num() * bufsize;
1515           k = 0;
1516           for(;;) {
1517             #pragma omp critical(sssort_lock)
1518             {
1519               if(0 < (l = j)) {
1520                 d0 = c0, d1 = c1;
1521                 do {
1522                   k = BUCKET_BSTAR(d0, d1);
1523                   if(--d1 <= d0) {
1524                     d1 = ALPHABET_SIZE - 1;
1525                     if(--d0 < 0) { break; }
1526                   }
1527                 } while(((l - k) <= 1) && (0 < (l = k)));
1528                 c0 = d0, c1 = d1, j = k;
1529               }
1530             }
1531             if(l == 0) { break; }
1532             sssort(T, PAb, SA + k, SA + l,
1533                    curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1534           }
1535         }
1536     }
1537     else
1538     {
1539         buf = SA + m, bufsize = n - (2 * m);
1540         for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1541           for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1542             i = BUCKET_BSTAR(c0, c1);
1543             if(1 < (j - i)) {
1544               sssort(T, PAb, SA + i, SA + j,
1545                      buf, bufsize, 2, n, *(SA + i) == (m - 1));
1546             }
1547           }
1548         }
1549     }
1550 #else
1551     buf = SA + m, bufsize = n - (2 * m);
1552     for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1553       for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1554         i = BUCKET_BSTAR(c0, c1);
1555         if(1 < (j - i)) {
1556           sssort(T, PAb, SA + i, SA + j,
1557                  buf, bufsize, 2, n, *(SA + i) == (m - 1));
1558         }
1559       }
1560     }
1561 #endif
1562 
1563     /* Compute ranks of type B* substrings. */
1564     for(i = m - 1; 0 <= i; --i) {
1565       if(0 <= SA[i]) {
1566         j = i;
1567         do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1568         SA[i + 1] = i - j;
1569         if(i <= 0) { break; }
1570       }
1571       j = i;
1572       do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1573       ISAb[SA[i]] = j;
1574     }
1575 
1576     /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577     trsort(ISAb, SA, m, 1);
1578 
1579     /* Set the sorted order of type B* suffixes. */
1580     for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582       if(0 <= i) {
1583         t = i;
1584         for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1585         SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1586       }
1587     }
1588 
1589     /* Calculate the index of start/end point of each bucket. */
1590     BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1591     for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1592       i = BUCKET_A(c0 + 1) - 1;
1593       for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1594         t = i - BUCKET_B(c0, c1);
1595         BUCKET_B(c0, c1) = i; /* end point */
1596 
1597         /* Move all type B* suffixes to the correct position. */
1598         for(i = t, j = BUCKET_BSTAR(c0, c1);
1599             j <= k;
1600             --i, --k) { SA[i] = SA[k]; }
1601       }
1602       BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1603       BUCKET_B(c0, c0) = i; /* end point */
1604     }
1605   }
1606 
1607   return m;
1608 }
1609 
1610 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1611 static
1612 void
construct_SA(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)1613 construct_SA(const unsigned char *T, int *SA,
1614              int *bucket_A, int *bucket_B,
1615              int n, int m) {
1616   int *i, *j, *k;
1617   int s;
1618   int c0, c1, c2;
1619 
1620   if(0 < m) {
1621     /* Construct the sorted order of type B suffixes by using
1622        the sorted order of type B* suffixes. */
1623     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1624       /* Scan the suffix array from right to left. */
1625       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1626           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1627           i <= j;
1628           --j) {
1629         if(0 < (s = *j)) {
1630           assert(T[s] == c1);
1631           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1632           assert(T[s - 1] <= T[s]);
1633           *j = ~s;
1634           c0 = T[--s];
1635           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1636           if(c0 != c2) {
1637             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638             k = SA + BUCKET_B(c2 = c0, c1);
1639           }
1640           assert(k < j); assert(k != NULL);
1641           *k-- = s;
1642         } else {
1643           assert(((s == 0) && (T[s] == c1)) || (s < 0));
1644           *j = ~s;
1645         }
1646       }
1647     }
1648   }
1649 
1650   /* Construct the suffix array by using
1651      the sorted order of type B suffixes. */
1652   k = SA + BUCKET_A(c2 = T[n - 1]);
1653   *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1654   /* Scan the suffix array from left to right. */
1655   for(i = SA, j = SA + n; i < j; ++i) {
1656     if(0 < (s = *i)) {
1657       assert(T[s - 1] >= T[s]);
1658       c0 = T[--s];
1659       if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1660       if(c0 != c2) {
1661         BUCKET_A(c2) = k - SA;
1662         k = SA + BUCKET_A(c2 = c0);
1663       }
1664       assert(i < k);
1665       *k++ = s;
1666     } else {
1667       assert(s < 0);
1668       *i = ~s;
1669     }
1670   }
1671 }
1672 
1673 /* Constructs the burrows-wheeler transformed string directly
1674    by using the sorted order of type B* suffixes. */
1675 static
1676 int
construct_BWT(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)1677 construct_BWT(const unsigned char *T, int *SA,
1678               int *bucket_A, int *bucket_B,
1679               int n, int m) {
1680   int *i, *j, *k, *orig;
1681   int s;
1682   int c0, c1, c2;
1683 
1684   if(0 < m) {
1685     /* Construct the sorted order of type B suffixes by using
1686        the sorted order of type B* suffixes. */
1687     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1688       /* Scan the suffix array from right to left. */
1689       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1690           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1691           i <= j;
1692           --j) {
1693         if(0 < (s = *j)) {
1694           assert(T[s] == c1);
1695           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1696           assert(T[s - 1] <= T[s]);
1697           c0 = T[--s];
1698           *j = ~((int)c0);
1699           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1700           if(c0 != c2) {
1701             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702             k = SA + BUCKET_B(c2 = c0, c1);
1703           }
1704           assert(k < j); assert(k != NULL);
1705           *k-- = s;
1706         } else if(s != 0) {
1707           *j = ~s;
1708 #ifndef NDEBUG
1709         } else {
1710           assert(T[s] == c1);
1711 #endif
1712         }
1713       }
1714     }
1715   }
1716 
1717   /* Construct the BWTed string by using
1718      the sorted order of type B suffixes. */
1719   k = SA + BUCKET_A(c2 = T[n - 1]);
1720   *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
1721   /* Scan the suffix array from left to right. */
1722   for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1723     if(0 < (s = *i)) {
1724       assert(T[s - 1] >= T[s]);
1725       c0 = T[--s];
1726       *i = c0;
1727       if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1728       if(c0 != c2) {
1729         BUCKET_A(c2) = k - SA;
1730         k = SA + BUCKET_A(c2 = c0);
1731       }
1732       assert(i < k);
1733       *k++ = s;
1734     } else if(s != 0) {
1735       *i = ~s;
1736     } else {
1737       orig = i;
1738     }
1739   }
1740 
1741   return orig - SA;
1742 }
1743 
1744 /* Constructs the burrows-wheeler transformed string directly
1745    by using the sorted order of type B* suffixes. */
1746 static
1747 int
construct_BWT_indexes(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m,unsigned char * num_indexes,int * indexes)1748 construct_BWT_indexes(const unsigned char *T, int *SA,
1749                       int *bucket_A, int *bucket_B,
1750                       int n, int m,
1751                       unsigned char * num_indexes, int * indexes) {
1752   int *i, *j, *k, *orig;
1753   int s;
1754   int c0, c1, c2;
1755 
1756   int mod = n / 8;
1757   {
1758       mod |= mod >> 1;  mod |= mod >> 2;
1759       mod |= mod >> 4;  mod |= mod >> 8;
1760       mod |= mod >> 16; mod >>= 1;
1761 
1762       *num_indexes = (unsigned char)((n - 1) / (mod + 1));
1763   }
1764 
1765   if(0 < m) {
1766     /* Construct the sorted order of type B suffixes by using
1767        the sorted order of type B* suffixes. */
1768     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1769       /* Scan the suffix array from right to left. */
1770       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1771           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1772           i <= j;
1773           --j) {
1774         if(0 < (s = *j)) {
1775           assert(T[s] == c1);
1776           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1777           assert(T[s - 1] <= T[s]);
1778 
1779           if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
1780 
1781           c0 = T[--s];
1782           *j = ~((int)c0);
1783           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1784           if(c0 != c2) {
1785             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786             k = SA + BUCKET_B(c2 = c0, c1);
1787           }
1788           assert(k < j); assert(k != NULL);
1789           *k-- = s;
1790         } else if(s != 0) {
1791           *j = ~s;
1792 #ifndef NDEBUG
1793         } else {
1794           assert(T[s] == c1);
1795 #endif
1796         }
1797       }
1798     }
1799   }
1800 
1801   /* Construct the BWTed string by using
1802      the sorted order of type B suffixes. */
1803   k = SA + BUCKET_A(c2 = T[n - 1]);
1804   if (T[n - 2] < c2) {
1805     if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
1806     *k++ = ~((int)T[n - 2]);
1807   }
1808   else {
1809     *k++ = n - 1;
1810   }
1811 
1812   /* Scan the suffix array from left to right. */
1813   for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1814     if(0 < (s = *i)) {
1815       assert(T[s - 1] >= T[s]);
1816 
1817       if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
1818 
1819       c0 = T[--s];
1820       *i = c0;
1821       if(c0 != c2) {
1822         BUCKET_A(c2) = k - SA;
1823         k = SA + BUCKET_A(c2 = c0);
1824       }
1825       assert(i < k);
1826       if((0 < s) && (T[s - 1] < c0)) {
1827           if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
1828           *k++ = ~((int)T[s - 1]);
1829       } else
1830         *k++ = s;
1831     } else if(s != 0) {
1832       *i = ~s;
1833     } else {
1834       orig = i;
1835     }
1836   }
1837 
1838   return orig - SA;
1839 }
1840 
1841 
1842 /*---------------------------------------------------------------------------*/
1843 
1844 /*- Function -*/
1845 
1846 int
divsufsort(const unsigned char * T,int * SA,int n,int openMP)1847 divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
1848   int *bucket_A, *bucket_B;
1849   int m;
1850   int err = 0;
1851 
1852   /* Check arguments. */
1853   if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
1854   else if(n == 0) { return 0; }
1855   else if(n == 1) { SA[0] = 0; return 0; }
1856   else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
1857 
1858   bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1859   bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1860 
1861   /* Suffixsort. */
1862   if((bucket_A != NULL) && (bucket_B != NULL)) {
1863     m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
1864     construct_SA(T, SA, bucket_A, bucket_B, n, m);
1865   } else {
1866     err = -2;
1867   }
1868 
1869   free(bucket_B);
1870   free(bucket_A);
1871 
1872   return err;
1873 }
1874 
1875 int
divbwt(const unsigned char * T,unsigned char * U,int * A,int n,unsigned char * num_indexes,int * indexes,int openMP)1876 divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
1877   int *B;
1878   int *bucket_A, *bucket_B;
1879   int m, pidx, i;
1880 
1881   /* Check arguments. */
1882   if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
1883   else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
1884 
1885   if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
1886   bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1887   bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1888 
1889   /* Burrows-Wheeler Transform. */
1890   if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1891     m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
1892 
1893     if (num_indexes == NULL || indexes == NULL) {
1894         pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1895     } else {
1896         pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
1897     }
1898 
1899     /* Copy to output string. */
1900     U[0] = T[n - 1];
1901     for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
1902     for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
1903     pidx += 1;
1904   } else {
1905     pidx = -2;
1906   }
1907 
1908   free(bucket_B);
1909   free(bucket_A);
1910   if(A == NULL) { free(B); }
1911 
1912   return pidx;
1913 }
1914