1 /*
2 * divsufsort.c for libdivsufsort-lite
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /*- Compiler specifics -*/
28 #ifdef __clang__
29 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
30 #endif
31
32 #if defined(_MSC_VER)
33 # pragma warning(disable : 4244)
34 # pragma warning(disable : 4127) /* C4127 : Condition expression is constant */
35 #endif
36
37
38 /*- Dependencies -*/
39 #include <assert.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42
43 #include "divsufsort.h"
44
45 /*- Constants -*/
46 #if defined(INLINE)
47 # undef INLINE
48 #endif
49 #if !defined(INLINE)
50 # define INLINE __inline
51 #endif
52 #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
53 # undef ALPHABET_SIZE
54 #endif
55 #if !defined(ALPHABET_SIZE)
56 # define ALPHABET_SIZE (256)
57 #endif
58 #define BUCKET_A_SIZE (ALPHABET_SIZE)
59 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
60 #if defined(SS_INSERTIONSORT_THRESHOLD)
61 # if SS_INSERTIONSORT_THRESHOLD < 1
62 # undef SS_INSERTIONSORT_THRESHOLD
63 # define SS_INSERTIONSORT_THRESHOLD (1)
64 # endif
65 #else
66 # define SS_INSERTIONSORT_THRESHOLD (8)
67 #endif
68 #if defined(SS_BLOCKSIZE)
69 # if SS_BLOCKSIZE < 0
70 # undef SS_BLOCKSIZE
71 # define SS_BLOCKSIZE (0)
72 # elif 32768 <= SS_BLOCKSIZE
73 # undef SS_BLOCKSIZE
74 # define SS_BLOCKSIZE (32767)
75 # endif
76 #else
77 # define SS_BLOCKSIZE (1024)
78 #endif
79 /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
80 #if SS_BLOCKSIZE == 0
81 # define SS_MISORT_STACKSIZE (96)
82 #elif SS_BLOCKSIZE <= 4096
83 # define SS_MISORT_STACKSIZE (16)
84 #else
85 # define SS_MISORT_STACKSIZE (24)
86 #endif
87 #define SS_SMERGE_STACKSIZE (32)
88 #define TR_INSERTIONSORT_THRESHOLD (8)
89 #define TR_STACKSIZE (64)
90
91
92 /*- Macros -*/
93 #ifndef SWAP
94 # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
95 #endif /* SWAP */
96 #ifndef MIN
97 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
98 #endif /* MIN */
99 #ifndef MAX
100 # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
101 #endif /* MAX */
102 #define STACK_PUSH(_a, _b, _c, _d)\
103 do {\
104 assert(ssize < STACK_SIZE);\
105 stack[ssize].a = (_a), stack[ssize].b = (_b),\
106 stack[ssize].c = (_c), stack[ssize++].d = (_d);\
107 } while(0)
108 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
109 do {\
110 assert(ssize < STACK_SIZE);\
111 stack[ssize].a = (_a), stack[ssize].b = (_b),\
112 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
113 } while(0)
114 #define STACK_POP(_a, _b, _c, _d)\
115 do {\
116 assert(0 <= ssize);\
117 if(ssize == 0) { return; }\
118 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
119 (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
120 } while(0)
121 #define STACK_POP5(_a, _b, _c, _d, _e)\
122 do {\
123 assert(0 <= ssize);\
124 if(ssize == 0) { return; }\
125 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
126 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
127 } while(0)
128 #define BUCKET_A(_c0) bucket_A[(_c0)]
129 #if ALPHABET_SIZE == 256
130 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
131 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
132 #else
133 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
134 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
135 #endif
136
137
138 /*- Private Functions -*/
139
140 static const int lg_table[256]= {
141 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
142 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
143 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
144 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
145 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
146 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
147 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
148 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
149 };
150
151 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
152
153 static INLINE
154 int
ss_ilg(int n)155 ss_ilg(int n) {
156 #if SS_BLOCKSIZE == 0
157 return (n & 0xffff0000) ?
158 ((n & 0xff000000) ?
159 24 + lg_table[(n >> 24) & 0xff] :
160 16 + lg_table[(n >> 16) & 0xff]) :
161 ((n & 0x0000ff00) ?
162 8 + lg_table[(n >> 8) & 0xff] :
163 0 + lg_table[(n >> 0) & 0xff]);
164 #elif SS_BLOCKSIZE < 256
165 return lg_table[n];
166 #else
167 return (n & 0xff00) ?
168 8 + lg_table[(n >> 8) & 0xff] :
169 0 + lg_table[(n >> 0) & 0xff];
170 #endif
171 }
172
173 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
174
175 #if SS_BLOCKSIZE != 0
176
177 static const int sqq_table[256] = {
178 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
179 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
180 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
181 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
182 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
183 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
184 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
185 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
186 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
187 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
188 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
189 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
190 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
191 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
192 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
193 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
194 };
195
196 static INLINE
197 int
ss_isqrt(int x)198 ss_isqrt(int x) {
199 int y, e;
200
201 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
202 e = (x & 0xffff0000) ?
203 ((x & 0xff000000) ?
204 24 + lg_table[(x >> 24) & 0xff] :
205 16 + lg_table[(x >> 16) & 0xff]) :
206 ((x & 0x0000ff00) ?
207 8 + lg_table[(x >> 8) & 0xff] :
208 0 + lg_table[(x >> 0) & 0xff]);
209
210 if(e >= 16) {
211 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
212 if(e >= 24) { y = (y + 1 + x / y) >> 1; }
213 y = (y + 1 + x / y) >> 1;
214 } else if(e >= 8) {
215 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
216 } else {
217 return sqq_table[x] >> 4;
218 }
219
220 return (x < (y * y)) ? y - 1 : y;
221 }
222
223 #endif /* SS_BLOCKSIZE != 0 */
224
225
226 /*---------------------------------------------------------------------------*/
227
228 /* Compares two suffixes. */
229 static INLINE
230 int
ss_compare(const unsigned char * T,const int * p1,const int * p2,int depth)231 ss_compare(const unsigned char *T,
232 const int *p1, const int *p2,
233 int depth) {
234 const unsigned char *U1, *U2, *U1n, *U2n;
235
236 for(U1 = T + depth + *p1,
237 U2 = T + depth + *p2,
238 U1n = T + *(p1 + 1) + 2,
239 U2n = T + *(p2 + 1) + 2;
240 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
241 ++U1, ++U2) {
242 }
243
244 return U1 < U1n ?
245 (U2 < U2n ? *U1 - *U2 : 1) :
246 (U2 < U2n ? -1 : 0);
247 }
248
249
250 /*---------------------------------------------------------------------------*/
251
252 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
253
254 /* Insertionsort for small size groups */
255 static
256 void
ss_insertionsort(const unsigned char * T,const int * PA,int * first,int * last,int depth)257 ss_insertionsort(const unsigned char *T, const int *PA,
258 int *first, int *last, int depth) {
259 int *i, *j;
260 int t;
261 int r;
262
263 for(i = last - 2; first <= i; --i) {
264 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
265 do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
266 if(last <= j) { break; }
267 }
268 if(r == 0) { *j = ~*j; }
269 *(j - 1) = t;
270 }
271 }
272
273 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
274
275
276 /*---------------------------------------------------------------------------*/
277
278 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
279
280 static INLINE
281 void
ss_fixdown(const unsigned char * Td,const int * PA,int * SA,int i,int size)282 ss_fixdown(const unsigned char *Td, const int *PA,
283 int *SA, int i, int size) {
284 int j, k;
285 int v;
286 int c, d, e;
287
288 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
289 d = Td[PA[SA[k = j++]]];
290 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
291 if(d <= c) { break; }
292 }
293 SA[i] = v;
294 }
295
296 /* Simple top-down heapsort. */
297 static
298 void
ss_heapsort(const unsigned char * Td,const int * PA,int * SA,int size)299 ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
300 int i, m;
301 int t;
302
303 m = size;
304 if((size % 2) == 0) {
305 m--;
306 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
307 }
308
309 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
310 if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
311 for(i = m - 1; 0 < i; --i) {
312 t = SA[0], SA[0] = SA[i];
313 ss_fixdown(Td, PA, SA, 0, i);
314 SA[i] = t;
315 }
316 }
317
318
319 /*---------------------------------------------------------------------------*/
320
321 /* Returns the median of three elements. */
322 static INLINE
323 int *
ss_median3(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3)324 ss_median3(const unsigned char *Td, const int *PA,
325 int *v1, int *v2, int *v3) {
326 int *t;
327 if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
328 if(Td[PA[*v2]] > Td[PA[*v3]]) {
329 if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
330 else { return v3; }
331 }
332 return v2;
333 }
334
335 /* Returns the median of five elements. */
336 static INLINE
337 int *
ss_median5(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3,int * v4,int * v5)338 ss_median5(const unsigned char *Td, const int *PA,
339 int *v1, int *v2, int *v3, int *v4, int *v5) {
340 int *t;
341 if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
342 if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
343 if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
344 if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
345 if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
346 if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
347 return v3;
348 }
349
350 /* Returns the pivot element. */
351 static INLINE
352 int *
ss_pivot(const unsigned char * Td,const int * PA,int * first,int * last)353 ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
354 int *middle;
355 int t;
356
357 t = last - first;
358 middle = first + t / 2;
359
360 if(t <= 512) {
361 if(t <= 32) {
362 return ss_median3(Td, PA, first, middle, last - 1);
363 } else {
364 t >>= 2;
365 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
366 }
367 }
368 t >>= 3;
369 first = ss_median3(Td, PA, first, first + t, first + (t << 1));
370 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
371 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
372 return ss_median3(Td, PA, first, middle, last);
373 }
374
375
376 /*---------------------------------------------------------------------------*/
377
378 /* Binary partition for substrings. */
379 static INLINE
380 int *
ss_partition(const int * PA,int * first,int * last,int depth)381 ss_partition(const int *PA,
382 int *first, int *last, int depth) {
383 int *a, *b;
384 int t;
385 for(a = first - 1, b = last;;) {
386 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
387 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
388 if(b <= a) { break; }
389 t = ~*b;
390 *b = *a;
391 *a = t;
392 }
393 if(first < a) { *first = ~*first; }
394 return a;
395 }
396
397 /* Multikey introsort for medium size groups. */
398 static
399 void
ss_mintrosort(const unsigned char * T,const int * PA,int * first,int * last,int depth)400 ss_mintrosort(const unsigned char *T, const int *PA,
401 int *first, int *last,
402 int depth) {
403 #define STACK_SIZE SS_MISORT_STACKSIZE
404 struct { int *a, *b, c; int d; } stack[STACK_SIZE];
405 const unsigned char *Td;
406 int *a, *b, *c, *d, *e, *f;
407 int s, t;
408 int ssize;
409 int limit;
410 int v, x = 0;
411
412 for(ssize = 0, limit = ss_ilg(last - first);;) {
413
414 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
415 #if 1 < SS_INSERTIONSORT_THRESHOLD
416 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
417 #endif
418 STACK_POP(first, last, depth, limit);
419 continue;
420 }
421
422 Td = T + depth;
423 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
424 if(limit < 0) {
425 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
426 if((x = Td[PA[*a]]) != v) {
427 if(1 < (a - first)) { break; }
428 v = x;
429 first = a;
430 }
431 }
432 if(Td[PA[*first] - 1] < v) {
433 first = ss_partition(PA, first, a, depth);
434 }
435 if((a - first) <= (last - a)) {
436 if(1 < (a - first)) {
437 STACK_PUSH(a, last, depth, -1);
438 last = a, depth += 1, limit = ss_ilg(a - first);
439 } else {
440 first = a, limit = -1;
441 }
442 } else {
443 if(1 < (last - a)) {
444 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
445 first = a, limit = -1;
446 } else {
447 last = a, depth += 1, limit = ss_ilg(a - first);
448 }
449 }
450 continue;
451 }
452
453 /* choose pivot */
454 a = ss_pivot(Td, PA, first, last);
455 v = Td[PA[*a]];
456 SWAP(*first, *a);
457
458 /* partition */
459 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
460 if(((a = b) < last) && (x < v)) {
461 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
462 if(x == v) { SWAP(*b, *a); ++a; }
463 }
464 }
465 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
466 if((b < (d = c)) && (x > v)) {
467 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
468 if(x == v) { SWAP(*c, *d); --d; }
469 }
470 }
471 for(; b < c;) {
472 SWAP(*b, *c);
473 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
474 if(x == v) { SWAP(*b, *a); ++a; }
475 }
476 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
477 if(x == v) { SWAP(*c, *d); --d; }
478 }
479 }
480
481 if(a <= d) {
482 c = b - 1;
483
484 if((s = a - first) > (t = b - a)) { s = t; }
485 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
486 if((s = d - c) > (t = last - d - 1)) { s = t; }
487 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
488
489 a = first + (b - a), c = last - (d - c);
490 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
491
492 if((a - first) <= (last - c)) {
493 if((last - c) <= (c - b)) {
494 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495 STACK_PUSH(c, last, depth, limit);
496 last = a;
497 } else if((a - first) <= (c - b)) {
498 STACK_PUSH(c, last, depth, limit);
499 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
500 last = a;
501 } else {
502 STACK_PUSH(c, last, depth, limit);
503 STACK_PUSH(first, a, depth, limit);
504 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
505 }
506 } else {
507 if((a - first) <= (c - b)) {
508 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
509 STACK_PUSH(first, a, depth, limit);
510 first = c;
511 } else if((last - c) <= (c - b)) {
512 STACK_PUSH(first, a, depth, limit);
513 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
514 first = c;
515 } else {
516 STACK_PUSH(first, a, depth, limit);
517 STACK_PUSH(c, last, depth, limit);
518 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
519 }
520 }
521 } else {
522 limit += 1;
523 if(Td[PA[*first] - 1] < v) {
524 first = ss_partition(PA, first, last, depth);
525 limit = ss_ilg(last - first);
526 }
527 depth += 1;
528 }
529 }
530 #undef STACK_SIZE
531 }
532
533 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
534
535
536 /*---------------------------------------------------------------------------*/
537
538 #if SS_BLOCKSIZE != 0
539
540 static INLINE
541 void
ss_blockswap(int * a,int * b,int n)542 ss_blockswap(int *a, int *b, int n) {
543 int t;
544 for(; 0 < n; --n, ++a, ++b) {
545 t = *a, *a = *b, *b = t;
546 }
547 }
548
549 static INLINE
550 void
ss_rotate(int * first,int * middle,int * last)551 ss_rotate(int *first, int *middle, int *last) {
552 int *a, *b, t;
553 int l, r;
554 l = middle - first, r = last - middle;
555 for(; (0 < l) && (0 < r);) {
556 if(l == r) { ss_blockswap(first, middle, l); break; }
557 if(l < r) {
558 a = last - 1, b = middle - 1;
559 t = *a;
560 do {
561 *a-- = *b, *b-- = *a;
562 if(b < first) {
563 *a = t;
564 last = a;
565 if((r -= l + 1) <= l) { break; }
566 a -= 1, b = middle - 1;
567 t = *a;
568 }
569 } while(1);
570 } else {
571 a = first, b = middle;
572 t = *a;
573 do {
574 *a++ = *b, *b++ = *a;
575 if(last <= b) {
576 *a = t;
577 first = a + 1;
578 if((l -= r + 1) <= r) { break; }
579 a += 1, b = middle;
580 t = *a;
581 }
582 } while(1);
583 }
584 }
585 }
586
587
588 /*---------------------------------------------------------------------------*/
589
590 static
591 void
ss_inplacemerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int depth)592 ss_inplacemerge(const unsigned char *T, const int *PA,
593 int *first, int *middle, int *last,
594 int depth) {
595 const int *p;
596 int *a, *b;
597 int len, half;
598 int q, r;
599 int x;
600
601 for(;;) {
602 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
603 else { x = 0; p = PA + *(last - 1); }
604 for(a = first, len = middle - first, half = len >> 1, r = -1;
605 0 < len;
606 len = half, half >>= 1) {
607 b = a + half;
608 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
609 if(q < 0) {
610 a = b + 1;
611 half -= (len & 1) ^ 1;
612 } else {
613 r = q;
614 }
615 }
616 if(a < middle) {
617 if(r == 0) { *a = ~*a; }
618 ss_rotate(a, middle, last);
619 last -= middle - a;
620 middle = a;
621 if(first == middle) { break; }
622 }
623 --last;
624 if(x != 0) { while(*--last < 0) { } }
625 if(middle == last) { break; }
626 }
627 }
628
629
630 /*---------------------------------------------------------------------------*/
631
632 /* Merge-forward with internal buffer. */
633 static
634 void
ss_mergeforward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)635 ss_mergeforward(const unsigned char *T, const int *PA,
636 int *first, int *middle, int *last,
637 int *buf, int depth) {
638 int *a, *b, *c, *bufend;
639 int t;
640 int r;
641
642 bufend = buf + (middle - first) - 1;
643 ss_blockswap(buf, first, middle - first);
644
645 for(t = *(a = first), b = buf, c = middle;;) {
646 r = ss_compare(T, PA + *b, PA + *c, depth);
647 if(r < 0) {
648 do {
649 *a++ = *b;
650 if(bufend <= b) { *bufend = t; return; }
651 *b++ = *a;
652 } while(*b < 0);
653 } else if(r > 0) {
654 do {
655 *a++ = *c, *c++ = *a;
656 if(last <= c) {
657 while(b < bufend) { *a++ = *b, *b++ = *a; }
658 *a = *b, *b = t;
659 return;
660 }
661 } while(*c < 0);
662 } else {
663 *c = ~*c;
664 do {
665 *a++ = *b;
666 if(bufend <= b) { *bufend = t; return; }
667 *b++ = *a;
668 } while(*b < 0);
669
670 do {
671 *a++ = *c, *c++ = *a;
672 if(last <= c) {
673 while(b < bufend) { *a++ = *b, *b++ = *a; }
674 *a = *b, *b = t;
675 return;
676 }
677 } while(*c < 0);
678 }
679 }
680 }
681
682 /* Merge-backward with internal buffer. */
683 static
684 void
ss_mergebackward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)685 ss_mergebackward(const unsigned char *T, const int *PA,
686 int *first, int *middle, int *last,
687 int *buf, int depth) {
688 const int *p1, *p2;
689 int *a, *b, *c, *bufend;
690 int t;
691 int r;
692 int x;
693
694 bufend = buf + (last - middle) - 1;
695 ss_blockswap(buf, middle, last - middle);
696
697 x = 0;
698 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
699 else { p1 = PA + *bufend; }
700 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
701 else { p2 = PA + *(middle - 1); }
702 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
703 r = ss_compare(T, p1, p2, depth);
704 if(0 < r) {
705 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
706 *a-- = *b;
707 if(b <= buf) { *buf = t; break; }
708 *b-- = *a;
709 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
710 else { p1 = PA + *b; }
711 } else if(r < 0) {
712 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
713 *a-- = *c, *c-- = *a;
714 if(c < first) {
715 while(buf < b) { *a-- = *b, *b-- = *a; }
716 *a = *b, *b = t;
717 break;
718 }
719 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
720 else { p2 = PA + *c; }
721 } else {
722 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
723 *a-- = ~*b;
724 if(b <= buf) { *buf = t; break; }
725 *b-- = *a;
726 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
727 *a-- = *c, *c-- = *a;
728 if(c < first) {
729 while(buf < b) { *a-- = *b, *b-- = *a; }
730 *a = *b, *b = t;
731 break;
732 }
733 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
734 else { p1 = PA + *b; }
735 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
736 else { p2 = PA + *c; }
737 }
738 }
739 }
740
741 /* D&C based merge. */
742 static
743 void
ss_swapmerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int bufsize,int depth)744 ss_swapmerge(const unsigned char *T, const int *PA,
745 int *first, int *middle, int *last,
746 int *buf, int bufsize, int depth) {
747 #define STACK_SIZE SS_SMERGE_STACKSIZE
748 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
749 #define MERGE_CHECK(a, b, c)\
750 do {\
751 if(((c) & 1) ||\
752 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
753 *(a) = ~*(a);\
754 }\
755 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
756 *(b) = ~*(b);\
757 }\
758 } while(0)
759 struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
760 int *l, *r, *lm, *rm;
761 int m, len, half;
762 int ssize;
763 int check, next;
764
765 for(check = 0, ssize = 0;;) {
766 if((last - middle) <= bufsize) {
767 if((first < middle) && (middle < last)) {
768 ss_mergebackward(T, PA, first, middle, last, buf, depth);
769 }
770 MERGE_CHECK(first, last, check);
771 STACK_POP(first, middle, last, check);
772 continue;
773 }
774
775 if((middle - first) <= bufsize) {
776 if(first < middle) {
777 ss_mergeforward(T, PA, first, middle, last, buf, depth);
778 }
779 MERGE_CHECK(first, last, check);
780 STACK_POP(first, middle, last, check);
781 continue;
782 }
783
784 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
785 0 < len;
786 len = half, half >>= 1) {
787 if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
788 PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
789 m += half + 1;
790 half -= (len & 1) ^ 1;
791 }
792 }
793
794 if(0 < m) {
795 lm = middle - m, rm = middle + m;
796 ss_blockswap(lm, middle, m);
797 l = r = middle, next = 0;
798 if(rm < last) {
799 if(*rm < 0) {
800 *rm = ~*rm;
801 if(first < lm) { for(; *--l < 0;) { } next |= 4; }
802 next |= 1;
803 } else if(first < lm) {
804 for(; *r < 0; ++r) { }
805 next |= 2;
806 }
807 }
808
809 if((l - first) <= (last - r)) {
810 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
811 middle = lm, last = l, check = (check & 3) | (next & 4);
812 } else {
813 if((next & 2) && (r == middle)) { next ^= 6; }
814 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
815 first = r, middle = rm, check = (next & 3) | (check & 4);
816 }
817 } else {
818 if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
819 *middle = ~*middle;
820 }
821 MERGE_CHECK(first, last, check);
822 STACK_POP(first, middle, last, check);
823 }
824 }
825 #undef STACK_SIZE
826 }
827
828 #endif /* SS_BLOCKSIZE != 0 */
829
830
831 /*---------------------------------------------------------------------------*/
832
833 /* Substring sort */
834 static
835 void
sssort(const unsigned char * T,const int * PA,int * first,int * last,int * buf,int bufsize,int depth,int n,int lastsuffix)836 sssort(const unsigned char *T, const int *PA,
837 int *first, int *last,
838 int *buf, int bufsize,
839 int depth, int n, int lastsuffix) {
840 int *a;
841 #if SS_BLOCKSIZE != 0
842 int *b, *middle, *curbuf;
843 int j, k, curbufsize, limit;
844 #endif
845 int i;
846
847 if(lastsuffix != 0) { ++first; }
848
849 #if SS_BLOCKSIZE == 0
850 ss_mintrosort(T, PA, first, last, depth);
851 #else
852 if((bufsize < SS_BLOCKSIZE) &&
853 (bufsize < (last - first)) &&
854 (bufsize < (limit = ss_isqrt(last - first)))) {
855 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
856 buf = middle = last - limit, bufsize = limit;
857 } else {
858 middle = last, limit = 0;
859 }
860 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
861 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
862 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
863 #elif 1 < SS_BLOCKSIZE
864 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
865 #endif
866 curbufsize = last - (a + SS_BLOCKSIZE);
867 curbuf = a + SS_BLOCKSIZE;
868 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
869 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
870 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
871 }
872 }
873 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
874 ss_mintrosort(T, PA, a, middle, depth);
875 #elif 1 < SS_BLOCKSIZE
876 ss_insertionsort(T, PA, a, middle, depth);
877 #endif
878 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
879 if(i & 1) {
880 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
881 a -= k;
882 }
883 }
884 if(limit != 0) {
885 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
886 ss_mintrosort(T, PA, middle, last, depth);
887 #elif 1 < SS_BLOCKSIZE
888 ss_insertionsort(T, PA, middle, last, depth);
889 #endif
890 ss_inplacemerge(T, PA, first, middle, last, depth);
891 }
892 #endif
893
894 if(lastsuffix != 0) {
895 /* Insert last type B* suffix. */
896 int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
897 for(a = first, i = *(first - 1);
898 (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
899 ++a) {
900 *(a - 1) = *a;
901 }
902 *(a - 1) = i;
903 }
904 }
905
906
907 /*---------------------------------------------------------------------------*/
908
909 static INLINE
910 int
tr_ilg(int n)911 tr_ilg(int n) {
912 return (n & 0xffff0000) ?
913 ((n & 0xff000000) ?
914 24 + lg_table[(n >> 24) & 0xff] :
915 16 + lg_table[(n >> 16) & 0xff]) :
916 ((n & 0x0000ff00) ?
917 8 + lg_table[(n >> 8) & 0xff] :
918 0 + lg_table[(n >> 0) & 0xff]);
919 }
920
921
922 /*---------------------------------------------------------------------------*/
923
924 /* Simple insertionsort for small size groups. */
925 static
926 void
tr_insertionsort(const int * ISAd,int * first,int * last)927 tr_insertionsort(const int *ISAd, int *first, int *last) {
928 int *a, *b;
929 int t, r;
930
931 for(a = first + 1; a < last; ++a) {
932 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
933 do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
934 if(b < first) { break; }
935 }
936 if(r == 0) { *b = ~*b; }
937 *(b + 1) = t;
938 }
939 }
940
941
942 /*---------------------------------------------------------------------------*/
943
944 static INLINE
945 void
tr_fixdown(const int * ISAd,int * SA,int i,int size)946 tr_fixdown(const int *ISAd, int *SA, int i, int size) {
947 int j, k;
948 int v;
949 int c, d, e;
950
951 for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
952 d = ISAd[SA[k = j++]];
953 if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
954 if(d <= c) { break; }
955 }
956 SA[i] = v;
957 }
958
959 /* Simple top-down heapsort. */
960 static
961 void
tr_heapsort(const int * ISAd,int * SA,int size)962 tr_heapsort(const int *ISAd, int *SA, int size) {
963 int i, m;
964 int t;
965
966 m = size;
967 if((size % 2) == 0) {
968 m--;
969 if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
970 }
971
972 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
973 if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
974 for(i = m - 1; 0 < i; --i) {
975 t = SA[0], SA[0] = SA[i];
976 tr_fixdown(ISAd, SA, 0, i);
977 SA[i] = t;
978 }
979 }
980
981
982 /*---------------------------------------------------------------------------*/
983
984 /* Returns the median of three elements. */
985 static INLINE
986 int *
tr_median3(const int * ISAd,int * v1,int * v2,int * v3)987 tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
988 int *t;
989 if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
990 if(ISAd[*v2] > ISAd[*v3]) {
991 if(ISAd[*v1] > ISAd[*v3]) { return v1; }
992 else { return v3; }
993 }
994 return v2;
995 }
996
997 /* Returns the median of five elements. */
998 static INLINE
999 int *
tr_median5(const int * ISAd,int * v1,int * v2,int * v3,int * v4,int * v5)1000 tr_median5(const int *ISAd,
1001 int *v1, int *v2, int *v3, int *v4, int *v5) {
1002 int *t;
1003 if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
1004 if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
1005 if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
1006 if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
1007 if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
1008 if(ISAd[*v3] > ISAd[*v4]) { return v4; }
1009 return v3;
1010 }
1011
1012 /* Returns the pivot element. */
1013 static INLINE
1014 int *
tr_pivot(const int * ISAd,int * first,int * last)1015 tr_pivot(const int *ISAd, int *first, int *last) {
1016 int *middle;
1017 int t;
1018
1019 t = last - first;
1020 middle = first + t / 2;
1021
1022 if(t <= 512) {
1023 if(t <= 32) {
1024 return tr_median3(ISAd, first, middle, last - 1);
1025 } else {
1026 t >>= 2;
1027 return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1028 }
1029 }
1030 t >>= 3;
1031 first = tr_median3(ISAd, first, first + t, first + (t << 1));
1032 middle = tr_median3(ISAd, middle - t, middle, middle + t);
1033 last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1034 return tr_median3(ISAd, first, middle, last);
1035 }
1036
1037
1038 /*---------------------------------------------------------------------------*/
1039
1040 typedef struct _trbudget_t trbudget_t;
1041 struct _trbudget_t {
1042 int chance;
1043 int remain;
1044 int incval;
1045 int count;
1046 };
1047
1048 static INLINE
1049 void
trbudget_init(trbudget_t * budget,int chance,int incval)1050 trbudget_init(trbudget_t *budget, int chance, int incval) {
1051 budget->chance = chance;
1052 budget->remain = budget->incval = incval;
1053 }
1054
1055 static INLINE
1056 int
trbudget_check(trbudget_t * budget,int size)1057 trbudget_check(trbudget_t *budget, int size) {
1058 if(size <= budget->remain) { budget->remain -= size; return 1; }
1059 if(budget->chance == 0) { budget->count += size; return 0; }
1060 budget->remain += budget->incval - size;
1061 budget->chance -= 1;
1062 return 1;
1063 }
1064
1065
1066 /*---------------------------------------------------------------------------*/
1067
1068 static INLINE
1069 void
tr_partition(const int * ISAd,int * first,int * middle,int * last,int ** pa,int ** pb,int v)1070 tr_partition(const int *ISAd,
1071 int *first, int *middle, int *last,
1072 int **pa, int **pb, int v) {
1073 int *a, *b, *c, *d, *e, *f;
1074 int t, s;
1075 int x = 0;
1076
1077 for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1078 if(((a = b) < last) && (x < v)) {
1079 for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1080 if(x == v) { SWAP(*b, *a); ++a; }
1081 }
1082 }
1083 for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1084 if((b < (d = c)) && (x > v)) {
1085 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1086 if(x == v) { SWAP(*c, *d); --d; }
1087 }
1088 }
1089 for(; b < c;) {
1090 SWAP(*b, *c);
1091 for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1092 if(x == v) { SWAP(*b, *a); ++a; }
1093 }
1094 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1095 if(x == v) { SWAP(*c, *d); --d; }
1096 }
1097 }
1098
1099 if(a <= d) {
1100 c = b - 1;
1101 if((s = a - first) > (t = b - a)) { s = t; }
1102 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1103 if((s = d - c) > (t = last - d - 1)) { s = t; }
1104 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1105 first += (b - a), last -= (d - c);
1106 }
1107 *pa = first, *pb = last;
1108 }
1109
1110 static
1111 void
tr_copy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)1112 tr_copy(int *ISA, const int *SA,
1113 int *first, int *a, int *b, int *last,
1114 int depth) {
1115 /* sort suffixes of middle partition
1116 by using sorted order of suffixes of left and right partition. */
1117 int *c, *d, *e;
1118 int s, v;
1119
1120 v = b - SA - 1;
1121 for(c = first, d = a - 1; c <= d; ++c) {
1122 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1123 *++d = s;
1124 ISA[s] = d - SA;
1125 }
1126 }
1127 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1128 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1129 *--d = s;
1130 ISA[s] = d - SA;
1131 }
1132 }
1133 }
1134
1135 static
1136 void
tr_partialcopy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)1137 tr_partialcopy(int *ISA, const int *SA,
1138 int *first, int *a, int *b, int *last,
1139 int depth) {
1140 int *c, *d, *e;
1141 int s, v;
1142 int rank, lastrank, newrank = -1;
1143
1144 v = b - SA - 1;
1145 lastrank = -1;
1146 for(c = first, d = a - 1; c <= d; ++c) {
1147 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1148 *++d = s;
1149 rank = ISA[s + depth];
1150 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1151 ISA[s] = newrank;
1152 }
1153 }
1154
1155 lastrank = -1;
1156 for(e = d; first <= e; --e) {
1157 rank = ISA[*e];
1158 if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1159 if(newrank != rank) { ISA[*e] = newrank; }
1160 }
1161
1162 lastrank = -1;
1163 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1164 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1165 *--d = s;
1166 rank = ISA[s + depth];
1167 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1168 ISA[s] = newrank;
1169 }
1170 }
1171 }
1172
1173 static
1174 void
tr_introsort(int * ISA,const int * ISAd,int * SA,int * first,int * last,trbudget_t * budget)1175 tr_introsort(int *ISA, const int *ISAd,
1176 int *SA, int *first, int *last,
1177 trbudget_t *budget) {
1178 #define STACK_SIZE TR_STACKSIZE
1179 struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1180 int *a, *b, *c;
1181 int t;
1182 int v, x = 0;
1183 int incr = ISAd - ISA;
1184 int limit, next;
1185 int ssize, trlink = -1;
1186
1187 for(ssize = 0, limit = tr_ilg(last - first);;) {
1188
1189 if(limit < 0) {
1190 if(limit == -1) {
1191 /* tandem repeat partition */
1192 tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1193
1194 /* update ranks */
1195 if(a < last) {
1196 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1197 }
1198 if(b < last) {
1199 for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1200 }
1201
1202 /* push */
1203 if(1 < (b - a)) {
1204 STACK_PUSH5(NULL, a, b, 0, 0);
1205 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1206 trlink = ssize - 2;
1207 }
1208 if((a - first) <= (last - b)) {
1209 if(1 < (a - first)) {
1210 STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1211 last = a, limit = tr_ilg(a - first);
1212 } else if(1 < (last - b)) {
1213 first = b, limit = tr_ilg(last - b);
1214 } else {
1215 STACK_POP5(ISAd, first, last, limit, trlink);
1216 }
1217 } else {
1218 if(1 < (last - b)) {
1219 STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1220 first = b, limit = tr_ilg(last - b);
1221 } else if(1 < (a - first)) {
1222 last = a, limit = tr_ilg(a - first);
1223 } else {
1224 STACK_POP5(ISAd, first, last, limit, trlink);
1225 }
1226 }
1227 } else if(limit == -2) {
1228 /* tandem repeat copy */
1229 a = stack[--ssize].b, b = stack[ssize].c;
1230 if(stack[ssize].d == 0) {
1231 tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1232 } else {
1233 if(0 <= trlink) { stack[trlink].d = -1; }
1234 tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1235 }
1236 STACK_POP5(ISAd, first, last, limit, trlink);
1237 } else {
1238 /* sorted partition */
1239 if(0 <= *first) {
1240 a = first;
1241 do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1242 first = a;
1243 }
1244 if(first < last) {
1245 a = first; do { *a = ~*a; } while(*++a < 0);
1246 next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1247 if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1248
1249 /* push */
1250 if(trbudget_check(budget, a - first)) {
1251 if((a - first) <= (last - a)) {
1252 STACK_PUSH5(ISAd, a, last, -3, trlink);
1253 ISAd += incr, last = a, limit = next;
1254 } else {
1255 if(1 < (last - a)) {
1256 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1257 first = a, limit = -3;
1258 } else {
1259 ISAd += incr, last = a, limit = next;
1260 }
1261 }
1262 } else {
1263 if(0 <= trlink) { stack[trlink].d = -1; }
1264 if(1 < (last - a)) {
1265 first = a, limit = -3;
1266 } else {
1267 STACK_POP5(ISAd, first, last, limit, trlink);
1268 }
1269 }
1270 } else {
1271 STACK_POP5(ISAd, first, last, limit, trlink);
1272 }
1273 }
1274 continue;
1275 }
1276
1277 if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1278 tr_insertionsort(ISAd, first, last);
1279 limit = -3;
1280 continue;
1281 }
1282
1283 if(limit-- == 0) {
1284 tr_heapsort(ISAd, first, last - first);
1285 for(a = last - 1; first < a; a = b) {
1286 for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1287 }
1288 limit = -3;
1289 continue;
1290 }
1291
1292 /* choose pivot */
1293 a = tr_pivot(ISAd, first, last);
1294 SWAP(*first, *a);
1295 v = ISAd[*first];
1296
1297 /* partition */
1298 tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1299 if((last - first) != (b - a)) {
1300 next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1301
1302 /* update ranks */
1303 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1304 if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1305
1306 /* push */
1307 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1308 if((a - first) <= (last - b)) {
1309 if((last - b) <= (b - a)) {
1310 if(1 < (a - first)) {
1311 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1312 STACK_PUSH5(ISAd, b, last, limit, trlink);
1313 last = a;
1314 } else if(1 < (last - b)) {
1315 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1316 first = b;
1317 } else {
1318 ISAd += incr, first = a, last = b, limit = next;
1319 }
1320 } else if((a - first) <= (b - a)) {
1321 if(1 < (a - first)) {
1322 STACK_PUSH5(ISAd, b, last, limit, trlink);
1323 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1324 last = a;
1325 } else {
1326 STACK_PUSH5(ISAd, b, last, limit, trlink);
1327 ISAd += incr, first = a, last = b, limit = next;
1328 }
1329 } else {
1330 STACK_PUSH5(ISAd, b, last, limit, trlink);
1331 STACK_PUSH5(ISAd, first, a, limit, trlink);
1332 ISAd += incr, first = a, last = b, limit = next;
1333 }
1334 } else {
1335 if((a - first) <= (b - a)) {
1336 if(1 < (last - b)) {
1337 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1338 STACK_PUSH5(ISAd, first, a, limit, trlink);
1339 first = b;
1340 } else if(1 < (a - first)) {
1341 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1342 last = a;
1343 } else {
1344 ISAd += incr, first = a, last = b, limit = next;
1345 }
1346 } else if((last - b) <= (b - a)) {
1347 if(1 < (last - b)) {
1348 STACK_PUSH5(ISAd, first, a, limit, trlink);
1349 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1350 first = b;
1351 } else {
1352 STACK_PUSH5(ISAd, first, a, limit, trlink);
1353 ISAd += incr, first = a, last = b, limit = next;
1354 }
1355 } else {
1356 STACK_PUSH5(ISAd, first, a, limit, trlink);
1357 STACK_PUSH5(ISAd, b, last, limit, trlink);
1358 ISAd += incr, first = a, last = b, limit = next;
1359 }
1360 }
1361 } else {
1362 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1363 if((a - first) <= (last - b)) {
1364 if(1 < (a - first)) {
1365 STACK_PUSH5(ISAd, b, last, limit, trlink);
1366 last = a;
1367 } else if(1 < (last - b)) {
1368 first = b;
1369 } else {
1370 STACK_POP5(ISAd, first, last, limit, trlink);
1371 }
1372 } else {
1373 if(1 < (last - b)) {
1374 STACK_PUSH5(ISAd, first, a, limit, trlink);
1375 first = b;
1376 } else if(1 < (a - first)) {
1377 last = a;
1378 } else {
1379 STACK_POP5(ISAd, first, last, limit, trlink);
1380 }
1381 }
1382 }
1383 } else {
1384 if(trbudget_check(budget, last - first)) {
1385 limit = tr_ilg(last - first), ISAd += incr;
1386 } else {
1387 if(0 <= trlink) { stack[trlink].d = -1; }
1388 STACK_POP5(ISAd, first, last, limit, trlink);
1389 }
1390 }
1391 }
1392 #undef STACK_SIZE
1393 }
1394
1395
1396
1397 /*---------------------------------------------------------------------------*/
1398
1399 /* Tandem repeat sort */
1400 static
1401 void
trsort(int * ISA,int * SA,int n,int depth)1402 trsort(int *ISA, int *SA, int n, int depth) {
1403 int *ISAd;
1404 int *first, *last;
1405 trbudget_t budget;
1406 int t, skip, unsorted;
1407
1408 trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1409 /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1410 for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1411 first = SA;
1412 skip = 0;
1413 unsorted = 0;
1414 do {
1415 if((t = *first) < 0) { first -= t; skip += t; }
1416 else {
1417 if(skip != 0) { *(first + skip) = skip; skip = 0; }
1418 last = SA + ISA[t] + 1;
1419 if(1 < (last - first)) {
1420 budget.count = 0;
1421 tr_introsort(ISA, ISAd, SA, first, last, &budget);
1422 if(budget.count != 0) { unsorted += budget.count; }
1423 else { skip = first - last; }
1424 } else if((last - first) == 1) {
1425 skip = -1;
1426 }
1427 first = last;
1428 }
1429 } while(first < (SA + n));
1430 if(skip != 0) { *(first + skip) = skip; }
1431 if(unsorted == 0) { break; }
1432 }
1433 }
1434
1435
1436 /*---------------------------------------------------------------------------*/
1437
1438 /* Sorts suffixes of type B*. */
1439 static
1440 int
sort_typeBstar(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int openMP)1441 sort_typeBstar(const unsigned char *T, int *SA,
1442 int *bucket_A, int *bucket_B,
1443 int n, int openMP) {
1444 int *PAb, *ISAb, *buf;
1445 #ifdef LIBBSC_OPENMP
1446 int *curbuf;
1447 int l;
1448 #endif
1449 int i, j, k, t, m, bufsize;
1450 int c0, c1;
1451 #ifdef LIBBSC_OPENMP
1452 int d0, d1;
1453 #endif
1454 (void)openMP;
1455
1456 /* Initialize bucket arrays. */
1457 for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1458 for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1459
1460 /* Count the number of occurrences of the first one or two characters of each
1461 type A, B and B* suffix. Moreover, store the beginning position of all
1462 type B* suffixes into the array SA. */
1463 for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1464 /* type A suffix. */
1465 do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1466 if(0 <= i) {
1467 /* type B* suffix. */
1468 ++BUCKET_BSTAR(c0, c1);
1469 SA[--m] = i;
1470 /* type B suffix. */
1471 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1472 ++BUCKET_B(c0, c1);
1473 }
1474 }
1475 }
1476 m = n - m;
1477 /*
1478 note:
1479 A type B* suffix is lexicographically smaller than a type B suffix that
1480 begins with the same first two characters.
1481 */
1482
1483 /* Calculate the index of start/end point of each bucket. */
1484 for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1485 t = i + BUCKET_A(c0);
1486 BUCKET_A(c0) = i + j; /* start point */
1487 i = t + BUCKET_B(c0, c0);
1488 for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1489 j += BUCKET_BSTAR(c0, c1);
1490 BUCKET_BSTAR(c0, c1) = j; /* end point */
1491 i += BUCKET_B(c0, c1);
1492 }
1493 }
1494
1495 if(0 < m) {
1496 /* Sort the type B* suffixes by their first two characters. */
1497 PAb = SA + n - m; ISAb = SA + m;
1498 for(i = m - 2; 0 <= i; --i) {
1499 t = PAb[i], c0 = T[t], c1 = T[t + 1];
1500 SA[--BUCKET_BSTAR(c0, c1)] = i;
1501 }
1502 t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1503 SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1504
1505 /* Sort the type B* substrings using sssort. */
1506 #ifdef LIBBSC_OPENMP
1507 if (openMP)
1508 {
1509 buf = SA + m;
1510 c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1511 #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
1512 {
1513 bufsize = (n - (2 * m)) / omp_get_num_threads();
1514 curbuf = buf + omp_get_thread_num() * bufsize;
1515 k = 0;
1516 for(;;) {
1517 #pragma omp critical(sssort_lock)
1518 {
1519 if(0 < (l = j)) {
1520 d0 = c0, d1 = c1;
1521 do {
1522 k = BUCKET_BSTAR(d0, d1);
1523 if(--d1 <= d0) {
1524 d1 = ALPHABET_SIZE - 1;
1525 if(--d0 < 0) { break; }
1526 }
1527 } while(((l - k) <= 1) && (0 < (l = k)));
1528 c0 = d0, c1 = d1, j = k;
1529 }
1530 }
1531 if(l == 0) { break; }
1532 sssort(T, PAb, SA + k, SA + l,
1533 curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1534 }
1535 }
1536 }
1537 else
1538 {
1539 buf = SA + m, bufsize = n - (2 * m);
1540 for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1541 for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1542 i = BUCKET_BSTAR(c0, c1);
1543 if(1 < (j - i)) {
1544 sssort(T, PAb, SA + i, SA + j,
1545 buf, bufsize, 2, n, *(SA + i) == (m - 1));
1546 }
1547 }
1548 }
1549 }
1550 #else
1551 buf = SA + m, bufsize = n - (2 * m);
1552 for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1553 for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1554 i = BUCKET_BSTAR(c0, c1);
1555 if(1 < (j - i)) {
1556 sssort(T, PAb, SA + i, SA + j,
1557 buf, bufsize, 2, n, *(SA + i) == (m - 1));
1558 }
1559 }
1560 }
1561 #endif
1562
1563 /* Compute ranks of type B* substrings. */
1564 for(i = m - 1; 0 <= i; --i) {
1565 if(0 <= SA[i]) {
1566 j = i;
1567 do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1568 SA[i + 1] = i - j;
1569 if(i <= 0) { break; }
1570 }
1571 j = i;
1572 do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1573 ISAb[SA[i]] = j;
1574 }
1575
1576 /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577 trsort(ISAb, SA, m, 1);
1578
1579 /* Set the sorted order of type B* suffixes. */
1580 for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582 if(0 <= i) {
1583 t = i;
1584 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1585 SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1586 }
1587 }
1588
1589 /* Calculate the index of start/end point of each bucket. */
1590 BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1591 for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1592 i = BUCKET_A(c0 + 1) - 1;
1593 for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1594 t = i - BUCKET_B(c0, c1);
1595 BUCKET_B(c0, c1) = i; /* end point */
1596
1597 /* Move all type B* suffixes to the correct position. */
1598 for(i = t, j = BUCKET_BSTAR(c0, c1);
1599 j <= k;
1600 --i, --k) { SA[i] = SA[k]; }
1601 }
1602 BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1603 BUCKET_B(c0, c0) = i; /* end point */
1604 }
1605 }
1606
1607 return m;
1608 }
1609
1610 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1611 static
1612 void
construct_SA(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)1613 construct_SA(const unsigned char *T, int *SA,
1614 int *bucket_A, int *bucket_B,
1615 int n, int m) {
1616 int *i, *j, *k;
1617 int s;
1618 int c0, c1, c2;
1619
1620 if(0 < m) {
1621 /* Construct the sorted order of type B suffixes by using
1622 the sorted order of type B* suffixes. */
1623 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1624 /* Scan the suffix array from right to left. */
1625 for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1626 j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1627 i <= j;
1628 --j) {
1629 if(0 < (s = *j)) {
1630 assert(T[s] == c1);
1631 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1632 assert(T[s - 1] <= T[s]);
1633 *j = ~s;
1634 c0 = T[--s];
1635 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1636 if(c0 != c2) {
1637 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638 k = SA + BUCKET_B(c2 = c0, c1);
1639 }
1640 assert(k < j); assert(k != NULL);
1641 *k-- = s;
1642 } else {
1643 assert(((s == 0) && (T[s] == c1)) || (s < 0));
1644 *j = ~s;
1645 }
1646 }
1647 }
1648 }
1649
1650 /* Construct the suffix array by using
1651 the sorted order of type B suffixes. */
1652 k = SA + BUCKET_A(c2 = T[n - 1]);
1653 *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1654 /* Scan the suffix array from left to right. */
1655 for(i = SA, j = SA + n; i < j; ++i) {
1656 if(0 < (s = *i)) {
1657 assert(T[s - 1] >= T[s]);
1658 c0 = T[--s];
1659 if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1660 if(c0 != c2) {
1661 BUCKET_A(c2) = k - SA;
1662 k = SA + BUCKET_A(c2 = c0);
1663 }
1664 assert(i < k);
1665 *k++ = s;
1666 } else {
1667 assert(s < 0);
1668 *i = ~s;
1669 }
1670 }
1671 }
1672
1673 /* Constructs the burrows-wheeler transformed string directly
1674 by using the sorted order of type B* suffixes. */
1675 static
1676 int
construct_BWT(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)1677 construct_BWT(const unsigned char *T, int *SA,
1678 int *bucket_A, int *bucket_B,
1679 int n, int m) {
1680 int *i, *j, *k, *orig;
1681 int s;
1682 int c0, c1, c2;
1683
1684 if(0 < m) {
1685 /* Construct the sorted order of type B suffixes by using
1686 the sorted order of type B* suffixes. */
1687 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1688 /* Scan the suffix array from right to left. */
1689 for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1690 j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1691 i <= j;
1692 --j) {
1693 if(0 < (s = *j)) {
1694 assert(T[s] == c1);
1695 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1696 assert(T[s - 1] <= T[s]);
1697 c0 = T[--s];
1698 *j = ~((int)c0);
1699 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1700 if(c0 != c2) {
1701 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702 k = SA + BUCKET_B(c2 = c0, c1);
1703 }
1704 assert(k < j); assert(k != NULL);
1705 *k-- = s;
1706 } else if(s != 0) {
1707 *j = ~s;
1708 #ifndef NDEBUG
1709 } else {
1710 assert(T[s] == c1);
1711 #endif
1712 }
1713 }
1714 }
1715 }
1716
1717 /* Construct the BWTed string by using
1718 the sorted order of type B suffixes. */
1719 k = SA + BUCKET_A(c2 = T[n - 1]);
1720 *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
1721 /* Scan the suffix array from left to right. */
1722 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1723 if(0 < (s = *i)) {
1724 assert(T[s - 1] >= T[s]);
1725 c0 = T[--s];
1726 *i = c0;
1727 if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1728 if(c0 != c2) {
1729 BUCKET_A(c2) = k - SA;
1730 k = SA + BUCKET_A(c2 = c0);
1731 }
1732 assert(i < k);
1733 *k++ = s;
1734 } else if(s != 0) {
1735 *i = ~s;
1736 } else {
1737 orig = i;
1738 }
1739 }
1740
1741 return orig - SA;
1742 }
1743
1744 /* Constructs the burrows-wheeler transformed string directly
1745 by using the sorted order of type B* suffixes. */
1746 static
1747 int
construct_BWT_indexes(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m,unsigned char * num_indexes,int * indexes)1748 construct_BWT_indexes(const unsigned char *T, int *SA,
1749 int *bucket_A, int *bucket_B,
1750 int n, int m,
1751 unsigned char * num_indexes, int * indexes) {
1752 int *i, *j, *k, *orig;
1753 int s;
1754 int c0, c1, c2;
1755
1756 int mod = n / 8;
1757 {
1758 mod |= mod >> 1; mod |= mod >> 2;
1759 mod |= mod >> 4; mod |= mod >> 8;
1760 mod |= mod >> 16; mod >>= 1;
1761
1762 *num_indexes = (unsigned char)((n - 1) / (mod + 1));
1763 }
1764
1765 if(0 < m) {
1766 /* Construct the sorted order of type B suffixes by using
1767 the sorted order of type B* suffixes. */
1768 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1769 /* Scan the suffix array from right to left. */
1770 for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1771 j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1772 i <= j;
1773 --j) {
1774 if(0 < (s = *j)) {
1775 assert(T[s] == c1);
1776 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1777 assert(T[s - 1] <= T[s]);
1778
1779 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
1780
1781 c0 = T[--s];
1782 *j = ~((int)c0);
1783 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1784 if(c0 != c2) {
1785 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786 k = SA + BUCKET_B(c2 = c0, c1);
1787 }
1788 assert(k < j); assert(k != NULL);
1789 *k-- = s;
1790 } else if(s != 0) {
1791 *j = ~s;
1792 #ifndef NDEBUG
1793 } else {
1794 assert(T[s] == c1);
1795 #endif
1796 }
1797 }
1798 }
1799 }
1800
1801 /* Construct the BWTed string by using
1802 the sorted order of type B suffixes. */
1803 k = SA + BUCKET_A(c2 = T[n - 1]);
1804 if (T[n - 2] < c2) {
1805 if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
1806 *k++ = ~((int)T[n - 2]);
1807 }
1808 else {
1809 *k++ = n - 1;
1810 }
1811
1812 /* Scan the suffix array from left to right. */
1813 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1814 if(0 < (s = *i)) {
1815 assert(T[s - 1] >= T[s]);
1816
1817 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
1818
1819 c0 = T[--s];
1820 *i = c0;
1821 if(c0 != c2) {
1822 BUCKET_A(c2) = k - SA;
1823 k = SA + BUCKET_A(c2 = c0);
1824 }
1825 assert(i < k);
1826 if((0 < s) && (T[s - 1] < c0)) {
1827 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
1828 *k++ = ~((int)T[s - 1]);
1829 } else
1830 *k++ = s;
1831 } else if(s != 0) {
1832 *i = ~s;
1833 } else {
1834 orig = i;
1835 }
1836 }
1837
1838 return orig - SA;
1839 }
1840
1841
1842 /*---------------------------------------------------------------------------*/
1843
1844 /*- Function -*/
1845
1846 int
divsufsort(const unsigned char * T,int * SA,int n,int openMP)1847 divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
1848 int *bucket_A, *bucket_B;
1849 int m;
1850 int err = 0;
1851
1852 /* Check arguments. */
1853 if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
1854 else if(n == 0) { return 0; }
1855 else if(n == 1) { SA[0] = 0; return 0; }
1856 else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
1857
1858 bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1859 bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1860
1861 /* Suffixsort. */
1862 if((bucket_A != NULL) && (bucket_B != NULL)) {
1863 m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
1864 construct_SA(T, SA, bucket_A, bucket_B, n, m);
1865 } else {
1866 err = -2;
1867 }
1868
1869 free(bucket_B);
1870 free(bucket_A);
1871
1872 return err;
1873 }
1874
1875 int
divbwt(const unsigned char * T,unsigned char * U,int * A,int n,unsigned char * num_indexes,int * indexes,int openMP)1876 divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
1877 int *B;
1878 int *bucket_A, *bucket_B;
1879 int m, pidx, i;
1880
1881 /* Check arguments. */
1882 if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
1883 else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
1884
1885 if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
1886 bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1887 bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1888
1889 /* Burrows-Wheeler Transform. */
1890 if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1891 m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
1892
1893 if (num_indexes == NULL || indexes == NULL) {
1894 pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1895 } else {
1896 pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
1897 }
1898
1899 /* Copy to output string. */
1900 U[0] = T[n - 1];
1901 for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
1902 for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
1903 pidx += 1;
1904 } else {
1905 pidx = -2;
1906 }
1907
1908 free(bucket_B);
1909 free(bucket_A);
1910 if(A == NULL) { free(B); }
1911
1912 return pidx;
1913 }
1914