xref: /freebsd/contrib/libdivsufsort/lib/trsort.c (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1 /*
2  * trsort.c for libdivsufsort
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "divsufsort_private.h"
28 
29 
30 /*- Private Functions -*/
31 
32 static const saint_t lg_table[256]= {
33  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
34   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
35   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
36   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
37   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
38   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
39   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
40   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
41 };
42 
43 static INLINE
44 saint_t
45 tr_ilg(saidx_t n) {
46 #if defined(BUILD_DIVSUFSORT64)
47   return (n >> 32) ?
48           ((n >> 48) ?
49             ((n >> 56) ?
50               56 + lg_table[(n >> 56) & 0xff] :
51               48 + lg_table[(n >> 48) & 0xff]) :
52             ((n >> 40) ?
53               40 + lg_table[(n >> 40) & 0xff] :
54               32 + lg_table[(n >> 32) & 0xff])) :
55           ((n & 0xffff0000) ?
56             ((n & 0xff000000) ?
57               24 + lg_table[(n >> 24) & 0xff] :
58               16 + lg_table[(n >> 16) & 0xff]) :
59             ((n & 0x0000ff00) ?
60                8 + lg_table[(n >>  8) & 0xff] :
61                0 + lg_table[(n >>  0) & 0xff]));
62 #else
63   return (n & 0xffff0000) ?
64           ((n & 0xff000000) ?
65             24 + lg_table[(n >> 24) & 0xff] :
66             16 + lg_table[(n >> 16) & 0xff]) :
67           ((n & 0x0000ff00) ?
68              8 + lg_table[(n >>  8) & 0xff] :
69              0 + lg_table[(n >>  0) & 0xff]);
70 #endif
71 }
72 
73 
74 /*---------------------------------------------------------------------------*/
75 
76 /* Simple insertionsort for small size groups. */
77 static
78 void
79 tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
80   saidx_t *a, *b;
81   saidx_t t, r;
82 
83   for(a = first + 1; a < last; ++a) {
84     for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
85       do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
86       if(b < first) { break; }
87     }
88     if(r == 0) { *b = ~*b; }
89     *(b + 1) = t;
90   }
91 }
92 
93 
94 /*---------------------------------------------------------------------------*/
95 
96 static INLINE
97 void
98 tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
99   saidx_t j, k;
100   saidx_t v;
101   saidx_t c, d, e;
102 
103   for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
104     d = ISAd[SA[k = j++]];
105     if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
106     if(d <= c) { break; }
107   }
108   SA[i] = v;
109 }
110 
111 /* Simple top-down heapsort. */
112 static
113 void
114 tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
115   saidx_t i, m;
116   saidx_t t;
117 
118   m = size;
119   if((size % 2) == 0) {
120     m--;
121     if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
122   }
123 
124   for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
125   if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
126   for(i = m - 1; 0 < i; --i) {
127     t = SA[0], SA[0] = SA[i];
128     tr_fixdown(ISAd, SA, 0, i);
129     SA[i] = t;
130   }
131 }
132 
133 
134 /*---------------------------------------------------------------------------*/
135 
136 /* Returns the median of three elements. */
137 static INLINE
138 saidx_t *
139 tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
140   saidx_t *t;
141   if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
142   if(ISAd[*v2] > ISAd[*v3]) {
143     if(ISAd[*v1] > ISAd[*v3]) { return v1; }
144     else { return v3; }
145   }
146   return v2;
147 }
148 
149 /* Returns the median of five elements. */
150 static INLINE
151 saidx_t *
152 tr_median5(const saidx_t *ISAd,
153            saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
154   saidx_t *t;
155   if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
156   if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
157   if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
158   if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
159   if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
160   if(ISAd[*v3] > ISAd[*v4]) { return v4; }
161   return v3;
162 }
163 
164 /* Returns the pivot element. */
165 static INLINE
166 saidx_t *
167 tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
168   saidx_t *middle;
169   saidx_t t;
170 
171   t = last - first;
172   middle = first + t / 2;
173 
174   if(t <= 512) {
175     if(t <= 32) {
176       return tr_median3(ISAd, first, middle, last - 1);
177     } else {
178       t >>= 2;
179       return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
180     }
181   }
182   t >>= 3;
183   first  = tr_median3(ISAd, first, first + t, first + (t << 1));
184   middle = tr_median3(ISAd, middle - t, middle, middle + t);
185   last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
186   return tr_median3(ISAd, first, middle, last);
187 }
188 
189 
190 /*---------------------------------------------------------------------------*/
191 
192 typedef struct _trbudget_t trbudget_t;
193 struct _trbudget_t {
194   saidx_t chance;
195   saidx_t remain;
196   saidx_t incval;
197   saidx_t count;
198 };
199 
200 static INLINE
201 void
202 trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
203   budget->chance = chance;
204   budget->remain = budget->incval = incval;
205 }
206 
207 static INLINE
208 saint_t
209 trbudget_check(trbudget_t *budget, saidx_t size) {
210   if(size <= budget->remain) { budget->remain -= size; return 1; }
211   if(budget->chance == 0) { budget->count += size; return 0; }
212   budget->remain += budget->incval - size;
213   budget->chance -= 1;
214   return 1;
215 }
216 
217 
218 /*---------------------------------------------------------------------------*/
219 
220 static INLINE
221 void
222 tr_partition(const saidx_t *ISAd,
223              saidx_t *first, saidx_t *middle, saidx_t *last,
224              saidx_t **pa, saidx_t **pb, saidx_t v) {
225   saidx_t *a, *b, *c, *d, *e, *f;
226   saidx_t t, s;
227   saidx_t x = 0;
228 
229   for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
230   if(((a = b) < last) && (x < v)) {
231     for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
232       if(x == v) { SWAP(*b, *a); ++a; }
233     }
234   }
235   for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
236   if((b < (d = c)) && (x > v)) {
237     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
238       if(x == v) { SWAP(*c, *d); --d; }
239     }
240   }
241   for(; b < c;) {
242     SWAP(*b, *c);
243     for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
244       if(x == v) { SWAP(*b, *a); ++a; }
245     }
246     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
247       if(x == v) { SWAP(*c, *d); --d; }
248     }
249   }
250 
251   if(a <= d) {
252     c = b - 1;
253     if((s = a - first) > (t = b - a)) { s = t; }
254     for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
255     if((s = d - c) > (t = last - d - 1)) { s = t; }
256     for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
257     first += (b - a), last -= (d - c);
258   }
259   *pa = first, *pb = last;
260 }
261 
262 static
263 void
264 tr_copy(saidx_t *ISA, const saidx_t *SA,
265         saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
266         saidx_t depth) {
267   /* sort suffixes of middle partition
268      by using sorted order of suffixes of left and right partition. */
269   saidx_t *c, *d, *e;
270   saidx_t s, v;
271 
272   v = b - SA - 1;
273   for(c = first, d = a - 1; c <= d; ++c) {
274     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
275       *++d = s;
276       ISA[s] = d - SA;
277     }
278   }
279   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
280     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
281       *--d = s;
282       ISA[s] = d - SA;
283     }
284   }
285 }
286 
287 static
288 void
289 tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
290                saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
291                saidx_t depth) {
292   saidx_t *c, *d, *e;
293   saidx_t s, v;
294   saidx_t rank, lastrank, newrank = -1;
295 
296   v = b - SA - 1;
297   lastrank = -1;
298   for(c = first, d = a - 1; c <= d; ++c) {
299     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
300       *++d = s;
301       rank = ISA[s + depth];
302       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
303       ISA[s] = newrank;
304     }
305   }
306 
307   lastrank = -1;
308   for(e = d; first <= e; --e) {
309     rank = ISA[*e];
310     if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
311     if(newrank != rank) { ISA[*e] = newrank; }
312   }
313 
314   lastrank = -1;
315   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
316     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
317       *--d = s;
318       rank = ISA[s + depth];
319       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
320       ISA[s] = newrank;
321     }
322   }
323 }
324 
325 static
326 void
327 tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
328              saidx_t *SA, saidx_t *first, saidx_t *last,
329              trbudget_t *budget) {
330 #define STACK_SIZE TR_STACKSIZE
331   struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
332   saidx_t *a, *b, *c;
333   saidx_t t;
334   saidx_t v, x = 0;
335   saidx_t incr = ISAd - ISA;
336   saint_t limit, next;
337   saint_t ssize, trlink = -1;
338 
339   for(ssize = 0, limit = tr_ilg(last - first);;) {
340 
341     if(limit < 0) {
342       if(limit == -1) {
343         /* tandem repeat partition */
344         tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
345 
346         /* update ranks */
347         if(a < last) {
348           for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
349         }
350         if(b < last) {
351           for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
352         }
353 
354         /* push */
355         if(1 < (b - a)) {
356           STACK_PUSH5(NULL, a, b, 0, 0);
357           STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
358           trlink = ssize - 2;
359         }
360         if((a - first) <= (last - b)) {
361           if(1 < (a - first)) {
362             STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
363             last = a, limit = tr_ilg(a - first);
364           } else if(1 < (last - b)) {
365             first = b, limit = tr_ilg(last - b);
366           } else {
367             STACK_POP5(ISAd, first, last, limit, trlink);
368           }
369         } else {
370           if(1 < (last - b)) {
371             STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
372             first = b, limit = tr_ilg(last - b);
373           } else if(1 < (a - first)) {
374             last = a, limit = tr_ilg(a - first);
375           } else {
376             STACK_POP5(ISAd, first, last, limit, trlink);
377           }
378         }
379       } else if(limit == -2) {
380         /* tandem repeat copy */
381         a = stack[--ssize].b, b = stack[ssize].c;
382         if(stack[ssize].d == 0) {
383           tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
384         } else {
385           if(0 <= trlink) { stack[trlink].d = -1; }
386           tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
387         }
388         STACK_POP5(ISAd, first, last, limit, trlink);
389       } else {
390         /* sorted partition */
391         if(0 <= *first) {
392           a = first;
393           do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
394           first = a;
395         }
396         if(first < last) {
397           a = first; do { *a = ~*a; } while(*++a < 0);
398           next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
399           if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
400 
401           /* push */
402           if(trbudget_check(budget, a - first)) {
403             if((a - first) <= (last - a)) {
404               STACK_PUSH5(ISAd, a, last, -3, trlink);
405               ISAd += incr, last = a, limit = next;
406             } else {
407               if(1 < (last - a)) {
408                 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
409                 first = a, limit = -3;
410               } else {
411                 ISAd += incr, last = a, limit = next;
412               }
413             }
414           } else {
415             if(0 <= trlink) { stack[trlink].d = -1; }
416             if(1 < (last - a)) {
417               first = a, limit = -3;
418             } else {
419               STACK_POP5(ISAd, first, last, limit, trlink);
420             }
421           }
422         } else {
423           STACK_POP5(ISAd, first, last, limit, trlink);
424         }
425       }
426       continue;
427     }
428 
429     if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
430       tr_insertionsort(ISAd, first, last);
431       limit = -3;
432       continue;
433     }
434 
435     if(limit-- == 0) {
436       tr_heapsort(ISAd, first, last - first);
437       for(a = last - 1; first < a; a = b) {
438         for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
439       }
440       limit = -3;
441       continue;
442     }
443 
444     /* choose pivot */
445     a = tr_pivot(ISAd, first, last);
446     SWAP(*first, *a);
447     v = ISAd[*first];
448 
449     /* partition */
450     tr_partition(ISAd, first, first + 1, last, &a, &b, v);
451     if((last - first) != (b - a)) {
452       next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
453 
454       /* update ranks */
455       for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
456       if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
457 
458       /* push */
459       if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
460         if((a - first) <= (last - b)) {
461           if((last - b) <= (b - a)) {
462             if(1 < (a - first)) {
463               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
464               STACK_PUSH5(ISAd, b, last, limit, trlink);
465               last = a;
466             } else if(1 < (last - b)) {
467               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
468               first = b;
469             } else {
470               ISAd += incr, first = a, last = b, limit = next;
471             }
472           } else if((a - first) <= (b - a)) {
473             if(1 < (a - first)) {
474               STACK_PUSH5(ISAd, b, last, limit, trlink);
475               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
476               last = a;
477             } else {
478               STACK_PUSH5(ISAd, b, last, limit, trlink);
479               ISAd += incr, first = a, last = b, limit = next;
480             }
481           } else {
482             STACK_PUSH5(ISAd, b, last, limit, trlink);
483             STACK_PUSH5(ISAd, first, a, limit, trlink);
484             ISAd += incr, first = a, last = b, limit = next;
485           }
486         } else {
487           if((a - first) <= (b - a)) {
488             if(1 < (last - b)) {
489               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
490               STACK_PUSH5(ISAd, first, a, limit, trlink);
491               first = b;
492             } else if(1 < (a - first)) {
493               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
494               last = a;
495             } else {
496               ISAd += incr, first = a, last = b, limit = next;
497             }
498           } else if((last - b) <= (b - a)) {
499             if(1 < (last - b)) {
500               STACK_PUSH5(ISAd, first, a, limit, trlink);
501               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
502               first = b;
503             } else {
504               STACK_PUSH5(ISAd, first, a, limit, trlink);
505               ISAd += incr, first = a, last = b, limit = next;
506             }
507           } else {
508             STACK_PUSH5(ISAd, first, a, limit, trlink);
509             STACK_PUSH5(ISAd, b, last, limit, trlink);
510             ISAd += incr, first = a, last = b, limit = next;
511           }
512         }
513       } else {
514         if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
515         if((a - first) <= (last - b)) {
516           if(1 < (a - first)) {
517             STACK_PUSH5(ISAd, b, last, limit, trlink);
518             last = a;
519           } else if(1 < (last - b)) {
520             first = b;
521           } else {
522             STACK_POP5(ISAd, first, last, limit, trlink);
523           }
524         } else {
525           if(1 < (last - b)) {
526             STACK_PUSH5(ISAd, first, a, limit, trlink);
527             first = b;
528           } else if(1 < (a - first)) {
529             last = a;
530           } else {
531             STACK_POP5(ISAd, first, last, limit, trlink);
532           }
533         }
534       }
535     } else {
536       if(trbudget_check(budget, last - first)) {
537         limit = tr_ilg(last - first), ISAd += incr;
538       } else {
539         if(0 <= trlink) { stack[trlink].d = -1; }
540         STACK_POP5(ISAd, first, last, limit, trlink);
541       }
542     }
543   }
544 #undef STACK_SIZE
545 }
546 
547 
548 
549 /*---------------------------------------------------------------------------*/
550 
551 /*- Function -*/
552 
553 /* Tandem repeat sort */
554 void
555 trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
556   saidx_t *ISAd;
557   saidx_t *first, *last;
558   trbudget_t budget;
559   saidx_t t, skip, unsorted;
560 
561   trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
562 /*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
563   for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
564     first = SA;
565     skip = 0;
566     unsorted = 0;
567     do {
568       if((t = *first) < 0) { first -= t; skip += t; }
569       else {
570         if(skip != 0) { *(first + skip) = skip; skip = 0; }
571         last = SA + ISA[t] + 1;
572         if(1 < (last - first)) {
573           budget.count = 0;
574           tr_introsort(ISA, ISAd, SA, first, last, &budget);
575           if(budget.count != 0) { unsorted += budget.count; }
576           else { skip = first - last; }
577         } else if((last - first) == 1) {
578           skip = -1;
579         }
580         first = last;
581       }
582     } while(first < (SA + n));
583     if(skip != 0) { *(first + skip) = skip; }
584     if(unsorted == 0) { break; }
585   }
586 }
587