1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2020 Oxide Computer Company
26 */
27
28 #if !defined(_KERNEL) && !defined(_KMDB)
29 #include "lint.h"
30 #endif /* !_KERNEL && !_KMDB */
31
32 #include <sys/types.h>
33
34 #if !defined(_KERNEL) && !defined(_KMDB)
35 #include <stdlib.h>
36 #include <synch.h>
37 #endif /* !_KERNEL && !_KMDB */
38
39 #include "qsort.h"
40
41 static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
42 static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
43 static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
44 static void swapb(char *r1, char *r2, size_t cnt);
45
46 typedef int (*cmp_f)(const void *, const void *, void *);
47
48 /*
49 * choose a median of 3 values
50 */
51 static __GNU_INLINE char *
med3(char * a,char * b,char * c,cmp_f cmp,void * arg)52 med3(char *a, char *b, char *c, cmp_f cmp, void *arg)
53 {
54 if (cmp(a, b, arg) < 0) {
55 if (cmp(b, c, arg) < 0) {
56 return (b);
57 } else if (cmp(a, c, arg) < 0) {
58 return (c);
59 } else {
60 return (a);
61 }
62 } else {
63 if (cmp(b, c, arg) > 0) {
64 return (b);
65 } else if (cmp(a, c, arg) > 0) {
66 return (c);
67 } else {
68 return (a);
69 }
70 }
71 }
72
73 #define THRESH_L 5 /* threshold for insertion sort */
74 #define THRESH_M3 20 /* threshold for median of 3 */
75 #define THRESH_M9 50 /* threshold for median of 9 */
76
77 typedef struct {
78 char *b_lim;
79 size_t nrec;
80 } stk_t;
81
82 /*
83 * qsort() is a general purpose, in-place sorting routine using a
84 * user provided call back function for comparisons. This implementation
85 * utilizes a ternary quicksort algorithm, and cuts over to an
86 * insertion sort for partitions involving fewer than THRESH_L records.
87 *
88 * Potential User Errors
89 * There is no return value from qsort, this function has no method
90 * of alerting the user that a sort did not work or could not work.
91 * We do not print an error message or exit the process or thread,
92 * Even if we can detect an error, We CANNOT silently return without
93 * sorting the data, if we did so the user could never be sure the
94 * sort completed successfully.
95 * It is possible we could change the return value of sort from void
96 * to int and return success or some error codes, but this gets into
97 * standards and compatibility issues.
98 *
99 * Examples of qsort parameter errors might be
100 * 1) record size (rsiz) equal to 0
101 * qsort will loop and never return.
102 * 2) record size (rsiz) less than 0
103 * rsiz is unsigned, so a negative value is insanely large
104 * 3) number of records (nrec) is 0
105 * This is legal - qsort will return without examining any records
106 * 4) number of records (nrec) is less than 0
107 * nrec is unsigned, so a negative value is insanely large.
108 * 5) nrec * rsiz > memory allocation for sort array
109 * a segment violation may occur
110 * corruption of other memory may occur
111 * 6) The base address of the sort array is invalid
112 * a segment violation may occur
113 * corruption of other memory may occur
114 * 7) The user call back function is invalid
115 * we may get alignment errors or segment violations
116 * we may jump into never-never land
117 *
118 * Some less obvious errors might be
119 * 8) The user compare function is not comparing correctly
120 * 9) The user compare function modifies the data records
121 */
122
123 static int
qsort_r_wrapper(const void * a,const void * b,void * arg)124 qsort_r_wrapper(const void *a, const void *b, void *arg)
125 {
126 int (*cmp)(const void *, const void *) =
127 (int(*)(const void *, const void *))(uintptr_t)arg;
128 return (cmp(a, b));
129 }
130
131 void
qsort_r(void * basep,size_t nrec,size_t rsiz,cmp_f cmp,void * arg)132 qsort_r(void *basep, size_t nrec, size_t rsiz,
133 cmp_f cmp, void *arg)
134 {
135 size_t i; /* temporary variable */
136
137 /* variables used by swap */
138 void (*swapf)(char *, char *, size_t);
139 size_t loops;
140
141 /* variables used by sort */
142 stk_t stack[8 * sizeof (nrec) + 1];
143 stk_t *sp;
144 char *b_lim; /* bottom limit */
145 char *b_dup; /* bottom duplicate */
146 char *b_par; /* bottom partition */
147 char *t_lim; /* top limit */
148 char *t_dup; /* top duplicate */
149 char *t_par; /* top partition */
150 char *m1, *m2, *m3; /* median pointers */
151 uintptr_t d_bytelength; /* byte length of duplicate records */
152 int b_nrec;
153 int t_nrec;
154 int cv; /* results of compare (bottom / top) */
155
156 /*
157 * choose a swap function based on alignment and size
158 *
159 * The qsort function sorts an array of fixed length records.
160 * We have very limited knowledge about the data record itself.
161 * It may be that the data record is in the array we are sorting
162 * or it may be that the array contains pointers or indexes to
163 * the actual data record and all that we are sorting is the indexes.
164 *
165 * The following decision will choose an optimal swap function
166 * based on the size and alignment of the data records
167 * swapp64 will swap 64 bit pointers
168 * swapp32 will swap 32 bit pointers
169 * swapi will swap an array of 32 bit integers
170 * swapb will swap an array of 8 bit characters
171 *
172 * swapi and swapb will also require the variable loops to be set
173 * to control the length of the array being swapped
174 */
175 if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
176 (rsiz == sizeof (uint64_t))) {
177 loops = 1;
178 swapf = (void (*)(char *, char *, size_t))swapp64;
179 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
180 (rsiz == sizeof (uint32_t))) {
181 loops = 1;
182 swapf = (void (*)(char *, char *, size_t))swapp32;
183 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
184 ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
185 loops = rsiz / sizeof (int);
186 swapf = (void (*)(char *, char *, size_t))swapi;
187 } else {
188 loops = rsiz;
189 swapf = swapb;
190 }
191
192 /*
193 * qsort is a partitioning sort
194 *
195 * the stack is the bookkeeping mechanism to keep track of all
196 * the partitions.
197 *
198 * each sort pass takes one partition and sorts it into two partitions.
199 * at the top of the loop we simply take the partition on the top
200 * of the stack and sort it. See the comments at the bottom
201 * of the loop regarding which partitions to add in what order.
202 *
203 * initially put the whole partition on the stack
204 */
205 sp = stack;
206 sp->b_lim = (char *)basep;
207 sp->nrec = nrec;
208 sp++;
209 while (sp > stack) {
210 sp--;
211 b_lim = sp->b_lim;
212 nrec = sp->nrec;
213
214 /*
215 * a linear insertion sort i faster than a qsort for
216 * very small number of records (THRESH_L)
217 *
218 * if number records < threshold use linear insertion sort
219 *
220 * this also handles the special case where the partition
221 * 0 or 1 records length.
222 */
223 if (nrec < THRESH_L) {
224 /*
225 * Linear insertion sort
226 */
227 t_par = b_lim;
228 for (i = 1; i < nrec; i++) {
229 t_par += rsiz;
230 b_par = t_par;
231 while (b_par > b_lim) {
232 b_par -= rsiz;
233 if ((*cmp)(b_par, b_par + rsiz, arg) <=
234 0) {
235 break;
236 }
237 (*swapf)(b_par, b_par + rsiz, loops);
238 }
239 }
240
241 /*
242 * a linear insertion sort will put all records
243 * in their final position and will not create
244 * subpartitions.
245 *
246 * therefore when the insertion sort is complete
247 * just go to the top of the loop and get the
248 * next partition to sort.
249 */
250 continue;
251 }
252
253 /* quicksort */
254
255 /*
256 * choose a pivot record
257 *
258 * Ideally the pivot record will divide the partition
259 * into two equal parts. however we have to balance the
260 * work involved in selecting the pivot record with the
261 * expected benefit.
262 *
263 * The choice of pivot record depends on the number of
264 * records in the partition
265 *
266 * for small partitions (nrec < THRESH_M3)
267 * we just select the record in the middle of the partition
268 *
269 * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
270 * we select three values and choose the median of 3
271 *
272 * if (nrec >= THRESH_M9)
273 * then we use an approximate median of 9
274 * 9 records are selected and grouped in 3 groups of 3
275 * the median of each of these 3 groups is fed into another
276 * median of 3 decision.
277 *
278 * Each median of 3 decision is 2 or 3 compares,
279 * so median of 9 costs between 8 and 12 compares.
280 *
281 * i is byte distance between two consecutive samples
282 * m2 will point to the pivot record
283 */
284 if (nrec < THRESH_M3) {
285 m2 = b_lim + (nrec / 2) * rsiz;
286 } else if (nrec < THRESH_M9) {
287 /* use median of 3 */
288 i = ((nrec - 1) / 2) * rsiz;
289 m2 = med3(b_lim, b_lim + i, b_lim + 2 * i, cmp, arg);
290 } else {
291 /* approx median of 9 */
292 i = ((nrec - 1) / 8) * rsiz;
293 m1 = med3(b_lim, b_lim + i, b_lim + 2 * i, cmp, arg);
294 m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i,
295 cmp, arg);
296 m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i,
297 cmp, arg);
298 m2 = med3(m1, m2, m3, cmp, arg);
299 }
300
301 /*
302 * quick sort partitioning
303 *
304 * The partition limits are defined by bottom and top pointers
305 * b_lim and t_lim.
306 *
307 * qsort uses a fairly standard method of moving the
308 * partitioning pointers, b_par and t_par, to the middle of
309 * the partition and exchanging records that are in the
310 * wrong part of the partition.
311 *
312 * Two enhancements have been made to the basic algorithm.
313 * One for handling duplicate records and one to minimize
314 * the number of swaps.
315 *
316 * Two duplicate records pointers are (b_dup and t_dup) are
317 * initially set to b_lim and t_lim. Each time a record
318 * whose sort key value is equal to the pivot record is found
319 * it will be swapped with the record pointed to by
320 * b_dup or t_dup and the duplicate pointer will be
321 * incremented toward the center.
322 * When partitioning is complete, all the duplicate records
323 * will have been collected at the upper and lower limits of
324 * the partition and can easily be moved adjacent to the
325 * pivot record.
326 *
327 * The second optimization is to minimize the number of swaps.
328 * The pointer m2 points to the pivot record.
329 * During partitioning, if m2 is ever equal to the partitioning
330 * pointers, b_par or t_par, then b_par or t_par just moves
331 * onto the next record without doing a compare.
332 * If as a result of duplicate record detection,
333 * b_dup or t_dup is ever equal to m2,
334 * then m2 is changed to point to the duplicate record and
335 * b_dup or t_dup is incremented with out swapping records.
336 *
337 * When partitioning is done, we may not have the same pivot
338 * record that we started with, but we will have one with
339 * an equal sort key.
340 */
341 b_dup = b_par = b_lim;
342 t_dup = t_par = t_lim = b_lim + rsiz * (nrec - 1);
343 for (;;) {
344
345 /* move bottom pointer up */
346 for (; b_par <= t_par; b_par += rsiz) {
347 if (b_par == m2) {
348 continue;
349 }
350 cv = cmp(b_par, m2, arg);
351 if (cv > 0) {
352 break;
353 }
354 if (cv == 0) {
355 if (b_dup == m2) {
356 m2 = b_par;
357 } else if (b_dup != b_par) {
358 (*swapf)(b_dup, b_par, loops);
359 }
360 b_dup += rsiz;
361 }
362 }
363
364 /* move top pointer down */
365 for (; b_par < t_par; t_par -= rsiz) {
366 if (t_par == m2) {
367 continue;
368 }
369 cv = cmp(t_par, m2, arg);
370 if (cv < 0) {
371 break;
372 }
373 if (cv == 0) {
374 if (t_dup == m2) {
375 m2 = t_par;
376 } else if (t_dup != t_par) {
377 (*swapf)(t_dup, t_par, loops);
378 }
379 t_dup -= rsiz;
380 }
381 }
382
383 /* break if we are done partitioning */
384 if (b_par >= t_par) {
385 break;
386 }
387
388 /* exchange records at upper and lower break points */
389 (*swapf)(b_par, t_par, loops);
390 b_par += rsiz;
391 t_par -= rsiz;
392 }
393
394 /*
395 * partitioning is now complete
396 *
397 * there are two termination conditions from the partitioning
398 * loop above. Either b_par or t_par have crossed or
399 * they are equal.
400 *
401 * we need to swap the pivot record to its final position
402 * m2 could be in either the upper or lower partitions
403 * or it could already be in its final position
404 */
405 /*
406 * R[b_par] > R[m2]
407 * R[t_par] < R[m2]
408 */
409 if (t_par < b_par) {
410 if (m2 < t_par) {
411 (*swapf)(m2, t_par, loops);
412 m2 = b_par = t_par;
413 } else if (m2 > b_par) {
414 (*swapf)(m2, b_par, loops);
415 m2 = t_par = b_par;
416 } else {
417 b_par = t_par = m2;
418 }
419 } else {
420 if (m2 < t_par) {
421 t_par = b_par = t_par - rsiz;
422 }
423 if (m2 != b_par) {
424 (*swapf)(m2, b_par, loops);
425 }
426 m2 = t_par;
427 }
428
429 /*
430 * move bottom duplicates next to pivot
431 * optimized to eliminate overlap
432 */
433 d_bytelength = b_dup - b_lim;
434 if (b_par - b_dup < d_bytelength) {
435 b_dup = b_lim + (b_par - b_dup);
436 }
437 while (b_dup > b_lim) {
438 b_dup -= rsiz;
439 b_par -= rsiz;
440 (*swapf)(b_dup, b_par, loops);
441 }
442 b_par = m2 - d_bytelength;
443
444 /*
445 * move top duplicates next to pivot
446 */
447 d_bytelength = t_lim - t_dup;
448 if (t_dup - t_par < d_bytelength) {
449 t_dup = t_lim - (t_dup - t_par);
450 }
451 while (t_dup < t_lim) {
452 t_dup += rsiz;
453 t_par += rsiz;
454 (*swapf)(t_dup, t_par, loops);
455 }
456 t_par = m2 + d_bytelength;
457
458 /*
459 * when a qsort pass completes there are three partitions
460 * 1) the lower contains all records less than pivot
461 * 2) the upper contains all records greater than pivot
462 * 3) the pivot partition contains all record equal to pivot
463 *
464 * all records in the pivot partition are in their final
465 * position and do not need to be accounted for by the stack
466 *
467 * when adding partitions to the stack
468 * it is important to add the largest partition first
469 * to prevent stack overflow.
470 *
471 * calculate number of unsorted records in top and bottom
472 * push resulting partitions on stack
473 */
474 b_nrec = (b_par - b_lim) / rsiz;
475 t_nrec = (t_lim - t_par) / rsiz;
476 if (b_nrec < t_nrec) {
477 sp->b_lim = t_par + rsiz;
478 sp->nrec = t_nrec;
479 sp++;
480 sp->b_lim = b_lim;
481 sp->nrec = b_nrec;
482 sp++;
483 } else {
484 sp->b_lim = b_lim;
485 sp->nrec = b_nrec;
486 sp++;
487 sp->b_lim = t_par + rsiz;
488 sp->nrec = t_nrec;
489 sp++;
490 }
491 }
492 }
493
494 void
qsort(void * basep,size_t nrec,size_t rsiz,int (* cmp)(const void *,const void *))495 qsort(void *basep, size_t nrec, size_t rsiz,
496 int (*cmp)(const void *, const void *))
497 {
498 return (qsort_r(basep, nrec, rsiz, qsort_r_wrapper,
499 (void *)(uintptr_t)cmp));
500 }
501
502 /*
503 * The following swap functions should not create a stack frame
504 * the SPARC call / return instruction will be executed
505 * but the a save / restore will not be executed
506 * which means we won't do a window turn with the spill / fill overhead
507 * verify this by examining the assembly code
508 */
509
510 /* ARGSUSED */
511 static void
swapp32(uint32_t * r1,uint32_t * r2,size_t cnt)512 swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
513 {
514 uint32_t temp;
515
516 temp = *r1;
517 *r1++ = *r2;
518 *r2++ = temp;
519 }
520
521 /* ARGSUSED */
522 static void
swapp64(uint64_t * r1,uint64_t * r2,size_t cnt)523 swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
524 {
525 uint64_t temp;
526
527 temp = *r1;
528 *r1++ = *r2;
529 *r2++ = temp;
530 }
531
532 static void
swapi(uint32_t * r1,uint32_t * r2,size_t cnt)533 swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
534 {
535 uint32_t temp;
536
537 /* character by character */
538 while (cnt--) {
539 temp = *r1;
540 *r1++ = *r2;
541 *r2++ = temp;
542 }
543 }
544
545 static void
swapb(char * r1,char * r2,size_t cnt)546 swapb(char *r1, char *r2, size_t cnt)
547 {
548 char temp;
549
550 /* character by character */
551 while (cnt--) {
552 temp = *r1;
553 *r1++ = *r2;
554 *r2++ = temp;
555 }
556 }
557