xref: /illumos-gate/usr/src/common/util/qsort.c (revision c72d926233e1335d5c1752f6ac1d5851d0d91857)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2020 Oxide Computer Company
26  */
27 
28 #if !defined(_KERNEL) && !defined(_KMDB)
29 #include "lint.h"
30 #endif /* !_KERNEL && !_KMDB */
31 
32 #include <sys/types.h>
33 
34 #if !defined(_KERNEL) && !defined(_KMDB)
35 #include <stdlib.h>
36 #include <synch.h>
37 #endif /* !_KERNEL && !_KMDB */
38 
39 #include "qsort.h"
40 
41 static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
42 static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
43 static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
44 static void swapb(char *r1, char *r2, size_t cnt);
45 
46 typedef int (*cmp_f)(const void *, const void *, void *);
47 
48 /*
49  * choose a median of 3 values
50  */
51 static __GNU_INLINE char *
52 med3(char *a, char *b, char *c, cmp_f cmp, void *arg)
53 {
54 	if (cmp(a, b, arg) < 0) {
55 		if (cmp(b, c, arg) < 0) {
56 			return (b);
57 		} else if (cmp(a, c, arg) < 0) {
58 			return (c);
59 		} else {
60 			return (a);
61 		}
62 	} else {
63 		if (cmp(b, c, arg) > 0) {
64 			return (b);
65 		} else if (cmp(a, c, arg) > 0) {
66 			return (c);
67 		} else {
68 			return (a);
69 		}
70 	}
71 }
72 
73 #define	THRESH_L	5	/* threshold for insertion sort */
74 #define	THRESH_M3	20	/* threshold for median of 3 */
75 #define	THRESH_M9	50	/* threshold for median of 9 */
76 
77 typedef struct {
78 	char	*b_lim;
79 	size_t	nrec;
80 } stk_t;
81 
82 /*
83  * qsort() is a general purpose, in-place sorting routine using a
84  * user provided call back function for comparisons.  This implementation
85  * utilizes a ternary quicksort algorithm, and cuts over to an
86  * insertion sort for partitions involving fewer than THRESH_L records.
87  *
88  * Potential User Errors
89  *   There is no return value from qsort, this function has no method
90  *   of alerting the user that a sort did not work or could not work.
91  *   We do not print an error message or exit the process or thread,
92  *   Even if we can detect an error, We CANNOT silently return without
93  *   sorting the data, if we did so the user could never be sure the
94  *   sort completed successfully.
95  *   It is possible we could change the return value of sort from void
96  *   to int and return success or some error codes, but this gets into
97  *   standards  and compatibility issues.
98  *
99  *   Examples of qsort parameter errors might be
100  *   1) record size (rsiz) equal to 0
101  *      qsort will loop and never return.
102  *   2) record size (rsiz) less than 0
103  *      rsiz is unsigned, so a negative value is insanely large
104  *   3) number of records (nrec) is 0
105  *      This is legal - qsort will return without examining any records
106  *   4) number of records (nrec) is less than 0
107  *      nrec is unsigned, so a negative value is insanely large.
108  *   5) nrec * rsiz > memory allocation for sort array
109  *      a segment violation may occur
110  *      corruption of other memory may occur
111  *   6) The base address of the sort array is invalid
112  *      a segment violation may occur
113  *      corruption of other memory may occur
114  *   7) The user call back function is invalid
115  *      we may get alignment errors or segment violations
116  *      we may jump into never-never land
117  *
118  *   Some less obvious errors might be
119  *   8) The user compare function is not comparing correctly
120  *   9) The user compare function modifies the data records
121  */
122 
123 static int
124 qsort_r_wrapper(const void *a, const void *b, void *arg)
125 {
126 	int (*cmp)(const void *, const void *) =
127 	    (int(*)(const void *, const void *))(uintptr_t)arg;
128 	return (cmp(a, b));
129 }
130 
131 void
132 qsort_r(void *basep, size_t nrec, size_t rsiz,
133     cmp_f cmp, void *arg)
134 {
135 	size_t		i;		/* temporary variable */
136 
137 	/* variables used by swap */
138 	void		(*swapf)(char *, char *, size_t);
139 	size_t		loops;
140 
141 	/* variables used by sort */
142 	stk_t		stack[8 * sizeof (nrec) + 1];
143 	stk_t		*sp;
144 	char		*b_lim;		/* bottom limit */
145 	char		*b_dup;		/* bottom duplicate */
146 	char		*b_par;		/* bottom partition */
147 	char		*t_lim;		/* top limit */
148 	char		*t_dup;		/* top duplicate */
149 	char		*t_par;		/* top partition */
150 	char		*m1, *m2, *m3;	/* median pointers */
151 	uintptr_t	d_bytelength;	/* byte length of duplicate records */
152 	int		b_nrec;
153 	int		t_nrec;
154 	int		cv;		/* results of compare (bottom / top) */
155 
156 	/*
157 	 * choose a swap function based on alignment and size
158 	 *
159 	 * The qsort function sorts an array of fixed length records.
160 	 * We have very limited knowledge about the data record itself.
161 	 * It may be that the data record is in the array we are sorting
162 	 * or it may be that the array contains pointers or indexes to
163 	 * the actual data record and all that we are sorting is the indexes.
164 	 *
165 	 * The following decision will choose an optimal swap function
166 	 * based on the size and alignment of the data records
167 	 *   swapp64	will swap 64 bit pointers
168 	 *   swapp32	will swap 32 bit pointers
169 	 *   swapi	will swap an array of 32 bit integers
170 	 *   swapb	will swap an array of 8 bit characters
171 	 *
172 	 * swapi and swapb will also require the variable loops to be set
173 	 * to control the length of the array being swapped
174 	 */
175 	if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
176 	    (rsiz == sizeof (uint64_t))) {
177 		loops = 1;
178 		swapf = (void (*)(char *, char *, size_t))swapp64;
179 	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
180 	    (rsiz == sizeof (uint32_t))) {
181 		loops = 1;
182 		swapf = (void (*)(char *, char *, size_t))swapp32;
183 	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
184 	    ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
185 		loops = rsiz / sizeof (int);
186 		swapf = (void (*)(char *, char *, size_t))swapi;
187 	} else {
188 		loops = rsiz;
189 		swapf = swapb;
190 	}
191 
192 	/*
193 	 * qsort is a partitioning sort
194 	 *
195 	 * the stack is the bookkeeping mechanism to keep track of all
196 	 * the partitions.
197 	 *
198 	 * each sort pass takes one partition and sorts it into two partitions.
199 	 * at the top of the loop we simply take the partition on the top
200 	 * of the stack and sort it. See the comments at the bottom
201 	 * of the loop regarding which partitions to add in what order.
202 	 *
203 	 * initially put the whole partition on the stack
204 	 */
205 	sp = stack;
206 	sp->b_lim = (char *)basep;
207 	sp->nrec = nrec;
208 	sp++;
209 	while (sp > stack) {
210 		sp--;
211 		b_lim = sp->b_lim;
212 		nrec = sp->nrec;
213 
214 		/*
215 		 * a linear insertion sort i faster than a qsort for
216 		 * very small number of records (THRESH_L)
217 		 *
218 		 * if number records < threshold use linear insertion sort
219 		 *
220 		 * this also handles the special case where the partition
221 		 * 0 or 1 records length.
222 		 */
223 		if (nrec < THRESH_L) {
224 			/*
225 			 * Linear insertion sort
226 			 */
227 			t_par = b_lim;
228 			for (i = 1; i < nrec; i++) {
229 				t_par += rsiz;
230 				b_par = t_par;
231 				while (b_par > b_lim) {
232 					b_par -= rsiz;
233 					if ((*cmp)(b_par, b_par + rsiz, arg) <=
234 					    0) {
235 						break;
236 					}
237 					(*swapf)(b_par, b_par + rsiz, loops);
238 				}
239 			}
240 
241 			/*
242 			 * a linear insertion sort will put all records
243 			 * in their final position and will not create
244 			 * subpartitions.
245 			 *
246 			 * therefore when the insertion sort is complete
247 			 * just go to the top of the loop and get the
248 			 * next partition to sort.
249 			 */
250 			continue;
251 		}
252 
253 		/* quicksort */
254 
255 		/*
256 		 * choose a pivot record
257 		 *
258 		 * Ideally the pivot record will divide the partition
259 		 * into two equal parts. however we have to balance the
260 		 * work involved in selecting the pivot record with the
261 		 * expected benefit.
262 		 *
263 		 * The choice of pivot record depends on the number of
264 		 * records in the partition
265 		 *
266 		 * for small partitions (nrec < THRESH_M3)
267 		 *   we just select the record in the middle of the partition
268 		 *
269 		 * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
270 		 *   we select three values and choose the median of 3
271 		 *
272 		 * if (nrec >= THRESH_M9)
273 		 *   then we use an approximate median of 9
274 		 *   9 records are selected and grouped in 3 groups of 3
275 		 *   the median of each of these 3 groups is fed into another
276 		 *   median of 3 decision.
277 		 *
278 		 * Each median of 3 decision is 2 or 3 compares,
279 		 * so median of 9 costs between 8 and 12 compares.
280 		 *
281 		 * i is byte distance between two consecutive samples
282 		 * m2 will point to the pivot record
283 		 */
284 		if (nrec < THRESH_M3) {
285 			m2 = b_lim + (nrec / 2) * rsiz;
286 		} else if (nrec < THRESH_M9) {
287 			/* use median of 3 */
288 			i = ((nrec - 1) / 2) * rsiz;
289 			m2 = med3(b_lim, b_lim + i, b_lim + 2 * i, cmp, arg);
290 		} else {
291 			/* approx median of 9 */
292 			i = ((nrec - 1) / 8) * rsiz;
293 			m1 = med3(b_lim, b_lim +  i, b_lim + 2 * i, cmp, arg);
294 			m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i,
295 			    cmp, arg);
296 			m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i,
297 			    cmp, arg);
298 			m2 = med3(m1, m2, m3, cmp, arg);
299 		}
300 
301 		/*
302 		 * quick sort partitioning
303 		 *
304 		 * The partition limits are defined by bottom and top pointers
305 		 * b_lim and t_lim.
306 		 *
307 		 * qsort uses a fairly standard method of moving the
308 		 * partitioning pointers, b_par and t_par, to the middle of
309 		 * the partition and exchanging records that are in the
310 		 * wrong part of the partition.
311 		 *
312 		 * Two enhancements have been made to the basic algorithm.
313 		 * One for handling duplicate records and one to minimize
314 		 * the number of swaps.
315 		 *
316 		 * Two duplicate records pointers are (b_dup and t_dup) are
317 		 * initially set to b_lim and t_lim.  Each time a record
318 		 * whose sort key value is equal to the pivot record is found
319 		 * it will be swapped with the record pointed to by
320 		 * b_dup or t_dup and the duplicate pointer will be
321 		 * incremented toward the center.
322 		 * When partitioning is complete, all the duplicate records
323 		 * will have been collected at the upper and lower limits of
324 		 * the partition and can easily be moved adjacent to the
325 		 * pivot record.
326 		 *
327 		 * The second optimization is to minimize the number of swaps.
328 		 * The pointer m2 points to the pivot record.
329 		 * During partitioning, if m2 is ever equal to the partitioning
330 		 * pointers, b_par or t_par, then b_par or t_par just moves
331 		 * onto the next record without doing a compare.
332 		 * If as a result of duplicate record detection,
333 		 * b_dup or t_dup is ever equal to m2,
334 		 * then m2 is changed to point to the duplicate record and
335 		 * b_dup or t_dup is incremented with out swapping records.
336 		 *
337 		 * When partitioning is done, we may not have the same pivot
338 		 * record that we started with, but we will have one with
339 		 * an equal sort key.
340 		 */
341 		b_dup = b_par		= b_lim;
342 		t_dup = t_par = t_lim	= b_lim + rsiz * (nrec - 1);
343 		for (;;) {
344 
345 			/* move bottom pointer up */
346 			for (; b_par <= t_par; b_par += rsiz) {
347 				if (b_par == m2) {
348 					continue;
349 				}
350 				cv = cmp(b_par, m2, arg);
351 				if (cv > 0) {
352 					break;
353 				}
354 				if (cv == 0) {
355 					if (b_dup == m2) {
356 						m2 = b_par;
357 					} else if (b_dup != b_par) {
358 						(*swapf)(b_dup, b_par, loops);
359 					}
360 					b_dup += rsiz;
361 				}
362 			}
363 
364 			/* move top pointer down */
365 			for (; b_par < t_par; t_par -= rsiz) {
366 				if (t_par == m2) {
367 					continue;
368 				}
369 				cv = cmp(t_par, m2, arg);
370 				if (cv < 0) {
371 					break;
372 				}
373 				if (cv == 0) {
374 					if (t_dup == m2) {
375 						m2 = t_par;
376 					} else if (t_dup != t_par) {
377 						(*swapf)(t_dup, t_par, loops);
378 					}
379 					t_dup -= rsiz;
380 				}
381 			}
382 
383 			/* break if we are done partitioning */
384 			if (b_par >= t_par) {
385 				break;
386 			}
387 
388 			/* exchange records at upper and lower break points */
389 			(*swapf)(b_par, t_par, loops);
390 			b_par += rsiz;
391 			t_par -= rsiz;
392 		}
393 
394 		/*
395 		 * partitioning is now complete
396 		 *
397 		 * there are two termination conditions from the partitioning
398 		 * loop above.  Either b_par or t_par have crossed or
399 		 * they are equal.
400 		 *
401 		 * we need to swap the pivot record to its final position
402 		 * m2 could be in either the upper or lower partitions
403 		 * or it could already be in its final position
404 		 */
405 		/*
406 		 * R[b_par] > R[m2]
407 		 * R[t_par] < R[m2]
408 		 */
409 		if (t_par < b_par) {
410 			if (m2 < t_par) {
411 				(*swapf)(m2, t_par, loops);
412 				m2 = b_par = t_par;
413 			} else if (m2 > b_par) {
414 				(*swapf)(m2, b_par, loops);
415 				m2 = t_par = b_par;
416 			} else {
417 				b_par = t_par = m2;
418 			}
419 		} else {
420 			if (m2 < t_par) {
421 				t_par = b_par = t_par - rsiz;
422 			}
423 			if (m2 != b_par) {
424 				(*swapf)(m2, b_par, loops);
425 			}
426 			m2 = t_par;
427 		}
428 
429 		/*
430 		 * move bottom duplicates next to pivot
431 		 * optimized to eliminate overlap
432 		 */
433 		d_bytelength = b_dup - b_lim;
434 		if (b_par - b_dup < d_bytelength) {
435 			b_dup = b_lim + (b_par - b_dup);
436 		}
437 		while (b_dup > b_lim) {
438 			b_dup -= rsiz;
439 			b_par -= rsiz;
440 			(*swapf)(b_dup, b_par, loops);
441 		}
442 		b_par = m2 - d_bytelength;
443 
444 		/*
445 		 * move top duplicates next to pivot
446 		 */
447 		d_bytelength = t_lim - t_dup;
448 		if (t_dup - t_par < d_bytelength) {
449 			t_dup = t_lim - (t_dup - t_par);
450 		}
451 		while (t_dup < t_lim) {
452 			t_dup += rsiz;
453 			t_par += rsiz;
454 			(*swapf)(t_dup, t_par, loops);
455 		}
456 		t_par = m2 + d_bytelength;
457 
458 		/*
459 		 * when a qsort pass completes there are three partitions
460 		 * 1) the lower contains all records less than pivot
461 		 * 2) the upper contains all records greater than pivot
462 		 * 3) the pivot partition contains all record equal to pivot
463 		 *
464 		 * all records in the pivot partition are in their final
465 		 * position and do not need to be accounted for by the stack
466 		 *
467 		 * when adding partitions to the stack
468 		 * it is important to add the largest partition first
469 		 * to prevent stack overflow.
470 		 *
471 		 * calculate number of unsorted records in top and bottom
472 		 * push resulting partitions on stack
473 		 */
474 		b_nrec = (b_par - b_lim) / rsiz;
475 		t_nrec = (t_lim - t_par) / rsiz;
476 		if (b_nrec < t_nrec) {
477 			sp->b_lim = t_par + rsiz;
478 			sp->nrec = t_nrec;
479 			sp++;
480 			sp->b_lim = b_lim;
481 			sp->nrec = b_nrec;
482 			sp++;
483 		} else {
484 			sp->b_lim = b_lim;
485 			sp->nrec = b_nrec;
486 			sp++;
487 			sp->b_lim = t_par + rsiz;
488 			sp->nrec = t_nrec;
489 			sp++;
490 		}
491 	}
492 }
493 
494 void
495 qsort(void *basep, size_t nrec, size_t rsiz,
496     int (*cmp)(const void *, const void *))
497 {
498 	return (qsort_r(basep, nrec, rsiz, qsort_r_wrapper,
499 	    (void *)(uintptr_t)cmp));
500 }
501 
502 /*
503  * The following swap functions should not create a stack frame
504  * the SPARC call / return instruction will be executed
505  * but the a save / restore will not be executed
506  * which means we won't do a window turn with the spill / fill overhead
507  * verify this by examining the assembly code
508  */
509 
510 /* ARGSUSED */
511 static void
512 swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
513 {
514 	uint32_t temp;
515 
516 	temp = *r1;
517 	*r1++ = *r2;
518 	*r2++ = temp;
519 }
520 
521 /* ARGSUSED */
522 static void
523 swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
524 {
525 	uint64_t temp;
526 
527 	temp = *r1;
528 	*r1++ = *r2;
529 	*r2++ = temp;
530 }
531 
532 static void
533 swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
534 {
535 	uint32_t temp;
536 
537 	/* character by character */
538 	while (cnt--) {
539 		temp = *r1;
540 		*r1++ = *r2;
541 		*r2++ = temp;
542 	}
543 }
544 
545 static void
546 swapb(char *r1, char *r2, size_t cnt)
547 {
548 	char	temp;
549 
550 	/* character by character */
551 	while (cnt--) {
552 		temp = *r1;
553 		*r1++ = *r2;
554 		*r2++ = temp;
555 	}
556 }
557