xref: /illumos-gate/usr/src/common/util/qsort.c (revision 34a0f871d192b33b865455a8812a3d34c1866315)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 
31 #if !defined(_KERNEL) && !defined(_KMDB)
32 #include "synonyms.h"
33 #endif /* !_KERNEL && !_KMDB */
34 
35 #include <sys/types.h>
36 
37 #if !defined(_KERNEL) && !defined(_KMDB)
38 #include <stdlib.h>
39 #include <synch.h>
40 #endif /* !_KERNEL && !_KMDB */
41 
42 #include "qsort.h"
43 
44 static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
45 static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
46 static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
47 static void swapb(char *r1, char *r2, size_t cnt);
48 
49 /*
50  * choose a median of 3 values
51  *
52  * note: cstyle specifically prohibits nested conditional operators
53  * but this is the only way to do the median of 3 function in-line
54  */
55 #define	med3(a, b, c) (cmp((a), (b)) < 0) \
56 	? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \
57 	: ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a))
58 
59 #define	THRESH_L	5	/* threshold for insertion sort */
60 #define	THRESH_M3	20	/* threshold for median of 3 */
61 #define	THRESH_M9	50	/* threshold for median of 9 */
62 
63 typedef struct {
64 	char	*b_lim;
65 	size_t	nrec;
66 } stk_t;
67 
68 /*
69  * qsort() is a general purpose, in-place sorting routine using a
70  * user provided call back function for comparisons.  This implementation
71  * utilizes a ternary quicksort algorithm, and cuts over to an
72  * insertion sort for partitions involving fewer than THRESH_L records.
73  *
74  * Potential User Errors
75  *   There is no return value from qsort, this function has no method
76  *   of alerting the user that a sort did not work or could not work.
77  *   We do not print an error message or exit the process or thread,
78  *   Even if we can detect an error, We CANNOT silently return without
79  *   sorting the data, if we did so the user could never be sure the
80  *   sort completed successfully.
81  *   It is possible we could change the return value of sort from void
82  *   to int and return success or some error codes, but this gets into
83  *   standards  and compatibility issues.
84  *
85  *   Examples of qsort parameter errors might be
86  *   1) record size (rsiz) equal to 0
87  *      qsort will loop and never return.
88  *   2) record size (rsiz) less than 0
89  *      rsiz is unsigned, so a negative value is insanely large
90  *   3) number of records (nrec) is 0
91  *      This is legal - qsort will return without examining any records
92  *   4) number of records (nrec) is less than 0
93  *      nrec is unsigned, so a negative value is insanely large.
94  *   5) nrec * rsiz > memory allocation for sort array
95  *      a segment violation may occur
96  *      corruption of other memory may occur
97  *   6) The base address of the sort array is invalid
98  *      a segment violation may occur
99  *      corruption of other memory may occur
100  *   7) The user call back function is invalid
101  *      we may get alignment errors or segment violations
102  *      we may jump into never-never land
103  *
104  *   Some less obvious errors might be
105  *   8) The user compare function is not comparing correctly
106  *   9) The user compare function modifies the data records
107  */
108 
109 void
110 qsort(
111 	void		*basep,
112 	size_t		nrec,
113 	size_t		rsiz,
114 	int		(*cmp)(const void *, const void *))
115 {
116 	size_t		i;		/* temporary variable */
117 
118 	/* variables used by swap */
119 	void		(*swapf)(char *, char *, size_t);
120 	size_t		loops;
121 
122 	/* variables used by sort */
123 	stk_t		stack[8 * sizeof (nrec) + 1];
124 	stk_t		*sp;
125 	char		*b_lim;		/* bottom limit */
126 	char		*b_dup;		/* bottom duplicate */
127 	char		*b_par;		/* bottom partition */
128 	char		*t_lim;		/* top limit */
129 	char		*t_dup;		/* top duplicate */
130 	char		*t_par;		/* top partition */
131 	char		*m1, *m2, *m3;	/* median pointers */
132 	uintptr_t	d_bytelength;	/* byte length of duplicate records */
133 	int		b_nrec;
134 	int		t_nrec;
135 	int		cv;		/* results of compare (bottom / top) */
136 
137 	/*
138 	 * choose a swap function based on alignment and size
139 	 *
140 	 * The qsort function sorts an array of fixed length records.
141 	 * We have very limited knowledge about the data record itself.
142 	 * It may be that the data record is in the array we are sorting
143 	 * or it may be that the array contains pointers or indexes to
144 	 * the actual data record and all that we are sorting is the indexes.
145 	 *
146 	 * The following decision will choose an optimal swap function
147 	 * based on the size and alignment of the data records
148 	 *   swapp64	will swap 64 bit pointers
149 	 *   swapp32	will swap 32 bit pointers
150 	 *   swapi	will swap an array of 32 bit integers
151 	 *   swapb	will swap an array of 8 bit characters
152 	 *
153 	 * swapi and swapb will also require the variable loops to be set
154 	 * to control the length of the array being swapped
155 	 */
156 	if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
157 	    (rsiz == sizeof (uint64_t))) {
158 		loops = 1;
159 		swapf = (void (*)(char *, char *, size_t))swapp64;
160 	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
161 	    (rsiz == sizeof (uint32_t))) {
162 		loops = 1;
163 		swapf = (void (*)(char *, char *, size_t))swapp32;
164 	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
165 	    ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
166 		loops = rsiz / sizeof (int);
167 		swapf = (void (*)(char *, char *, size_t))swapi;
168 	} else {
169 		loops = rsiz;
170 		swapf = swapb;
171 	}
172 
173 	/*
174 	 * qsort is a partitioning sort
175 	 *
176 	 * the stack is the bookkeeping mechanism to keep track of all
177 	 * the partitions.
178 	 *
179 	 * each sort pass takes one partition and sorts it into two partitions.
180 	 * at the top of the loop we simply take the partition on the top
181 	 * of the stack and sort it. See the comments at the bottom
182 	 * of the loop regarding which partitions to add in what order.
183 	 *
184 	 * initially put the whole partition on the stack
185 	 */
186 	sp = stack;
187 	sp->b_lim = (char *)basep;
188 	sp->nrec = nrec;
189 	sp++;
190 	while (sp > stack) {
191 		sp--;
192 		b_lim = sp->b_lim;
193 		nrec = sp->nrec;
194 
195 		/*
196 		 * a linear insertion sort i faster than a qsort for
197 		 * very small number of records (THRESH_L)
198 		 *
199 		 * if number records < threshold use linear insertion sort
200 		 *
201 		 * this also handles the special case where the partition
202 		 * 0 or 1 records length.
203 		 */
204 		if (nrec < THRESH_L) {
205 			/*
206 			 * Linear insertion sort
207 			 */
208 			t_par = b_lim;
209 			for (i = 1; i < nrec; i++) {
210 				t_par += rsiz;
211 				b_par = t_par;
212 				while (b_par > b_lim) {
213 					b_par -= rsiz;
214 					if ((*cmp)(b_par, b_par + rsiz) <= 0) {
215 						break;
216 					}
217 					(*swapf)(b_par, b_par + rsiz, loops);
218 				}
219 			}
220 
221 			/*
222 			 * a linear insertion sort will put all records
223 			 * in their final position and will not create
224 			 * subpartitions.
225 			 *
226 			 * therefore when the insertion sort is complete
227 			 * just go to the top of the loop and get the
228 			 * next partition to sort.
229 			 */
230 			continue;
231 		}
232 
233 		/* quicksort */
234 
235 		/*
236 		 * choose a pivot record
237 		 *
238 		 * Ideally the pivot record will divide the partition
239 		 * into two equal parts. however we have to balance the
240 		 * work involved in selecting the pivot record with the
241 		 * expected benefit.
242 		 *
243 		 * The choice of pivot record depends on the number of
244 		 * records in the partition
245 		 *
246 		 * for small partitions (nrec < THRESH_M3)
247 		 *   we just select the record in the middle of the partition
248 		 *
249 		 * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
250 		 *   we select three values and choose the median of 3
251 		 *
252 		 * if (nrec >= THRESH_M9)
253 		 *   then we use an approximate median of 9
254 		 *   9 records are selected and grouped in 3 groups of 3
255 		 *   the median of each of these 3 groups is fed into another
256 		 *   median of 3 decision.
257 		 *
258 		 * Each median of 3 decision is 2 or 3 compares,
259 		 * so median of 9 costs between 8 and 12 compares.
260 		 *
261 		 * i is byte distance between two consecutive samples
262 		 * m2 will point to the pivot record
263 		 */
264 		if (nrec < THRESH_M3) {
265 			m2 = b_lim + (nrec / 2) * rsiz;
266 		} else if (nrec < THRESH_M9) {
267 			/* use median of 3 */
268 			i = ((nrec - 1) / 2) * rsiz;
269 			m2 = med3(b_lim, b_lim + i, b_lim + 2 * i);
270 		} else {
271 			/* approx median of 9 */
272 			i = ((nrec - 1) / 8) * rsiz;
273 			m1 = med3(b_lim, b_lim +  i, b_lim + 2 * i);
274 			m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i);
275 			m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i);
276 			m2 = med3(m1, m2, m3);
277 		}
278 
279 		/*
280 		 * quick sort partitioning
281 		 *
282 		 * The partition limits are defined by bottom and top pointers
283 		 * b_lim and t_lim.
284 		 *
285 		 * qsort uses a fairly standard method of moving the
286 		 * partitioning pointers, b_par and t_par, to the middle of
287 		 * the partition and exchanging records that are in the
288 		 * wrong part of the partition.
289 		 *
290 		 * Two enhancements have been made to the basic algorithm.
291 		 * One for handling duplicate records and one to minimize
292 		 * the number of swaps.
293 		 *
294 		 * Two duplicate records pointers are (b_dup and t_dup) are
295 		 * initially set to b_lim and t_lim.  Each time a record
296 		 * whose sort key value is equal to the pivot record is found
297 		 * it will be swapped with the record pointed to by
298 		 * b_dup or t_dup and the duplicate pointer will be
299 		 * incremented toward the center.
300 		 * When partitioning is complete, all the duplicate records
301 		 * will have been collected at the upper and lower limits of
302 		 * the partition and can easily be moved adjacent to the
303 		 * pivot record.
304 		 *
305 		 * The second optimization is to minimize the number of swaps.
306 		 * The pointer m2 points to the pivot record.
307 		 * During partitioning, if m2 is ever equal to the partitioning
308 		 * pointers, b_par or t_par, then b_par or t_par just moves
309 		 * onto the next record without doing a compare.
310 		 * If as a result of duplicate record detection,
311 		 * b_dup or t_dup is ever equal to m2,
312 		 * then m2 is changed to point to the duplicate record and
313 		 * b_dup or t_dup is incremented with out swapping records.
314 		 *
315 		 * When partitioning is done, we may not have the same pivot
316 		 * record that we started with, but we will have one with
317 		 * an equal sort key.
318 		 */
319 		b_dup = b_par		= b_lim;
320 		t_dup = t_par = t_lim	= b_lim + rsiz * (nrec - 1);
321 		for (;;) {
322 
323 			/* move bottom pointer up */
324 			for (; b_par <= t_par; b_par += rsiz) {
325 				if (b_par == m2) {
326 					continue;
327 				}
328 				cv = cmp(b_par, m2);
329 				if (cv > 0) {
330 					break;
331 				}
332 				if (cv == 0) {
333 					if (b_dup == m2) {
334 						m2 = b_par;
335 					} else if (b_dup != b_par) {
336 						(*swapf)(b_dup, b_par, loops);
337 					}
338 					b_dup += rsiz;
339 				}
340 			}
341 
342 			/* move top pointer down */
343 			for (; b_par < t_par; t_par -= rsiz) {
344 				if (t_par == m2) {
345 					continue;
346 				}
347 				cv = cmp(t_par, m2);
348 				if (cv < 0) {
349 					break;
350 				}
351 				if (cv == 0) {
352 					if (t_dup == m2) {
353 						m2 = t_par;
354 					} else if (t_dup != t_par) {
355 						(*swapf)(t_dup, t_par, loops);
356 					}
357 					t_dup -= rsiz;
358 				}
359 			}
360 
361 			/* break if we are done partitioning */
362 			if (b_par >= t_par) {
363 				break;
364 			}
365 
366 			/* exchange records at upper and lower break points */
367 			(*swapf)(b_par, t_par, loops);
368 			b_par += rsiz;
369 			t_par -= rsiz;
370 		}
371 
372 		/*
373 		 * partitioning is now complete
374 		 *
375 		 * there are two termination conditions from the partitioning
376 		 * loop above.  Either b_par or t_par have crossed or
377 		 * they are equal.
378 		 *
379 		 * we need to swap the pivot record to its final position
380 		 * m2 could be in either the upper or lower partitions
381 		 * or it could already be in its final position
382 		 */
383 		/*
384 		 * R[b_par] > R[m2]
385 		 * R[t_par] < R[m2]
386 		 */
387 		if (t_par < b_par) {
388 			if (m2 < t_par) {
389 				(*swapf)(m2, t_par, loops);
390 				m2 = b_par = t_par;
391 			} else if (m2 > b_par) {
392 				(*swapf)(m2, b_par, loops);
393 				m2 = t_par = b_par;
394 			} else {
395 				b_par = t_par = m2;
396 			}
397 		} else {
398 			if (m2 < t_par) {
399 				t_par = b_par = t_par - rsiz;
400 			}
401 			if (m2 != b_par) {
402 				(*swapf)(m2, b_par, loops);
403 			}
404 			m2 = t_par;
405 		}
406 
407 		/*
408 		 * move bottom duplicates next to pivot
409 		 * optimized to eliminate overlap
410 		 */
411 		d_bytelength = b_dup - b_lim;
412 		if (b_par - b_dup < d_bytelength) {
413 			b_dup = b_lim + (b_par - b_dup);
414 		}
415 		while (b_dup > b_lim) {
416 			b_dup -= rsiz;
417 			b_par -= rsiz;
418 			(*swapf)(b_dup, b_par, loops);
419 		}
420 		b_par = m2 - d_bytelength;
421 
422 		/*
423 		 * move top duplicates next to pivot
424 		 */
425 		d_bytelength = t_lim - t_dup;
426 		if (t_dup - t_par < d_bytelength) {
427 			t_dup = t_lim - (t_dup - t_par);
428 		}
429 		while (t_dup < t_lim) {
430 			t_dup += rsiz;
431 			t_par += rsiz;
432 			(*swapf)(t_dup, t_par, loops);
433 		}
434 		t_par = m2 + d_bytelength;
435 
436 		/*
437 		 * when a qsort pass completes there are three partitions
438 		 * 1) the lower contains all records less than pivot
439 		 * 2) the upper contains all records greater than pivot
440 		 * 3) the pivot partition contains all record equal to pivot
441 		 *
442 		 * all records in the pivot partition are in their final
443 		 * position and do not need to be accounted for by the stack
444 		 *
445 		 * when adding partitions to the stack
446 		 * it is important to add the largest partition first
447 		 * to prevent stack overflow.
448 		 *
449 		 * calculate number of unsorted records in top and bottom
450 		 * push resulting partitions on stack
451 		 */
452 		b_nrec = (b_par - b_lim) / rsiz;
453 		t_nrec = (t_lim - t_par) / rsiz;
454 		if (b_nrec < t_nrec) {
455 			sp->b_lim = t_par + rsiz;
456 			sp->nrec = t_nrec;
457 			sp++;
458 			sp->b_lim = b_lim;
459 			sp->nrec = b_nrec;
460 			sp++;
461 		} else {
462 			sp->b_lim = b_lim;
463 			sp->nrec = b_nrec;
464 			sp++;
465 			sp->b_lim = t_par + rsiz;
466 			sp->nrec = t_nrec;
467 			sp++;
468 		}
469 	}
470 }
471 
472 /*
473  * The following swap functions should not create a stack frame
474  * the SPARC call / return instruction will be executed
475  * but the a save / restore will not be executed
476  * which means we won't do a window turn with the spill / fill overhead
477  * verify this by examining the assembly code
478  */
479 
480 /* ARGSUSED */
481 static void
482 swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
483 {
484 	uint32_t temp;
485 
486 	temp = *r1;
487 	*r1++ = *r2;
488 	*r2++ = temp;
489 }
490 
491 /* ARGSUSED */
492 static void
493 swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
494 {
495 	uint64_t temp;
496 
497 	temp = *r1;
498 	*r1++ = *r2;
499 	*r2++ = temp;
500 }
501 
502 static void
503 swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
504 {
505 	uint32_t temp;
506 
507 	/* character by character */
508 	while (cnt--) {
509 		temp = *r1;
510 		*r1++ = *r2;
511 		*r2++ = temp;
512 	}
513 }
514 
515 static void
516 swapb(char *r1, char *r2, size_t cnt)
517 {
518 	char	temp;
519 
520 	/* character by character */
521 	while (cnt--) {
522 		temp = *r1;
523 		*r1++ = *r2;
524 		*r2++ = temp;
525 	}
526 }
527