1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #if !defined(_KERNEL) && !defined(_KMDB) 30 #include "lint.h" 31 #endif /* !_KERNEL && !_KMDB */ 32 33 #include <sys/types.h> 34 35 #if !defined(_KERNEL) && !defined(_KMDB) 36 #include <stdlib.h> 37 #include <synch.h> 38 #endif /* !_KERNEL && !_KMDB */ 39 40 #include "qsort.h" 41 42 static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt); 43 static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt); 44 static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt); 45 static void swapb(char *r1, char *r2, size_t cnt); 46 47 /* 48 * choose a median of 3 values 49 * 50 * note: cstyle specifically prohibits nested conditional operators 51 * but this is the only way to do the median of 3 function in-line 52 */ 53 #define med3(a, b, c) (cmp((a), (b)) < 0) \ 54 ? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \ 55 : ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a)) 56 57 #define THRESH_L 5 /* threshold for insertion sort */ 58 #define THRESH_M3 20 /* threshold for median of 3 */ 59 #define THRESH_M9 50 /* threshold for median of 9 */ 60 61 typedef struct { 62 char *b_lim; 63 size_t nrec; 64 } stk_t; 65 66 /* 67 * qsort() is a general purpose, in-place sorting routine using a 68 * user provided call back function for comparisons. This implementation 69 * utilizes a ternary quicksort algorithm, and cuts over to an 70 * insertion sort for partitions involving fewer than THRESH_L records. 71 * 72 * Potential User Errors 73 * There is no return value from qsort, this function has no method 74 * of alerting the user that a sort did not work or could not work. 75 * We do not print an error message or exit the process or thread, 76 * Even if we can detect an error, We CANNOT silently return without 77 * sorting the data, if we did so the user could never be sure the 78 * sort completed successfully. 79 * It is possible we could change the return value of sort from void 80 * to int and return success or some error codes, but this gets into 81 * standards and compatibility issues. 82 * 83 * Examples of qsort parameter errors might be 84 * 1) record size (rsiz) equal to 0 85 * qsort will loop and never return. 86 * 2) record size (rsiz) less than 0 87 * rsiz is unsigned, so a negative value is insanely large 88 * 3) number of records (nrec) is 0 89 * This is legal - qsort will return without examining any records 90 * 4) number of records (nrec) is less than 0 91 * nrec is unsigned, so a negative value is insanely large. 92 * 5) nrec * rsiz > memory allocation for sort array 93 * a segment violation may occur 94 * corruption of other memory may occur 95 * 6) The base address of the sort array is invalid 96 * a segment violation may occur 97 * corruption of other memory may occur 98 * 7) The user call back function is invalid 99 * we may get alignment errors or segment violations 100 * we may jump into never-never land 101 * 102 * Some less obvious errors might be 103 * 8) The user compare function is not comparing correctly 104 * 9) The user compare function modifies the data records 105 */ 106 107 void 108 qsort( 109 void *basep, 110 size_t nrec, 111 size_t rsiz, 112 int (*cmp)(const void *, const void *)) 113 { 114 size_t i; /* temporary variable */ 115 116 /* variables used by swap */ 117 void (*swapf)(char *, char *, size_t); 118 size_t loops; 119 120 /* variables used by sort */ 121 stk_t stack[8 * sizeof (nrec) + 1]; 122 stk_t *sp; 123 char *b_lim; /* bottom limit */ 124 char *b_dup; /* bottom duplicate */ 125 char *b_par; /* bottom partition */ 126 char *t_lim; /* top limit */ 127 char *t_dup; /* top duplicate */ 128 char *t_par; /* top partition */ 129 char *m1, *m2, *m3; /* median pointers */ 130 uintptr_t d_bytelength; /* byte length of duplicate records */ 131 int b_nrec; 132 int t_nrec; 133 int cv; /* results of compare (bottom / top) */ 134 135 /* 136 * choose a swap function based on alignment and size 137 * 138 * The qsort function sorts an array of fixed length records. 139 * We have very limited knowledge about the data record itself. 140 * It may be that the data record is in the array we are sorting 141 * or it may be that the array contains pointers or indexes to 142 * the actual data record and all that we are sorting is the indexes. 143 * 144 * The following decision will choose an optimal swap function 145 * based on the size and alignment of the data records 146 * swapp64 will swap 64 bit pointers 147 * swapp32 will swap 32 bit pointers 148 * swapi will swap an array of 32 bit integers 149 * swapb will swap an array of 8 bit characters 150 * 151 * swapi and swapb will also require the variable loops to be set 152 * to control the length of the array being swapped 153 */ 154 if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) && 155 (rsiz == sizeof (uint64_t))) { 156 loops = 1; 157 swapf = (void (*)(char *, char *, size_t))swapp64; 158 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) && 159 (rsiz == sizeof (uint32_t))) { 160 loops = 1; 161 swapf = (void (*)(char *, char *, size_t))swapp32; 162 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) && 163 ((rsiz & (sizeof (uint32_t) - 1)) == 0)) { 164 loops = rsiz / sizeof (int); 165 swapf = (void (*)(char *, char *, size_t))swapi; 166 } else { 167 loops = rsiz; 168 swapf = swapb; 169 } 170 171 /* 172 * qsort is a partitioning sort 173 * 174 * the stack is the bookkeeping mechanism to keep track of all 175 * the partitions. 176 * 177 * each sort pass takes one partition and sorts it into two partitions. 178 * at the top of the loop we simply take the partition on the top 179 * of the stack and sort it. See the comments at the bottom 180 * of the loop regarding which partitions to add in what order. 181 * 182 * initially put the whole partition on the stack 183 */ 184 sp = stack; 185 sp->b_lim = (char *)basep; 186 sp->nrec = nrec; 187 sp++; 188 while (sp > stack) { 189 sp--; 190 b_lim = sp->b_lim; 191 nrec = sp->nrec; 192 193 /* 194 * a linear insertion sort i faster than a qsort for 195 * very small number of records (THRESH_L) 196 * 197 * if number records < threshold use linear insertion sort 198 * 199 * this also handles the special case where the partition 200 * 0 or 1 records length. 201 */ 202 if (nrec < THRESH_L) { 203 /* 204 * Linear insertion sort 205 */ 206 t_par = b_lim; 207 for (i = 1; i < nrec; i++) { 208 t_par += rsiz; 209 b_par = t_par; 210 while (b_par > b_lim) { 211 b_par -= rsiz; 212 if ((*cmp)(b_par, b_par + rsiz) <= 0) { 213 break; 214 } 215 (*swapf)(b_par, b_par + rsiz, loops); 216 } 217 } 218 219 /* 220 * a linear insertion sort will put all records 221 * in their final position and will not create 222 * subpartitions. 223 * 224 * therefore when the insertion sort is complete 225 * just go to the top of the loop and get the 226 * next partition to sort. 227 */ 228 continue; 229 } 230 231 /* quicksort */ 232 233 /* 234 * choose a pivot record 235 * 236 * Ideally the pivot record will divide the partition 237 * into two equal parts. however we have to balance the 238 * work involved in selecting the pivot record with the 239 * expected benefit. 240 * 241 * The choice of pivot record depends on the number of 242 * records in the partition 243 * 244 * for small partitions (nrec < THRESH_M3) 245 * we just select the record in the middle of the partition 246 * 247 * if (nrec >= THRESH_M3 && nrec < THRESH_M9) 248 * we select three values and choose the median of 3 249 * 250 * if (nrec >= THRESH_M9) 251 * then we use an approximate median of 9 252 * 9 records are selected and grouped in 3 groups of 3 253 * the median of each of these 3 groups is fed into another 254 * median of 3 decision. 255 * 256 * Each median of 3 decision is 2 or 3 compares, 257 * so median of 9 costs between 8 and 12 compares. 258 * 259 * i is byte distance between two consecutive samples 260 * m2 will point to the pivot record 261 */ 262 if (nrec < THRESH_M3) { 263 m2 = b_lim + (nrec / 2) * rsiz; 264 } else if (nrec < THRESH_M9) { 265 /* use median of 3 */ 266 i = ((nrec - 1) / 2) * rsiz; 267 m2 = med3(b_lim, b_lim + i, b_lim + 2 * i); 268 } else { 269 /* approx median of 9 */ 270 i = ((nrec - 1) / 8) * rsiz; 271 m1 = med3(b_lim, b_lim + i, b_lim + 2 * i); 272 m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i); 273 m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i); 274 m2 = med3(m1, m2, m3); 275 } 276 277 /* 278 * quick sort partitioning 279 * 280 * The partition limits are defined by bottom and top pointers 281 * b_lim and t_lim. 282 * 283 * qsort uses a fairly standard method of moving the 284 * partitioning pointers, b_par and t_par, to the middle of 285 * the partition and exchanging records that are in the 286 * wrong part of the partition. 287 * 288 * Two enhancements have been made to the basic algorithm. 289 * One for handling duplicate records and one to minimize 290 * the number of swaps. 291 * 292 * Two duplicate records pointers are (b_dup and t_dup) are 293 * initially set to b_lim and t_lim. Each time a record 294 * whose sort key value is equal to the pivot record is found 295 * it will be swapped with the record pointed to by 296 * b_dup or t_dup and the duplicate pointer will be 297 * incremented toward the center. 298 * When partitioning is complete, all the duplicate records 299 * will have been collected at the upper and lower limits of 300 * the partition and can easily be moved adjacent to the 301 * pivot record. 302 * 303 * The second optimization is to minimize the number of swaps. 304 * The pointer m2 points to the pivot record. 305 * During partitioning, if m2 is ever equal to the partitioning 306 * pointers, b_par or t_par, then b_par or t_par just moves 307 * onto the next record without doing a compare. 308 * If as a result of duplicate record detection, 309 * b_dup or t_dup is ever equal to m2, 310 * then m2 is changed to point to the duplicate record and 311 * b_dup or t_dup is incremented with out swapping records. 312 * 313 * When partitioning is done, we may not have the same pivot 314 * record that we started with, but we will have one with 315 * an equal sort key. 316 */ 317 b_dup = b_par = b_lim; 318 t_dup = t_par = t_lim = b_lim + rsiz * (nrec - 1); 319 for (;;) { 320 321 /* move bottom pointer up */ 322 for (; b_par <= t_par; b_par += rsiz) { 323 if (b_par == m2) { 324 continue; 325 } 326 cv = cmp(b_par, m2); 327 if (cv > 0) { 328 break; 329 } 330 if (cv == 0) { 331 if (b_dup == m2) { 332 m2 = b_par; 333 } else if (b_dup != b_par) { 334 (*swapf)(b_dup, b_par, loops); 335 } 336 b_dup += rsiz; 337 } 338 } 339 340 /* move top pointer down */ 341 for (; b_par < t_par; t_par -= rsiz) { 342 if (t_par == m2) { 343 continue; 344 } 345 cv = cmp(t_par, m2); 346 if (cv < 0) { 347 break; 348 } 349 if (cv == 0) { 350 if (t_dup == m2) { 351 m2 = t_par; 352 } else if (t_dup != t_par) { 353 (*swapf)(t_dup, t_par, loops); 354 } 355 t_dup -= rsiz; 356 } 357 } 358 359 /* break if we are done partitioning */ 360 if (b_par >= t_par) { 361 break; 362 } 363 364 /* exchange records at upper and lower break points */ 365 (*swapf)(b_par, t_par, loops); 366 b_par += rsiz; 367 t_par -= rsiz; 368 } 369 370 /* 371 * partitioning is now complete 372 * 373 * there are two termination conditions from the partitioning 374 * loop above. Either b_par or t_par have crossed or 375 * they are equal. 376 * 377 * we need to swap the pivot record to its final position 378 * m2 could be in either the upper or lower partitions 379 * or it could already be in its final position 380 */ 381 /* 382 * R[b_par] > R[m2] 383 * R[t_par] < R[m2] 384 */ 385 if (t_par < b_par) { 386 if (m2 < t_par) { 387 (*swapf)(m2, t_par, loops); 388 m2 = b_par = t_par; 389 } else if (m2 > b_par) { 390 (*swapf)(m2, b_par, loops); 391 m2 = t_par = b_par; 392 } else { 393 b_par = t_par = m2; 394 } 395 } else { 396 if (m2 < t_par) { 397 t_par = b_par = t_par - rsiz; 398 } 399 if (m2 != b_par) { 400 (*swapf)(m2, b_par, loops); 401 } 402 m2 = t_par; 403 } 404 405 /* 406 * move bottom duplicates next to pivot 407 * optimized to eliminate overlap 408 */ 409 d_bytelength = b_dup - b_lim; 410 if (b_par - b_dup < d_bytelength) { 411 b_dup = b_lim + (b_par - b_dup); 412 } 413 while (b_dup > b_lim) { 414 b_dup -= rsiz; 415 b_par -= rsiz; 416 (*swapf)(b_dup, b_par, loops); 417 } 418 b_par = m2 - d_bytelength; 419 420 /* 421 * move top duplicates next to pivot 422 */ 423 d_bytelength = t_lim - t_dup; 424 if (t_dup - t_par < d_bytelength) { 425 t_dup = t_lim - (t_dup - t_par); 426 } 427 while (t_dup < t_lim) { 428 t_dup += rsiz; 429 t_par += rsiz; 430 (*swapf)(t_dup, t_par, loops); 431 } 432 t_par = m2 + d_bytelength; 433 434 /* 435 * when a qsort pass completes there are three partitions 436 * 1) the lower contains all records less than pivot 437 * 2) the upper contains all records greater than pivot 438 * 3) the pivot partition contains all record equal to pivot 439 * 440 * all records in the pivot partition are in their final 441 * position and do not need to be accounted for by the stack 442 * 443 * when adding partitions to the stack 444 * it is important to add the largest partition first 445 * to prevent stack overflow. 446 * 447 * calculate number of unsorted records in top and bottom 448 * push resulting partitions on stack 449 */ 450 b_nrec = (b_par - b_lim) / rsiz; 451 t_nrec = (t_lim - t_par) / rsiz; 452 if (b_nrec < t_nrec) { 453 sp->b_lim = t_par + rsiz; 454 sp->nrec = t_nrec; 455 sp++; 456 sp->b_lim = b_lim; 457 sp->nrec = b_nrec; 458 sp++; 459 } else { 460 sp->b_lim = b_lim; 461 sp->nrec = b_nrec; 462 sp++; 463 sp->b_lim = t_par + rsiz; 464 sp->nrec = t_nrec; 465 sp++; 466 } 467 } 468 } 469 470 /* 471 * The following swap functions should not create a stack frame 472 * the SPARC call / return instruction will be executed 473 * but the a save / restore will not be executed 474 * which means we won't do a window turn with the spill / fill overhead 475 * verify this by examining the assembly code 476 */ 477 478 /* ARGSUSED */ 479 static void 480 swapp32(uint32_t *r1, uint32_t *r2, size_t cnt) 481 { 482 uint32_t temp; 483 484 temp = *r1; 485 *r1++ = *r2; 486 *r2++ = temp; 487 } 488 489 /* ARGSUSED */ 490 static void 491 swapp64(uint64_t *r1, uint64_t *r2, size_t cnt) 492 { 493 uint64_t temp; 494 495 temp = *r1; 496 *r1++ = *r2; 497 *r2++ = temp; 498 } 499 500 static void 501 swapi(uint32_t *r1, uint32_t *r2, size_t cnt) 502 { 503 uint32_t temp; 504 505 /* character by character */ 506 while (cnt--) { 507 temp = *r1; 508 *r1++ = *r2; 509 *r2++ = temp; 510 } 511 } 512 513 static void 514 swapb(char *r1, char *r2, size_t cnt) 515 { 516 char temp; 517 518 /* character by character */ 519 while (cnt--) { 520 temp = *r1; 521 *r1++ = *r2; 522 *r2++ = temp; 523 } 524 } 525