1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2020 Oxide Computer Company 26 */ 27 28 #if !defined(_KERNEL) && !defined(_KMDB) 29 #include "lint.h" 30 #endif /* !_KERNEL && !_KMDB */ 31 32 #include <sys/types.h> 33 34 #if !defined(_KERNEL) && !defined(_KMDB) 35 #include <stdlib.h> 36 #include <synch.h> 37 #endif /* !_KERNEL && !_KMDB */ 38 39 #include "qsort.h" 40 41 static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt); 42 static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt); 43 static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt); 44 static void swapb(char *r1, char *r2, size_t cnt); 45 46 typedef int (*cmp_f)(const void *, const void *, void *); 47 48 /* 49 * choose a median of 3 values 50 */ 51 static __GNU_INLINE char * 52 med3(char *a, char *b, char *c, cmp_f cmp, void *arg) 53 { 54 if (cmp(a, b, arg) < 0) { 55 if (cmp(b, c, arg) < 0) { 56 return (b); 57 } else if (cmp(a, c, arg) < 0) { 58 return (c); 59 } else { 60 return (a); 61 } 62 } else { 63 if (cmp(b, c, arg) > 0) { 64 return (b); 65 } else if (cmp(a, c, arg) > 0) { 66 return (c); 67 } else { 68 return (a); 69 } 70 } 71 } 72 73 #define THRESH_L 5 /* threshold for insertion sort */ 74 #define THRESH_M3 20 /* threshold for median of 3 */ 75 #define THRESH_M9 50 /* threshold for median of 9 */ 76 77 typedef struct { 78 char *b_lim; 79 size_t nrec; 80 } stk_t; 81 82 /* 83 * qsort() is a general purpose, in-place sorting routine using a 84 * user provided call back function for comparisons. This implementation 85 * utilizes a ternary quicksort algorithm, and cuts over to an 86 * insertion sort for partitions involving fewer than THRESH_L records. 87 * 88 * Potential User Errors 89 * There is no return value from qsort, this function has no method 90 * of alerting the user that a sort did not work or could not work. 91 * We do not print an error message or exit the process or thread, 92 * Even if we can detect an error, We CANNOT silently return without 93 * sorting the data, if we did so the user could never be sure the 94 * sort completed successfully. 95 * It is possible we could change the return value of sort from void 96 * to int and return success or some error codes, but this gets into 97 * standards and compatibility issues. 98 * 99 * Examples of qsort parameter errors might be 100 * 1) record size (rsiz) equal to 0 101 * qsort will loop and never return. 102 * 2) record size (rsiz) less than 0 103 * rsiz is unsigned, so a negative value is insanely large 104 * 3) number of records (nrec) is 0 105 * This is legal - qsort will return without examining any records 106 * 4) number of records (nrec) is less than 0 107 * nrec is unsigned, so a negative value is insanely large. 108 * 5) nrec * rsiz > memory allocation for sort array 109 * a segment violation may occur 110 * corruption of other memory may occur 111 * 6) The base address of the sort array is invalid 112 * a segment violation may occur 113 * corruption of other memory may occur 114 * 7) The user call back function is invalid 115 * we may get alignment errors or segment violations 116 * we may jump into never-never land 117 * 118 * Some less obvious errors might be 119 * 8) The user compare function is not comparing correctly 120 * 9) The user compare function modifies the data records 121 */ 122 123 static int 124 qsort_r_wrapper(const void *a, const void *b, void *arg) 125 { 126 int (*cmp)(const void *, const void *) = 127 (int(*)(const void *, const void *))(uintptr_t)arg; 128 return (cmp(a, b)); 129 } 130 131 void 132 qsort_r(void *basep, size_t nrec, size_t rsiz, 133 cmp_f cmp, void *arg) 134 { 135 size_t i; /* temporary variable */ 136 137 /* variables used by swap */ 138 void (*swapf)(char *, char *, size_t); 139 size_t loops; 140 141 /* variables used by sort */ 142 stk_t stack[8 * sizeof (nrec) + 1]; 143 stk_t *sp; 144 char *b_lim; /* bottom limit */ 145 char *b_dup; /* bottom duplicate */ 146 char *b_par; /* bottom partition */ 147 char *t_lim; /* top limit */ 148 char *t_dup; /* top duplicate */ 149 char *t_par; /* top partition */ 150 char *m1, *m2, *m3; /* median pointers */ 151 uintptr_t d_bytelength; /* byte length of duplicate records */ 152 int b_nrec; 153 int t_nrec; 154 int cv; /* results of compare (bottom / top) */ 155 156 /* 157 * choose a swap function based on alignment and size 158 * 159 * The qsort function sorts an array of fixed length records. 160 * We have very limited knowledge about the data record itself. 161 * It may be that the data record is in the array we are sorting 162 * or it may be that the array contains pointers or indexes to 163 * the actual data record and all that we are sorting is the indexes. 164 * 165 * The following decision will choose an optimal swap function 166 * based on the size and alignment of the data records 167 * swapp64 will swap 64 bit pointers 168 * swapp32 will swap 32 bit pointers 169 * swapi will swap an array of 32 bit integers 170 * swapb will swap an array of 8 bit characters 171 * 172 * swapi and swapb will also require the variable loops to be set 173 * to control the length of the array being swapped 174 */ 175 if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) && 176 (rsiz == sizeof (uint64_t))) { 177 loops = 1; 178 swapf = (void (*)(char *, char *, size_t))swapp64; 179 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) && 180 (rsiz == sizeof (uint32_t))) { 181 loops = 1; 182 swapf = (void (*)(char *, char *, size_t))swapp32; 183 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) && 184 ((rsiz & (sizeof (uint32_t) - 1)) == 0)) { 185 loops = rsiz / sizeof (int); 186 swapf = (void (*)(char *, char *, size_t))swapi; 187 } else { 188 loops = rsiz; 189 swapf = swapb; 190 } 191 192 /* 193 * qsort is a partitioning sort 194 * 195 * the stack is the bookkeeping mechanism to keep track of all 196 * the partitions. 197 * 198 * each sort pass takes one partition and sorts it into two partitions. 199 * at the top of the loop we simply take the partition on the top 200 * of the stack and sort it. See the comments at the bottom 201 * of the loop regarding which partitions to add in what order. 202 * 203 * initially put the whole partition on the stack 204 */ 205 sp = stack; 206 sp->b_lim = (char *)basep; 207 sp->nrec = nrec; 208 sp++; 209 while (sp > stack) { 210 sp--; 211 b_lim = sp->b_lim; 212 nrec = sp->nrec; 213 214 /* 215 * a linear insertion sort i faster than a qsort for 216 * very small number of records (THRESH_L) 217 * 218 * if number records < threshold use linear insertion sort 219 * 220 * this also handles the special case where the partition 221 * 0 or 1 records length. 222 */ 223 if (nrec < THRESH_L) { 224 /* 225 * Linear insertion sort 226 */ 227 t_par = b_lim; 228 for (i = 1; i < nrec; i++) { 229 t_par += rsiz; 230 b_par = t_par; 231 while (b_par > b_lim) { 232 b_par -= rsiz; 233 if ((*cmp)(b_par, b_par + rsiz, arg) <= 234 0) { 235 break; 236 } 237 (*swapf)(b_par, b_par + rsiz, loops); 238 } 239 } 240 241 /* 242 * a linear insertion sort will put all records 243 * in their final position and will not create 244 * subpartitions. 245 * 246 * therefore when the insertion sort is complete 247 * just go to the top of the loop and get the 248 * next partition to sort. 249 */ 250 continue; 251 } 252 253 /* quicksort */ 254 255 /* 256 * choose a pivot record 257 * 258 * Ideally the pivot record will divide the partition 259 * into two equal parts. however we have to balance the 260 * work involved in selecting the pivot record with the 261 * expected benefit. 262 * 263 * The choice of pivot record depends on the number of 264 * records in the partition 265 * 266 * for small partitions (nrec < THRESH_M3) 267 * we just select the record in the middle of the partition 268 * 269 * if (nrec >= THRESH_M3 && nrec < THRESH_M9) 270 * we select three values and choose the median of 3 271 * 272 * if (nrec >= THRESH_M9) 273 * then we use an approximate median of 9 274 * 9 records are selected and grouped in 3 groups of 3 275 * the median of each of these 3 groups is fed into another 276 * median of 3 decision. 277 * 278 * Each median of 3 decision is 2 or 3 compares, 279 * so median of 9 costs between 8 and 12 compares. 280 * 281 * i is byte distance between two consecutive samples 282 * m2 will point to the pivot record 283 */ 284 if (nrec < THRESH_M3) { 285 m2 = b_lim + (nrec / 2) * rsiz; 286 } else if (nrec < THRESH_M9) { 287 /* use median of 3 */ 288 i = ((nrec - 1) / 2) * rsiz; 289 m2 = med3(b_lim, b_lim + i, b_lim + 2 * i, cmp, arg); 290 } else { 291 /* approx median of 9 */ 292 i = ((nrec - 1) / 8) * rsiz; 293 m1 = med3(b_lim, b_lim + i, b_lim + 2 * i, cmp, arg); 294 m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i, 295 cmp, arg); 296 m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i, 297 cmp, arg); 298 m2 = med3(m1, m2, m3, cmp, arg); 299 } 300 301 /* 302 * quick sort partitioning 303 * 304 * The partition limits are defined by bottom and top pointers 305 * b_lim and t_lim. 306 * 307 * qsort uses a fairly standard method of moving the 308 * partitioning pointers, b_par and t_par, to the middle of 309 * the partition and exchanging records that are in the 310 * wrong part of the partition. 311 * 312 * Two enhancements have been made to the basic algorithm. 313 * One for handling duplicate records and one to minimize 314 * the number of swaps. 315 * 316 * Two duplicate records pointers are (b_dup and t_dup) are 317 * initially set to b_lim and t_lim. Each time a record 318 * whose sort key value is equal to the pivot record is found 319 * it will be swapped with the record pointed to by 320 * b_dup or t_dup and the duplicate pointer will be 321 * incremented toward the center. 322 * When partitioning is complete, all the duplicate records 323 * will have been collected at the upper and lower limits of 324 * the partition and can easily be moved adjacent to the 325 * pivot record. 326 * 327 * The second optimization is to minimize the number of swaps. 328 * The pointer m2 points to the pivot record. 329 * During partitioning, if m2 is ever equal to the partitioning 330 * pointers, b_par or t_par, then b_par or t_par just moves 331 * onto the next record without doing a compare. 332 * If as a result of duplicate record detection, 333 * b_dup or t_dup is ever equal to m2, 334 * then m2 is changed to point to the duplicate record and 335 * b_dup or t_dup is incremented with out swapping records. 336 * 337 * When partitioning is done, we may not have the same pivot 338 * record that we started with, but we will have one with 339 * an equal sort key. 340 */ 341 b_dup = b_par = b_lim; 342 t_dup = t_par = t_lim = b_lim + rsiz * (nrec - 1); 343 for (;;) { 344 345 /* move bottom pointer up */ 346 for (; b_par <= t_par; b_par += rsiz) { 347 if (b_par == m2) { 348 continue; 349 } 350 cv = cmp(b_par, m2, arg); 351 if (cv > 0) { 352 break; 353 } 354 if (cv == 0) { 355 if (b_dup == m2) { 356 m2 = b_par; 357 } else if (b_dup != b_par) { 358 (*swapf)(b_dup, b_par, loops); 359 } 360 b_dup += rsiz; 361 } 362 } 363 364 /* move top pointer down */ 365 for (; b_par < t_par; t_par -= rsiz) { 366 if (t_par == m2) { 367 continue; 368 } 369 cv = cmp(t_par, m2, arg); 370 if (cv < 0) { 371 break; 372 } 373 if (cv == 0) { 374 if (t_dup == m2) { 375 m2 = t_par; 376 } else if (t_dup != t_par) { 377 (*swapf)(t_dup, t_par, loops); 378 } 379 t_dup -= rsiz; 380 } 381 } 382 383 /* break if we are done partitioning */ 384 if (b_par >= t_par) { 385 break; 386 } 387 388 /* exchange records at upper and lower break points */ 389 (*swapf)(b_par, t_par, loops); 390 b_par += rsiz; 391 t_par -= rsiz; 392 } 393 394 /* 395 * partitioning is now complete 396 * 397 * there are two termination conditions from the partitioning 398 * loop above. Either b_par or t_par have crossed or 399 * they are equal. 400 * 401 * we need to swap the pivot record to its final position 402 * m2 could be in either the upper or lower partitions 403 * or it could already be in its final position 404 */ 405 /* 406 * R[b_par] > R[m2] 407 * R[t_par] < R[m2] 408 */ 409 if (t_par < b_par) { 410 if (m2 < t_par) { 411 (*swapf)(m2, t_par, loops); 412 m2 = b_par = t_par; 413 } else if (m2 > b_par) { 414 (*swapf)(m2, b_par, loops); 415 m2 = t_par = b_par; 416 } else { 417 b_par = t_par = m2; 418 } 419 } else { 420 if (m2 < t_par) { 421 t_par = b_par = t_par - rsiz; 422 } 423 if (m2 != b_par) { 424 (*swapf)(m2, b_par, loops); 425 } 426 m2 = t_par; 427 } 428 429 /* 430 * move bottom duplicates next to pivot 431 * optimized to eliminate overlap 432 */ 433 d_bytelength = b_dup - b_lim; 434 if (b_par - b_dup < d_bytelength) { 435 b_dup = b_lim + (b_par - b_dup); 436 } 437 while (b_dup > b_lim) { 438 b_dup -= rsiz; 439 b_par -= rsiz; 440 (*swapf)(b_dup, b_par, loops); 441 } 442 b_par = m2 - d_bytelength; 443 444 /* 445 * move top duplicates next to pivot 446 */ 447 d_bytelength = t_lim - t_dup; 448 if (t_dup - t_par < d_bytelength) { 449 t_dup = t_lim - (t_dup - t_par); 450 } 451 while (t_dup < t_lim) { 452 t_dup += rsiz; 453 t_par += rsiz; 454 (*swapf)(t_dup, t_par, loops); 455 } 456 t_par = m2 + d_bytelength; 457 458 /* 459 * when a qsort pass completes there are three partitions 460 * 1) the lower contains all records less than pivot 461 * 2) the upper contains all records greater than pivot 462 * 3) the pivot partition contains all record equal to pivot 463 * 464 * all records in the pivot partition are in their final 465 * position and do not need to be accounted for by the stack 466 * 467 * when adding partitions to the stack 468 * it is important to add the largest partition first 469 * to prevent stack overflow. 470 * 471 * calculate number of unsorted records in top and bottom 472 * push resulting partitions on stack 473 */ 474 b_nrec = (b_par - b_lim) / rsiz; 475 t_nrec = (t_lim - t_par) / rsiz; 476 if (b_nrec < t_nrec) { 477 sp->b_lim = t_par + rsiz; 478 sp->nrec = t_nrec; 479 sp++; 480 sp->b_lim = b_lim; 481 sp->nrec = b_nrec; 482 sp++; 483 } else { 484 sp->b_lim = b_lim; 485 sp->nrec = b_nrec; 486 sp++; 487 sp->b_lim = t_par + rsiz; 488 sp->nrec = t_nrec; 489 sp++; 490 } 491 } 492 } 493 494 void 495 qsort(void *basep, size_t nrec, size_t rsiz, 496 int (*cmp)(const void *, const void *)) 497 { 498 return (qsort_r(basep, nrec, rsiz, qsort_r_wrapper, 499 (void *)(uintptr_t)cmp)); 500 } 501 502 /* 503 * The following swap functions should not create a stack frame 504 * the SPARC call / return instruction will be executed 505 * but the a save / restore will not be executed 506 * which means we won't do a window turn with the spill / fill overhead 507 * verify this by examining the assembly code 508 */ 509 510 /* ARGSUSED */ 511 static void 512 swapp32(uint32_t *r1, uint32_t *r2, size_t cnt) 513 { 514 uint32_t temp; 515 516 temp = *r1; 517 *r1++ = *r2; 518 *r2++ = temp; 519 } 520 521 /* ARGSUSED */ 522 static void 523 swapp64(uint64_t *r1, uint64_t *r2, size_t cnt) 524 { 525 uint64_t temp; 526 527 temp = *r1; 528 *r1++ = *r2; 529 *r2++ = temp; 530 } 531 532 static void 533 swapi(uint32_t *r1, uint32_t *r2, size_t cnt) 534 { 535 uint32_t temp; 536 537 /* character by character */ 538 while (cnt--) { 539 temp = *r1; 540 *r1++ = *r2; 541 *r2++ = temp; 542 } 543 } 544 545 static void 546 swapb(char *r1, char *r2, size_t cnt) 547 { 548 char temp; 549 550 /* character by character */ 551 while (cnt--) { 552 temp = *r1; 553 *r1++ = *r2; 554 *r2++ = temp; 555 } 556 } 557