xref: /freebsd/lib/libc/tests/stdlib/qsort_bench.c (revision 88b8b7f0c4e9948667a2279e78e975a784049cba)
1 /*-
2  * Copyright (c) 2025 Dag-Erling Smørgrav <des@FreeBSD.org>
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  */
6 
7 #include <atf-c.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <time.h>
12 #include <unistd.h>
13 
14 /*-
15  * Measures qsort(3) runtime with pathological input and verify that it
16  * stays close to N * log2(N).
17  *
18  * Thanks to Vivian Hussey for the proof of concept.
19  *
20  * The input we construct is similar to a sweep from 0 to N where each
21  * half, except for the first element, has been reversed; for instance,
22  * with N = 8, we get { 0, 3, 2, 1, 4, 8, 7, 6 }.  This triggers a bug in
23  * the BSD qsort(3) where it will switch to insertion sort if the pivots
24  * are sorted.
25  *
26  * This article goes into more detail about the bug and its origin:
27  *
28  * https://www.raygard.net/2022/02/26/Re-engineering-a-qsort-part-3
29  *
30  * With this optimization (the `if (swap_cnt == 0)` block), qsort(3) needs
31  * roughly N * N / 4 comparisons to sort our pathological input.  Without
32  * it, it needs only a little more than N * log2(N) comparisons.
33  */
34 
35 /* we stop testing once a single takes longer than this */
36 #define MAXRUNSECS 10
37 
38 static bool debugging;
39 
40 static uintmax_t ncmp;
41 
42 static int
43 intcmp(const void *a, const void *b)
44 {
45 	ncmp++;
46 	return ((*(int *)a > *(int *)b) - (*(int *)a < *(int *)b));
47 }
48 
49 static void
50 qsort_bench(int log2n)
51 {
52 	uintmax_t n = 1LLU << log2n;
53 	int *buf;
54 
55 	/* fill an array with a pathological pattern */
56 	ATF_REQUIRE(buf = malloc(n * sizeof(*buf)));
57 	buf[0] = 0;
58 	buf[n / 2] = n / 2;
59 	for (unsigned int i = 1; i < n / 2; i++) {
60 		buf[i] = n / 2 - i;
61 		buf[n / 2 + i] = n - i;
62 	}
63 
64 	ncmp = 0;
65 	qsort(buf, n, sizeof(*buf), intcmp);
66 
67 	/* check result and free array */
68 	if (debugging) {
69 		for (unsigned int i = 1; i < n; i++) {
70 			ATF_REQUIRE_MSG(buf[i] > buf[i - 1],
71 			    "array is not sorted");
72 		}
73 	}
74 	free(buf);
75 
76 	/* check that runtime does not exceed N² */
77 	ATF_CHECK_MSG(ncmp / n < n,
78 	    "runtime %ju exceeds N² for N = %ju", ncmp, n);
79 
80 	/* check that runtime does not exceed N log N by much */
81 	ATF_CHECK_MSG(ncmp / n <= log2n + 1,
82 	    "runtime %ju exceeds N log N for N = %ju", ncmp, n);
83 }
84 
85 ATF_TC_WITHOUT_HEAD(qsort_bench);
86 ATF_TC_BODY(qsort_bench, tc)
87 {
88 	struct timespec t0, t1;
89 	uintmax_t tus;
90 
91 	for (int i = 10; i <= 30; i++) {
92 		clock_gettime(CLOCK_UPTIME, &t0);
93 		qsort_bench(i);
94 		clock_gettime(CLOCK_UPTIME, &t1);
95 		tus = t1.tv_sec * 1000000 + t1.tv_nsec / 1000;
96 		tus -= t0.tv_sec * 1000000 + t0.tv_nsec / 1000;
97 		if (debugging) {
98 			fprintf(stderr, "N = 2^%d in %ju.%06jus\n",
99 			    i, tus / 1000000, tus % 1000000);
100 		}
101 		/* stop once an individual run exceeds our limit */
102 		if (tus / 1000000 >= MAXRUNSECS)
103 			break;
104 	}
105 }
106 
107 ATF_TP_ADD_TCS(tp)
108 {
109 	debugging = !getenv("__RUNNING_INSIDE_ATF_RUN") &&
110 	    isatty(STDERR_FILENO);
111 	ATF_TP_ADD_TC(tp, qsort_bench);
112 	return (atf_no_error());
113 }
114