1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org> 5 * 6 * biastest.c -- bias test for arc4random_uniform(). 7 * 8 * The default configuration of this test has an upper bound of 9 * (3/4) * UINT32_MAX, which should give a high amount of bias in 10 * an incorrect implementation. If the range reduction is 11 * implemented correctly, the parameters of the statistic should 12 * closely match the expected values. If not, they'll differ. 13 * 14 * For memory usage reasons, we use an uchar to track the number of 15 * observations per bucket. If the number of tries is much larger 16 * than upper_bound, the buckets likely overflow. This is detected 17 * by the test, but will lead to incorrect results. 18 */ 19 20 #include <assert.h> 21 #include <limits.h> 22 #include <math.h> 23 #include <signal.h> 24 #include <stdatomic.h> 25 #include <stdbool.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <unistd.h> 30 31 static void collect_sample(unsigned char *, long long, uint32_t); 32 static void analyze_sample(const unsigned char *, long long, uint32_t); 33 34 static atomic_bool complete = false; 35 static long long tries = 5ULL << 32; 36 static atomic_llong tries_done = 0; 37 38 static void 39 usage(const char *argv0) 40 { 41 fprintf(stderr, "usage: %s [-n tries] [-t upper_bound]\n", argv0); 42 exit(EXIT_FAILURE); 43 } 44 45 int 46 main(int argc, char *argv[]) 47 { 48 uint32_t threshold = 3UL << 30; 49 int ch; 50 unsigned char *sample; 51 52 while (ch = getopt(argc, argv, "n:t:"), ch != EOF) 53 switch (ch) { 54 case 'n': 55 tries = atoll(optarg); 56 break; 57 58 case 't': 59 threshold = (uint32_t)atoll(optarg); 60 break; 61 62 default: 63 usage(argv[0]); 64 } 65 66 if (optind != argc) 67 usage(argv[0]); 68 69 if (threshold == 0) { 70 fprintf(stderr, "threshold must be between 1 and %lu\n", (unsigned long)UINT32_MAX); 71 exit(EXIT_FAILURE); 72 } 73 74 sample = calloc(threshold, 1); 75 if (sample == NULL) { 76 perror("calloc(threshold, 1)"); 77 return (EXIT_FAILURE); 78 } 79 80 collect_sample(sample, tries, threshold); 81 analyze_sample(sample, tries, threshold); 82 } 83 84 static void 85 progress(int signo) 86 { 87 (void)signo; 88 89 if (!complete) { 90 fprintf(stderr, "\r%10lld of %10lld samples taken (%5.2f%% done)", 91 tries_done, tries, (tries_done * 100.0) / tries); 92 93 signal(SIGALRM, progress); 94 alarm(1); 95 } 96 } 97 98 static void 99 collect_sample(unsigned char *sample, long long tries, uint32_t threshold) 100 { 101 long long i; 102 uint32_t x; 103 bool overflowed = false; 104 105 progress(SIGALRM); 106 107 for (i = 0; i < tries; i++) { 108 x = arc4random_uniform(threshold); 109 tries_done++; 110 assert(x < threshold); 111 112 if (sample[x] == UCHAR_MAX) { 113 if (!overflowed) { 114 printf("sample table overflow, results will be incorrect\n"); 115 overflowed = true; 116 } 117 } else 118 sample[x]++; 119 } 120 121 progress(SIGALRM); 122 complete = true; 123 fputc('\n', stderr); 124 } 125 126 static void 127 analyze_sample(const unsigned char *sample, long long tries, uint32_t threshold) 128 { 129 double discrepancy, average, variance, total; 130 long long histogram[UCHAR_MAX + 1] = { 0 }, sum, n, median; 131 uint32_t i, i_min, i_max; 132 int min, max; 133 134 printf("distribution properties:\n"); 135 136 /* find median, average, deviation, smallest, and largest bucket */ 137 total = 0.0; 138 for (i = 0; i < threshold; i++) { 139 histogram[sample[i]]++; 140 total += (double)i * sample[i]; 141 } 142 143 average = total / tries; 144 145 variance = 0.0; 146 median = threshold; 147 n = 0; 148 i_min = 0; 149 i_max = 0; 150 min = sample[i_min]; 151 max = sample[i_max]; 152 153 for (i = 0; i < threshold; i++) { 154 discrepancy = i - average; 155 variance += sample[i] * discrepancy * discrepancy; 156 157 n += sample[i]; 158 if (median == threshold && n > tries / 2) 159 median = i; 160 161 if (sample[i] < min) { 162 i_min = i; 163 min = sample[i_min]; 164 } else if (sample[i] > max) { 165 i_max = i; 166 max = sample[i_max]; 167 } 168 } 169 170 variance /= tries; 171 assert(median < threshold); 172 173 printf("\tthreshold: %lu\n", (unsigned long)threshold); 174 printf("\tobservations: %lld\n", tries); 175 printf("\tleast common: %lu (%d observations)\n", (unsigned long)i_min, min); 176 printf("\tmost common: %lu (%d observations)\n", (unsigned long)i_max, max); 177 printf("\tmedian: %lld (expected %lu)\n", median, (unsigned long)threshold / 2); 178 printf("\taverage: %f (expected %f)\n", average, 0.5 * (threshold - 1)); 179 printf("\tdeviation: %f (expected %f)\n\n", sqrt(variance), 180 sqrt(((double)threshold * threshold - 1.0) / 12)); 181 182 /* build histogram and analyze it */ 183 printf("sample properties:\n"); 184 185 /* find median, average, and deviation */ 186 average = (double)tries / threshold; 187 188 variance = 0.0; 189 for (i = 0; i < UCHAR_MAX; i++) { 190 discrepancy = i - average; 191 variance += histogram[i] * discrepancy * discrepancy; 192 } 193 194 variance /= threshold; 195 196 n = 0; 197 median = UCHAR_MAX + 1; 198 for (i = 0; i <= UCHAR_MAX; i++) { 199 n += histogram[i]; 200 if (n >= threshold / 2) { 201 median = i; 202 break; 203 } 204 } 205 206 assert(median <= UCHAR_MAX); /* unreachable */ 207 208 printf("\tmedian: %lld\n", median); 209 printf("\taverage: %f\n", average); 210 printf("\tdeviation: %f (expected %f)\n\n", sqrt(variance), sqrt(average * (1.0 - 1.0 / threshold))); 211 212 printf("histogram:\n"); 213 for (i = 0; i < 256; i++) 214 if (histogram[i] != 0) 215 printf("\t%3d:\t%lld\n", (int)i, histogram[i]); 216 } 217