xref: /freebsd/tools/test/arc4random/biastest.c (revision 4a0fc138e5eb343e45388e66698a4765b308a622)
1*4a0fc138SRobert Clausecker /*-
2*4a0fc138SRobert Clausecker  * SPDX-License-Identifier: BSD-2-Clause
3*4a0fc138SRobert Clausecker  *
4*4a0fc138SRobert Clausecker  * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
5*4a0fc138SRobert Clausecker  *
6*4a0fc138SRobert Clausecker  * biastest.c -- bias test for arc4random_uniform().
7*4a0fc138SRobert Clausecker  *
8*4a0fc138SRobert Clausecker  * The default configuration of this test has an upper bound of
9*4a0fc138SRobert Clausecker  * (3/4) * UINT32_MAX, which should give a high amount of bias in
10*4a0fc138SRobert Clausecker  * an incorrect implementation.  If the range reduction is
11*4a0fc138SRobert Clausecker  * implemented correctly, the parameters of the statistic should
12*4a0fc138SRobert Clausecker  * closely match the expected values.  If not, they'll differ.
13*4a0fc138SRobert Clausecker  *
14*4a0fc138SRobert Clausecker  * For memory usage reasons, we use an uchar to track the number of
15*4a0fc138SRobert Clausecker  * observations per bucket.  If the number of tries is much larger
16*4a0fc138SRobert Clausecker  * than upper_bound, the buckets likely overflow.  This is detected
17*4a0fc138SRobert Clausecker  * by the test, but will lead to incorrect results.
18*4a0fc138SRobert Clausecker  */
19*4a0fc138SRobert Clausecker 
20*4a0fc138SRobert Clausecker #include <assert.h>
21*4a0fc138SRobert Clausecker #include <limits.h>
22*4a0fc138SRobert Clausecker #include <math.h>
23*4a0fc138SRobert Clausecker #include <signal.h>
24*4a0fc138SRobert Clausecker #include <stdatomic.h>
25*4a0fc138SRobert Clausecker #include <stdbool.h>
26*4a0fc138SRobert Clausecker #include <stdint.h>
27*4a0fc138SRobert Clausecker #include <stdio.h>
28*4a0fc138SRobert Clausecker #include <stdlib.h>
29*4a0fc138SRobert Clausecker #include <unistd.h>
30*4a0fc138SRobert Clausecker 
31*4a0fc138SRobert Clausecker static void	collect_sample(unsigned char *, long long, uint32_t);
32*4a0fc138SRobert Clausecker static void	analyze_sample(const unsigned char *, long long, uint32_t);
33*4a0fc138SRobert Clausecker 
34*4a0fc138SRobert Clausecker static atomic_bool complete = false;
35*4a0fc138SRobert Clausecker static long long tries = 5ULL << 32;
36*4a0fc138SRobert Clausecker static atomic_llong tries_done = 0;
37*4a0fc138SRobert Clausecker 
38*4a0fc138SRobert Clausecker static void
usage(const char * argv0)39*4a0fc138SRobert Clausecker usage(const char *argv0)
40*4a0fc138SRobert Clausecker {
41*4a0fc138SRobert Clausecker 	fprintf(stderr, "usage: %s [-n tries] [-t upper_bound]\n", argv0);
42*4a0fc138SRobert Clausecker 	exit(EXIT_FAILURE);
43*4a0fc138SRobert Clausecker }
44*4a0fc138SRobert Clausecker 
45*4a0fc138SRobert Clausecker int
main(int argc,char * argv[])46*4a0fc138SRobert Clausecker main(int argc, char *argv[])
47*4a0fc138SRobert Clausecker {
48*4a0fc138SRobert Clausecker 	uint32_t threshold = 3UL << 30;
49*4a0fc138SRobert Clausecker 	int ch;
50*4a0fc138SRobert Clausecker 	unsigned char *sample;
51*4a0fc138SRobert Clausecker 
52*4a0fc138SRobert Clausecker 	while (ch = getopt(argc, argv, "n:t:"), ch != EOF)
53*4a0fc138SRobert Clausecker 		switch (ch) {
54*4a0fc138SRobert Clausecker 		case 'n':
55*4a0fc138SRobert Clausecker 			tries = atoll(optarg);
56*4a0fc138SRobert Clausecker 			break;
57*4a0fc138SRobert Clausecker 
58*4a0fc138SRobert Clausecker 		case 't':
59*4a0fc138SRobert Clausecker 			threshold = (uint32_t)atoll(optarg);
60*4a0fc138SRobert Clausecker 			break;
61*4a0fc138SRobert Clausecker 
62*4a0fc138SRobert Clausecker 		default:
63*4a0fc138SRobert Clausecker 			usage(argv[0]);
64*4a0fc138SRobert Clausecker 		}
65*4a0fc138SRobert Clausecker 
66*4a0fc138SRobert Clausecker 	if (optind != argc)
67*4a0fc138SRobert Clausecker 		usage(argv[0]);
68*4a0fc138SRobert Clausecker 
69*4a0fc138SRobert Clausecker 	if (threshold == 0) {
70*4a0fc138SRobert Clausecker 		fprintf(stderr, "threshold must be between 1 and %lu\n", (unsigned long)UINT32_MAX);
71*4a0fc138SRobert Clausecker 		exit(EXIT_FAILURE);
72*4a0fc138SRobert Clausecker 	}
73*4a0fc138SRobert Clausecker 
74*4a0fc138SRobert Clausecker 	sample = calloc(threshold, 1);
75*4a0fc138SRobert Clausecker 	if (sample == NULL) {
76*4a0fc138SRobert Clausecker 		perror("calloc(threshold, 1)");
77*4a0fc138SRobert Clausecker 		return (EXIT_FAILURE);
78*4a0fc138SRobert Clausecker 	}
79*4a0fc138SRobert Clausecker 
80*4a0fc138SRobert Clausecker 	collect_sample(sample, tries, threshold);
81*4a0fc138SRobert Clausecker 	analyze_sample(sample, tries, threshold);
82*4a0fc138SRobert Clausecker }
83*4a0fc138SRobert Clausecker 
84*4a0fc138SRobert Clausecker static void
progress(int signo)85*4a0fc138SRobert Clausecker progress(int signo)
86*4a0fc138SRobert Clausecker {
87*4a0fc138SRobert Clausecker 	(void)signo;
88*4a0fc138SRobert Clausecker 
89*4a0fc138SRobert Clausecker 	if (!complete) {
90*4a0fc138SRobert Clausecker 		fprintf(stderr, "\r%10lld of %10lld samples taken (%5.2f%% done)",
91*4a0fc138SRobert Clausecker 		    tries_done, tries, (tries_done * 100.0) / tries);
92*4a0fc138SRobert Clausecker 
93*4a0fc138SRobert Clausecker 		signal(SIGALRM, progress);
94*4a0fc138SRobert Clausecker 		alarm(1);
95*4a0fc138SRobert Clausecker 	}
96*4a0fc138SRobert Clausecker }
97*4a0fc138SRobert Clausecker 
98*4a0fc138SRobert Clausecker static void
collect_sample(unsigned char * sample,long long tries,uint32_t threshold)99*4a0fc138SRobert Clausecker collect_sample(unsigned char *sample, long long tries, uint32_t threshold)
100*4a0fc138SRobert Clausecker {
101*4a0fc138SRobert Clausecker 	long long i;
102*4a0fc138SRobert Clausecker 	uint32_t x;
103*4a0fc138SRobert Clausecker 	bool overflowed = false;
104*4a0fc138SRobert Clausecker 
105*4a0fc138SRobert Clausecker 	progress(SIGALRM);
106*4a0fc138SRobert Clausecker 
107*4a0fc138SRobert Clausecker 	for (i = 0; i < tries; i++) {
108*4a0fc138SRobert Clausecker 		x = arc4random_uniform(threshold);
109*4a0fc138SRobert Clausecker 		tries_done++;
110*4a0fc138SRobert Clausecker 		assert(x < threshold);
111*4a0fc138SRobert Clausecker 
112*4a0fc138SRobert Clausecker 		if (sample[x] == UCHAR_MAX) {
113*4a0fc138SRobert Clausecker 			if (!overflowed) {
114*4a0fc138SRobert Clausecker 				printf("sample table overflow, results will be incorrect\n");
115*4a0fc138SRobert Clausecker 				overflowed = true;
116*4a0fc138SRobert Clausecker 			}
117*4a0fc138SRobert Clausecker 		} else
118*4a0fc138SRobert Clausecker 			sample[x]++;
119*4a0fc138SRobert Clausecker 	}
120*4a0fc138SRobert Clausecker 
121*4a0fc138SRobert Clausecker 	progress(SIGALRM);
122*4a0fc138SRobert Clausecker 	complete = true;
123*4a0fc138SRobert Clausecker 	fputc('\n', stderr);
124*4a0fc138SRobert Clausecker }
125*4a0fc138SRobert Clausecker 
126*4a0fc138SRobert Clausecker static void
analyze_sample(const unsigned char * sample,long long tries,uint32_t threshold)127*4a0fc138SRobert Clausecker analyze_sample(const unsigned char *sample, long long tries,  uint32_t threshold)
128*4a0fc138SRobert Clausecker {
129*4a0fc138SRobert Clausecker 	double discrepancy, average, variance, total;
130*4a0fc138SRobert Clausecker 	long long histogram[UCHAR_MAX + 1] = { 0 }, sum, n, median;
131*4a0fc138SRobert Clausecker 	uint32_t i, i_min, i_max;
132*4a0fc138SRobert Clausecker 	int min, max;
133*4a0fc138SRobert Clausecker 
134*4a0fc138SRobert Clausecker 	printf("distribution properties:\n");
135*4a0fc138SRobert Clausecker 
136*4a0fc138SRobert Clausecker 	/* find median, average, deviation, smallest, and largest bucket */
137*4a0fc138SRobert Clausecker 	total = 0.0;
138*4a0fc138SRobert Clausecker 	for (i = 0; i < threshold; i++) {
139*4a0fc138SRobert Clausecker 		histogram[sample[i]]++;
140*4a0fc138SRobert Clausecker 		total += (double)i * sample[i];
141*4a0fc138SRobert Clausecker 	}
142*4a0fc138SRobert Clausecker 
143*4a0fc138SRobert Clausecker 	average = total / tries;
144*4a0fc138SRobert Clausecker 
145*4a0fc138SRobert Clausecker 	variance = 0.0;
146*4a0fc138SRobert Clausecker 	median = threshold;
147*4a0fc138SRobert Clausecker 	n = 0;
148*4a0fc138SRobert Clausecker 	i_min = 0;
149*4a0fc138SRobert Clausecker 	i_max = 0;
150*4a0fc138SRobert Clausecker 	min = sample[i_min];
151*4a0fc138SRobert Clausecker 	max = sample[i_max];
152*4a0fc138SRobert Clausecker 
153*4a0fc138SRobert Clausecker 	for (i = 0; i < threshold; i++) {
154*4a0fc138SRobert Clausecker 		discrepancy = i - average;
155*4a0fc138SRobert Clausecker 		variance += sample[i] * discrepancy * discrepancy;
156*4a0fc138SRobert Clausecker 
157*4a0fc138SRobert Clausecker 		n += sample[i];
158*4a0fc138SRobert Clausecker 		if (median == threshold && n > tries / 2)
159*4a0fc138SRobert Clausecker 			median = i;
160*4a0fc138SRobert Clausecker 
161*4a0fc138SRobert Clausecker 		if (sample[i] < min) {
162*4a0fc138SRobert Clausecker 			i_min = i;
163*4a0fc138SRobert Clausecker 			min = sample[i_min];
164*4a0fc138SRobert Clausecker 		} else if (sample[i] > max) {
165*4a0fc138SRobert Clausecker 			i_max = i;
166*4a0fc138SRobert Clausecker 			max = sample[i_max];
167*4a0fc138SRobert Clausecker 		}
168*4a0fc138SRobert Clausecker 	}
169*4a0fc138SRobert Clausecker 
170*4a0fc138SRobert Clausecker 	variance /= tries;
171*4a0fc138SRobert Clausecker 	assert(median < threshold);
172*4a0fc138SRobert Clausecker 
173*4a0fc138SRobert Clausecker 	printf("\tthreshold:	%lu\n", (unsigned long)threshold);
174*4a0fc138SRobert Clausecker 	printf("\tobservations:	%lld\n", tries);
175*4a0fc138SRobert Clausecker 	printf("\tleast common:	%lu (%d observations)\n", (unsigned long)i_min, min);
176*4a0fc138SRobert Clausecker 	printf("\tmost common:	%lu (%d observations)\n", (unsigned long)i_max, max);
177*4a0fc138SRobert Clausecker 	printf("\tmedian:		%lld (expected %lu)\n", median, (unsigned long)threshold / 2);
178*4a0fc138SRobert Clausecker 	printf("\taverage:	%f (expected %f)\n", average, 0.5 * (threshold - 1));
179*4a0fc138SRobert Clausecker 	printf("\tdeviation:	%f (expected %f)\n\n", sqrt(variance),
180*4a0fc138SRobert Clausecker 	    sqrt(((double)threshold * threshold - 1.0) / 12));
181*4a0fc138SRobert Clausecker 
182*4a0fc138SRobert Clausecker 	/* build histogram and analyze it */
183*4a0fc138SRobert Clausecker 	printf("sample properties:\n");
184*4a0fc138SRobert Clausecker 
185*4a0fc138SRobert Clausecker 	/* find median, average, and deviation */
186*4a0fc138SRobert Clausecker 	average = (double)tries / threshold;
187*4a0fc138SRobert Clausecker 
188*4a0fc138SRobert Clausecker 	variance = 0.0;
189*4a0fc138SRobert Clausecker 	for (i = 0; i < UCHAR_MAX; i++) {
190*4a0fc138SRobert Clausecker 		discrepancy = i - average;
191*4a0fc138SRobert Clausecker 		variance += histogram[i] * discrepancy * discrepancy;
192*4a0fc138SRobert Clausecker 	}
193*4a0fc138SRobert Clausecker 
194*4a0fc138SRobert Clausecker 	variance /= threshold;
195*4a0fc138SRobert Clausecker 
196*4a0fc138SRobert Clausecker 	n = 0;
197*4a0fc138SRobert Clausecker 	median = UCHAR_MAX + 1;
198*4a0fc138SRobert Clausecker 	for (i = 0; i <= UCHAR_MAX; i++) {
199*4a0fc138SRobert Clausecker 		n += histogram[i];
200*4a0fc138SRobert Clausecker 		if (n >= threshold / 2) {
201*4a0fc138SRobert Clausecker 			median = i;
202*4a0fc138SRobert Clausecker 			break;
203*4a0fc138SRobert Clausecker 		}
204*4a0fc138SRobert Clausecker 	}
205*4a0fc138SRobert Clausecker 
206*4a0fc138SRobert Clausecker 	assert(median <= UCHAR_MAX); /* unreachable */
207*4a0fc138SRobert Clausecker 
208*4a0fc138SRobert Clausecker 	printf("\tmedian:		%lld\n", median);
209*4a0fc138SRobert Clausecker 	printf("\taverage:	%f\n", average);
210*4a0fc138SRobert Clausecker 	printf("\tdeviation:	%f (expected %f)\n\n", sqrt(variance), sqrt(average * (1.0 - 1.0 / threshold)));
211*4a0fc138SRobert Clausecker 
212*4a0fc138SRobert Clausecker 	printf("histogram:\n");
213*4a0fc138SRobert Clausecker 	for (i = 0; i < 256; i++)
214*4a0fc138SRobert Clausecker 		if (histogram[i] != 0)
215*4a0fc138SRobert Clausecker 			printf("\t%3d:\t%lld\n", (int)i, histogram[i]);
216*4a0fc138SRobert Clausecker }
217