xref: /freebsd/usr.bin/ministat/ministat.c (revision 2357939bc239bd5334a169b62313806178dd8f30)
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  *
9  */
10 
11 #include <sys/cdefs.h>
12 __FBSDID("$FreeBSD$");
13 
14 #include <stdio.h>
15 #include <math.h>
16 #include <err.h>
17 #include <string.h>
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <sys/queue.h>
21 
22 #define NSTUDENT 100
23 #define NCONF 6
24 double const studentpct[] = { 80, 90, 95, 98, 99, 99.5 };
25 double student [NSTUDENT + 1][NCONF] = {
26 /* inf */	{	1.282,	1.645,	1.960,	2.326,	2.576,	3.090  },
27 /* 1. */	{	3.078,	6.314,	12.706,	31.821,	63.657,	318.313  },
28 /* 2. */	{	1.886,	2.920,	4.303,	6.965,	9.925,	22.327  },
29 /* 3. */	{	1.638,	2.353,	3.182,	4.541,	5.841,	10.215  },
30 /* 4. */	{	1.533,	2.132,	2.776,	3.747,	4.604,	7.173  },
31 /* 5. */	{	1.476,	2.015,	2.571,	3.365,	4.032,	5.893  },
32 /* 6. */	{	1.440,	1.943,	2.447,	3.143,	3.707,	5.208  },
33 /* 7. */	{	1.415,	1.895,	2.365,	2.998,	3.499,	4.782  },
34 /* 8. */	{	1.397,	1.860,	2.306,	2.896,	3.355,	4.499  },
35 /* 9. */	{	1.383,	1.833,	2.262,	2.821,	3.250,	4.296  },
36 /* 10. */	{	1.372,	1.812,	2.228,	2.764,	3.169,	4.143  },
37 /* 11. */	{	1.363,	1.796,	2.201,	2.718,	3.106,	4.024  },
38 /* 12. */	{	1.356,	1.782,	2.179,	2.681,	3.055,	3.929  },
39 /* 13. */	{	1.350,	1.771,	2.160,	2.650,	3.012,	3.852  },
40 /* 14. */	{	1.345,	1.761,	2.145,	2.624,	2.977,	3.787  },
41 /* 15. */	{	1.341,	1.753,	2.131,	2.602,	2.947,	3.733  },
42 /* 16. */	{	1.337,	1.746,	2.120,	2.583,	2.921,	3.686  },
43 /* 17. */	{	1.333,	1.740,	2.110,	2.567,	2.898,	3.646  },
44 /* 18. */	{	1.330,	1.734,	2.101,	2.552,	2.878,	3.610  },
45 /* 19. */	{	1.328,	1.729,	2.093,	2.539,	2.861,	3.579  },
46 /* 20. */	{	1.325,	1.725,	2.086,	2.528,	2.845,	3.552  },
47 /* 21. */	{	1.323,	1.721,	2.080,	2.518,	2.831,	3.527  },
48 /* 22. */	{	1.321,	1.717,	2.074,	2.508,	2.819,	3.505  },
49 /* 23. */	{	1.319,	1.714,	2.069,	2.500,	2.807,	3.485  },
50 /* 24. */	{	1.318,	1.711,	2.064,	2.492,	2.797,	3.467  },
51 /* 25. */	{	1.316,	1.708,	2.060,	2.485,	2.787,	3.450  },
52 /* 26. */	{	1.315,	1.706,	2.056,	2.479,	2.779,	3.435  },
53 /* 27. */	{	1.314,	1.703,	2.052,	2.473,	2.771,	3.421  },
54 /* 28. */	{	1.313,	1.701,	2.048,	2.467,	2.763,	3.408  },
55 /* 29. */	{	1.311,	1.699,	2.045,	2.462,	2.756,	3.396  },
56 /* 30. */	{	1.310,	1.697,	2.042,	2.457,	2.750,	3.385  },
57 /* 31. */	{	1.309,	1.696,	2.040,	2.453,	2.744,	3.375  },
58 /* 32. */	{	1.309,	1.694,	2.037,	2.449,	2.738,	3.365  },
59 /* 33. */	{	1.308,	1.692,	2.035,	2.445,	2.733,	3.356  },
60 /* 34. */	{	1.307,	1.691,	2.032,	2.441,	2.728,	3.348  },
61 /* 35. */	{	1.306,	1.690,	2.030,	2.438,	2.724,	3.340  },
62 /* 36. */	{	1.306,	1.688,	2.028,	2.434,	2.719,	3.333  },
63 /* 37. */	{	1.305,	1.687,	2.026,	2.431,	2.715,	3.326  },
64 /* 38. */	{	1.304,	1.686,	2.024,	2.429,	2.712,	3.319  },
65 /* 39. */	{	1.304,	1.685,	2.023,	2.426,	2.708,	3.313  },
66 /* 40. */	{	1.303,	1.684,	2.021,	2.423,	2.704,	3.307  },
67 /* 41. */	{	1.303,	1.683,	2.020,	2.421,	2.701,	3.301  },
68 /* 42. */	{	1.302,	1.682,	2.018,	2.418,	2.698,	3.296  },
69 /* 43. */	{	1.302,	1.681,	2.017,	2.416,	2.695,	3.291  },
70 /* 44. */	{	1.301,	1.680,	2.015,	2.414,	2.692,	3.286  },
71 /* 45. */	{	1.301,	1.679,	2.014,	2.412,	2.690,	3.281  },
72 /* 46. */	{	1.300,	1.679,	2.013,	2.410,	2.687,	3.277  },
73 /* 47. */	{	1.300,	1.678,	2.012,	2.408,	2.685,	3.273  },
74 /* 48. */	{	1.299,	1.677,	2.011,	2.407,	2.682,	3.269  },
75 /* 49. */	{	1.299,	1.677,	2.010,	2.405,	2.680,	3.265  },
76 /* 50. */	{	1.299,	1.676,	2.009,	2.403,	2.678,	3.261  },
77 /* 51. */	{	1.298,	1.675,	2.008,	2.402,	2.676,	3.258  },
78 /* 52. */	{	1.298,	1.675,	2.007,	2.400,	2.674,	3.255  },
79 /* 53. */	{	1.298,	1.674,	2.006,	2.399,	2.672,	3.251  },
80 /* 54. */	{	1.297,	1.674,	2.005,	2.397,	2.670,	3.248  },
81 /* 55. */	{	1.297,	1.673,	2.004,	2.396,	2.668,	3.245  },
82 /* 56. */	{	1.297,	1.673,	2.003,	2.395,	2.667,	3.242  },
83 /* 57. */	{	1.297,	1.672,	2.002,	2.394,	2.665,	3.239  },
84 /* 58. */	{	1.296,	1.672,	2.002,	2.392,	2.663,	3.237  },
85 /* 59. */	{	1.296,	1.671,	2.001,	2.391,	2.662,	3.234  },
86 /* 60. */	{	1.296,	1.671,	2.000,	2.390,	2.660,	3.232  },
87 /* 61. */	{	1.296,	1.670,	2.000,	2.389,	2.659,	3.229  },
88 /* 62. */	{	1.295,	1.670,	1.999,	2.388,	2.657,	3.227  },
89 /* 63. */	{	1.295,	1.669,	1.998,	2.387,	2.656,	3.225  },
90 /* 64. */	{	1.295,	1.669,	1.998,	2.386,	2.655,	3.223  },
91 /* 65. */	{	1.295,	1.669,	1.997,	2.385,	2.654,	3.220  },
92 /* 66. */	{	1.295,	1.668,	1.997,	2.384,	2.652,	3.218  },
93 /* 67. */	{	1.294,	1.668,	1.996,	2.383,	2.651,	3.216  },
94 /* 68. */	{	1.294,	1.668,	1.995,	2.382,	2.650,	3.214  },
95 /* 69. */	{	1.294,	1.667,	1.995,	2.382,	2.649,	3.213  },
96 /* 70. */	{	1.294,	1.667,	1.994,	2.381,	2.648,	3.211  },
97 /* 71. */	{	1.294,	1.667,	1.994,	2.380,	2.647,	3.209  },
98 /* 72. */	{	1.293,	1.666,	1.993,	2.379,	2.646,	3.207  },
99 /* 73. */	{	1.293,	1.666,	1.993,	2.379,	2.645,	3.206  },
100 /* 74. */	{	1.293,	1.666,	1.993,	2.378,	2.644,	3.204  },
101 /* 75. */	{	1.293,	1.665,	1.992,	2.377,	2.643,	3.202  },
102 /* 76. */	{	1.293,	1.665,	1.992,	2.376,	2.642,	3.201  },
103 /* 77. */	{	1.293,	1.665,	1.991,	2.376,	2.641,	3.199  },
104 /* 78. */	{	1.292,	1.665,	1.991,	2.375,	2.640,	3.198  },
105 /* 79. */	{	1.292,	1.664,	1.990,	2.374,	2.640,	3.197  },
106 /* 80. */	{	1.292,	1.664,	1.990,	2.374,	2.639,	3.195  },
107 /* 81. */	{	1.292,	1.664,	1.990,	2.373,	2.638,	3.194  },
108 /* 82. */	{	1.292,	1.664,	1.989,	2.373,	2.637,	3.193  },
109 /* 83. */	{	1.292,	1.663,	1.989,	2.372,	2.636,	3.191  },
110 /* 84. */	{	1.292,	1.663,	1.989,	2.372,	2.636,	3.190  },
111 /* 85. */	{	1.292,	1.663,	1.988,	2.371,	2.635,	3.189  },
112 /* 86. */	{	1.291,	1.663,	1.988,	2.370,	2.634,	3.188  },
113 /* 87. */	{	1.291,	1.663,	1.988,	2.370,	2.634,	3.187  },
114 /* 88. */	{	1.291,	1.662,	1.987,	2.369,	2.633,	3.185  },
115 /* 89. */	{	1.291,	1.662,	1.987,	2.369,	2.632,	3.184  },
116 /* 90. */	{	1.291,	1.662,	1.987,	2.368,	2.632,	3.183  },
117 /* 91. */	{	1.291,	1.662,	1.986,	2.368,	2.631,	3.182  },
118 /* 92. */	{	1.291,	1.662,	1.986,	2.368,	2.630,	3.181  },
119 /* 93. */	{	1.291,	1.661,	1.986,	2.367,	2.630,	3.180  },
120 /* 94. */	{	1.291,	1.661,	1.986,	2.367,	2.629,	3.179  },
121 /* 95. */	{	1.291,	1.661,	1.985,	2.366,	2.629,	3.178  },
122 /* 96. */	{	1.290,	1.661,	1.985,	2.366,	2.628,	3.177  },
123 /* 97. */	{	1.290,	1.661,	1.985,	2.365,	2.627,	3.176  },
124 /* 98. */	{	1.290,	1.661,	1.984,	2.365,	2.627,	3.175  },
125 /* 99. */	{	1.290,	1.660,	1.984,	2.365,	2.626,	3.175  },
126 /* 100. */	{	1.290,	1.660,	1.984,	2.364,	2.626,	3.174  }
127 };
128 
129 TAILQ_HEAD(pointlist, point);
130 
131 struct dataset {
132 	struct pointlist list;
133 	double sy, syy;
134 	int n;
135 };
136 
137 static struct dataset *
138 NewSet(void)
139 {
140 	struct dataset *ds;
141 
142 	ds = calloc(1, sizeof *ds);
143 	TAILQ_INIT(&ds->list);
144 	return(ds);
145 }
146 
147 struct point {
148 	TAILQ_ENTRY(point)	list;
149 	double			val;
150 };
151 
152 static void
153 AddPoint(struct dataset *ds, double a)
154 {
155 	struct point *pp, *pp2;
156 
157 	pp = calloc(1, sizeof *pp);
158 	pp->val = a;
159 
160 	ds->n++;
161 	ds->sy += a;
162 	ds->syy += a * a;
163 	if (TAILQ_EMPTY(&ds->list)) {
164 		TAILQ_INSERT_HEAD(&ds->list, pp, list);
165 		return;
166 	}
167 	TAILQ_FOREACH(pp2, &ds->list, list) {
168 		if (pp->val < pp2->val) {
169 			TAILQ_INSERT_BEFORE(pp2, pp, list);
170 			return;
171 		}
172 	}
173 	TAILQ_INSERT_TAIL(&ds->list, pp, list);
174 }
175 
176 static double
177 Min(struct dataset *ds)
178 {
179 
180 	return (TAILQ_FIRST(&ds->list)->val);
181 }
182 
183 static double
184 Max(struct dataset *ds)
185 {
186 
187 	return(TAILQ_LAST(&ds->list, pointlist)->val);
188 }
189 
190 static double
191 Avg(struct dataset *ds)
192 {
193 
194 	return(ds->sy / ds->n);
195 }
196 
197 static double
198 Median(struct dataset *ds)
199 {
200 	int i;
201 	struct point *pp;
202 
203 	i = ds->n / 2;
204 	TAILQ_FOREACH(pp, &ds->list, list) {
205 		if (i--)
206 			continue;
207 		return (pp->val);
208 	}
209 }
210 
211 static double
212 Var(struct dataset *ds)
213 {
214 
215 	return (ds->syy - ds->sy * ds->sy / ds->n) / (ds->n - 1.0);
216 }
217 
218 static double
219 Stddev(struct dataset *ds)
220 {
221 
222 	return sqrt(Var(ds));
223 }
224 
225 static void
226 VitalsHead(void)
227 {
228 
229 	printf("    N           Min           Max        Median           Avg        Stddev\n");
230 }
231 
232 static void
233 Vitals(struct dataset *ds, int flag)
234 {
235 	double a;
236 
237 	printf("%c %3d %13.8g %13.8g %13.8g %13.8g %13.8g",
238 	    flag == 1 ? 'x' : '+',
239 	    ds->n, Min(ds), Max(ds), Median(ds), Avg(ds), Stddev(ds));
240 	printf("\n");
241 }
242 
243 static void
244 Relative(struct dataset *ds, struct dataset *rs, int confidx)
245 {
246 	double spool, s, d, e, t;
247 	int i, c;
248 
249 	i = ds->n + rs->n - 2;
250 	if (i > NSTUDENT)
251 		t = student[0][confidx];
252 	else
253 		t = student[i][confidx];
254 	spool = (ds->n - 1) * Var(ds) + (rs->n - 1) * Var(rs);
255 	spool /= ds->n + rs->n - 2;
256 	spool = sqrt(spool);
257 	s = spool * sqrt(1.0 / ds->n + 1.0 / rs->n);
258 	d = Avg(ds) - Avg(rs);
259 	e = t * s;
260 
261 	if (fabs(d) > e) {
262 
263 		printf("Difference at %.1f%% confidence\n", studentpct[confidx]);
264 		printf("	%g +/- %g\n", d, e);
265 		printf("	%g%% +/- %g%%\n", d * 100 / Avg(rs), e * 100 / Avg(rs));
266 		printf("	(Student's t, pooled s = %g)\n", spool);
267 	} else {
268 		printf("No difference proven at %.1f%% confidence\n",
269 		    studentpct[confidx]);
270 	}
271 }
272 
273 struct plot {
274 	double		min;
275 	double		max;
276 	double		span;
277 	int		width;
278 
279 	double		x0, dx;
280 	int		height;
281 	char		*data;
282 	char		**bar;
283 	int		separate_bars;
284 };
285 
286 static struct plot plot;
287 
288 static void
289 SetupPlot(int width, int separate)
290 {
291 	struct plot *pl;
292 
293 	pl = &plot;
294 	pl->width = width;
295 	pl->height = 0;
296 	pl->data = NULL;
297 	pl->bar = NULL;
298 	pl->separate_bars = separate;
299 	pl->min = 999e99;
300 	pl->max = -999e99;
301 }
302 
303 static void
304 AdjPlot(double a)
305 {
306 	struct plot *pl;
307 
308 	pl = &plot;
309 	if (a < pl->min)
310 		pl->min = a;
311 	if (a > pl->max)
312 		pl->max = a;
313 	pl->span = pl->max - pl->min;
314 	pl->dx = pl->span / (pl->width - 1.0);
315 	pl->x0 = pl->min - .5 * pl->dx;
316 }
317 
318 static void
319 DimPlot(struct dataset *ds)
320 {
321 	AdjPlot(Min(ds));
322 	AdjPlot(Max(ds));
323 	AdjPlot(Avg(ds) - Stddev(ds));
324 	AdjPlot(Avg(ds) + Stddev(ds));
325 }
326 
327 static void
328 PlotSet(struct dataset *ds, int val)
329 {
330 	struct plot *pl;
331 	struct point *pp;
332 	int i, j, m, x;
333 	int bar;
334 
335 	pl = &plot;
336 	if (pl->span == 0)
337 		return;
338 
339 	if (pl->separate_bars)
340 		bar = val-1;
341 	else
342 		bar = 0;
343 
344 	if (pl->bar == NULL) {
345 		pl->bar = malloc(sizeof(char *) * 2);
346 		memset(pl->bar, 0, sizeof(char*) * 2);
347 	}
348 	if (pl->bar[bar] == NULL) {
349 		pl->bar[bar] = malloc(pl->width);
350 		memset(pl->bar[bar], 0, pl->width);
351 	}
352 
353 	m = 1;
354 	i = -1;
355 	j = 0;
356 	TAILQ_FOREACH(pp, &ds->list, list) {
357 		x = (pp->val - pl->x0) / pl->dx;
358 		if (x == i) {
359 			j++;
360 			if (j > m)
361 				m = j;
362 		} else {
363 			j = 1;
364 			i = x;
365 		}
366 	}
367 	m += 1;
368 	if (m > pl->height) {
369 		pl->data = realloc(pl->data, pl->width * m);
370 		memset(pl->data + pl->height * pl->width, 0,
371 		    (m - pl->height) * pl->width);
372 	}
373 	pl->height = m;
374 	i = -1;
375 	TAILQ_FOREACH(pp, &ds->list, list) {
376 		x = (pp->val - pl->x0) / pl->dx;
377 		if (x == i) {
378 			j++;
379 		} else {
380 			j = 1;
381 			i = x;
382 		}
383 		pl->data[j * pl->width + x] |= val;
384 	}
385 	x = ((Avg(ds) - Stddev(ds)) - pl->x0) / pl->dx;
386 	m = ((Avg(ds) + Stddev(ds)) - pl->x0) / pl->dx;
387 	pl->bar[bar][m] = '|';
388 	pl->bar[bar][x] = '|';
389 	for (i = x + 1; i < m; i++)
390 		if (pl->bar[bar][i] == 0)
391 			pl->bar[bar][i] = '_';
392 	x = (Median(ds) - pl->x0) / pl->dx;
393 	pl->bar[bar][x] = 'M';
394 	x = (Avg(ds) - pl->x0) / pl->dx;
395 	pl->bar[bar][x] = 'A';
396 }
397 
398 static void
399 DumpPlot(void)
400 {
401 	struct plot *pl;
402 	int i, j, k;
403 
404 	pl = &plot;
405 	if (pl->span == 0) {
406 		printf("[no plot, span is zero width]\n");
407 		return;
408 	}
409 
410 	putchar('+');
411 	for (i = 0; i < pl->width; i++)
412 		putchar('-');
413 	putchar('+');
414 	putchar('\n');
415 	for (i = 1; i < pl->height; i++) {
416 		putchar('|');
417 		for (j = 0; j < pl->width; j++) {
418 			k = pl->data[(pl->height - i) * pl->width + j];
419 			switch (k) {
420 			case 0: putchar(' '); break;
421 			case 1: putchar('x'); break;
422 			case 2: putchar('+'); break;
423 			case 3: putchar('*'); break;
424 			default: printf("[%02x]", k); break;
425 
426 			}
427 		}
428 		putchar('|');
429 		putchar('\n');
430 	}
431 	for (i = 0; i < 2; i++) {
432 		if (pl->bar[i] == NULL)
433 			continue;
434 		putchar('|');
435 		for (j = 0; j < pl->width; j++) {
436 			k = pl->bar[i][j];
437 			if (k == 0)
438 				k = ' ';
439 			putchar(k);
440 		}
441 		putchar('|');
442 		putchar('\n');
443 	}
444 	putchar('+');
445 	for (i = 0; i < pl->width; i++)
446 		putchar('-');
447 	putchar('+');
448 	putchar('\n');
449 }
450 
451 
452 static struct dataset *
453 ReadSet(char *n)
454 {
455 	FILE *f;
456 	char buf[BUFSIZ], *p;
457 	struct dataset *s;
458 	double d;
459 	int line;
460 
461 	if (n == NULL) {
462 		f = stdin;
463 		n = "<stdin>";
464 	} else if (!strcmp(n, "-")) {
465 		f = stdin;
466 		n = "<stdin>";
467 	} else {
468 		f = fopen(n, "r");
469 	}
470 	if (f == NULL)
471 		err(1, "Cannot open %s", n);
472 	s = NewSet();
473 	line = 0;
474 	while (fgets(buf, sizeof buf, f) != NULL) {
475 		line++;
476 		p = strchr(buf, '#');
477 		if (p != NULL)
478 			*p = '\0';
479 		p = buf + strlen(buf) - 1;
480 		while (p >= buf && isspace(*p)) {
481 			*p = '\0';
482 			p--;
483 		}
484 		d = strtod(buf, &p);
485 		if (p != NULL && *p != '\0')
486 			err(2, "Invalid data on line %d in %s\n", line, n);
487 		if (*buf != '\0')
488 			AddPoint(s, d);
489 	}
490 	fclose(f);
491 	if (s->n < 3) {
492 		fprintf(stderr,
493 		    "Dataset %s must contain at least 3 data points\n", n);
494 		exit (2);
495 	}
496 	return (s);
497 }
498 
499 static void
500 usage(char const *whine)
501 {
502 	int i;
503 
504 	fprintf(stderr, "%s\n", whine);
505 	fprintf(stderr,
506 	    "Usage: ministat [ -c confidence ] [-s] [file 1 [file 2]]\n");
507 	fprintf(stderr, "\tconfidence = {");
508 	for (i = 0; i < NCONF; i++) {
509 		fprintf(stderr, "%s%g%%",
510 		    i ? ", " : "",
511 		    studentpct[i]);
512 	}
513 	fprintf(stderr, "}\n");
514 	fprintf(stderr, "\t-s : print avg/median/stddev bars on separate lines\n");
515 	exit (2);
516 }
517 
518 int
519 main(int argc, char **argv)
520 {
521 	struct dataset *ds1, *ds2, *ds3;
522 	double a;
523 	char *p;
524 	int c, i, ci;
525 	int flag_s = 0;
526 
527 	ci = -1;
528 	while ((c = getopt(argc, argv, "c:s")) != -1)
529 		switch (c) {
530 		case 'c':
531 			a = strtod(optarg, &p);
532 			if (p != NULL && *p != '\0')
533 				usage("Not a floating point number");
534 			for (i = 0; i < NCONF; i++)
535 				if (a == studentpct[i])
536 					ci = i;
537 			if (ci == -1)
538 				usage("No support for confidence level");
539 			break;
540 		case 's':
541 			flag_s = 1;
542 			break;
543 		default:
544 			usage("Unknown option");
545 			break;
546 		}
547 	if (ci == -1)
548 		ci = 2;
549 	argc -= optind;
550 	argv += optind;
551 
552 	if (argc == 0) {
553 		ds1 = ReadSet(NULL);
554 		printf("x stdin\n");
555 	} else if (argc > 0) {
556 		ds1 = ReadSet(argv[0]);
557 		printf("x %s\n", argv[0]);
558 	} if (argc > 1) {
559 		ds2 = ReadSet(argv[1]);
560 		printf("+ %s\n", argv[1]);
561 	}
562 
563 	SetupPlot(74, flag_s);
564 	DimPlot(ds1);
565 	if (argc > 1)
566 		DimPlot(ds2);
567 	PlotSet(ds1, 1);
568 	if (argc > 1)
569 		PlotSet(ds2, 2);
570 	DumpPlot();
571 	VitalsHead();
572 	Vitals(ds1, 1);
573 	if (argc > 1) {
574 		Vitals(ds2, 2);
575 		Relative(ds2, ds1, ci);
576 	}
577 	exit(0);
578 }
579