xref: /freebsd/usr.bin/systat/iolat.c (revision 22054f88914b51113f77f6eccc11353a891f9f3e)
1 /*
2  * Copyright (c) 2021 Netflix, Inc
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  */
6 
7 
8 #include <sys/param.h>
9 #include <sys/sysctl.h>
10 #include <sys/resource.h>
11 
12 #include <devstat.h>
13 #include <err.h>
14 #include <errno.h>
15 #include <math.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include <sys/queue.h>
21 #include <sys/sysctl.h>
22 
23 #include "systat.h"
24 #include "extern.h"
25 #include "devs.h"
26 
27 #define CAM_BASE "kern.cam"
28 #define LATENCY ".latencies"
29 #define CAM_IOSCHED_BASE "kern.cam.iosched.bucket_base_us"
30 
31 #define DEV_NAMSIZE	32
32 #define OP_NAMSIZE	16
33 #define MAX_LATS	32
34 
35 static double high_thresh = 500;
36 static double med_thresh = 300;
37 static bool docolor = true;
38 
39 static int ndevs;
40 static SLIST_HEAD(, iosched_stat)	curlist;
41 
42 struct iosched_op_stat {
43 	int		nlats;
44 	uint64_t	lats[MAX_LATS];
45 	uint64_t	prev_lats[MAX_LATS];
46 };
47 
48 enum { OP_READ = 0, OP_WRITE, OP_TRIM, NUM_OPS };
49 static const char *ops[NUM_OPS] = { "read", "write", "trim" };
50 #define OP_READ_MASK (1 << OP_READ)
51 #define OP_WRITE_MASK (1 << OP_WRITE)
52 #define OP_TRIM_MASK (1 << OP_TRIM)
53 
54 static uint32_t flags = OP_READ_MASK | OP_WRITE_MASK | OP_TRIM_MASK;
55 
56 struct iosched_stat {
57 	SLIST_ENTRY(iosched_stat)	 link;
58 	char		dev_name[DEV_NAMSIZE];
59 	int		unit;
60 	struct iosched_op_stat op_stats[NUM_OPS];
61 };
62 
63 static int	name2oid(const char *, int *);
64 static int	walk_sysctl(int *, size_t);
65 
66 static int
name2oid(const char * name,int * oidp)67 name2oid(const char *name, int *oidp)
68 {
69 	int oid[2];
70 	int i;
71 	size_t j;
72 
73 	oid[0] = CTL_SYSCTL;
74 	oid[1] = CTL_SYSCTL_NAME2OID;
75 
76 	j = CTL_MAXNAME * sizeof(int);
77 	i = sysctl(oid, 2, oidp, &j, name, strlen(name));
78 	if (i < 0)
79 		return (i);
80 	j /= sizeof(int);
81 	return (j);
82 }
83 
84 static size_t /* Includes the trailing NUL */
oid2name(int * oid,size_t nlen,char * name,size_t namlen)85 oid2name(int *oid, size_t nlen, char *name, size_t namlen)
86 {
87 	int qoid[CTL_MAXNAME + 2];
88 	int i;
89 	size_t j;
90 
91 	bzero(name, namlen);
92 	qoid[0] = CTL_SYSCTL;
93 	qoid[1] = CTL_SYSCTL_NAME;
94 	memcpy(qoid + 2, oid, nlen * sizeof(int));
95 	j = namlen;
96 	i = sysctl(qoid, nlen + 2, name, &j, 0, 0);
97 	if (i || !j)
98 		err(1, "sysctl name %d %zu %d", i, j, errno);
99 	return (j);
100 }
101 
102 static int
oidfmt(int * oid,int len,u_int * kind)103 oidfmt(int *oid, int len, u_int *kind)
104 {
105 	int qoid[CTL_MAXNAME+2];
106 	u_char buf[BUFSIZ];
107 	int i;
108 	size_t j;
109 
110 	qoid[0] = CTL_SYSCTL;
111 	qoid[1] = CTL_SYSCTL_OIDFMT;
112 	memcpy(qoid + 2, oid, len * sizeof(int));
113 
114 	j = sizeof(buf);
115 	i = sysctl(qoid, len + 2, buf, &j, 0, 0);
116 	if (i)
117 		err(1, "sysctl fmt %d %zu %d", i, j, errno);
118 	*kind = *(u_int *)buf;
119 	return (0);
120 }
121 
122 static int
split_u64(char * str,const char * delim,uint64_t * buckets,int * nbuckets)123 split_u64(char *str, const char *delim, uint64_t *buckets, int *nbuckets)
124 {
125 	int n = *nbuckets, i;
126 	char *v;
127 
128 	memset(buckets, 0, n * sizeof(buckets[0]));
129 	for (i = 0; (v = strsep(&str, delim)) != NULL && i < n; i++) {
130 		buckets[i] = strtoull(v, NULL, 10);
131 	}
132 	if (i < n)
133 		*nbuckets = i;
134 	return (i < n);
135 }
136 
137 static double baselat = 0.000020;
138 
139 static float
pest(int permill,uint64_t * lats,int nlat)140 pest(int permill, uint64_t *lats, int nlat)
141 {
142 	uint64_t tot, samp;
143 	int i;
144 	float b1, b2;
145 
146 	for (tot = 0, i = 0; i < nlat; i++)
147 		tot += lats[i];
148 	if (tot == 0)
149 		return -nanf("");
150 	if (tot < (uint64_t)2000 / (1000 - permill))
151 		return nanf("");
152 	samp = tot * permill / 1000;
153 	if (samp < lats[0])
154 		return baselat * (float)samp / lats[0]; /* linear interpolation 0 and baselat */
155 	for (tot = 0, i = 0; samp >= tot && i < nlat; i++)
156 		tot += lats[i];
157 	i--;
158 	b1 = baselat * (1 << (i - 1));
159 	b2 = baselat * (1 << i);
160 	/* Should expoentially interpolate between buckets -- doing linear instead */
161 	return b1 + (b2 - b1) * (float)(lats[i] - (tot - samp)) / lats[i];
162 }
163 
164 static int
op2num(const char * op)165 op2num(const char *op)
166 {
167 	for (int i = 0; i < NUM_OPS; i++)
168 		if (strcmp(op, ops[i]) == 0)
169 			return i;
170 	return -1;
171 }
172 
173 static struct iosched_op_stat *
find_dev(const char * dev,int unit,int op)174 find_dev(const char *dev, int unit, int op)
175 {
176 	struct iosched_stat *isp;
177 	struct iosched_op_stat *iosp;
178 
179 	SLIST_FOREACH(isp, &curlist, link) {
180 		if (strcmp(isp->dev_name, dev) != 0 || isp->unit != unit)
181 			continue;
182 		iosp = &isp->op_stats[op];
183 		return iosp;
184 	}
185 	return NULL;
186 }
187 
188 static struct iosched_op_stat *
alloc_dev(const char * dev,int unit,int op)189 alloc_dev(const char *dev, int unit, int op)
190 {
191 	struct iosched_stat *isp;
192 	struct iosched_op_stat *iosp;
193 
194 	isp = malloc(sizeof(*isp));
195 	if (isp == NULL)
196 		return NULL;
197 	strlcpy(isp->dev_name, dev, sizeof(isp->dev_name));
198 	isp->unit = unit;
199 	SLIST_INSERT_HEAD(&curlist, isp, link);
200 	ndevs++;
201 	iosp = &isp->op_stats[op];
202 	return iosp;
203 }
204 
205 #define E3 1000.0
206 static void
update_dev(const char * dev,int unit,int op,uint64_t * lats,int nlat)207 update_dev(const char *dev, int unit, int op, uint64_t *lats, int nlat)
208 {
209 	struct iosched_op_stat *iosp;
210 
211 	iosp = find_dev(dev, unit, op);
212 	if (iosp == NULL)
213 		iosp = alloc_dev(dev, unit, op);
214 	if (iosp == NULL)
215 		return;
216 	iosp->nlats = nlat;
217 	memcpy(iosp->prev_lats, iosp->lats, iosp->nlats * sizeof(uint64_t));
218 	memcpy(iosp->lats, lats, iosp->nlats * sizeof(uint64_t));
219 //	printf("%s%d: %-6s %.3f %.3f %.3f %.3f\r\n",
220 //	    dev, unit, operation, E3 * pest(500, lats, nlat), E3 * pest(900, lats, nlat),
221 //	    E3 * pest(990, lats, nlat), E3 * pest(999, lats, nlat));
222 }
223 
224 static int
walk_sysctl(int * base_oid,size_t len)225 walk_sysctl(int *base_oid, size_t len)
226 {
227 	int qoid[CTL_MAXNAME + 2], oid[CTL_MAXNAME];
228 	size_t l1, l2;
229 	char name[BUFSIZ];
230 
231 	if (len > CTL_MAXNAME)
232 		err(1, "Length %zd too long", len);
233 
234 	qoid[0] = CTL_SYSCTL;
235 	qoid[1] = CTL_SYSCTL_NEXT;
236 	l1 = 2;
237 	memcpy(qoid + 2, base_oid, len * sizeof(int));
238 	l1 += len;
239 	for (;;) {
240 		/*
241 		 * Get the next one or return when we get to the end of the
242 		 * sysctls in the kernel.
243 		 */
244 		l2 = sizeof(oid);
245 		if (sysctl(qoid, l1, oid, &l2, 0, 0) != 0) {
246 			if (errno == ENOENT)
247 				return (0);
248 			err(1, "sysctl(getnext) %zu", l2);
249 		}
250 
251 		l2 /= sizeof(int);
252 
253 		/*
254 		 * Bail if we're seeing OIDs that don't have the
255 		 * same prefix or can't have the same prefix.
256 		 */
257 		if (l2 < len ||
258 		    memcmp(oid, base_oid, len * sizeof(int)) != 0)
259 			return (0);
260 
261 		/*
262 		 * Get the name, validate it's one we're looking for,
263 		 * parse the latency and add to list.
264 		 */
265 		do {
266 			int nlat;
267 			size_t l3;
268 			char val[BUFSIZ];
269 			char *walker, *dev, *opstr;
270 			uint64_t latvals[MAX_LATS];
271 			u_int kind;
272 			int unit, op;
273 
274 			l1 = oid2name(oid, l2, name, sizeof(name));
275 			if (strcmp(name + l1 - strlen(LATENCY) - 1, LATENCY) != 0)
276 				break;
277 			if (oidfmt(oid, l2, &kind) != 0)
278 				err(1, "oidfmt");
279 			if ((kind & CTLTYPE) != CTLTYPE_STRING)
280 				errx(1, "string");
281 			l3 = sizeof(val);
282 			if (sysctl(oid, l2, val, &l3, 0, 0) != 0)
283 				err(1, "sysctl");
284 			val[l3] = '\0';
285 			nlat = nitems(latvals);
286 			if (split_u64(val, ",", latvals, &nlat) == 0)
287 				break;
288 			walker = name + strlen(CAM_BASE) + 1;
289 			dev = strsep(&walker, ".");
290 			unit = (int)strtol(strsep(&walker, "."), NULL, 10);
291 			strsep(&walker, ".");
292 			opstr = strsep(&walker, ".");
293 			op = op2num(opstr);
294 			if (op < 0)
295 				break;
296 			update_dev(dev, unit, op, latvals, nlat);
297 		} while (false);
298 
299 		memcpy(qoid + 2, oid, l2 * sizeof(int));
300 		l1 = 2 + l2;
301 	}
302 }
303 
304 void
closeiolat(WINDOW * w)305 closeiolat(WINDOW *w)
306 {
307 	if (w == NULL)
308 		return;
309 	wclear(w);
310 	wrefresh(w);
311 	delwin(w);
312 }
313 
314 static void
doublecmd(const char * cmd,double * v)315 doublecmd(const char *cmd, double *v)
316 {
317 	const char *p;
318 	double tv;
319 
320 	p = strchr(cmd, '=');
321 	if (p == NULL)
322 		return;	/* XXX Tell the user something? */
323 	if (sscanf(p + 1, "%lf", &tv) != 1)
324 		return;	/* XXX Tell the user something? */
325 	*v = tv;
326 }
327 
328 int
cmdiolat(const char * cmd __unused,const char * args __unused)329 cmdiolat(const char *cmd __unused, const char *args __unused)
330 {
331 	fprintf(stderr, "CMD IS '%s'\n\n", cmd);
332 	if (prefix(cmd, "trim"))
333 		flags ^= OP_TRIM_MASK;
334 	else if (prefix(cmd, "read"))
335 		flags ^= OP_READ_MASK;
336 	else if (prefix(cmd, "write"))
337 		flags ^= OP_WRITE_MASK;
338 	else if (prefix(cmd, "color"))
339 		docolor = !docolor;
340 	else if (prefix("high", cmd))
341 		doublecmd(cmd, &high_thresh);
342 	else if (prefix("med", cmd))
343 		doublecmd(cmd, &med_thresh);
344 	else
345 		return (0);
346 	wclear(wnd);
347 	labeliolat();
348 	refresh();
349 	return (1);
350 }
351 
352 int
initiolat(void)353 initiolat(void)
354 {
355 	int cam[CTL_MAXNAME];
356 	uint64_t sbt_base;
357 	size_t len = sizeof(sbt_base);
358 
359 	SLIST_INIT(&curlist);
360 
361 	baselat = 1e-3;		/* old default */
362 	if (sysctlbyname(CAM_IOSCHED_BASE, &sbt_base, &len, NULL, 0) == 0)
363 		baselat = sbt_base * 1e-6;	/* Convert to microseconds */
364 
365 	name2oid(CAM_BASE, cam);
366 	walk_sysctl(cam, 2);
367 	return (1);
368 }
369 
370 void
fetchiolat(void)371 fetchiolat(void)
372 {
373 	int cam[CTL_MAXNAME];
374 
375 	name2oid(CAM_BASE, cam);
376 	walk_sysctl(cam, 2);
377 }
378 
379 #define	INSET	10
380 
381 void
labeliolat(void)382 labeliolat(void)
383 {
384 	int _col, ndrives, lpr, row, j;
385 	int regions __unused;
386 	struct iosched_stat *isp;
387 	char tmpstr[32];
388 #define COLWIDTH	29
389 #define DRIVESPERLINE	((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
390 	ndrives = ndevs; // XXX FILTER XXX
391 	regions = howmany(ndrives, DRIVESPERLINE);
392 	lpr = 2; /* for headers */
393 	for (int i = 0; i < NUM_OPS; i++) {
394 		if (flags & (1 << i))
395 			lpr++;
396 	}
397 	row = 0;
398 	_col = INSET;
399 	j = 2;
400 	if (flags & OP_READ_MASK)
401 		mvwaddstr(wnd, row + j++, 1, "read");
402 	if (flags & OP_WRITE_MASK)
403 		mvwaddstr(wnd, row + j++, 1, "write");
404 	if (flags & OP_TRIM_MASK)
405 		mvwaddstr(wnd, row + j++, 1, "trim");
406 	SLIST_FOREACH(isp, &curlist, link) {
407 		if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
408 			_col = INSET;
409 			row += lpr + 1;
410 			if (row > getmaxy(wnd) - 1 - (lpr + 1))
411 				break;
412 			j = 2;
413 			if (flags & OP_READ_MASK)
414 				mvwaddstr(wnd, row + j++, 1, "read");
415 			if (flags & OP_WRITE_MASK)
416 				mvwaddstr(wnd, row + j++, 1, "write");
417 			if (flags & OP_TRIM_MASK)
418 				mvwaddstr(wnd, row + j++, 1, "trim");
419 		}
420 		snprintf(tmpstr, sizeof(tmpstr), "%s%d", isp->dev_name, isp->unit);
421 		mvwaddstr(wnd, row, _col + (COLWIDTH - strlen(tmpstr)) / 2, tmpstr);
422 		mvwaddstr(wnd, row + 1, _col, "   p50    p90    p99  p99.9");
423 		_col += COLWIDTH;
424 	}
425 }
426 
427 WINDOW *
openiolat(void)428 openiolat(void)
429 {
430 	return (subwin(stdscr, LINES-3-1, 0, MAINWIN_ROW, 0));
431 }
432 
433 static void
fmt(float f,char * buf,size_t len)434 fmt(float f, char *buf, size_t len)
435 {
436 	if (isnan(f))
437 		strlcpy(buf, "   -  ", len);
438 	else if (f >= 1000.0)
439 		snprintf(buf, len, "%6d", (int)f);
440 	else if (f >= 100.0)
441 		snprintf(buf, len, "%6.1f", f);
442 	else if (f >= 10.0)
443 		snprintf(buf, len, "%6.2f", f);
444 	else
445 		snprintf(buf, len, "%6.3f", f);
446 }
447 
448 static void
latout(double lat,int y,int x)449 latout(double lat, int y, int x)
450 {
451 	int i;
452 	char tmpstr[32];
453 
454 	fmt(lat, tmpstr, sizeof(tmpstr));
455 	if (isnan(lat))
456 		i = 4;
457 	else if (lat > high_thresh)
458 		i = 3;
459 	else if (lat > med_thresh)
460 		i = 2;
461 	else
462 		i = 1;
463 	if (docolor)
464 		wattron(wnd, COLOR_PAIR(i));
465 	mvwaddstr(wnd, y, x, tmpstr);
466 	if (docolor)
467 		wattroff(wnd, COLOR_PAIR(i));
468 }
469 
470 void
showiolat(void)471 showiolat(void)
472 {
473 	int _col, ndrives, lpr, row, k;
474 	int regions __unused;
475 	struct iosched_stat *isp;
476 	struct iosched_op_stat *iosp;
477 #define COLWIDTH	29
478 #define DRIVESPERLINE	((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
479 	ndrives = ndevs; // XXX FILTER XXX
480 	regions = howmany(ndrives, DRIVESPERLINE);
481 	lpr = 2; /* XXX */
482 	for (int i = 0; i < NUM_OPS; i++) {
483 		if (flags & (1 << i))
484 			lpr++;
485 	}
486 	row = 0;
487 	_col = INSET;
488 	SLIST_FOREACH(isp, &curlist, link) {
489 		if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
490 			_col = INSET;
491 			row += lpr + 1;
492 			if (row > getmaxy(wnd) - 1 - (lpr + 1))
493 				break;
494 		}
495 		k = 2;
496 		for (int i = 0; i < NUM_OPS; i++) {
497 			uint64_t lats[MAX_LATS];
498 			int nlats;
499 			float p50, p90, p99, p999;
500 
501 			if ((flags & (1 << i)) == 0)
502 				continue;
503 			iosp = &isp->op_stats[i];
504 			nlats = iosp->nlats;
505 			memset(lats, 0, sizeof(lats));
506 			for (int j = 0; j < iosp->nlats; j++)
507 				lats[j] = iosp->lats[j] - iosp->prev_lats[j];
508 			p50 = pest(500, lats, nlats) * E3;
509 			p90 = pest(900, lats, nlats) * E3;
510 			p99 = pest(990, lats, nlats) * E3;
511 			p999 = pest(999, lats, nlats) * E3;
512 			latout(p50, row + k, _col);
513 			latout(p90, row + k, _col + 7);
514 			latout(p99, row + k, _col + 14);
515 			latout(p999, row + k, _col + 21);
516 			k++;
517 		}
518 		_col += COLWIDTH;
519 	}
520 }
521