1 /*
2 * Copyright (c) 2021 Netflix, Inc
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7
8 #include <sys/param.h>
9 #include <sys/sysctl.h>
10 #include <sys/resource.h>
11
12 #include <devstat.h>
13 #include <err.h>
14 #include <errno.h>
15 #include <math.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include <sys/queue.h>
21 #include <sys/sysctl.h>
22
23 #include "systat.h"
24 #include "extern.h"
25 #include "devs.h"
26
27 #define CAM_BASE "kern.cam"
28 #define LATENCY ".latencies"
29 #define CAM_IOSCHED_BASE "kern.cam.iosched.bucket_base_us"
30
31 #define DEV_NAMSIZE 32
32 #define OP_NAMSIZE 16
33 #define MAX_LATS 32
34
35 static double high_thresh = 500;
36 static double med_thresh = 300;
37 static bool docolor = true;
38
39 static int ndevs;
40 static SLIST_HEAD(, iosched_stat) curlist;
41
42 struct iosched_op_stat {
43 int nlats;
44 uint64_t lats[MAX_LATS];
45 uint64_t prev_lats[MAX_LATS];
46 };
47
48 enum { OP_READ = 0, OP_WRITE, OP_TRIM, NUM_OPS };
49 static const char *ops[NUM_OPS] = { "read", "write", "trim" };
50 #define OP_READ_MASK (1 << OP_READ)
51 #define OP_WRITE_MASK (1 << OP_WRITE)
52 #define OP_TRIM_MASK (1 << OP_TRIM)
53
54 static uint32_t flags = OP_READ_MASK | OP_WRITE_MASK | OP_TRIM_MASK;
55
56 struct iosched_stat {
57 SLIST_ENTRY(iosched_stat) link;
58 char dev_name[DEV_NAMSIZE];
59 int unit;
60 struct iosched_op_stat op_stats[NUM_OPS];
61 };
62
63 static int name2oid(const char *, int *);
64 static int walk_sysctl(int *, size_t);
65
66 static int
name2oid(const char * name,int * oidp)67 name2oid(const char *name, int *oidp)
68 {
69 int oid[2];
70 int i;
71 size_t j;
72
73 oid[0] = CTL_SYSCTL;
74 oid[1] = CTL_SYSCTL_NAME2OID;
75
76 j = CTL_MAXNAME * sizeof(int);
77 i = sysctl(oid, 2, oidp, &j, name, strlen(name));
78 if (i < 0)
79 return (i);
80 j /= sizeof(int);
81 return (j);
82 }
83
84 static size_t /* Includes the trailing NUL */
oid2name(int * oid,size_t nlen,char * name,size_t namlen)85 oid2name(int *oid, size_t nlen, char *name, size_t namlen)
86 {
87 int qoid[CTL_MAXNAME + 2];
88 int i;
89 size_t j;
90
91 bzero(name, namlen);
92 qoid[0] = CTL_SYSCTL;
93 qoid[1] = CTL_SYSCTL_NAME;
94 memcpy(qoid + 2, oid, nlen * sizeof(int));
95 j = namlen;
96 i = sysctl(qoid, nlen + 2, name, &j, 0, 0);
97 if (i || !j)
98 err(1, "sysctl name %d %zu %d", i, j, errno);
99 return (j);
100 }
101
102 static int
oidfmt(int * oid,int len,u_int * kind)103 oidfmt(int *oid, int len, u_int *kind)
104 {
105 int qoid[CTL_MAXNAME+2];
106 u_char buf[BUFSIZ];
107 int i;
108 size_t j;
109
110 qoid[0] = CTL_SYSCTL;
111 qoid[1] = CTL_SYSCTL_OIDFMT;
112 memcpy(qoid + 2, oid, len * sizeof(int));
113
114 j = sizeof(buf);
115 i = sysctl(qoid, len + 2, buf, &j, 0, 0);
116 if (i)
117 err(1, "sysctl fmt %d %zu %d", i, j, errno);
118 *kind = *(u_int *)buf;
119 return (0);
120 }
121
122 static int
split_u64(char * str,const char * delim,uint64_t * buckets,int * nbuckets)123 split_u64(char *str, const char *delim, uint64_t *buckets, int *nbuckets)
124 {
125 int n = *nbuckets, i;
126 char *v;
127
128 memset(buckets, 0, n * sizeof(buckets[0]));
129 for (i = 0; (v = strsep(&str, delim)) != NULL && i < n; i++) {
130 buckets[i] = strtoull(v, NULL, 10);
131 }
132 if (i < n)
133 *nbuckets = i;
134 return (i < n);
135 }
136
137 static double baselat = 0.000020;
138
139 static float
pest(int permill,uint64_t * lats,int nlat)140 pest(int permill, uint64_t *lats, int nlat)
141 {
142 uint64_t tot, samp;
143 int i;
144 float b1, b2;
145
146 for (tot = 0, i = 0; i < nlat; i++)
147 tot += lats[i];
148 if (tot == 0)
149 return -nanf("");
150 if (tot < (uint64_t)2000 / (1000 - permill))
151 return nanf("");
152 samp = tot * permill / 1000;
153 if (samp < lats[0])
154 return baselat * (float)samp / lats[0]; /* linear interpolation 0 and baselat */
155 for (tot = 0, i = 0; samp >= tot && i < nlat; i++)
156 tot += lats[i];
157 i--;
158 b1 = baselat * (1 << (i - 1));
159 b2 = baselat * (1 << i);
160 /* Should expoentially interpolate between buckets -- doing linear instead */
161 return b1 + (b2 - b1) * (float)(lats[i] - (tot - samp)) / lats[i];
162 }
163
164 static int
op2num(const char * op)165 op2num(const char *op)
166 {
167 for (int i = 0; i < NUM_OPS; i++)
168 if (strcmp(op, ops[i]) == 0)
169 return i;
170 return -1;
171 }
172
173 static struct iosched_op_stat *
find_dev(const char * dev,int unit,int op)174 find_dev(const char *dev, int unit, int op)
175 {
176 struct iosched_stat *isp;
177 struct iosched_op_stat *iosp;
178
179 SLIST_FOREACH(isp, &curlist, link) {
180 if (strcmp(isp->dev_name, dev) != 0 || isp->unit != unit)
181 continue;
182 iosp = &isp->op_stats[op];
183 return iosp;
184 }
185 return NULL;
186 }
187
188 static struct iosched_op_stat *
alloc_dev(const char * dev,int unit,int op)189 alloc_dev(const char *dev, int unit, int op)
190 {
191 struct iosched_stat *isp;
192 struct iosched_op_stat *iosp;
193
194 isp = malloc(sizeof(*isp));
195 if (isp == NULL)
196 return NULL;
197 strlcpy(isp->dev_name, dev, sizeof(isp->dev_name));
198 isp->unit = unit;
199 SLIST_INSERT_HEAD(&curlist, isp, link);
200 ndevs++;
201 iosp = &isp->op_stats[op];
202 return iosp;
203 }
204
205 #define E3 1000.0
206 static void
update_dev(const char * dev,int unit,int op,uint64_t * lats,int nlat)207 update_dev(const char *dev, int unit, int op, uint64_t *lats, int nlat)
208 {
209 struct iosched_op_stat *iosp;
210
211 iosp = find_dev(dev, unit, op);
212 if (iosp == NULL)
213 iosp = alloc_dev(dev, unit, op);
214 if (iosp == NULL)
215 return;
216 iosp->nlats = nlat;
217 memcpy(iosp->prev_lats, iosp->lats, iosp->nlats * sizeof(uint64_t));
218 memcpy(iosp->lats, lats, iosp->nlats * sizeof(uint64_t));
219 // printf("%s%d: %-6s %.3f %.3f %.3f %.3f\r\n",
220 // dev, unit, operation, E3 * pest(500, lats, nlat), E3 * pest(900, lats, nlat),
221 // E3 * pest(990, lats, nlat), E3 * pest(999, lats, nlat));
222 }
223
224 static int
walk_sysctl(int * base_oid,size_t len)225 walk_sysctl(int *base_oid, size_t len)
226 {
227 int qoid[CTL_MAXNAME + 2], oid[CTL_MAXNAME];
228 size_t l1, l2;
229 char name[BUFSIZ];
230
231 if (len > CTL_MAXNAME)
232 err(1, "Length %zd too long", len);
233
234 qoid[0] = CTL_SYSCTL;
235 qoid[1] = CTL_SYSCTL_NEXT;
236 l1 = 2;
237 memcpy(qoid + 2, base_oid, len * sizeof(int));
238 l1 += len;
239 for (;;) {
240 /*
241 * Get the next one or return when we get to the end of the
242 * sysctls in the kernel.
243 */
244 l2 = sizeof(oid);
245 if (sysctl(qoid, l1, oid, &l2, 0, 0) != 0) {
246 if (errno == ENOENT)
247 return (0);
248 err(1, "sysctl(getnext) %zu", l2);
249 }
250
251 l2 /= sizeof(int);
252
253 /*
254 * Bail if we're seeing OIDs that don't have the
255 * same prefix or can't have the same prefix.
256 */
257 if (l2 < len ||
258 memcmp(oid, base_oid, len * sizeof(int)) != 0)
259 return (0);
260
261 /*
262 * Get the name, validate it's one we're looking for,
263 * parse the latency and add to list.
264 */
265 do {
266 int nlat;
267 size_t l3;
268 char val[BUFSIZ];
269 char *walker, *dev, *opstr;
270 uint64_t latvals[MAX_LATS];
271 u_int kind;
272 int unit, op;
273
274 l1 = oid2name(oid, l2, name, sizeof(name));
275 if (strcmp(name + l1 - strlen(LATENCY) - 1, LATENCY) != 0)
276 break;
277 if (oidfmt(oid, l2, &kind) != 0)
278 err(1, "oidfmt");
279 if ((kind & CTLTYPE) != CTLTYPE_STRING)
280 errx(1, "string");
281 l3 = sizeof(val);
282 if (sysctl(oid, l2, val, &l3, 0, 0) != 0)
283 err(1, "sysctl");
284 val[l3] = '\0';
285 nlat = nitems(latvals);
286 if (split_u64(val, ",", latvals, &nlat) == 0)
287 break;
288 walker = name + strlen(CAM_BASE) + 1;
289 dev = strsep(&walker, ".");
290 unit = (int)strtol(strsep(&walker, "."), NULL, 10);
291 strsep(&walker, ".");
292 opstr = strsep(&walker, ".");
293 op = op2num(opstr);
294 if (op < 0)
295 break;
296 update_dev(dev, unit, op, latvals, nlat);
297 } while (false);
298
299 memcpy(qoid + 2, oid, l2 * sizeof(int));
300 l1 = 2 + l2;
301 }
302 }
303
304 void
closeiolat(WINDOW * w)305 closeiolat(WINDOW *w)
306 {
307 if (w == NULL)
308 return;
309 wclear(w);
310 wrefresh(w);
311 delwin(w);
312 }
313
314 static void
doublecmd(const char * cmd,double * v)315 doublecmd(const char *cmd, double *v)
316 {
317 const char *p;
318 double tv;
319
320 p = strchr(cmd, '=');
321 if (p == NULL)
322 return; /* XXX Tell the user something? */
323 if (sscanf(p + 1, "%lf", &tv) != 1)
324 return; /* XXX Tell the user something? */
325 *v = tv;
326 }
327
328 int
cmdiolat(const char * cmd __unused,const char * args __unused)329 cmdiolat(const char *cmd __unused, const char *args __unused)
330 {
331 fprintf(stderr, "CMD IS '%s'\n\n", cmd);
332 if (prefix(cmd, "trim"))
333 flags ^= OP_TRIM_MASK;
334 else if (prefix(cmd, "read"))
335 flags ^= OP_READ_MASK;
336 else if (prefix(cmd, "write"))
337 flags ^= OP_WRITE_MASK;
338 else if (prefix(cmd, "color"))
339 docolor = !docolor;
340 else if (prefix("high", cmd))
341 doublecmd(cmd, &high_thresh);
342 else if (prefix("med", cmd))
343 doublecmd(cmd, &med_thresh);
344 else
345 return (0);
346 wclear(wnd);
347 labeliolat();
348 refresh();
349 return (1);
350 }
351
352 int
initiolat(void)353 initiolat(void)
354 {
355 int cam[CTL_MAXNAME];
356 uint64_t sbt_base;
357 size_t len = sizeof(sbt_base);
358
359 SLIST_INIT(&curlist);
360
361 baselat = 1e-3; /* old default */
362 if (sysctlbyname(CAM_IOSCHED_BASE, &sbt_base, &len, NULL, 0) == 0)
363 baselat = sbt_base * 1e-6; /* Convert to microseconds */
364
365 name2oid(CAM_BASE, cam);
366 walk_sysctl(cam, 2);
367 return (1);
368 }
369
370 void
fetchiolat(void)371 fetchiolat(void)
372 {
373 int cam[CTL_MAXNAME];
374
375 name2oid(CAM_BASE, cam);
376 walk_sysctl(cam, 2);
377 }
378
379 #define INSET 10
380
381 void
labeliolat(void)382 labeliolat(void)
383 {
384 int _col, ndrives, lpr, row, j;
385 int regions __unused;
386 struct iosched_stat *isp;
387 char tmpstr[32];
388 #define COLWIDTH 29
389 #define DRIVESPERLINE ((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
390 ndrives = ndevs; // XXX FILTER XXX
391 regions = howmany(ndrives, DRIVESPERLINE);
392 lpr = 2; /* for headers */
393 for (int i = 0; i < NUM_OPS; i++) {
394 if (flags & (1 << i))
395 lpr++;
396 }
397 row = 0;
398 _col = INSET;
399 j = 2;
400 if (flags & OP_READ_MASK)
401 mvwaddstr(wnd, row + j++, 1, "read");
402 if (flags & OP_WRITE_MASK)
403 mvwaddstr(wnd, row + j++, 1, "write");
404 if (flags & OP_TRIM_MASK)
405 mvwaddstr(wnd, row + j++, 1, "trim");
406 SLIST_FOREACH(isp, &curlist, link) {
407 if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
408 _col = INSET;
409 row += lpr + 1;
410 if (row > getmaxy(wnd) - 1 - (lpr + 1))
411 break;
412 j = 2;
413 if (flags & OP_READ_MASK)
414 mvwaddstr(wnd, row + j++, 1, "read");
415 if (flags & OP_WRITE_MASK)
416 mvwaddstr(wnd, row + j++, 1, "write");
417 if (flags & OP_TRIM_MASK)
418 mvwaddstr(wnd, row + j++, 1, "trim");
419 }
420 snprintf(tmpstr, sizeof(tmpstr), "%s%d", isp->dev_name, isp->unit);
421 mvwaddstr(wnd, row, _col + (COLWIDTH - strlen(tmpstr)) / 2, tmpstr);
422 mvwaddstr(wnd, row + 1, _col, " p50 p90 p99 p99.9");
423 _col += COLWIDTH;
424 }
425 }
426
427 WINDOW *
openiolat(void)428 openiolat(void)
429 {
430 return (subwin(stdscr, LINES-3-1, 0, MAINWIN_ROW, 0));
431 }
432
433 static void
fmt(float f,char * buf,size_t len)434 fmt(float f, char *buf, size_t len)
435 {
436 if (isnan(f))
437 strlcpy(buf, " - ", len);
438 else if (f >= 1000.0)
439 snprintf(buf, len, "%6d", (int)f);
440 else if (f >= 100.0)
441 snprintf(buf, len, "%6.1f", f);
442 else if (f >= 10.0)
443 snprintf(buf, len, "%6.2f", f);
444 else
445 snprintf(buf, len, "%6.3f", f);
446 }
447
448 static void
latout(double lat,int y,int x)449 latout(double lat, int y, int x)
450 {
451 int i;
452 char tmpstr[32];
453
454 fmt(lat, tmpstr, sizeof(tmpstr));
455 if (isnan(lat))
456 i = 4;
457 else if (lat > high_thresh)
458 i = 3;
459 else if (lat > med_thresh)
460 i = 2;
461 else
462 i = 1;
463 if (docolor)
464 wattron(wnd, COLOR_PAIR(i));
465 mvwaddstr(wnd, y, x, tmpstr);
466 if (docolor)
467 wattroff(wnd, COLOR_PAIR(i));
468 }
469
470 void
showiolat(void)471 showiolat(void)
472 {
473 int _col, ndrives, lpr, row, k;
474 int regions __unused;
475 struct iosched_stat *isp;
476 struct iosched_op_stat *iosp;
477 #define COLWIDTH 29
478 #define DRIVESPERLINE ((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
479 ndrives = ndevs; // XXX FILTER XXX
480 regions = howmany(ndrives, DRIVESPERLINE);
481 lpr = 2; /* XXX */
482 for (int i = 0; i < NUM_OPS; i++) {
483 if (flags & (1 << i))
484 lpr++;
485 }
486 row = 0;
487 _col = INSET;
488 SLIST_FOREACH(isp, &curlist, link) {
489 if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
490 _col = INSET;
491 row += lpr + 1;
492 if (row > getmaxy(wnd) - 1 - (lpr + 1))
493 break;
494 }
495 k = 2;
496 for (int i = 0; i < NUM_OPS; i++) {
497 uint64_t lats[MAX_LATS];
498 int nlats;
499 float p50, p90, p99, p999;
500
501 if ((flags & (1 << i)) == 0)
502 continue;
503 iosp = &isp->op_stats[i];
504 nlats = iosp->nlats;
505 memset(lats, 0, sizeof(lats));
506 for (int j = 0; j < iosp->nlats; j++)
507 lats[j] = iosp->lats[j] - iosp->prev_lats[j];
508 p50 = pest(500, lats, nlats) * E3;
509 p90 = pest(900, lats, nlats) * E3;
510 p99 = pest(990, lats, nlats) * E3;
511 p999 = pest(999, lats, nlats) * E3;
512 latout(p50, row + k, _col);
513 latout(p90, row + k, _col + 7);
514 latout(p99, row + k, _col + 14);
515 latout(p999, row + k, _col + 21);
516 k++;
517 }
518 _col += COLWIDTH;
519 }
520 }
521