1 /* 2 * Copyright (c) 2021 Netflix, Inc 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7 8 #include <sys/param.h> 9 #include <sys/sysctl.h> 10 #include <sys/resource.h> 11 12 #include <devstat.h> 13 #include <err.h> 14 #include <errno.h> 15 #include <math.h> 16 #include <stdbool.h> 17 #include <stdlib.h> 18 #include <string.h> 19 20 #include <sys/queue.h> 21 #include <sys/sysctl.h> 22 23 #include "systat.h" 24 #include "extern.h" 25 #include "devs.h" 26 27 #define CAM_BASE "kern.cam" 28 #define LATENCY ".latencies" 29 #define CAM_IOSCHED_BASE "kern.cam.iosched.bucket_base_us" 30 31 #define DEV_NAMSIZE 32 32 #define OP_NAMSIZE 16 33 #define MAX_LATS 32 34 35 static double high_thresh = 500; 36 static double med_thresh = 300; 37 static bool docolor = true; 38 39 static int ndevs; 40 static SLIST_HEAD(, iosched_stat) curlist; 41 42 struct iosched_op_stat { 43 int nlats; 44 uint64_t lats[MAX_LATS]; 45 uint64_t prev_lats[MAX_LATS]; 46 }; 47 48 enum { OP_READ = 0, OP_WRITE, OP_TRIM, NUM_OPS }; 49 static const char *ops[NUM_OPS] = { "read", "write", "trim" }; 50 #define OP_READ_MASK (1 << OP_READ) 51 #define OP_WRITE_MASK (1 << OP_WRITE) 52 #define OP_TRIM_MASK (1 << OP_TRIM) 53 54 static uint32_t flags = OP_READ_MASK | OP_WRITE_MASK | OP_TRIM_MASK; 55 56 struct iosched_stat { 57 SLIST_ENTRY(iosched_stat) link; 58 char dev_name[DEV_NAMSIZE]; 59 int unit; 60 struct iosched_op_stat op_stats[NUM_OPS]; 61 }; 62 63 static int name2oid(const char *, int *); 64 static int walk_sysctl(int *, size_t); 65 66 static int 67 name2oid(const char *name, int *oidp) 68 { 69 int oid[2]; 70 int i; 71 size_t j; 72 73 oid[0] = CTL_SYSCTL; 74 oid[1] = CTL_SYSCTL_NAME2OID; 75 76 j = CTL_MAXNAME * sizeof(int); 77 i = sysctl(oid, 2, oidp, &j, name, strlen(name)); 78 if (i < 0) 79 return (i); 80 j /= sizeof(int); 81 return (j); 82 } 83 84 static size_t /* Includes the trailing NUL */ 85 oid2name(int *oid, size_t nlen, char *name, size_t namlen) 86 { 87 int qoid[CTL_MAXNAME + 2]; 88 int i; 89 size_t j; 90 91 bzero(name, namlen); 92 qoid[0] = CTL_SYSCTL; 93 qoid[1] = CTL_SYSCTL_NAME; 94 memcpy(qoid + 2, oid, nlen * sizeof(int)); 95 j = namlen; 96 i = sysctl(qoid, nlen + 2, name, &j, 0, 0); 97 if (i || !j) 98 err(1, "sysctl name %d %zu %d", i, j, errno); 99 return (j); 100 } 101 102 static int 103 oidfmt(int *oid, int len, u_int *kind) 104 { 105 int qoid[CTL_MAXNAME+2]; 106 u_char buf[BUFSIZ]; 107 int i; 108 size_t j; 109 110 qoid[0] = CTL_SYSCTL; 111 qoid[1] = CTL_SYSCTL_OIDFMT; 112 memcpy(qoid + 2, oid, len * sizeof(int)); 113 114 j = sizeof(buf); 115 i = sysctl(qoid, len + 2, buf, &j, 0, 0); 116 if (i) 117 err(1, "sysctl fmt %d %zu %d", i, j, errno); 118 *kind = *(u_int *)buf; 119 return (0); 120 } 121 122 static int 123 split_u64(char *str, const char *delim, uint64_t *buckets, int *nbuckets) 124 { 125 int n = *nbuckets, i; 126 char *v; 127 128 memset(buckets, 0, n * sizeof(buckets[0])); 129 for (i = 0; (v = strsep(&str, delim)) != NULL && i < n; i++) { 130 buckets[i] = strtoull(v, NULL, 10); 131 } 132 if (i < n) 133 *nbuckets = i; 134 return (i < n); 135 } 136 137 static double baselat = 0.000020; 138 139 static float 140 pest(int permill, uint64_t *lats, int nlat) 141 { 142 uint64_t tot, samp; 143 int i; 144 float b1, b2; 145 146 for (tot = 0, i = 0; i < nlat; i++) 147 tot += lats[i]; 148 if (tot == 0) 149 return -nanf(""); 150 if (tot < (uint64_t)2000 / (1000 - permill)) 151 return nanf(""); 152 samp = tot * permill / 1000; 153 if (samp < lats[0]) 154 return baselat * (float)samp / lats[0]; /* linear interpolation 0 and baselat */ 155 for (tot = 0, i = 0; samp >= tot && i < nlat; i++) 156 tot += lats[i]; 157 i--; 158 b1 = baselat * (1 << (i - 1)); 159 b2 = baselat * (1 << i); 160 /* Should expoentially interpolate between buckets -- doing linear instead */ 161 return b1 + (b2 - b1) * (float)(lats[i] - (tot - samp)) / lats[i]; 162 } 163 164 static int 165 op2num(const char *op) 166 { 167 for (int i = 0; i < NUM_OPS; i++) 168 if (strcmp(op, ops[i]) == 0) 169 return i; 170 return -1; 171 } 172 173 static struct iosched_op_stat * 174 find_dev(const char *dev, int unit, int op) 175 { 176 struct iosched_stat *isp; 177 struct iosched_op_stat *iosp; 178 179 SLIST_FOREACH(isp, &curlist, link) { 180 if (strcmp(isp->dev_name, dev) != 0 || isp->unit != unit) 181 continue; 182 iosp = &isp->op_stats[op]; 183 return iosp; 184 } 185 return NULL; 186 } 187 188 static struct iosched_op_stat * 189 alloc_dev(const char *dev, int unit, int op) 190 { 191 struct iosched_stat *isp; 192 struct iosched_op_stat *iosp; 193 194 isp = malloc(sizeof(*isp)); 195 if (isp == NULL) 196 return NULL; 197 strlcpy(isp->dev_name, dev, sizeof(isp->dev_name)); 198 isp->unit = unit; 199 SLIST_INSERT_HEAD(&curlist, isp, link); 200 ndevs++; 201 iosp = &isp->op_stats[op]; 202 return iosp; 203 } 204 205 #define E3 1000.0 206 static void 207 update_dev(const char *dev, int unit, int op, uint64_t *lats, int nlat) 208 { 209 struct iosched_op_stat *iosp; 210 211 iosp = find_dev(dev, unit, op); 212 if (iosp == NULL) 213 iosp = alloc_dev(dev, unit, op); 214 if (iosp == NULL) 215 return; 216 iosp->nlats = nlat; 217 memcpy(iosp->prev_lats, iosp->lats, iosp->nlats * sizeof(uint64_t)); 218 memcpy(iosp->lats, lats, iosp->nlats * sizeof(uint64_t)); 219 // printf("%s%d: %-6s %.3f %.3f %.3f %.3f\r\n", 220 // dev, unit, operation, E3 * pest(500, lats, nlat), E3 * pest(900, lats, nlat), 221 // E3 * pest(990, lats, nlat), E3 * pest(999, lats, nlat)); 222 } 223 224 static int 225 walk_sysctl(int *base_oid, size_t len) 226 { 227 int qoid[CTL_MAXNAME + 2], oid[CTL_MAXNAME]; 228 size_t l1, l2; 229 char name[BUFSIZ]; 230 231 if (len > CTL_MAXNAME) 232 err(1, "Length %zd too long", len); 233 234 qoid[0] = CTL_SYSCTL; 235 qoid[1] = CTL_SYSCTL_NEXT; 236 l1 = 2; 237 memcpy(qoid + 2, base_oid, len * sizeof(int)); 238 l1 += len; 239 for (;;) { 240 /* 241 * Get the next one or return when we get to the end of the 242 * sysctls in the kernel. 243 */ 244 l2 = sizeof(oid); 245 if (sysctl(qoid, l1, oid, &l2, 0, 0) != 0) { 246 if (errno == ENOENT) 247 return (0); 248 err(1, "sysctl(getnext) %zu", l2); 249 } 250 251 l2 /= sizeof(int); 252 253 /* 254 * Bail if we're seeing OIDs that don't have the 255 * same prefix or can't have the same prefix. 256 */ 257 if (l2 < len || 258 memcmp(oid, base_oid, len * sizeof(int)) != 0) 259 return (0); 260 261 /* 262 * Get the name, validate it's one we're looking for, 263 * parse the latency and add to list. 264 */ 265 do { 266 int nlat; 267 size_t l3; 268 char val[BUFSIZ]; 269 char *walker, *dev, *opstr; 270 uint64_t latvals[MAX_LATS]; 271 u_int kind; 272 int unit, op; 273 274 l1 = oid2name(oid, l2, name, sizeof(name)); 275 if (strcmp(name + l1 - strlen(LATENCY) - 1, LATENCY) != 0) 276 break; 277 if (oidfmt(oid, l2, &kind) != 0) 278 err(1, "oidfmt"); 279 if ((kind & CTLTYPE) != CTLTYPE_STRING) 280 errx(1, "string"); 281 l3 = sizeof(val); 282 if (sysctl(oid, l2, val, &l3, 0, 0) != 0) 283 err(1, "sysctl"); 284 val[l3] = '\0'; 285 nlat = nitems(latvals); 286 if (split_u64(val, ",", latvals, &nlat) == 0) 287 break; 288 walker = name + strlen(CAM_BASE) + 1; 289 dev = strsep(&walker, "."); 290 unit = (int)strtol(strsep(&walker, "."), NULL, 10); 291 strsep(&walker, "."); 292 opstr = strsep(&walker, "."); 293 op = op2num(opstr); 294 if (op < 0) 295 break; 296 update_dev(dev, unit, op, latvals, nlat); 297 } while (false); 298 299 memcpy(qoid + 2, oid, l2 * sizeof(int)); 300 l1 = 2 + l2; 301 } 302 } 303 304 void 305 closeiolat(WINDOW *w) 306 { 307 if (w == NULL) 308 return; 309 wclear(w); 310 wrefresh(w); 311 delwin(w); 312 } 313 314 static void 315 doublecmd(const char *cmd, double *v) 316 { 317 const char *p; 318 double tv; 319 320 p = strchr(cmd, '='); 321 if (p == NULL) 322 return; /* XXX Tell the user something? */ 323 if (sscanf(p + 1, "%lf", &tv) != 1) 324 return; /* XXX Tell the user something? */ 325 *v = tv; 326 } 327 328 int 329 cmdiolat(const char *cmd __unused, const char *args __unused) 330 { 331 fprintf(stderr, "CMD IS '%s'\n\n", cmd); 332 if (prefix(cmd, "trim")) 333 flags ^= OP_TRIM_MASK; 334 else if (prefix(cmd, "read")) 335 flags ^= OP_READ_MASK; 336 else if (prefix(cmd, "write")) 337 flags ^= OP_WRITE_MASK; 338 else if (prefix(cmd, "color")) 339 docolor = !docolor; 340 else if (prefix("high", cmd)) 341 doublecmd(cmd, &high_thresh); 342 else if (prefix("med", cmd)) 343 doublecmd(cmd, &med_thresh); 344 else 345 return (0); 346 wclear(wnd); 347 labeliolat(); 348 refresh(); 349 return (1); 350 } 351 352 int 353 initiolat(void) 354 { 355 int cam[CTL_MAXNAME]; 356 uint64_t sbt_base; 357 size_t len = sizeof(sbt_base); 358 359 SLIST_INIT(&curlist); 360 361 baselat = 1e-3; /* old default */ 362 if (sysctlbyname(CAM_IOSCHED_BASE, &sbt_base, &len, NULL, 0) == 0) 363 baselat = sbt_base * 1e-6; /* Convert to microseconds */ 364 365 name2oid(CAM_BASE, cam); 366 walk_sysctl(cam, 2); 367 return (1); 368 } 369 370 void 371 fetchiolat(void) 372 { 373 int cam[CTL_MAXNAME]; 374 375 name2oid(CAM_BASE, cam); 376 walk_sysctl(cam, 2); 377 } 378 379 #define INSET 10 380 381 void 382 labeliolat(void) 383 { 384 int _col, ndrives, lpr, row, j; 385 int regions __unused; 386 struct iosched_stat *isp; 387 char tmpstr[32]; 388 #define COLWIDTH 29 389 #define DRIVESPERLINE ((getmaxx(wnd) - 1 - INSET) / COLWIDTH) 390 ndrives = ndevs; // XXX FILTER XXX 391 regions = howmany(ndrives, DRIVESPERLINE); 392 lpr = 2; /* for headers */ 393 for (int i = 0; i < NUM_OPS; i++) { 394 if (flags & (1 << i)) 395 lpr++; 396 } 397 row = 0; 398 _col = INSET; 399 j = 2; 400 if (flags & OP_READ_MASK) 401 mvwaddstr(wnd, row + j++, 1, "read"); 402 if (flags & OP_WRITE_MASK) 403 mvwaddstr(wnd, row + j++, 1, "write"); 404 if (flags & OP_TRIM_MASK) 405 mvwaddstr(wnd, row + j++, 1, "trim"); 406 SLIST_FOREACH(isp, &curlist, link) { 407 if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) { 408 _col = INSET; 409 row += lpr + 1; 410 if (row > getmaxy(wnd) - 1 - (lpr + 1)) 411 break; 412 j = 2; 413 if (flags & OP_READ_MASK) 414 mvwaddstr(wnd, row + j++, 1, "read"); 415 if (flags & OP_WRITE_MASK) 416 mvwaddstr(wnd, row + j++, 1, "write"); 417 if (flags & OP_TRIM_MASK) 418 mvwaddstr(wnd, row + j++, 1, "trim"); 419 } 420 snprintf(tmpstr, sizeof(tmpstr), "%s%d", isp->dev_name, isp->unit); 421 mvwaddstr(wnd, row, _col + (COLWIDTH - strlen(tmpstr)) / 2, tmpstr); 422 mvwaddstr(wnd, row + 1, _col, " p50 p90 p99 p99.9"); 423 _col += COLWIDTH; 424 } 425 } 426 427 WINDOW * 428 openiolat(void) 429 { 430 return (subwin(stdscr, LINES-3-1, 0, MAINWIN_ROW, 0)); 431 } 432 433 static void 434 fmt(float f, char *buf, size_t len) 435 { 436 if (isnan(f)) 437 strlcpy(buf, " - ", len); 438 else if (f >= 1000.0) 439 snprintf(buf, len, "%6d", (int)f); 440 else if (f >= 100.0) 441 snprintf(buf, len, "%6.1f", f); 442 else if (f >= 10.0) 443 snprintf(buf, len, "%6.2f", f); 444 else 445 snprintf(buf, len, "%6.3f", f); 446 } 447 448 static void 449 latout(double lat, int y, int x) 450 { 451 int i; 452 char tmpstr[32]; 453 454 fmt(lat, tmpstr, sizeof(tmpstr)); 455 if (isnan(lat)) 456 i = 4; 457 else if (lat > high_thresh) 458 i = 3; 459 else if (lat > med_thresh) 460 i = 2; 461 else 462 i = 1; 463 if (docolor) 464 wattron(wnd, COLOR_PAIR(i)); 465 mvwaddstr(wnd, y, x, tmpstr); 466 if (docolor) 467 wattroff(wnd, COLOR_PAIR(i)); 468 } 469 470 void 471 showiolat(void) 472 { 473 int _col, ndrives, lpr, row, k; 474 int regions __unused; 475 struct iosched_stat *isp; 476 struct iosched_op_stat *iosp; 477 #define COLWIDTH 29 478 #define DRIVESPERLINE ((getmaxx(wnd) - 1 - INSET) / COLWIDTH) 479 ndrives = ndevs; // XXX FILTER XXX 480 regions = howmany(ndrives, DRIVESPERLINE); 481 lpr = 2; /* XXX */ 482 for (int i = 0; i < NUM_OPS; i++) { 483 if (flags & (1 << i)) 484 lpr++; 485 } 486 row = 0; 487 _col = INSET; 488 SLIST_FOREACH(isp, &curlist, link) { 489 if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) { 490 _col = INSET; 491 row += lpr + 1; 492 if (row > getmaxy(wnd) - 1 - (lpr + 1)) 493 break; 494 } 495 k = 2; 496 for (int i = 0; i < NUM_OPS; i++) { 497 uint64_t lats[MAX_LATS]; 498 int nlats; 499 float p50, p90, p99, p999; 500 501 if ((flags & (1 << i)) == 0) 502 continue; 503 iosp = &isp->op_stats[i]; 504 nlats = iosp->nlats; 505 memset(lats, 0, sizeof(lats)); 506 for (int j = 0; j < iosp->nlats; j++) 507 lats[j] = iosp->lats[j] - iosp->prev_lats[j]; 508 p50 = pest(500, lats, nlats) * E3; 509 p90 = pest(900, lats, nlats) * E3; 510 p99 = pest(990, lats, nlats) * E3; 511 p999 = pest(999, lats, nlats) * E3; 512 latout(p50, row + k, _col); 513 latout(p90, row + k, _col + 7); 514 latout(p99, row + k, _col + 14); 515 latout(p999, row + k, _col + 21); 516 k++; 517 } 518 _col += COLWIDTH; 519 } 520 } 521