1 /* 2 * Gather top-level ZFS pool and resilver/scan statistics and print using 3 * influxdb line protocol 4 * usage: [options] [pool_name] 5 * where options are: 6 * --execd, -e run in telegraf execd input plugin mode, [CR] on 7 * stdin causes a sample to be printed and wait for 8 * the next [CR] 9 * --no-histograms, -n don't print histogram data (reduces cardinality 10 * if you don't care about histograms) 11 * --sum-histogram-buckets, -s sum histogram bucket values 12 * 13 * To integrate into telegraf use one of: 14 * 1. the `inputs.execd` plugin with the `--execd` option 15 * 2. the `inputs.exec` plugin to simply run with no options 16 * 17 * NOTE: libzfs is an unstable interface. YMMV. 18 * 19 * The design goals of this software include: 20 * + be as lightweight as possible 21 * + reduce the number of external dependencies as far as possible, hence 22 * there is no dependency on a client library for managing the metric 23 * collection -- info is printed, KISS 24 * + broken pools or kernel bugs can cause this process to hang in an 25 * unkillable state. For this reason, it is best to keep the damage limited 26 * to a small process like zpool_influxdb rather than a larger collector. 27 * 28 * Copyright 2018-2020 Richard Elling 29 * 30 * This software is dual-licensed MIT and CDDL. 31 * 32 * The MIT License (MIT) 33 * 34 * Permission is hereby granted, free of charge, to any person obtaining a copy 35 * of this software and associated documentation files (the "Software"), to deal 36 * in the Software without restriction, including without limitation the rights 37 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 38 * copies of the Software, and to permit persons to whom the Software is 39 * furnished to do so, subject to the following conditions: 40 * 41 * The above copyright notice and this permission notice shall be included in 42 * all copies or substantial portions of the Software. 43 * 44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 50 * SOFTWARE. 51 * 52 * CDDL HEADER START 53 * 54 * The contents of this file are subject to the terms of the 55 * Common Development and Distribution License (the "License"). 56 * You may not use this file except in compliance with the License. 57 * 58 * The contents of this file are subject to the terms of the 59 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 60 * You can obtain a copy of the license from the top-level file 61 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 62 * You may not use this file except in compliance with the license. 63 * 64 * See the License for the specific language governing permissions 65 * and limitations under the License. 66 * 67 * CDDL HEADER END 68 */ 69 #include <string.h> 70 #include <getopt.h> 71 #include <stdio.h> 72 #include <stdint.h> 73 #include <inttypes.h> 74 #include <libzfs.h> 75 76 #define POOL_MEASUREMENT "zpool_stats" 77 #define SCAN_MEASUREMENT "zpool_scan_stats" 78 #define VDEV_MEASUREMENT "zpool_vdev_stats" 79 #define POOL_LATENCY_MEASUREMENT "zpool_latency" 80 #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" 81 #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ 82 #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" 83 #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ 84 85 /* global options */ 86 int execd_mode = 0; 87 int no_histograms = 0; 88 int sum_histogram_buckets = 0; 89 char metric_data_type = 'u'; 90 uint64_t metric_value_mask = UINT64_MAX; 91 uint64_t timestamp = 0; 92 int complained_about_sync = 0; 93 char *tags = ""; 94 95 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); 96 97 /* 98 * influxdb line protocol rules for escaping are important because the 99 * zpool name can include characters that need to be escaped 100 * 101 * caller is responsible for freeing result 102 */ 103 static char * 104 escape_string(const char *s) 105 { 106 const char *c; 107 char *d; 108 char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); 109 if (t == NULL) { 110 fprintf(stderr, "error: cannot allocate memory\n"); 111 exit(1); 112 } 113 114 for (c = s, d = t; *c != '\0'; c++, d++) { 115 switch (*c) { 116 case ' ': 117 case ',': 118 case '=': 119 case '\\': 120 *d++ = '\\'; 121 /* FALLTHROUGH */ 122 default: 123 *d = *c; 124 } 125 } 126 *d = '\0'; 127 return (t); 128 } 129 130 /* 131 * print key=value where value is a uint64_t 132 */ 133 static void 134 print_kv(char *key, uint64_t value) 135 { 136 printf("%s=%llu%c", key, 137 (u_longlong_t)value & metric_value_mask, metric_data_type); 138 } 139 140 /* 141 * print_scan_status() prints the details as often seen in the "zpool status" 142 * output. However, unlike the zpool command, which is intended for humans, 143 * this output is suitable for long-term tracking in influxdb. 144 * TODO: update to include issued scan data 145 */ 146 static int 147 print_scan_status(nvlist_t *nvroot, const char *pool_name) 148 { 149 uint_t c; 150 int64_t elapsed; 151 uint64_t examined, pass_exam, paused_time, paused_ts, rate; 152 uint64_t remaining_time; 153 pool_scan_stat_t *ps = NULL; 154 double pct_done; 155 char *state[DSS_NUM_STATES] = { 156 "none", "scanning", "finished", "canceled"}; 157 char *func; 158 159 (void) nvlist_lookup_uint64_array(nvroot, 160 ZPOOL_CONFIG_SCAN_STATS, 161 (uint64_t **)&ps, &c); 162 163 /* 164 * ignore if there are no stats 165 */ 166 if (ps == NULL) 167 return (0); 168 169 /* 170 * return error if state is bogus 171 */ 172 if (ps->pss_state >= DSS_NUM_STATES || 173 ps->pss_func >= POOL_SCAN_FUNCS) { 174 if (complained_about_sync % 1000 == 0) { 175 fprintf(stderr, "error: cannot decode scan stats: " 176 "ZFS is out of sync with compiled zpool_influxdb"); 177 complained_about_sync++; 178 } 179 return (1); 180 } 181 182 switch (ps->pss_func) { 183 case POOL_SCAN_NONE: 184 func = "none_requested"; 185 break; 186 case POOL_SCAN_SCRUB: 187 func = "scrub"; 188 break; 189 case POOL_SCAN_RESILVER: 190 func = "resilver"; 191 break; 192 #ifdef POOL_SCAN_REBUILD 193 case POOL_SCAN_REBUILD: 194 func = "rebuild"; 195 break; 196 #endif 197 default: 198 func = "scan"; 199 } 200 201 /* overall progress */ 202 examined = ps->pss_examined ? ps->pss_examined : 1; 203 pct_done = 0.0; 204 if (ps->pss_to_examine > 0) 205 pct_done = 100.0 * examined / ps->pss_to_examine; 206 207 #ifdef EZFS_SCRUB_PAUSED 208 paused_ts = ps->pss_pass_scrub_pause; 209 paused_time = ps->pss_pass_scrub_spent_paused; 210 #else 211 paused_ts = 0; 212 paused_time = 0; 213 #endif 214 215 /* calculations for this pass */ 216 if (ps->pss_state == DSS_SCANNING) { 217 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - 218 (int64_t)paused_time; 219 elapsed = (elapsed > 0) ? elapsed : 1; 220 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 221 rate = pass_exam / elapsed; 222 rate = (rate > 0) ? rate : 1; 223 remaining_time = ps->pss_to_examine - examined / rate; 224 } else { 225 elapsed = 226 (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - 227 (int64_t)paused_time; 228 elapsed = (elapsed > 0) ? elapsed : 1; 229 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 230 rate = pass_exam / elapsed; 231 remaining_time = 0; 232 } 233 rate = rate ? rate : 1; 234 235 /* influxdb line protocol format: "tags metrics timestamp" */ 236 printf("%s%s,function=%s,name=%s,state=%s ", 237 SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); 238 print_kv("end_ts", ps->pss_end_time); 239 print_kv(",errors", ps->pss_errors); 240 print_kv(",examined", examined); 241 print_kv(",issued", ps->pss_issued); 242 print_kv(",pass_examined", pass_exam); 243 print_kv(",pass_issued", ps->pss_pass_issued); 244 print_kv(",paused_ts", paused_ts); 245 print_kv(",paused_t", paused_time); 246 printf(",pct_done=%.2f", pct_done); 247 print_kv(",processed", ps->pss_processed); 248 print_kv(",rate", rate); 249 print_kv(",remaining_t", remaining_time); 250 print_kv(",start_ts", ps->pss_start_time); 251 print_kv(",to_examine", ps->pss_to_examine); 252 print_kv(",to_process", ps->pss_to_process); 253 printf(" %llu\n", (u_longlong_t)timestamp); 254 return (0); 255 } 256 257 /* 258 * get a vdev name that corresponds to the top-level vdev names 259 * printed by `zpool status` 260 */ 261 static char * 262 get_vdev_name(nvlist_t *nvroot, const char *parent_name) 263 { 264 static char vdev_name[256]; 265 char *vdev_type = NULL; 266 uint64_t vdev_id = 0; 267 268 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, 269 &vdev_type) != 0) { 270 vdev_type = "unknown"; 271 } 272 if (nvlist_lookup_uint64( 273 nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 274 vdev_id = UINT64_MAX; 275 } 276 if (parent_name == NULL) { 277 (void) snprintf(vdev_name, sizeof (vdev_name), "%s", 278 vdev_type); 279 } else { 280 (void) snprintf(vdev_name, sizeof (vdev_name), 281 "%s/%s-%llu", 282 parent_name, vdev_type, (u_longlong_t)vdev_id); 283 } 284 return (vdev_name); 285 } 286 287 /* 288 * get a string suitable for an influxdb tag that describes this vdev 289 * 290 * By default only the vdev hierarchical name is shown, separated by '/' 291 * If the vdev has an associated path, which is typical of leaf vdevs, 292 * then the path is added. 293 * It would be nice to have the devid instead of the path, but under 294 * Linux we cannot be sure a devid will exist and we'd rather have 295 * something than nothing, so we'll use path instead. 296 */ 297 static char * 298 get_vdev_desc(nvlist_t *nvroot, const char *parent_name) 299 { 300 static char vdev_desc[2 * MAXPATHLEN]; 301 char *vdev_type = NULL; 302 uint64_t vdev_id = 0; 303 char vdev_value[MAXPATHLEN]; 304 char *vdev_path = NULL; 305 char *s, *t; 306 307 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) { 308 vdev_type = "unknown"; 309 } 310 if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 311 vdev_id = UINT64_MAX; 312 } 313 if (nvlist_lookup_string( 314 nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) { 315 vdev_path = NULL; 316 } 317 318 if (parent_name == NULL) { 319 s = escape_string(vdev_type); 320 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); 321 free(s); 322 } else { 323 s = escape_string((char *)parent_name); 324 t = escape_string(vdev_type); 325 (void) snprintf(vdev_value, sizeof (vdev_value), 326 "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); 327 free(s); 328 free(t); 329 } 330 if (vdev_path == NULL) { 331 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", 332 vdev_value); 333 } else { 334 s = escape_string(vdev_path); 335 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", 336 s, vdev_value); 337 free(s); 338 } 339 return (vdev_desc); 340 } 341 342 /* 343 * vdev summary stats are a combination of the data shown by 344 * `zpool status` and `zpool list -v` 345 */ 346 static int 347 print_summary_stats(nvlist_t *nvroot, const char *pool_name, 348 const char *parent_name) 349 { 350 uint_t c; 351 vdev_stat_t *vs; 352 char *vdev_desc = NULL; 353 vdev_desc = get_vdev_desc(nvroot, parent_name); 354 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 355 (uint64_t **)&vs, &c) != 0) { 356 return (1); 357 } 358 printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, 359 pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, 360 (vdev_aux_t)vs->vs_aux), vdev_desc); 361 print_kv("alloc", vs->vs_alloc); 362 print_kv(",free", vs->vs_space - vs->vs_alloc); 363 print_kv(",size", vs->vs_space); 364 print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); 365 print_kv(",read_errors", vs->vs_read_errors); 366 print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); 367 print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); 368 print_kv(",write_errors", vs->vs_write_errors); 369 print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); 370 print_kv(",checksum_errors", vs->vs_checksum_errors); 371 print_kv(",fragmentation", vs->vs_fragmentation); 372 printf(" %llu\n", (u_longlong_t)timestamp); 373 return (0); 374 } 375 376 /* 377 * vdev latency stats are histograms stored as nvlist arrays of uint64. 378 * Latency stats include the ZIO scheduler classes plus lower-level 379 * vdev latencies. 380 * 381 * In many cases, the top-level "root" view obscures the underlying 382 * top-level vdev operations. For example, if a pool has a log, special, 383 * or cache device, then each can behave very differently. It is useful 384 * to see how each is responding. 385 */ 386 static int 387 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, 388 const char *parent_name) 389 { 390 uint_t c, end = 0; 391 nvlist_t *nv_ex; 392 char *vdev_desc = NULL; 393 394 /* short_names become part of the metric name and are influxdb-ready */ 395 struct lat_lookup { 396 char *name; 397 char *short_name; 398 uint64_t sum; 399 uint64_t *array; 400 }; 401 struct lat_lookup lat_type[] = { 402 {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, 403 {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, 404 {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, 405 {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, 406 {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, 407 {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, 408 {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, 409 {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, 410 {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, 411 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO 412 {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, 413 #endif 414 {NULL, NULL} 415 }; 416 417 if (nvlist_lookup_nvlist(nvroot, 418 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 419 return (6); 420 } 421 422 vdev_desc = get_vdev_desc(nvroot, parent_name); 423 424 for (int i = 0; lat_type[i].name; i++) { 425 if (nvlist_lookup_uint64_array(nv_ex, 426 lat_type[i].name, &lat_type[i].array, &c) != 0) { 427 fprintf(stderr, "error: can't get %s\n", 428 lat_type[i].name); 429 return (3); 430 } 431 /* end count count, all of the arrays are the same size */ 432 end = c - 1; 433 } 434 435 for (int bucket = 0; bucket <= end; bucket++) { 436 if (bucket < MIN_LAT_INDEX) { 437 /* don't print, but collect the sum */ 438 for (int i = 0; lat_type[i].name; i++) { 439 lat_type[i].sum += lat_type[i].array[bucket]; 440 } 441 continue; 442 } 443 if (bucket < end) { 444 printf("%s%s,le=%0.6f,name=%s,%s ", 445 POOL_LATENCY_MEASUREMENT, tags, 446 (float)(1ULL << bucket) * 1e-9, 447 pool_name, vdev_desc); 448 } else { 449 printf("%s%s,le=+Inf,name=%s,%s ", 450 POOL_LATENCY_MEASUREMENT, tags, pool_name, 451 vdev_desc); 452 } 453 for (int i = 0; lat_type[i].name; i++) { 454 if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { 455 lat_type[i].sum += lat_type[i].array[bucket]; 456 } else { 457 lat_type[i].sum = lat_type[i].array[bucket]; 458 } 459 print_kv(lat_type[i].short_name, lat_type[i].sum); 460 if (lat_type[i + 1].name != NULL) { 461 printf(","); 462 } 463 } 464 printf(" %llu\n", (u_longlong_t)timestamp); 465 } 466 return (0); 467 } 468 469 /* 470 * vdev request size stats are histograms stored as nvlist arrays of uint64. 471 * Request size stats include the ZIO scheduler classes plus lower-level 472 * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. 473 * 474 * In many cases, the top-level "root" view obscures the underlying 475 * top-level vdev operations. For example, if a pool has a log, special, 476 * or cache device, then each can behave very differently. It is useful 477 * to see how each is responding. 478 */ 479 static int 480 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, 481 const char *parent_name) 482 { 483 uint_t c, end = 0; 484 nvlist_t *nv_ex; 485 char *vdev_desc = NULL; 486 487 /* short_names become the field name */ 488 struct size_lookup { 489 char *name; 490 char *short_name; 491 uint64_t sum; 492 uint64_t *array; 493 }; 494 struct size_lookup size_type[] = { 495 {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, 496 {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, 497 {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, 498 {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, 499 {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, 500 {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, 501 {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, 502 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, 503 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, 504 {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, 505 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO 506 {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, 507 {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, 508 #endif 509 {NULL, NULL} 510 }; 511 512 if (nvlist_lookup_nvlist(nvroot, 513 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 514 return (6); 515 } 516 517 vdev_desc = get_vdev_desc(nvroot, parent_name); 518 519 for (int i = 0; size_type[i].name; i++) { 520 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, 521 &size_type[i].array, &c) != 0) { 522 fprintf(stderr, "error: can't get %s\n", 523 size_type[i].name); 524 return (3); 525 } 526 /* end count count, all of the arrays are the same size */ 527 end = c - 1; 528 } 529 530 for (int bucket = 0; bucket <= end; bucket++) { 531 if (bucket < MIN_SIZE_INDEX) { 532 /* don't print, but collect the sum */ 533 for (int i = 0; size_type[i].name; i++) { 534 size_type[i].sum += size_type[i].array[bucket]; 535 } 536 continue; 537 } 538 539 if (bucket < end) { 540 printf("%s%s,le=%llu,name=%s,%s ", 541 POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, 542 pool_name, vdev_desc); 543 } else { 544 printf("%s%s,le=+Inf,name=%s,%s ", 545 POOL_IO_SIZE_MEASUREMENT, tags, pool_name, 546 vdev_desc); 547 } 548 for (int i = 0; size_type[i].name; i++) { 549 if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { 550 size_type[i].sum += size_type[i].array[bucket]; 551 } else { 552 size_type[i].sum = size_type[i].array[bucket]; 553 } 554 print_kv(size_type[i].short_name, size_type[i].sum); 555 if (size_type[i + 1].name != NULL) { 556 printf(","); 557 } 558 } 559 printf(" %llu\n", (u_longlong_t)timestamp); 560 } 561 return (0); 562 } 563 564 /* 565 * ZIO scheduler queue stats are stored as gauges. This is unfortunate 566 * because the values can change very rapidly and any point-in-time 567 * value will quickly be obsoleted. It is also not easy to downsample. 568 * Thus only the top-level queue stats might be beneficial... maybe. 569 */ 570 static int 571 print_queue_stats(nvlist_t *nvroot, const char *pool_name, 572 const char *parent_name) 573 { 574 nvlist_t *nv_ex; 575 uint64_t value; 576 577 /* short_names are used for the field name */ 578 struct queue_lookup { 579 char *name; 580 char *short_name; 581 }; 582 struct queue_lookup queue_type[] = { 583 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, 584 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, 585 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, 586 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, 587 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, 588 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, 589 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, 590 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, 591 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, 592 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, 593 {NULL, NULL} 594 }; 595 596 if (nvlist_lookup_nvlist(nvroot, 597 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 598 return (6); 599 } 600 601 printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, 602 get_vdev_desc(nvroot, parent_name)); 603 for (int i = 0; queue_type[i].name; i++) { 604 if (nvlist_lookup_uint64(nv_ex, 605 queue_type[i].name, &value) != 0) { 606 fprintf(stderr, "error: can't get %s\n", 607 queue_type[i].name); 608 return (3); 609 } 610 print_kv(queue_type[i].short_name, value); 611 if (queue_type[i + 1].name != NULL) { 612 printf(","); 613 } 614 } 615 printf(" %llu\n", (u_longlong_t)timestamp); 616 return (0); 617 } 618 619 /* 620 * top-level vdev stats are at the pool level 621 */ 622 static int 623 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) 624 { 625 nvlist_t *nv_ex; 626 uint64_t value; 627 628 /* short_names become part of the metric name */ 629 struct queue_lookup { 630 char *name; 631 char *short_name; 632 }; 633 struct queue_lookup queue_type[] = { 634 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, 635 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, 636 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, 637 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, 638 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, 639 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, 640 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, 641 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, 642 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, 643 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, 644 {NULL, NULL} 645 }; 646 647 if (nvlist_lookup_nvlist(nvroot, 648 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 649 return (6); 650 } 651 652 printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, 653 pool_name); 654 for (int i = 0; queue_type[i].name; i++) { 655 if (nvlist_lookup_uint64(nv_ex, 656 queue_type[i].name, &value) != 0) { 657 fprintf(stderr, "error: can't get %s\n", 658 queue_type[i].name); 659 return (3); 660 } 661 if (i > 0) 662 printf(","); 663 print_kv(queue_type[i].short_name, value); 664 } 665 666 printf(" %llu\n", (u_longlong_t)timestamp); 667 return (0); 668 } 669 670 /* 671 * recursive stats printer 672 */ 673 static int 674 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, 675 const char *pool_name, const char *parent_name, int descend) 676 { 677 uint_t c, children; 678 nvlist_t **child; 679 char vdev_name[256]; 680 int err; 681 682 err = func(nvroot, pool_name, parent_name); 683 if (err) 684 return (err); 685 686 if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 687 &child, &children) == 0) { 688 (void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name), 689 sizeof (vdev_name)); 690 691 for (c = 0; c < children; c++) { 692 print_recursive_stats(func, child[c], pool_name, 693 vdev_name, descend); 694 } 695 } 696 return (0); 697 } 698 699 /* 700 * call-back to print the stats from the pool config 701 * 702 * Note: if the pool is broken, this can hang indefinitely and perhaps in an 703 * unkillable state. 704 */ 705 static int 706 print_stats(zpool_handle_t *zhp, void *data) 707 { 708 uint_t c; 709 int err; 710 boolean_t missing; 711 nvlist_t *config, *nvroot; 712 vdev_stat_t *vs; 713 struct timespec tv; 714 char *pool_name; 715 716 /* if not this pool return quickly */ 717 if (data && 718 strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) { 719 zpool_close(zhp); 720 return (0); 721 } 722 723 if (zpool_refresh_stats(zhp, &missing) != 0) { 724 zpool_close(zhp); 725 return (1); 726 } 727 728 config = zpool_get_config(zhp, NULL); 729 if (clock_gettime(CLOCK_REALTIME, &tv) != 0) 730 timestamp = (uint64_t)time(NULL) * 1000000000; 731 else 732 timestamp = 733 ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; 734 735 if (nvlist_lookup_nvlist( 736 config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 737 zpool_close(zhp); 738 return (2); 739 } 740 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 741 (uint64_t **)&vs, &c) != 0) { 742 zpool_close(zhp); 743 return (3); 744 } 745 746 pool_name = escape_string(zpool_get_name(zhp)); 747 err = print_recursive_stats(print_summary_stats, nvroot, 748 pool_name, NULL, 1); 749 /* if any of these return an error, skip the rest */ 750 if (err == 0) 751 err = print_top_level_vdev_stats(nvroot, pool_name); 752 753 if (no_histograms == 0) { 754 if (err == 0) 755 err = print_recursive_stats(print_vdev_latency_stats, nvroot, 756 pool_name, NULL, 1); 757 if (err == 0) 758 err = print_recursive_stats(print_vdev_size_stats, nvroot, 759 pool_name, NULL, 1); 760 if (err == 0) 761 err = print_recursive_stats(print_queue_stats, nvroot, 762 pool_name, NULL, 0); 763 } 764 if (err == 0) 765 err = print_scan_status(nvroot, pool_name); 766 767 free(pool_name); 768 zpool_close(zhp); 769 return (err); 770 } 771 772 static void 773 usage(char *name) 774 { 775 fprintf(stderr, "usage: %s [--execd][--no-histograms]" 776 "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); 777 exit(EXIT_FAILURE); 778 } 779 780 int 781 main(int argc, char *argv[]) 782 { 783 int opt; 784 int ret = 8; 785 char *line = NULL; 786 size_t len, tagslen = 0; 787 struct option long_options[] = { 788 {"execd", no_argument, NULL, 'e'}, 789 {"help", no_argument, NULL, 'h'}, 790 {"no-histograms", no_argument, NULL, 'n'}, 791 {"signed-int", no_argument, NULL, 'i'}, 792 {"sum-histogram-buckets", no_argument, NULL, 's'}, 793 {"tags", required_argument, NULL, 't'}, 794 {0, 0, 0, 0} 795 }; 796 while ((opt = getopt_long( 797 argc, argv, "ehinst:", long_options, NULL)) != -1) { 798 switch (opt) { 799 case 'e': 800 execd_mode = 1; 801 break; 802 case 'i': 803 metric_data_type = 'i'; 804 metric_value_mask = INT64_MAX; 805 break; 806 case 'n': 807 no_histograms = 1; 808 break; 809 case 's': 810 sum_histogram_buckets = 1; 811 break; 812 case 't': 813 tagslen = strlen(optarg) + 2; 814 tags = calloc(tagslen, 1); 815 if (tags == NULL) { 816 fprintf(stderr, 817 "error: cannot allocate memory " 818 "for tags\n"); 819 exit(1); 820 } 821 (void) snprintf(tags, tagslen, ",%s", optarg); 822 break; 823 default: 824 usage(argv[0]); 825 } 826 } 827 828 libzfs_handle_t *g_zfs; 829 if ((g_zfs = libzfs_init()) == NULL) { 830 fprintf(stderr, 831 "error: cannot initialize libzfs. " 832 "Is the zfs module loaded or zrepl running?\n"); 833 exit(EXIT_FAILURE); 834 } 835 if (execd_mode == 0) { 836 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 837 return (ret); 838 } 839 while (getline(&line, &len, stdin) != -1) { 840 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 841 fflush(stdout); 842 } 843 return (ret); 844 } 845