1 /* 2 * Gather top-level ZFS pool and resilver/scan statistics and print using 3 * influxdb line protocol 4 * usage: [options] [pool_name] 5 * where options are: 6 * --execd, -e run in telegraf execd input plugin mode, [CR] on 7 * stdin causes a sample to be printed and wait for 8 * the next [CR] 9 * --no-histograms, -n don't print histogram data (reduces cardinality 10 * if you don't care about histograms) 11 * --sum-histogram-buckets, -s sum histogram bucket values 12 * 13 * To integrate into telegraf use one of: 14 * 1. the `inputs.execd` plugin with the `--execd` option 15 * 2. the `inputs.exec` plugin to simply run with no options 16 * 17 * NOTE: libzfs is an unstable interface. YMMV. 18 * 19 * The design goals of this software include: 20 * + be as lightweight as possible 21 * + reduce the number of external dependencies as far as possible, hence 22 * there is no dependency on a client library for managing the metric 23 * collection -- info is printed, KISS 24 * + broken pools or kernel bugs can cause this process to hang in an 25 * unkillable state. For this reason, it is best to keep the damage limited 26 * to a small process like zpool_influxdb rather than a larger collector. 27 * 28 * Copyright 2018-2020 Richard Elling 29 * 30 * This software is dual-licensed MIT and CDDL. 31 * 32 * The MIT License (MIT) 33 * 34 * Permission is hereby granted, free of charge, to any person obtaining a copy 35 * of this software and associated documentation files (the "Software"), to deal 36 * in the Software without restriction, including without limitation the rights 37 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 38 * copies of the Software, and to permit persons to whom the Software is 39 * furnished to do so, subject to the following conditions: 40 * 41 * The above copyright notice and this permission notice shall be included in 42 * all copies or substantial portions of the Software. 43 * 44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 50 * SOFTWARE. 51 * 52 * CDDL HEADER START 53 * 54 * The contents of this file are subject to the terms of the 55 * Common Development and Distribution License (the "License"). 56 * You may not use this file except in compliance with the License. 57 * 58 * The contents of this file are subject to the terms of the 59 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 60 * You can obtain a copy of the license from the top-level file 61 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 62 * You may not use this file except in compliance with the license. 63 * 64 * See the License for the specific language governing permissions 65 * and limitations under the License. 66 * 67 * CDDL HEADER END 68 */ 69 #include <string.h> 70 #include <getopt.h> 71 #include <stdio.h> 72 #include <stdint.h> 73 #include <inttypes.h> 74 #include <libzfs.h> 75 76 #define POOL_MEASUREMENT "zpool_stats" 77 #define SCAN_MEASUREMENT "zpool_scan_stats" 78 #define VDEV_MEASUREMENT "zpool_vdev_stats" 79 #define POOL_LATENCY_MEASUREMENT "zpool_latency" 80 #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" 81 #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ 82 #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" 83 #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ 84 85 /* global options */ 86 int execd_mode = 0; 87 int no_histograms = 0; 88 int sum_histogram_buckets = 0; 89 char metric_data_type = 'u'; 90 uint64_t metric_value_mask = UINT64_MAX; 91 uint64_t timestamp = 0; 92 int complained_about_sync = 0; 93 char *tags = ""; 94 95 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); 96 97 /* 98 * influxdb line protocol rules for escaping are important because the 99 * zpool name can include characters that need to be escaped 100 * 101 * caller is responsible for freeing result 102 */ 103 static char * 104 escape_string(const char *s) 105 { 106 const char *c; 107 char *d; 108 char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); 109 if (t == NULL) { 110 fprintf(stderr, "error: cannot allocate memory\n"); 111 exit(1); 112 } 113 114 for (c = s, d = t; *c != '\0'; c++, d++) { 115 switch (*c) { 116 case ' ': 117 case ',': 118 case '=': 119 case '\\': 120 *d++ = '\\'; 121 default: 122 *d = *c; 123 } 124 } 125 *d = '\0'; 126 return (t); 127 } 128 129 /* 130 * print key=value where value is a uint64_t 131 */ 132 static void 133 print_kv(char *key, uint64_t value) 134 { 135 printf("%s=%llu%c", key, 136 (u_longlong_t)value & metric_value_mask, metric_data_type); 137 } 138 139 /* 140 * print_scan_status() prints the details as often seen in the "zpool status" 141 * output. However, unlike the zpool command, which is intended for humans, 142 * this output is suitable for long-term tracking in influxdb. 143 * TODO: update to include issued scan data 144 */ 145 static int 146 print_scan_status(nvlist_t *nvroot, const char *pool_name) 147 { 148 uint_t c; 149 int64_t elapsed; 150 uint64_t examined, pass_exam, paused_time, paused_ts, rate; 151 uint64_t remaining_time; 152 pool_scan_stat_t *ps = NULL; 153 double pct_done; 154 char *state[DSS_NUM_STATES] = { 155 "none", "scanning", "finished", "canceled"}; 156 char *func; 157 158 (void) nvlist_lookup_uint64_array(nvroot, 159 ZPOOL_CONFIG_SCAN_STATS, 160 (uint64_t **)&ps, &c); 161 162 /* 163 * ignore if there are no stats 164 */ 165 if (ps == NULL) 166 return (0); 167 168 /* 169 * return error if state is bogus 170 */ 171 if (ps->pss_state >= DSS_NUM_STATES || 172 ps->pss_func >= POOL_SCAN_FUNCS) { 173 if (complained_about_sync % 1000 == 0) { 174 fprintf(stderr, "error: cannot decode scan stats: " 175 "ZFS is out of sync with compiled zpool_influxdb"); 176 complained_about_sync++; 177 } 178 return (1); 179 } 180 181 switch (ps->pss_func) { 182 case POOL_SCAN_NONE: 183 func = "none_requested"; 184 break; 185 case POOL_SCAN_SCRUB: 186 func = "scrub"; 187 break; 188 case POOL_SCAN_RESILVER: 189 func = "resilver"; 190 break; 191 #ifdef POOL_SCAN_REBUILD 192 case POOL_SCAN_REBUILD: 193 func = "rebuild"; 194 break; 195 #endif 196 default: 197 func = "scan"; 198 } 199 200 /* overall progress */ 201 examined = ps->pss_examined ? ps->pss_examined : 1; 202 pct_done = 0.0; 203 if (ps->pss_to_examine > 0) 204 pct_done = 100.0 * examined / ps->pss_to_examine; 205 206 #ifdef EZFS_SCRUB_PAUSED 207 paused_ts = ps->pss_pass_scrub_pause; 208 paused_time = ps->pss_pass_scrub_spent_paused; 209 #else 210 paused_ts = 0; 211 paused_time = 0; 212 #endif 213 214 /* calculations for this pass */ 215 if (ps->pss_state == DSS_SCANNING) { 216 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - 217 (int64_t)paused_time; 218 elapsed = (elapsed > 0) ? elapsed : 1; 219 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 220 rate = pass_exam / elapsed; 221 rate = (rate > 0) ? rate : 1; 222 remaining_time = ps->pss_to_examine - examined / rate; 223 } else { 224 elapsed = 225 (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - 226 (int64_t)paused_time; 227 elapsed = (elapsed > 0) ? elapsed : 1; 228 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 229 rate = pass_exam / elapsed; 230 remaining_time = 0; 231 } 232 rate = rate ? rate : 1; 233 234 /* influxdb line protocol format: "tags metrics timestamp" */ 235 printf("%s%s,function=%s,name=%s,state=%s ", 236 SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); 237 print_kv("end_ts", ps->pss_end_time); 238 print_kv(",errors", ps->pss_errors); 239 print_kv(",examined", examined); 240 print_kv(",issued", ps->pss_issued); 241 print_kv(",pass_examined", pass_exam); 242 print_kv(",pass_issued", ps->pss_pass_issued); 243 print_kv(",paused_ts", paused_ts); 244 print_kv(",paused_t", paused_time); 245 printf(",pct_done=%.2f", pct_done); 246 print_kv(",processed", ps->pss_processed); 247 print_kv(",rate", rate); 248 print_kv(",remaining_t", remaining_time); 249 print_kv(",start_ts", ps->pss_start_time); 250 print_kv(",to_examine", ps->pss_to_examine); 251 print_kv(",to_process", ps->pss_to_process); 252 printf(" %llu\n", (u_longlong_t)timestamp); 253 return (0); 254 } 255 256 /* 257 * get a vdev name that corresponds to the top-level vdev names 258 * printed by `zpool status` 259 */ 260 static char * 261 get_vdev_name(nvlist_t *nvroot, const char *parent_name) 262 { 263 static char vdev_name[256]; 264 char *vdev_type = NULL; 265 uint64_t vdev_id = 0; 266 267 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, 268 &vdev_type) != 0) { 269 vdev_type = "unknown"; 270 } 271 if (nvlist_lookup_uint64( 272 nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 273 vdev_id = UINT64_MAX; 274 } 275 if (parent_name == NULL) { 276 (void) snprintf(vdev_name, sizeof (vdev_name), "%s", 277 vdev_type); 278 } else { 279 (void) snprintf(vdev_name, sizeof (vdev_name), 280 "%s/%s-%llu", 281 parent_name, vdev_type, (u_longlong_t)vdev_id); 282 } 283 return (vdev_name); 284 } 285 286 /* 287 * get a string suitable for an influxdb tag that describes this vdev 288 * 289 * By default only the vdev hierarchical name is shown, separated by '/' 290 * If the vdev has an associated path, which is typical of leaf vdevs, 291 * then the path is added. 292 * It would be nice to have the devid instead of the path, but under 293 * Linux we cannot be sure a devid will exist and we'd rather have 294 * something than nothing, so we'll use path instead. 295 */ 296 static char * 297 get_vdev_desc(nvlist_t *nvroot, const char *parent_name) 298 { 299 static char vdev_desc[2 * MAXPATHLEN]; 300 char *vdev_type = NULL; 301 uint64_t vdev_id = 0; 302 char vdev_value[MAXPATHLEN]; 303 char *vdev_path = NULL; 304 char *s, *t; 305 306 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) { 307 vdev_type = "unknown"; 308 } 309 if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 310 vdev_id = UINT64_MAX; 311 } 312 if (nvlist_lookup_string( 313 nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) { 314 vdev_path = NULL; 315 } 316 317 if (parent_name == NULL) { 318 s = escape_string(vdev_type); 319 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); 320 free(s); 321 } else { 322 s = escape_string((char *)parent_name); 323 t = escape_string(vdev_type); 324 (void) snprintf(vdev_value, sizeof (vdev_value), 325 "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); 326 free(s); 327 free(t); 328 } 329 if (vdev_path == NULL) { 330 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", 331 vdev_value); 332 } else { 333 s = escape_string(vdev_path); 334 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", 335 s, vdev_value); 336 free(s); 337 } 338 return (vdev_desc); 339 } 340 341 /* 342 * vdev summary stats are a combination of the data shown by 343 * `zpool status` and `zpool list -v` 344 */ 345 static int 346 print_summary_stats(nvlist_t *nvroot, const char *pool_name, 347 const char *parent_name) 348 { 349 uint_t c; 350 vdev_stat_t *vs; 351 char *vdev_desc = NULL; 352 vdev_desc = get_vdev_desc(nvroot, parent_name); 353 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 354 (uint64_t **)&vs, &c) != 0) { 355 return (1); 356 } 357 printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, 358 pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, 359 (vdev_aux_t)vs->vs_aux), vdev_desc); 360 print_kv("alloc", vs->vs_alloc); 361 print_kv(",free", vs->vs_space - vs->vs_alloc); 362 print_kv(",size", vs->vs_space); 363 print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); 364 print_kv(",read_errors", vs->vs_read_errors); 365 print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); 366 print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); 367 print_kv(",write_errors", vs->vs_write_errors); 368 print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); 369 print_kv(",checksum_errors", vs->vs_checksum_errors); 370 print_kv(",fragmentation", vs->vs_fragmentation); 371 printf(" %llu\n", (u_longlong_t)timestamp); 372 return (0); 373 } 374 375 /* 376 * vdev latency stats are histograms stored as nvlist arrays of uint64. 377 * Latency stats include the ZIO scheduler classes plus lower-level 378 * vdev latencies. 379 * 380 * In many cases, the top-level "root" view obscures the underlying 381 * top-level vdev operations. For example, if a pool has a log, special, 382 * or cache device, then each can behave very differently. It is useful 383 * to see how each is responding. 384 */ 385 static int 386 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, 387 const char *parent_name) 388 { 389 uint_t c, end = 0; 390 nvlist_t *nv_ex; 391 char *vdev_desc = NULL; 392 393 /* short_names become part of the metric name and are influxdb-ready */ 394 struct lat_lookup { 395 char *name; 396 char *short_name; 397 uint64_t sum; 398 uint64_t *array; 399 }; 400 struct lat_lookup lat_type[] = { 401 {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, 402 {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, 403 {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, 404 {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, 405 {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, 406 {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, 407 {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, 408 {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, 409 {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, 410 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO 411 {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, 412 #endif 413 {NULL, NULL} 414 }; 415 416 if (nvlist_lookup_nvlist(nvroot, 417 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 418 return (6); 419 } 420 421 vdev_desc = get_vdev_desc(nvroot, parent_name); 422 423 for (int i = 0; lat_type[i].name; i++) { 424 if (nvlist_lookup_uint64_array(nv_ex, 425 lat_type[i].name, &lat_type[i].array, &c) != 0) { 426 fprintf(stderr, "error: can't get %s\n", 427 lat_type[i].name); 428 return (3); 429 } 430 /* end count count, all of the arrays are the same size */ 431 end = c - 1; 432 } 433 434 for (int bucket = 0; bucket <= end; bucket++) { 435 if (bucket < MIN_LAT_INDEX) { 436 /* don't print, but collect the sum */ 437 for (int i = 0; lat_type[i].name; i++) { 438 lat_type[i].sum += lat_type[i].array[bucket]; 439 } 440 continue; 441 } 442 if (bucket < end) { 443 printf("%s%s,le=%0.6f,name=%s,%s ", 444 POOL_LATENCY_MEASUREMENT, tags, 445 (float)(1ULL << bucket) * 1e-9, 446 pool_name, vdev_desc); 447 } else { 448 printf("%s%s,le=+Inf,name=%s,%s ", 449 POOL_LATENCY_MEASUREMENT, tags, pool_name, 450 vdev_desc); 451 } 452 for (int i = 0; lat_type[i].name; i++) { 453 if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { 454 lat_type[i].sum += lat_type[i].array[bucket]; 455 } else { 456 lat_type[i].sum = lat_type[i].array[bucket]; 457 } 458 print_kv(lat_type[i].short_name, lat_type[i].sum); 459 if (lat_type[i + 1].name != NULL) { 460 printf(","); 461 } 462 } 463 printf(" %llu\n", (u_longlong_t)timestamp); 464 } 465 return (0); 466 } 467 468 /* 469 * vdev request size stats are histograms stored as nvlist arrays of uint64. 470 * Request size stats include the ZIO scheduler classes plus lower-level 471 * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. 472 * 473 * In many cases, the top-level "root" view obscures the underlying 474 * top-level vdev operations. For example, if a pool has a log, special, 475 * or cache device, then each can behave very differently. It is useful 476 * to see how each is responding. 477 */ 478 static int 479 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, 480 const char *parent_name) 481 { 482 uint_t c, end = 0; 483 nvlist_t *nv_ex; 484 char *vdev_desc = NULL; 485 486 /* short_names become the field name */ 487 struct size_lookup { 488 char *name; 489 char *short_name; 490 uint64_t sum; 491 uint64_t *array; 492 }; 493 struct size_lookup size_type[] = { 494 {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, 495 {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, 496 {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, 497 {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, 498 {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, 499 {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, 500 {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, 501 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, 502 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, 503 {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, 504 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO 505 {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, 506 {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, 507 #endif 508 {NULL, NULL} 509 }; 510 511 if (nvlist_lookup_nvlist(nvroot, 512 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 513 return (6); 514 } 515 516 vdev_desc = get_vdev_desc(nvroot, parent_name); 517 518 for (int i = 0; size_type[i].name; i++) { 519 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, 520 &size_type[i].array, &c) != 0) { 521 fprintf(stderr, "error: can't get %s\n", 522 size_type[i].name); 523 return (3); 524 } 525 /* end count count, all of the arrays are the same size */ 526 end = c - 1; 527 } 528 529 for (int bucket = 0; bucket <= end; bucket++) { 530 if (bucket < MIN_SIZE_INDEX) { 531 /* don't print, but collect the sum */ 532 for (int i = 0; size_type[i].name; i++) { 533 size_type[i].sum += size_type[i].array[bucket]; 534 } 535 continue; 536 } 537 538 if (bucket < end) { 539 printf("%s%s,le=%llu,name=%s,%s ", 540 POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, 541 pool_name, vdev_desc); 542 } else { 543 printf("%s%s,le=+Inf,name=%s,%s ", 544 POOL_IO_SIZE_MEASUREMENT, tags, pool_name, 545 vdev_desc); 546 } 547 for (int i = 0; size_type[i].name; i++) { 548 if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { 549 size_type[i].sum += size_type[i].array[bucket]; 550 } else { 551 size_type[i].sum = size_type[i].array[bucket]; 552 } 553 print_kv(size_type[i].short_name, size_type[i].sum); 554 if (size_type[i + 1].name != NULL) { 555 printf(","); 556 } 557 } 558 printf(" %llu\n", (u_longlong_t)timestamp); 559 } 560 return (0); 561 } 562 563 /* 564 * ZIO scheduler queue stats are stored as gauges. This is unfortunate 565 * because the values can change very rapidly and any point-in-time 566 * value will quickly be obsoleted. It is also not easy to downsample. 567 * Thus only the top-level queue stats might be beneficial... maybe. 568 */ 569 static int 570 print_queue_stats(nvlist_t *nvroot, const char *pool_name, 571 const char *parent_name) 572 { 573 nvlist_t *nv_ex; 574 uint64_t value; 575 576 /* short_names are used for the field name */ 577 struct queue_lookup { 578 char *name; 579 char *short_name; 580 }; 581 struct queue_lookup queue_type[] = { 582 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, 583 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, 584 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, 585 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, 586 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, 587 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, 588 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, 589 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, 590 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, 591 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, 592 {NULL, NULL} 593 }; 594 595 if (nvlist_lookup_nvlist(nvroot, 596 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 597 return (6); 598 } 599 600 printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, 601 get_vdev_desc(nvroot, parent_name)); 602 for (int i = 0; queue_type[i].name; i++) { 603 if (nvlist_lookup_uint64(nv_ex, 604 queue_type[i].name, &value) != 0) { 605 fprintf(stderr, "error: can't get %s\n", 606 queue_type[i].name); 607 return (3); 608 } 609 print_kv(queue_type[i].short_name, value); 610 if (queue_type[i + 1].name != NULL) { 611 printf(","); 612 } 613 } 614 printf(" %llu\n", (u_longlong_t)timestamp); 615 return (0); 616 } 617 618 /* 619 * top-level vdev stats are at the pool level 620 */ 621 static int 622 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) 623 { 624 nvlist_t *nv_ex; 625 uint64_t value; 626 627 /* short_names become part of the metric name */ 628 struct queue_lookup { 629 char *name; 630 char *short_name; 631 }; 632 struct queue_lookup queue_type[] = { 633 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, 634 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, 635 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, 636 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, 637 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, 638 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, 639 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, 640 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, 641 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, 642 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, 643 {NULL, NULL} 644 }; 645 646 if (nvlist_lookup_nvlist(nvroot, 647 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 648 return (6); 649 } 650 651 printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, 652 pool_name); 653 for (int i = 0; queue_type[i].name; i++) { 654 if (nvlist_lookup_uint64(nv_ex, 655 queue_type[i].name, &value) != 0) { 656 fprintf(stderr, "error: can't get %s\n", 657 queue_type[i].name); 658 return (3); 659 } 660 if (i > 0) 661 printf(","); 662 print_kv(queue_type[i].short_name, value); 663 } 664 665 printf(" %llu\n", (u_longlong_t)timestamp); 666 return (0); 667 } 668 669 /* 670 * recursive stats printer 671 */ 672 static int 673 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, 674 const char *pool_name, const char *parent_name, int descend) 675 { 676 uint_t c, children; 677 nvlist_t **child; 678 char vdev_name[256]; 679 int err; 680 681 err = func(nvroot, pool_name, parent_name); 682 if (err) 683 return (err); 684 685 if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 686 &child, &children) == 0) { 687 (void) strncpy(vdev_name, get_vdev_name(nvroot, parent_name), 688 sizeof (vdev_name)); 689 vdev_name[sizeof (vdev_name) - 1] = '\0'; 690 691 for (c = 0; c < children; c++) { 692 print_recursive_stats(func, child[c], pool_name, 693 vdev_name, descend); 694 } 695 } 696 return (0); 697 } 698 699 /* 700 * call-back to print the stats from the pool config 701 * 702 * Note: if the pool is broken, this can hang indefinitely and perhaps in an 703 * unkillable state. 704 */ 705 static int 706 print_stats(zpool_handle_t *zhp, void *data) 707 { 708 uint_t c; 709 int err; 710 boolean_t missing; 711 nvlist_t *config, *nvroot; 712 vdev_stat_t *vs; 713 struct timespec tv; 714 char *pool_name; 715 716 /* if not this pool return quickly */ 717 if (data && 718 strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) { 719 zpool_close(zhp); 720 return (0); 721 } 722 723 if (zpool_refresh_stats(zhp, &missing) != 0) { 724 zpool_close(zhp); 725 return (1); 726 } 727 728 config = zpool_get_config(zhp, NULL); 729 if (clock_gettime(CLOCK_REALTIME, &tv) != 0) 730 timestamp = (uint64_t)time(NULL) * 1000000000; 731 else 732 timestamp = 733 ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; 734 735 if (nvlist_lookup_nvlist( 736 config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 737 zpool_close(zhp); 738 return (2); 739 } 740 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 741 (uint64_t **)&vs, &c) != 0) { 742 zpool_close(zhp); 743 return (3); 744 } 745 746 pool_name = escape_string(zpool_get_name(zhp)); 747 err = print_recursive_stats(print_summary_stats, nvroot, 748 pool_name, NULL, 1); 749 /* if any of these return an error, skip the rest */ 750 if (err == 0) 751 err = print_top_level_vdev_stats(nvroot, pool_name); 752 753 if (no_histograms == 0) { 754 if (err == 0) 755 err = print_recursive_stats(print_vdev_latency_stats, nvroot, 756 pool_name, NULL, 1); 757 if (err == 0) 758 err = print_recursive_stats(print_vdev_size_stats, nvroot, 759 pool_name, NULL, 1); 760 if (err == 0) 761 err = print_recursive_stats(print_queue_stats, nvroot, 762 pool_name, NULL, 0); 763 } 764 if (err == 0) 765 err = print_scan_status(nvroot, pool_name); 766 767 free(pool_name); 768 zpool_close(zhp); 769 return (err); 770 } 771 772 static void 773 usage(char *name) 774 { 775 fprintf(stderr, "usage: %s [--execd][--no-histograms]" 776 "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); 777 exit(EXIT_FAILURE); 778 } 779 780 int 781 main(int argc, char *argv[]) 782 { 783 int opt; 784 int ret = 8; 785 char *line = NULL; 786 size_t len, tagslen = 0; 787 struct option long_options[] = { 788 {"execd", no_argument, NULL, 'e'}, 789 {"help", no_argument, NULL, 'h'}, 790 {"no-histograms", no_argument, NULL, 'n'}, 791 {"signed-int", no_argument, NULL, 'i'}, 792 {"sum-histogram-buckets", no_argument, NULL, 's'}, 793 {"tags", required_argument, NULL, 't'}, 794 {0, 0, 0, 0} 795 }; 796 while ((opt = getopt_long( 797 argc, argv, "ehinst:", long_options, NULL)) != -1) { 798 switch (opt) { 799 case 'e': 800 execd_mode = 1; 801 break; 802 case 'i': 803 metric_data_type = 'i'; 804 metric_value_mask = INT64_MAX; 805 break; 806 case 'n': 807 no_histograms = 1; 808 break; 809 case 's': 810 sum_histogram_buckets = 1; 811 break; 812 case 't': 813 tagslen = strlen(optarg) + 2; 814 tags = calloc(tagslen, 1); 815 if (tags == NULL) { 816 fprintf(stderr, 817 "error: cannot allocate memory " 818 "for tags\n"); 819 exit(1); 820 } 821 (void) snprintf(tags, tagslen, ",%s", optarg); 822 break; 823 default: 824 usage(argv[0]); 825 } 826 } 827 828 libzfs_handle_t *g_zfs; 829 if ((g_zfs = libzfs_init()) == NULL) { 830 fprintf(stderr, 831 "error: cannot initialize libzfs. " 832 "Is the zfs module loaded or zrepl running?\n"); 833 exit(EXIT_FAILURE); 834 } 835 if (execd_mode == 0) { 836 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 837 return (ret); 838 } 839 while (getline(&line, &len, stdin) != -1) { 840 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 841 fflush(stdout); 842 } 843 return (ret); 844 } 845