1 /* 2 * Gather top-level ZFS pool and resilver/scan statistics and print using 3 * influxdb line protocol 4 * usage: [options] [pool_name] 5 * where options are: 6 * --execd, -e run in telegraf execd input plugin mode, [CR] on 7 * stdin causes a sample to be printed and wait for 8 * the next [CR] 9 * --no-histograms, -n don't print histogram data (reduces cardinality 10 * if you don't care about histograms) 11 * --sum-histogram-buckets, -s sum histogram bucket values 12 * 13 * To integrate into telegraf use one of: 14 * 1. the `inputs.execd` plugin with the `--execd` option 15 * 2. the `inputs.exec` plugin to simply run with no options 16 * 17 * NOTE: libzfs is an unstable interface. YMMV. 18 * 19 * The design goals of this software include: 20 * + be as lightweight as possible 21 * + reduce the number of external dependencies as far as possible, hence 22 * there is no dependency on a client library for managing the metric 23 * collection -- info is printed, KISS 24 * + broken pools or kernel bugs can cause this process to hang in an 25 * unkillable state. For this reason, it is best to keep the damage limited 26 * to a small process like zpool_influxdb rather than a larger collector. 27 * 28 * Copyright 2018-2020 Richard Elling 29 * 30 * This software is dual-licensed MIT and CDDL. 31 * 32 * The MIT License (MIT) 33 * 34 * Permission is hereby granted, free of charge, to any person obtaining a copy 35 * of this software and associated documentation files (the "Software"), to deal 36 * in the Software without restriction, including without limitation the rights 37 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 38 * copies of the Software, and to permit persons to whom the Software is 39 * furnished to do so, subject to the following conditions: 40 * 41 * The above copyright notice and this permission notice shall be included in 42 * all copies or substantial portions of the Software. 43 * 44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 50 * SOFTWARE. 51 * 52 * CDDL HEADER START 53 * 54 * The contents of this file are subject to the terms of the 55 * Common Development and Distribution License (the "License"). 56 * You may not use this file except in compliance with the License. 57 * 58 * The contents of this file are subject to the terms of the 59 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 60 * You can obtain a copy of the license from the top-level file 61 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 62 * You may not use this file except in compliance with the license. 63 * 64 * See the License for the specific language governing permissions 65 * and limitations under the License. 66 * 67 * CDDL HEADER END 68 */ 69 #include <string.h> 70 #include <getopt.h> 71 #include <stdio.h> 72 #include <stdint.h> 73 #include <inttypes.h> 74 #include <libzfs_impl.h> 75 76 #define POOL_MEASUREMENT "zpool_stats" 77 #define SCAN_MEASUREMENT "zpool_scan_stats" 78 #define VDEV_MEASUREMENT "zpool_vdev_stats" 79 #define POOL_LATENCY_MEASUREMENT "zpool_latency" 80 #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" 81 #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ 82 #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" 83 #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ 84 85 /* global options */ 86 int execd_mode = 0; 87 int no_histograms = 0; 88 int sum_histogram_buckets = 0; 89 char metric_data_type = 'u'; 90 uint64_t metric_value_mask = UINT64_MAX; 91 uint64_t timestamp = 0; 92 int complained_about_sync = 0; 93 char *tags = ""; 94 95 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); 96 97 /* 98 * influxdb line protocol rules for escaping are important because the 99 * zpool name can include characters that need to be escaped 100 * 101 * caller is responsible for freeing result 102 */ 103 static char * 104 escape_string(char *s) 105 { 106 char *c, *d; 107 char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); 108 if (t == NULL) { 109 fprintf(stderr, "error: cannot allocate memory\n"); 110 exit(1); 111 } 112 113 for (c = s, d = t; *c != '\0'; c++, d++) { 114 switch (*c) { 115 case ' ': 116 case ',': 117 case '=': 118 case '\\': 119 *d++ = '\\'; 120 default: 121 *d = *c; 122 } 123 } 124 *d = '\0'; 125 return (t); 126 } 127 128 /* 129 * print key=value where value is a uint64_t 130 */ 131 static void 132 print_kv(char *key, uint64_t value) 133 { 134 printf("%s=%llu%c", key, 135 (u_longlong_t)value & metric_value_mask, metric_data_type); 136 } 137 138 /* 139 * print_scan_status() prints the details as often seen in the "zpool status" 140 * output. However, unlike the zpool command, which is intended for humans, 141 * this output is suitable for long-term tracking in influxdb. 142 * TODO: update to include issued scan data 143 */ 144 static int 145 print_scan_status(nvlist_t *nvroot, const char *pool_name) 146 { 147 uint_t c; 148 int64_t elapsed; 149 uint64_t examined, pass_exam, paused_time, paused_ts, rate; 150 uint64_t remaining_time; 151 pool_scan_stat_t *ps = NULL; 152 double pct_done; 153 char *state[DSS_NUM_STATES] = { 154 "none", "scanning", "finished", "canceled"}; 155 char *func; 156 157 (void) nvlist_lookup_uint64_array(nvroot, 158 ZPOOL_CONFIG_SCAN_STATS, 159 (uint64_t **)&ps, &c); 160 161 /* 162 * ignore if there are no stats 163 */ 164 if (ps == NULL) 165 return (0); 166 167 /* 168 * return error if state is bogus 169 */ 170 if (ps->pss_state >= DSS_NUM_STATES || 171 ps->pss_func >= POOL_SCAN_FUNCS) { 172 if (complained_about_sync % 1000 == 0) { 173 fprintf(stderr, "error: cannot decode scan stats: " 174 "ZFS is out of sync with compiled zpool_influxdb"); 175 complained_about_sync++; 176 } 177 return (1); 178 } 179 180 switch (ps->pss_func) { 181 case POOL_SCAN_NONE: 182 func = "none_requested"; 183 break; 184 case POOL_SCAN_SCRUB: 185 func = "scrub"; 186 break; 187 case POOL_SCAN_RESILVER: 188 func = "resilver"; 189 break; 190 #ifdef POOL_SCAN_REBUILD 191 case POOL_SCAN_REBUILD: 192 func = "rebuild"; 193 break; 194 #endif 195 default: 196 func = "scan"; 197 } 198 199 /* overall progress */ 200 examined = ps->pss_examined ? ps->pss_examined : 1; 201 pct_done = 0.0; 202 if (ps->pss_to_examine > 0) 203 pct_done = 100.0 * examined / ps->pss_to_examine; 204 205 #ifdef EZFS_SCRUB_PAUSED 206 paused_ts = ps->pss_pass_scrub_pause; 207 paused_time = ps->pss_pass_scrub_spent_paused; 208 #else 209 paused_ts = 0; 210 paused_time = 0; 211 #endif 212 213 /* calculations for this pass */ 214 if (ps->pss_state == DSS_SCANNING) { 215 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - 216 (int64_t)paused_time; 217 elapsed = (elapsed > 0) ? elapsed : 1; 218 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 219 rate = pass_exam / elapsed; 220 rate = (rate > 0) ? rate : 1; 221 remaining_time = ps->pss_to_examine - examined / rate; 222 } else { 223 elapsed = 224 (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - 225 (int64_t)paused_time; 226 elapsed = (elapsed > 0) ? elapsed : 1; 227 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 228 rate = pass_exam / elapsed; 229 remaining_time = 0; 230 } 231 rate = rate ? rate : 1; 232 233 /* influxdb line protocol format: "tags metrics timestamp" */ 234 printf("%s%s,function=%s,name=%s,state=%s ", 235 SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); 236 print_kv("end_ts", ps->pss_end_time); 237 print_kv(",errors", ps->pss_errors); 238 print_kv(",examined", examined); 239 print_kv(",issued", ps->pss_issued); 240 print_kv(",pass_examined", pass_exam); 241 print_kv(",pass_issued", ps->pss_pass_issued); 242 print_kv(",paused_ts", paused_ts); 243 print_kv(",paused_t", paused_time); 244 printf(",pct_done=%.2f", pct_done); 245 print_kv(",processed", ps->pss_processed); 246 print_kv(",rate", rate); 247 print_kv(",remaining_t", remaining_time); 248 print_kv(",start_ts", ps->pss_start_time); 249 print_kv(",to_examine", ps->pss_to_examine); 250 print_kv(",to_process", ps->pss_to_process); 251 printf(" %llu\n", (u_longlong_t)timestamp); 252 return (0); 253 } 254 255 /* 256 * get a vdev name that corresponds to the top-level vdev names 257 * printed by `zpool status` 258 */ 259 static char * 260 get_vdev_name(nvlist_t *nvroot, const char *parent_name) 261 { 262 static char vdev_name[256]; 263 char *vdev_type = NULL; 264 uint64_t vdev_id = 0; 265 266 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, 267 &vdev_type) != 0) { 268 vdev_type = "unknown"; 269 } 270 if (nvlist_lookup_uint64( 271 nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 272 vdev_id = UINT64_MAX; 273 } 274 if (parent_name == NULL) { 275 (void) snprintf(vdev_name, sizeof (vdev_name), "%s", 276 vdev_type); 277 } else { 278 (void) snprintf(vdev_name, sizeof (vdev_name), 279 "%s/%s-%llu", 280 parent_name, vdev_type, (u_longlong_t)vdev_id); 281 } 282 return (vdev_name); 283 } 284 285 /* 286 * get a string suitable for an influxdb tag that describes this vdev 287 * 288 * By default only the vdev hierarchical name is shown, separated by '/' 289 * If the vdev has an associated path, which is typical of leaf vdevs, 290 * then the path is added. 291 * It would be nice to have the devid instead of the path, but under 292 * Linux we cannot be sure a devid will exist and we'd rather have 293 * something than nothing, so we'll use path instead. 294 */ 295 static char * 296 get_vdev_desc(nvlist_t *nvroot, const char *parent_name) 297 { 298 static char vdev_desc[2 * MAXPATHLEN]; 299 char *vdev_type = NULL; 300 uint64_t vdev_id = 0; 301 char vdev_value[MAXPATHLEN]; 302 char *vdev_path = NULL; 303 char *s, *t; 304 305 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) { 306 vdev_type = "unknown"; 307 } 308 if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 309 vdev_id = UINT64_MAX; 310 } 311 if (nvlist_lookup_string( 312 nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) { 313 vdev_path = NULL; 314 } 315 316 if (parent_name == NULL) { 317 s = escape_string(vdev_type); 318 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); 319 free(s); 320 } else { 321 s = escape_string((char *)parent_name); 322 t = escape_string(vdev_type); 323 (void) snprintf(vdev_value, sizeof (vdev_value), 324 "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); 325 free(s); 326 free(t); 327 } 328 if (vdev_path == NULL) { 329 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", 330 vdev_value); 331 } else { 332 s = escape_string(vdev_path); 333 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", 334 s, vdev_value); 335 free(s); 336 } 337 return (vdev_desc); 338 } 339 340 /* 341 * vdev summary stats are a combination of the data shown by 342 * `zpool status` and `zpool list -v` 343 */ 344 static int 345 print_summary_stats(nvlist_t *nvroot, const char *pool_name, 346 const char *parent_name) 347 { 348 uint_t c; 349 vdev_stat_t *vs; 350 char *vdev_desc = NULL; 351 vdev_desc = get_vdev_desc(nvroot, parent_name); 352 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 353 (uint64_t **)&vs, &c) != 0) { 354 return (1); 355 } 356 printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, 357 pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, 358 (vdev_aux_t)vs->vs_aux), vdev_desc); 359 print_kv("alloc", vs->vs_alloc); 360 print_kv(",free", vs->vs_space - vs->vs_alloc); 361 print_kv(",size", vs->vs_space); 362 print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); 363 print_kv(",read_errors", vs->vs_read_errors); 364 print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); 365 print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); 366 print_kv(",write_errors", vs->vs_write_errors); 367 print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); 368 print_kv(",checksum_errors", vs->vs_checksum_errors); 369 print_kv(",fragmentation", vs->vs_fragmentation); 370 printf(" %llu\n", (u_longlong_t)timestamp); 371 return (0); 372 } 373 374 /* 375 * vdev latency stats are histograms stored as nvlist arrays of uint64. 376 * Latency stats include the ZIO scheduler classes plus lower-level 377 * vdev latencies. 378 * 379 * In many cases, the top-level "root" view obscures the underlying 380 * top-level vdev operations. For example, if a pool has a log, special, 381 * or cache device, then each can behave very differently. It is useful 382 * to see how each is responding. 383 */ 384 static int 385 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, 386 const char *parent_name) 387 { 388 uint_t c, end = 0; 389 nvlist_t *nv_ex; 390 char *vdev_desc = NULL; 391 392 /* short_names become part of the metric name and are influxdb-ready */ 393 struct lat_lookup { 394 char *name; 395 char *short_name; 396 uint64_t sum; 397 uint64_t *array; 398 }; 399 struct lat_lookup lat_type[] = { 400 {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, 401 {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, 402 {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, 403 {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, 404 {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, 405 {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, 406 {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, 407 {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, 408 {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, 409 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO 410 {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, 411 #endif 412 {NULL, NULL} 413 }; 414 415 if (nvlist_lookup_nvlist(nvroot, 416 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 417 return (6); 418 } 419 420 vdev_desc = get_vdev_desc(nvroot, parent_name); 421 422 for (int i = 0; lat_type[i].name; i++) { 423 if (nvlist_lookup_uint64_array(nv_ex, 424 lat_type[i].name, &lat_type[i].array, &c) != 0) { 425 fprintf(stderr, "error: can't get %s\n", 426 lat_type[i].name); 427 return (3); 428 } 429 /* end count count, all of the arrays are the same size */ 430 end = c - 1; 431 } 432 433 for (int bucket = 0; bucket <= end; bucket++) { 434 if (bucket < MIN_LAT_INDEX) { 435 /* don't print, but collect the sum */ 436 for (int i = 0; lat_type[i].name; i++) { 437 lat_type[i].sum += lat_type[i].array[bucket]; 438 } 439 continue; 440 } 441 if (bucket < end) { 442 printf("%s%s,le=%0.6f,name=%s,%s ", 443 POOL_LATENCY_MEASUREMENT, tags, 444 (float)(1ULL << bucket) * 1e-9, 445 pool_name, vdev_desc); 446 } else { 447 printf("%s%s,le=+Inf,name=%s,%s ", 448 POOL_LATENCY_MEASUREMENT, tags, pool_name, 449 vdev_desc); 450 } 451 for (int i = 0; lat_type[i].name; i++) { 452 if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { 453 lat_type[i].sum += lat_type[i].array[bucket]; 454 } else { 455 lat_type[i].sum = lat_type[i].array[bucket]; 456 } 457 print_kv(lat_type[i].short_name, lat_type[i].sum); 458 if (lat_type[i + 1].name != NULL) { 459 printf(","); 460 } 461 } 462 printf(" %llu\n", (u_longlong_t)timestamp); 463 } 464 return (0); 465 } 466 467 /* 468 * vdev request size stats are histograms stored as nvlist arrays of uint64. 469 * Request size stats include the ZIO scheduler classes plus lower-level 470 * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. 471 * 472 * In many cases, the top-level "root" view obscures the underlying 473 * top-level vdev operations. For example, if a pool has a log, special, 474 * or cache device, then each can behave very differently. It is useful 475 * to see how each is responding. 476 */ 477 static int 478 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, 479 const char *parent_name) 480 { 481 uint_t c, end = 0; 482 nvlist_t *nv_ex; 483 char *vdev_desc = NULL; 484 485 /* short_names become the field name */ 486 struct size_lookup { 487 char *name; 488 char *short_name; 489 uint64_t sum; 490 uint64_t *array; 491 }; 492 struct size_lookup size_type[] = { 493 {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, 494 {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, 495 {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, 496 {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, 497 {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, 498 {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, 499 {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, 500 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, 501 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, 502 {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, 503 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO 504 {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, 505 {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, 506 #endif 507 {NULL, NULL} 508 }; 509 510 if (nvlist_lookup_nvlist(nvroot, 511 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 512 return (6); 513 } 514 515 vdev_desc = get_vdev_desc(nvroot, parent_name); 516 517 for (int i = 0; size_type[i].name; i++) { 518 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, 519 &size_type[i].array, &c) != 0) { 520 fprintf(stderr, "error: can't get %s\n", 521 size_type[i].name); 522 return (3); 523 } 524 /* end count count, all of the arrays are the same size */ 525 end = c - 1; 526 } 527 528 for (int bucket = 0; bucket <= end; bucket++) { 529 if (bucket < MIN_SIZE_INDEX) { 530 /* don't print, but collect the sum */ 531 for (int i = 0; size_type[i].name; i++) { 532 size_type[i].sum += size_type[i].array[bucket]; 533 } 534 continue; 535 } 536 537 if (bucket < end) { 538 printf("%s%s,le=%llu,name=%s,%s ", 539 POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, 540 pool_name, vdev_desc); 541 } else { 542 printf("%s%s,le=+Inf,name=%s,%s ", 543 POOL_IO_SIZE_MEASUREMENT, tags, pool_name, 544 vdev_desc); 545 } 546 for (int i = 0; size_type[i].name; i++) { 547 if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { 548 size_type[i].sum += size_type[i].array[bucket]; 549 } else { 550 size_type[i].sum = size_type[i].array[bucket]; 551 } 552 print_kv(size_type[i].short_name, size_type[i].sum); 553 if (size_type[i + 1].name != NULL) { 554 printf(","); 555 } 556 } 557 printf(" %llu\n", (u_longlong_t)timestamp); 558 } 559 return (0); 560 } 561 562 /* 563 * ZIO scheduler queue stats are stored as gauges. This is unfortunate 564 * because the values can change very rapidly and any point-in-time 565 * value will quickly be obsoleted. It is also not easy to downsample. 566 * Thus only the top-level queue stats might be beneficial... maybe. 567 */ 568 static int 569 print_queue_stats(nvlist_t *nvroot, const char *pool_name, 570 const char *parent_name) 571 { 572 nvlist_t *nv_ex; 573 uint64_t value; 574 575 /* short_names are used for the field name */ 576 struct queue_lookup { 577 char *name; 578 char *short_name; 579 }; 580 struct queue_lookup queue_type[] = { 581 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, 582 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, 583 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, 584 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, 585 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, 586 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, 587 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, 588 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, 589 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, 590 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, 591 {NULL, NULL} 592 }; 593 594 if (nvlist_lookup_nvlist(nvroot, 595 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 596 return (6); 597 } 598 599 printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, 600 get_vdev_desc(nvroot, parent_name)); 601 for (int i = 0; queue_type[i].name; i++) { 602 if (nvlist_lookup_uint64(nv_ex, 603 queue_type[i].name, &value) != 0) { 604 fprintf(stderr, "error: can't get %s\n", 605 queue_type[i].name); 606 return (3); 607 } 608 print_kv(queue_type[i].short_name, value); 609 if (queue_type[i + 1].name != NULL) { 610 printf(","); 611 } 612 } 613 printf(" %llu\n", (u_longlong_t)timestamp); 614 return (0); 615 } 616 617 /* 618 * top-level vdev stats are at the pool level 619 */ 620 static int 621 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) 622 { 623 nvlist_t *nv_ex; 624 uint64_t value; 625 626 /* short_names become part of the metric name */ 627 struct queue_lookup { 628 char *name; 629 char *short_name; 630 }; 631 struct queue_lookup queue_type[] = { 632 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, 633 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, 634 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, 635 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, 636 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, 637 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, 638 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, 639 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, 640 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, 641 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, 642 {NULL, NULL} 643 }; 644 645 if (nvlist_lookup_nvlist(nvroot, 646 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 647 return (6); 648 } 649 650 printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, 651 pool_name); 652 for (int i = 0; queue_type[i].name; i++) { 653 if (nvlist_lookup_uint64(nv_ex, 654 queue_type[i].name, &value) != 0) { 655 fprintf(stderr, "error: can't get %s\n", 656 queue_type[i].name); 657 return (3); 658 } 659 if (i > 0) 660 printf(","); 661 print_kv(queue_type[i].short_name, value); 662 } 663 664 printf(" %llu\n", (u_longlong_t)timestamp); 665 return (0); 666 } 667 668 /* 669 * recursive stats printer 670 */ 671 static int 672 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, 673 const char *pool_name, const char *parent_name, int descend) 674 { 675 uint_t c, children; 676 nvlist_t **child; 677 char vdev_name[256]; 678 int err; 679 680 err = func(nvroot, pool_name, parent_name); 681 if (err) 682 return (err); 683 684 if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 685 &child, &children) == 0) { 686 (void) strncpy(vdev_name, get_vdev_name(nvroot, parent_name), 687 sizeof (vdev_name)); 688 vdev_name[sizeof (vdev_name) - 1] = '\0'; 689 690 for (c = 0; c < children; c++) { 691 print_recursive_stats(func, child[c], pool_name, 692 vdev_name, descend); 693 } 694 } 695 return (0); 696 } 697 698 /* 699 * call-back to print the stats from the pool config 700 * 701 * Note: if the pool is broken, this can hang indefinitely and perhaps in an 702 * unkillable state. 703 */ 704 static int 705 print_stats(zpool_handle_t *zhp, void *data) 706 { 707 uint_t c; 708 int err; 709 boolean_t missing; 710 nvlist_t *config, *nvroot; 711 vdev_stat_t *vs; 712 struct timespec tv; 713 char *pool_name; 714 715 /* if not this pool return quickly */ 716 if (data && 717 strncmp(data, zhp->zpool_name, ZFS_MAX_DATASET_NAME_LEN) != 0) { 718 zpool_close(zhp); 719 return (0); 720 } 721 722 if (zpool_refresh_stats(zhp, &missing) != 0) { 723 zpool_close(zhp); 724 return (1); 725 } 726 727 config = zpool_get_config(zhp, NULL); 728 if (clock_gettime(CLOCK_REALTIME, &tv) != 0) 729 timestamp = (uint64_t)time(NULL) * 1000000000; 730 else 731 timestamp = 732 ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; 733 734 if (nvlist_lookup_nvlist( 735 config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 736 zpool_close(zhp); 737 return (2); 738 } 739 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 740 (uint64_t **)&vs, &c) != 0) { 741 zpool_close(zhp); 742 return (3); 743 } 744 745 pool_name = escape_string(zhp->zpool_name); 746 err = print_recursive_stats(print_summary_stats, nvroot, 747 pool_name, NULL, 1); 748 /* if any of these return an error, skip the rest */ 749 if (err == 0) 750 err = print_top_level_vdev_stats(nvroot, pool_name); 751 752 if (no_histograms == 0) { 753 if (err == 0) 754 err = print_recursive_stats(print_vdev_latency_stats, nvroot, 755 pool_name, NULL, 1); 756 if (err == 0) 757 err = print_recursive_stats(print_vdev_size_stats, nvroot, 758 pool_name, NULL, 1); 759 if (err == 0) 760 err = print_recursive_stats(print_queue_stats, nvroot, 761 pool_name, NULL, 0); 762 } 763 if (err == 0) 764 err = print_scan_status(nvroot, pool_name); 765 766 free(pool_name); 767 zpool_close(zhp); 768 return (err); 769 } 770 771 static void 772 usage(char *name) 773 { 774 fprintf(stderr, "usage: %s [--execd][--no-histograms]" 775 "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); 776 exit(EXIT_FAILURE); 777 } 778 779 int 780 main(int argc, char *argv[]) 781 { 782 int opt; 783 int ret = 8; 784 char *line = NULL; 785 size_t len, tagslen = 0; 786 struct option long_options[] = { 787 {"execd", no_argument, NULL, 'e'}, 788 {"help", no_argument, NULL, 'h'}, 789 {"no-histograms", no_argument, NULL, 'n'}, 790 {"signed-int", no_argument, NULL, 'i'}, 791 {"sum-histogram-buckets", no_argument, NULL, 's'}, 792 {"tags", required_argument, NULL, 't'}, 793 {0, 0, 0, 0} 794 }; 795 while ((opt = getopt_long( 796 argc, argv, "ehinst:", long_options, NULL)) != -1) { 797 switch (opt) { 798 case 'e': 799 execd_mode = 1; 800 break; 801 case 'i': 802 metric_data_type = 'i'; 803 metric_value_mask = INT64_MAX; 804 break; 805 case 'n': 806 no_histograms = 1; 807 break; 808 case 's': 809 sum_histogram_buckets = 1; 810 break; 811 case 't': 812 tagslen = strlen(optarg) + 2; 813 tags = calloc(tagslen, 1); 814 if (tags == NULL) { 815 fprintf(stderr, 816 "error: cannot allocate memory " 817 "for tags\n"); 818 exit(1); 819 } 820 (void) snprintf(tags, tagslen, ",%s", optarg); 821 break; 822 default: 823 usage(argv[0]); 824 } 825 } 826 827 libzfs_handle_t *g_zfs; 828 if ((g_zfs = libzfs_init()) == NULL) { 829 fprintf(stderr, 830 "error: cannot initialize libzfs. " 831 "Is the zfs module loaded or zrepl running?\n"); 832 exit(EXIT_FAILURE); 833 } 834 if (execd_mode == 0) { 835 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 836 return (ret); 837 } 838 while (getline(&line, &len, stdin) != -1) { 839 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 840 fflush(stdout); 841 } 842 return (ret); 843 } 844