1 /* 2 * Gather top-level ZFS pool and resilver/scan statistics and print using 3 * influxdb line protocol 4 * usage: [options] [pool_name] 5 * where options are: 6 * --execd, -e run in telegraf execd input plugin mode, [CR] on 7 * stdin causes a sample to be printed and wait for 8 * the next [CR] 9 * --no-histograms, -n don't print histogram data (reduces cardinality 10 * if you don't care about histograms) 11 * --sum-histogram-buckets, -s sum histogram bucket values 12 * 13 * To integrate into telegraf use one of: 14 * 1. the `inputs.execd` plugin with the `--execd` option 15 * 2. the `inputs.exec` plugin to simply run with no options 16 * 17 * NOTE: libzfs is an unstable interface. YMMV. 18 * 19 * The design goals of this software include: 20 * + be as lightweight as possible 21 * + reduce the number of external dependencies as far as possible, hence 22 * there is no dependency on a client library for managing the metric 23 * collection -- info is printed, KISS 24 * + broken pools or kernel bugs can cause this process to hang in an 25 * unkillable state. For this reason, it is best to keep the damage limited 26 * to a small process like zpool_influxdb rather than a larger collector. 27 * 28 * Copyright 2018-2020 Richard Elling 29 * 30 * This software is dual-licensed MIT and CDDL. 31 * 32 * The MIT License (MIT) 33 * 34 * Permission is hereby granted, free of charge, to any person obtaining a copy 35 * of this software and associated documentation files (the "Software"), to deal 36 * in the Software without restriction, including without limitation the rights 37 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 38 * copies of the Software, and to permit persons to whom the Software is 39 * furnished to do so, subject to the following conditions: 40 * 41 * The above copyright notice and this permission notice shall be included in 42 * all copies or substantial portions of the Software. 43 * 44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 50 * SOFTWARE. 51 * 52 * CDDL HEADER START 53 * 54 * The contents of this file are subject to the terms of the 55 * Common Development and Distribution License (the "License"). 56 * You may not use this file except in compliance with the License. 57 * 58 * The contents of this file are subject to the terms of the 59 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 60 * You can obtain a copy of the license from the top-level file 61 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 62 * You may not use this file except in compliance with the license. 63 * 64 * See the License for the specific language governing permissions 65 * and limitations under the License. 66 * 67 * CDDL HEADER END 68 */ 69 #include <string.h> 70 #include <getopt.h> 71 #include <stdio.h> 72 #include <stdint.h> 73 #include <inttypes.h> 74 #include <libzfs.h> 75 76 #define POOL_MEASUREMENT "zpool_stats" 77 #define SCAN_MEASUREMENT "zpool_scan_stats" 78 #define VDEV_MEASUREMENT "zpool_vdev_stats" 79 #define POOL_LATENCY_MEASUREMENT "zpool_latency" 80 #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" 81 #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ 82 #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" 83 #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ 84 85 /* global options */ 86 int execd_mode = 0; 87 int no_histograms = 0; 88 int sum_histogram_buckets = 0; 89 char metric_data_type = 'u'; 90 uint64_t metric_value_mask = UINT64_MAX; 91 uint64_t timestamp = 0; 92 int complained_about_sync = 0; 93 char *tags = ""; 94 95 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); 96 97 /* 98 * influxdb line protocol rules for escaping are important because the 99 * zpool name can include characters that need to be escaped 100 * 101 * caller is responsible for freeing result 102 */ 103 static char * 104 escape_string(const char *s) 105 { 106 const char *c; 107 char *d; 108 char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); 109 if (t == NULL) { 110 fprintf(stderr, "error: cannot allocate memory\n"); 111 exit(1); 112 } 113 114 for (c = s, d = t; *c != '\0'; c++, d++) { 115 switch (*c) { 116 case ' ': 117 case ',': 118 case '=': 119 case '\\': 120 *d++ = '\\'; 121 fallthrough; 122 default: 123 *d = *c; 124 } 125 } 126 *d = '\0'; 127 return (t); 128 } 129 130 /* 131 * print key=value where value is a uint64_t 132 */ 133 static void 134 print_kv(char *key, uint64_t value) 135 { 136 printf("%s=%llu%c", key, 137 (u_longlong_t)value & metric_value_mask, metric_data_type); 138 } 139 140 /* 141 * print_scan_status() prints the details as often seen in the "zpool status" 142 * output. However, unlike the zpool command, which is intended for humans, 143 * this output is suitable for long-term tracking in influxdb. 144 * TODO: update to include issued scan data 145 */ 146 static int 147 print_scan_status(nvlist_t *nvroot, const char *pool_name) 148 { 149 uint_t c; 150 int64_t elapsed; 151 uint64_t examined, pass_exam, paused_time, paused_ts, rate; 152 uint64_t remaining_time; 153 pool_scan_stat_t *ps = NULL; 154 double pct_done; 155 char *state[DSS_NUM_STATES] = { 156 "none", "scanning", "finished", "canceled"}; 157 char *func; 158 159 (void) nvlist_lookup_uint64_array(nvroot, 160 ZPOOL_CONFIG_SCAN_STATS, 161 (uint64_t **)&ps, &c); 162 163 /* 164 * ignore if there are no stats 165 */ 166 if (ps == NULL) 167 return (0); 168 169 /* 170 * return error if state is bogus 171 */ 172 if (ps->pss_state >= DSS_NUM_STATES || 173 ps->pss_func >= POOL_SCAN_FUNCS) { 174 if (complained_about_sync % 1000 == 0) { 175 fprintf(stderr, "error: cannot decode scan stats: " 176 "ZFS is out of sync with compiled zpool_influxdb"); 177 complained_about_sync++; 178 } 179 return (1); 180 } 181 182 switch (ps->pss_func) { 183 case POOL_SCAN_NONE: 184 func = "none_requested"; 185 break; 186 case POOL_SCAN_SCRUB: 187 func = "scrub"; 188 break; 189 case POOL_SCAN_RESILVER: 190 func = "resilver"; 191 break; 192 #ifdef POOL_SCAN_REBUILD 193 case POOL_SCAN_REBUILD: 194 func = "rebuild"; 195 break; 196 #endif 197 default: 198 func = "scan"; 199 } 200 201 /* overall progress */ 202 examined = ps->pss_examined ? ps->pss_examined : 1; 203 pct_done = 0.0; 204 if (ps->pss_to_examine > 0) 205 pct_done = 100.0 * examined / ps->pss_to_examine; 206 207 #ifdef EZFS_SCRUB_PAUSED 208 paused_ts = ps->pss_pass_scrub_pause; 209 paused_time = ps->pss_pass_scrub_spent_paused; 210 #else 211 paused_ts = 0; 212 paused_time = 0; 213 #endif 214 215 /* calculations for this pass */ 216 if (ps->pss_state == DSS_SCANNING) { 217 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - 218 (int64_t)paused_time; 219 elapsed = (elapsed > 0) ? elapsed : 1; 220 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 221 rate = pass_exam / elapsed; 222 rate = (rate > 0) ? rate : 1; 223 remaining_time = ps->pss_to_examine - examined / rate; 224 } else { 225 elapsed = 226 (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - 227 (int64_t)paused_time; 228 elapsed = (elapsed > 0) ? elapsed : 1; 229 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 230 rate = pass_exam / elapsed; 231 remaining_time = 0; 232 } 233 rate = rate ? rate : 1; 234 235 /* influxdb line protocol format: "tags metrics timestamp" */ 236 printf("%s%s,function=%s,name=%s,state=%s ", 237 SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); 238 print_kv("end_ts", ps->pss_end_time); 239 print_kv(",errors", ps->pss_errors); 240 print_kv(",examined", examined); 241 print_kv(",issued", ps->pss_issued); 242 print_kv(",pass_examined", pass_exam); 243 print_kv(",pass_issued", ps->pss_pass_issued); 244 print_kv(",paused_ts", paused_ts); 245 print_kv(",paused_t", paused_time); 246 printf(",pct_done=%.2f", pct_done); 247 print_kv(",processed", ps->pss_processed); 248 print_kv(",rate", rate); 249 print_kv(",remaining_t", remaining_time); 250 print_kv(",start_ts", ps->pss_start_time); 251 print_kv(",to_examine", ps->pss_to_examine); 252 print_kv(",to_process", ps->pss_to_process); 253 printf(" %llu\n", (u_longlong_t)timestamp); 254 return (0); 255 } 256 257 /* 258 * get a vdev name that corresponds to the top-level vdev names 259 * printed by `zpool status` 260 */ 261 static char * 262 get_vdev_name(nvlist_t *nvroot, const char *parent_name) 263 { 264 static char vdev_name[256]; 265 char *vdev_type = NULL; 266 uint64_t vdev_id = 0; 267 268 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, 269 &vdev_type) != 0) { 270 vdev_type = "unknown"; 271 } 272 if (nvlist_lookup_uint64( 273 nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 274 vdev_id = UINT64_MAX; 275 } 276 if (parent_name == NULL) { 277 (void) snprintf(vdev_name, sizeof (vdev_name), "%s", 278 vdev_type); 279 } else { 280 (void) snprintf(vdev_name, sizeof (vdev_name), 281 "%s/%s-%llu", 282 parent_name, vdev_type, (u_longlong_t)vdev_id); 283 } 284 return (vdev_name); 285 } 286 287 /* 288 * get a string suitable for an influxdb tag that describes this vdev 289 * 290 * By default only the vdev hierarchical name is shown, separated by '/' 291 * If the vdev has an associated path, which is typical of leaf vdevs, 292 * then the path is added. 293 * It would be nice to have the devid instead of the path, but under 294 * Linux we cannot be sure a devid will exist and we'd rather have 295 * something than nothing, so we'll use path instead. 296 */ 297 static char * 298 get_vdev_desc(nvlist_t *nvroot, const char *parent_name) 299 { 300 static char vdev_desc[2 * MAXPATHLEN]; 301 char *vdev_type = NULL; 302 uint64_t vdev_id = 0; 303 char vdev_value[MAXPATHLEN]; 304 char *vdev_path = NULL; 305 char *s, *t; 306 307 if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) { 308 vdev_type = "unknown"; 309 } 310 if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 311 vdev_id = UINT64_MAX; 312 } 313 if (nvlist_lookup_string( 314 nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) { 315 vdev_path = NULL; 316 } 317 318 if (parent_name == NULL) { 319 s = escape_string(vdev_type); 320 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); 321 free(s); 322 } else { 323 s = escape_string((char *)parent_name); 324 t = escape_string(vdev_type); 325 (void) snprintf(vdev_value, sizeof (vdev_value), 326 "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); 327 free(s); 328 free(t); 329 } 330 if (vdev_path == NULL) { 331 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", 332 vdev_value); 333 } else { 334 s = escape_string(vdev_path); 335 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", 336 s, vdev_value); 337 free(s); 338 } 339 return (vdev_desc); 340 } 341 342 /* 343 * vdev summary stats are a combination of the data shown by 344 * `zpool status` and `zpool list -v` 345 */ 346 static int 347 print_summary_stats(nvlist_t *nvroot, const char *pool_name, 348 const char *parent_name) 349 { 350 uint_t c; 351 vdev_stat_t *vs; 352 char *vdev_desc = NULL; 353 vdev_desc = get_vdev_desc(nvroot, parent_name); 354 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 355 (uint64_t **)&vs, &c) != 0) { 356 return (1); 357 } 358 printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, 359 pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, 360 (vdev_aux_t)vs->vs_aux), vdev_desc); 361 print_kv("alloc", vs->vs_alloc); 362 print_kv(",free", vs->vs_space - vs->vs_alloc); 363 print_kv(",size", vs->vs_space); 364 print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); 365 print_kv(",read_errors", vs->vs_read_errors); 366 print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); 367 print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); 368 print_kv(",write_errors", vs->vs_write_errors); 369 print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); 370 print_kv(",checksum_errors", vs->vs_checksum_errors); 371 print_kv(",fragmentation", vs->vs_fragmentation); 372 printf(" %llu\n", (u_longlong_t)timestamp); 373 return (0); 374 } 375 376 /* 377 * vdev latency stats are histograms stored as nvlist arrays of uint64. 378 * Latency stats include the ZIO scheduler classes plus lower-level 379 * vdev latencies. 380 * 381 * In many cases, the top-level "root" view obscures the underlying 382 * top-level vdev operations. For example, if a pool has a log, special, 383 * or cache device, then each can behave very differently. It is useful 384 * to see how each is responding. 385 */ 386 static int 387 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, 388 const char *parent_name) 389 { 390 uint_t c, end = 0; 391 nvlist_t *nv_ex; 392 char *vdev_desc = NULL; 393 394 /* short_names become part of the metric name and are influxdb-ready */ 395 struct lat_lookup { 396 char *name; 397 char *short_name; 398 uint64_t sum; 399 uint64_t *array; 400 }; 401 struct lat_lookup lat_type[] = { 402 {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, 403 {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, 404 {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, 405 {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, 406 {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, 407 {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, 408 {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, 409 {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, 410 {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, 411 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO 412 {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, 413 #endif 414 {ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO, "rebuild", 0}, 415 {NULL, NULL} 416 }; 417 418 if (nvlist_lookup_nvlist(nvroot, 419 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 420 return (6); 421 } 422 423 vdev_desc = get_vdev_desc(nvroot, parent_name); 424 425 for (int i = 0; lat_type[i].name; i++) { 426 if (nvlist_lookup_uint64_array(nv_ex, 427 lat_type[i].name, &lat_type[i].array, &c) != 0) { 428 fprintf(stderr, "error: can't get %s\n", 429 lat_type[i].name); 430 return (3); 431 } 432 /* end count count, all of the arrays are the same size */ 433 end = c - 1; 434 } 435 436 for (int bucket = 0; bucket <= end; bucket++) { 437 if (bucket < MIN_LAT_INDEX) { 438 /* don't print, but collect the sum */ 439 for (int i = 0; lat_type[i].name; i++) { 440 lat_type[i].sum += lat_type[i].array[bucket]; 441 } 442 continue; 443 } 444 if (bucket < end) { 445 printf("%s%s,le=%0.6f,name=%s,%s ", 446 POOL_LATENCY_MEASUREMENT, tags, 447 (float)(1ULL << bucket) * 1e-9, 448 pool_name, vdev_desc); 449 } else { 450 printf("%s%s,le=+Inf,name=%s,%s ", 451 POOL_LATENCY_MEASUREMENT, tags, pool_name, 452 vdev_desc); 453 } 454 for (int i = 0; lat_type[i].name; i++) { 455 if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { 456 lat_type[i].sum += lat_type[i].array[bucket]; 457 } else { 458 lat_type[i].sum = lat_type[i].array[bucket]; 459 } 460 print_kv(lat_type[i].short_name, lat_type[i].sum); 461 if (lat_type[i + 1].name != NULL) { 462 printf(","); 463 } 464 } 465 printf(" %llu\n", (u_longlong_t)timestamp); 466 } 467 return (0); 468 } 469 470 /* 471 * vdev request size stats are histograms stored as nvlist arrays of uint64. 472 * Request size stats include the ZIO scheduler classes plus lower-level 473 * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. 474 * 475 * In many cases, the top-level "root" view obscures the underlying 476 * top-level vdev operations. For example, if a pool has a log, special, 477 * or cache device, then each can behave very differently. It is useful 478 * to see how each is responding. 479 */ 480 static int 481 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, 482 const char *parent_name) 483 { 484 uint_t c, end = 0; 485 nvlist_t *nv_ex; 486 char *vdev_desc = NULL; 487 488 /* short_names become the field name */ 489 struct size_lookup { 490 char *name; 491 char *short_name; 492 uint64_t sum; 493 uint64_t *array; 494 }; 495 struct size_lookup size_type[] = { 496 {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, 497 {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, 498 {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, 499 {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, 500 {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, 501 {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, 502 {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, 503 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, 504 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, 505 {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, 506 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO 507 {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, 508 {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, 509 #endif 510 {ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO, "rebuild_write_ind"}, 511 {ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO, "rebuild_write_agg"}, 512 {NULL, NULL} 513 }; 514 515 if (nvlist_lookup_nvlist(nvroot, 516 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 517 return (6); 518 } 519 520 vdev_desc = get_vdev_desc(nvroot, parent_name); 521 522 for (int i = 0; size_type[i].name; i++) { 523 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, 524 &size_type[i].array, &c) != 0) { 525 fprintf(stderr, "error: can't get %s\n", 526 size_type[i].name); 527 return (3); 528 } 529 /* end count count, all of the arrays are the same size */ 530 end = c - 1; 531 } 532 533 for (int bucket = 0; bucket <= end; bucket++) { 534 if (bucket < MIN_SIZE_INDEX) { 535 /* don't print, but collect the sum */ 536 for (int i = 0; size_type[i].name; i++) { 537 size_type[i].sum += size_type[i].array[bucket]; 538 } 539 continue; 540 } 541 542 if (bucket < end) { 543 printf("%s%s,le=%llu,name=%s,%s ", 544 POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, 545 pool_name, vdev_desc); 546 } else { 547 printf("%s%s,le=+Inf,name=%s,%s ", 548 POOL_IO_SIZE_MEASUREMENT, tags, pool_name, 549 vdev_desc); 550 } 551 for (int i = 0; size_type[i].name; i++) { 552 if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { 553 size_type[i].sum += size_type[i].array[bucket]; 554 } else { 555 size_type[i].sum = size_type[i].array[bucket]; 556 } 557 print_kv(size_type[i].short_name, size_type[i].sum); 558 if (size_type[i + 1].name != NULL) { 559 printf(","); 560 } 561 } 562 printf(" %llu\n", (u_longlong_t)timestamp); 563 } 564 return (0); 565 } 566 567 /* 568 * ZIO scheduler queue stats are stored as gauges. This is unfortunate 569 * because the values can change very rapidly and any point-in-time 570 * value will quickly be obsoleted. It is also not easy to downsample. 571 * Thus only the top-level queue stats might be beneficial... maybe. 572 */ 573 static int 574 print_queue_stats(nvlist_t *nvroot, const char *pool_name, 575 const char *parent_name) 576 { 577 nvlist_t *nv_ex; 578 uint64_t value; 579 580 /* short_names are used for the field name */ 581 struct queue_lookup { 582 char *name; 583 char *short_name; 584 }; 585 struct queue_lookup queue_type[] = { 586 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, 587 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, 588 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, 589 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, 590 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, 591 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active"}, 592 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, 593 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, 594 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, 595 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, 596 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, 597 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend"}, 598 {NULL, NULL} 599 }; 600 601 if (nvlist_lookup_nvlist(nvroot, 602 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 603 return (6); 604 } 605 606 printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, 607 get_vdev_desc(nvroot, parent_name)); 608 for (int i = 0; queue_type[i].name; i++) { 609 if (nvlist_lookup_uint64(nv_ex, 610 queue_type[i].name, &value) != 0) { 611 fprintf(stderr, "error: can't get %s\n", 612 queue_type[i].name); 613 return (3); 614 } 615 print_kv(queue_type[i].short_name, value); 616 if (queue_type[i + 1].name != NULL) { 617 printf(","); 618 } 619 } 620 printf(" %llu\n", (u_longlong_t)timestamp); 621 return (0); 622 } 623 624 /* 625 * top-level vdev stats are at the pool level 626 */ 627 static int 628 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) 629 { 630 nvlist_t *nv_ex; 631 uint64_t value; 632 633 /* short_names become part of the metric name */ 634 struct queue_lookup { 635 char *name; 636 char *short_name; 637 }; 638 struct queue_lookup queue_type[] = { 639 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, 640 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, 641 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, 642 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, 643 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, 644 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active_queue"}, 645 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, 646 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, 647 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, 648 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, 649 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, 650 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend_queue"}, 651 {NULL, NULL} 652 }; 653 654 if (nvlist_lookup_nvlist(nvroot, 655 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 656 return (6); 657 } 658 659 printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, 660 pool_name); 661 for (int i = 0; queue_type[i].name; i++) { 662 if (nvlist_lookup_uint64(nv_ex, 663 queue_type[i].name, &value) != 0) { 664 fprintf(stderr, "error: can't get %s\n", 665 queue_type[i].name); 666 return (3); 667 } 668 if (i > 0) 669 printf(","); 670 print_kv(queue_type[i].short_name, value); 671 } 672 673 printf(" %llu\n", (u_longlong_t)timestamp); 674 return (0); 675 } 676 677 /* 678 * recursive stats printer 679 */ 680 static int 681 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, 682 const char *pool_name, const char *parent_name, int descend) 683 { 684 uint_t c, children; 685 nvlist_t **child; 686 char vdev_name[256]; 687 int err; 688 689 err = func(nvroot, pool_name, parent_name); 690 if (err) 691 return (err); 692 693 if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 694 &child, &children) == 0) { 695 (void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name), 696 sizeof (vdev_name)); 697 698 for (c = 0; c < children; c++) { 699 print_recursive_stats(func, child[c], pool_name, 700 vdev_name, descend); 701 } 702 } 703 return (0); 704 } 705 706 /* 707 * call-back to print the stats from the pool config 708 * 709 * Note: if the pool is broken, this can hang indefinitely and perhaps in an 710 * unkillable state. 711 */ 712 static int 713 print_stats(zpool_handle_t *zhp, void *data) 714 { 715 uint_t c; 716 int err; 717 boolean_t missing; 718 nvlist_t *config, *nvroot; 719 vdev_stat_t *vs; 720 struct timespec tv; 721 char *pool_name; 722 723 /* if not this pool return quickly */ 724 if (data && 725 strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) { 726 zpool_close(zhp); 727 return (0); 728 } 729 730 if (zpool_refresh_stats(zhp, &missing) != 0) { 731 zpool_close(zhp); 732 return (1); 733 } 734 735 config = zpool_get_config(zhp, NULL); 736 if (clock_gettime(CLOCK_REALTIME, &tv) != 0) 737 timestamp = (uint64_t)time(NULL) * 1000000000; 738 else 739 timestamp = 740 ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; 741 742 if (nvlist_lookup_nvlist( 743 config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 744 zpool_close(zhp); 745 return (2); 746 } 747 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 748 (uint64_t **)&vs, &c) != 0) { 749 zpool_close(zhp); 750 return (3); 751 } 752 753 pool_name = escape_string(zpool_get_name(zhp)); 754 err = print_recursive_stats(print_summary_stats, nvroot, 755 pool_name, NULL, 1); 756 /* if any of these return an error, skip the rest */ 757 if (err == 0) 758 err = print_top_level_vdev_stats(nvroot, pool_name); 759 760 if (no_histograms == 0) { 761 if (err == 0) 762 err = print_recursive_stats(print_vdev_latency_stats, nvroot, 763 pool_name, NULL, 1); 764 if (err == 0) 765 err = print_recursive_stats(print_vdev_size_stats, nvroot, 766 pool_name, NULL, 1); 767 if (err == 0) 768 err = print_recursive_stats(print_queue_stats, nvroot, 769 pool_name, NULL, 0); 770 } 771 if (err == 0) 772 err = print_scan_status(nvroot, pool_name); 773 774 free(pool_name); 775 zpool_close(zhp); 776 return (err); 777 } 778 779 static void 780 usage(char *name) 781 { 782 fprintf(stderr, "usage: %s [--execd][--no-histograms]" 783 "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); 784 exit(EXIT_FAILURE); 785 } 786 787 int 788 main(int argc, char *argv[]) 789 { 790 int opt; 791 int ret = 8; 792 char *line = NULL; 793 size_t len, tagslen = 0; 794 struct option long_options[] = { 795 {"execd", no_argument, NULL, 'e'}, 796 {"help", no_argument, NULL, 'h'}, 797 {"no-histograms", no_argument, NULL, 'n'}, 798 {"signed-int", no_argument, NULL, 'i'}, 799 {"sum-histogram-buckets", no_argument, NULL, 's'}, 800 {"tags", required_argument, NULL, 't'}, 801 {0, 0, 0, 0} 802 }; 803 while ((opt = getopt_long( 804 argc, argv, "ehinst:", long_options, NULL)) != -1) { 805 switch (opt) { 806 case 'e': 807 execd_mode = 1; 808 break; 809 case 'i': 810 metric_data_type = 'i'; 811 metric_value_mask = INT64_MAX; 812 break; 813 case 'n': 814 no_histograms = 1; 815 break; 816 case 's': 817 sum_histogram_buckets = 1; 818 break; 819 case 't': 820 tagslen = strlen(optarg) + 2; 821 tags = calloc(tagslen, 1); 822 if (tags == NULL) { 823 fprintf(stderr, 824 "error: cannot allocate memory " 825 "for tags\n"); 826 exit(1); 827 } 828 (void) snprintf(tags, tagslen, ",%s", optarg); 829 break; 830 default: 831 usage(argv[0]); 832 } 833 } 834 835 libzfs_handle_t *g_zfs; 836 if ((g_zfs = libzfs_init()) == NULL) { 837 fprintf(stderr, 838 "error: cannot initialize libzfs. " 839 "Is the zfs module loaded or zrepl running?\n"); 840 exit(EXIT_FAILURE); 841 } 842 if (execd_mode == 0) { 843 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 844 return (ret); 845 } 846 while (getline(&line, &len, stdin) != -1) { 847 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 848 fflush(stdout); 849 } 850 return (ret); 851 } 852