1 /* 2 * Gather top-level ZFS pool and resilver/scan statistics and print using 3 * influxdb line protocol 4 * usage: [options] [pool_name] 5 * where options are: 6 * --execd, -e run in telegraf execd input plugin mode, [CR] on 7 * stdin causes a sample to be printed and wait for 8 * the next [CR] 9 * --no-histograms, -n don't print histogram data (reduces cardinality 10 * if you don't care about histograms) 11 * --sum-histogram-buckets, -s sum histogram bucket values 12 * 13 * To integrate into telegraf use one of: 14 * 1. the `inputs.execd` plugin with the `--execd` option 15 * 2. the `inputs.exec` plugin to simply run with no options 16 * 17 * NOTE: libzfs is an unstable interface. YMMV. 18 * 19 * The design goals of this software include: 20 * + be as lightweight as possible 21 * + reduce the number of external dependencies as far as possible, hence 22 * there is no dependency on a client library for managing the metric 23 * collection -- info is printed, KISS 24 * + broken pools or kernel bugs can cause this process to hang in an 25 * unkillable state. For this reason, it is best to keep the damage limited 26 * to a small process like zpool_influxdb rather than a larger collector. 27 * 28 * Copyright 2018-2020 Richard Elling 29 * 30 * This software is dual-licensed MIT and CDDL. 31 * 32 * The MIT License (MIT) 33 * 34 * Permission is hereby granted, free of charge, to any person obtaining a copy 35 * of this software and associated documentation files (the "Software"), to deal 36 * in the Software without restriction, including without limitation the rights 37 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 38 * copies of the Software, and to permit persons to whom the Software is 39 * furnished to do so, subject to the following conditions: 40 * 41 * The above copyright notice and this permission notice shall be included in 42 * all copies or substantial portions of the Software. 43 * 44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 50 * SOFTWARE. 51 * 52 * CDDL HEADER START 53 * 54 * The contents of this file are subject to the terms of the 55 * Common Development and Distribution License (the "License"). 56 * You may not use this file except in compliance with the License. 57 * 58 * The contents of this file are subject to the terms of the 59 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 60 * You can obtain a copy of the license from the top-level file 61 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 62 * You may not use this file except in compliance with the license. 63 * 64 * See the License for the specific language governing permissions 65 * and limitations under the License. 66 * 67 * CDDL HEADER END 68 */ 69 #include <string.h> 70 #include <getopt.h> 71 #include <stdio.h> 72 #include <stdint.h> 73 #include <inttypes.h> 74 #include <libzfs.h> 75 76 #define POOL_MEASUREMENT "zpool_stats" 77 #define SCAN_MEASUREMENT "zpool_scan_stats" 78 #define VDEV_MEASUREMENT "zpool_vdev_stats" 79 #define POOL_LATENCY_MEASUREMENT "zpool_latency" 80 #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" 81 #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ 82 #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" 83 #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ 84 85 /* global options */ 86 int execd_mode = 0; 87 int no_histograms = 0; 88 int sum_histogram_buckets = 0; 89 char metric_data_type = 'u'; 90 uint64_t metric_value_mask = UINT64_MAX; 91 uint64_t timestamp = 0; 92 int complained_about_sync = 0; 93 const char *tags = ""; 94 95 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); 96 97 /* 98 * influxdb line protocol rules for escaping are important because the 99 * zpool name can include characters that need to be escaped 100 * 101 * caller is responsible for freeing result 102 */ 103 static char * 104 escape_string(const char *s) 105 { 106 const char *c; 107 char *d; 108 char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); 109 if (t == NULL) { 110 fprintf(stderr, "error: cannot allocate memory\n"); 111 exit(1); 112 } 113 114 for (c = s, d = t; *c != '\0'; c++, d++) { 115 switch (*c) { 116 case ' ': 117 case ',': 118 case '=': 119 case '\\': 120 *d++ = '\\'; 121 zfs_fallthrough; 122 default: 123 *d = *c; 124 } 125 } 126 *d = '\0'; 127 return (t); 128 } 129 130 /* 131 * print key=value where value is a uint64_t 132 */ 133 static void 134 print_kv(const char *key, uint64_t value) 135 { 136 printf("%s=%llu%c", key, 137 (u_longlong_t)value & metric_value_mask, metric_data_type); 138 } 139 140 /* 141 * print_scan_status() prints the details as often seen in the "zpool status" 142 * output. However, unlike the zpool command, which is intended for humans, 143 * this output is suitable for long-term tracking in influxdb. 144 * TODO: update to include issued scan data 145 */ 146 static int 147 print_scan_status(nvlist_t *nvroot, const char *pool_name) 148 { 149 uint_t c; 150 int64_t elapsed; 151 uint64_t examined, pass_exam, paused_time, paused_ts, rate; 152 uint64_t remaining_time; 153 pool_scan_stat_t *ps = NULL; 154 double pct_done; 155 const char *const state[DSS_NUM_STATES] = { 156 "none", "scanning", "finished", "canceled"}; 157 const char *func; 158 159 (void) nvlist_lookup_uint64_array(nvroot, 160 ZPOOL_CONFIG_SCAN_STATS, 161 (uint64_t **)&ps, &c); 162 163 /* 164 * ignore if there are no stats 165 */ 166 if (ps == NULL) 167 return (0); 168 169 /* 170 * return error if state is bogus 171 */ 172 if (ps->pss_state >= DSS_NUM_STATES || 173 ps->pss_func >= POOL_SCAN_FUNCS) { 174 if (complained_about_sync % 1000 == 0) { 175 fprintf(stderr, "error: cannot decode scan stats: " 176 "ZFS is out of sync with compiled zpool_influxdb"); 177 complained_about_sync++; 178 } 179 return (1); 180 } 181 182 switch (ps->pss_func) { 183 case POOL_SCAN_NONE: 184 func = "none_requested"; 185 break; 186 case POOL_SCAN_SCRUB: 187 func = "scrub"; 188 break; 189 case POOL_SCAN_RESILVER: 190 func = "resilver"; 191 break; 192 #ifdef POOL_SCAN_REBUILD 193 case POOL_SCAN_REBUILD: 194 func = "rebuild"; 195 break; 196 #endif 197 default: 198 func = "scan"; 199 } 200 201 /* overall progress */ 202 examined = ps->pss_examined ? ps->pss_examined : 1; 203 pct_done = 0.0; 204 if (ps->pss_to_examine > 0) 205 pct_done = 100.0 * examined / ps->pss_to_examine; 206 207 #ifdef EZFS_SCRUB_PAUSED 208 paused_ts = ps->pss_pass_scrub_pause; 209 paused_time = ps->pss_pass_scrub_spent_paused; 210 #else 211 paused_ts = 0; 212 paused_time = 0; 213 #endif 214 215 /* calculations for this pass */ 216 if (ps->pss_state == DSS_SCANNING) { 217 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - 218 (int64_t)paused_time; 219 elapsed = (elapsed > 0) ? elapsed : 1; 220 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 221 rate = pass_exam / elapsed; 222 rate = (rate > 0) ? rate : 1; 223 remaining_time = ps->pss_to_examine - examined / rate; 224 } else { 225 elapsed = 226 (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - 227 (int64_t)paused_time; 228 elapsed = (elapsed > 0) ? elapsed : 1; 229 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 230 rate = pass_exam / elapsed; 231 remaining_time = 0; 232 } 233 rate = rate ? rate : 1; 234 235 /* influxdb line protocol format: "tags metrics timestamp" */ 236 printf("%s%s,function=%s,name=%s,state=%s ", 237 SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); 238 print_kv("end_ts", ps->pss_end_time); 239 print_kv(",errors", ps->pss_errors); 240 print_kv(",examined", examined); 241 print_kv(",skipped", ps->pss_skipped); 242 print_kv(",issued", ps->pss_issued); 243 print_kv(",pass_examined", pass_exam); 244 print_kv(",pass_issued", ps->pss_pass_issued); 245 print_kv(",paused_ts", paused_ts); 246 print_kv(",paused_t", paused_time); 247 printf(",pct_done=%.2f", pct_done); 248 print_kv(",processed", ps->pss_processed); 249 print_kv(",rate", rate); 250 print_kv(",remaining_t", remaining_time); 251 print_kv(",start_ts", ps->pss_start_time); 252 print_kv(",to_examine", ps->pss_to_examine); 253 printf(" %llu\n", (u_longlong_t)timestamp); 254 return (0); 255 } 256 257 /* 258 * get a vdev name that corresponds to the top-level vdev names 259 * printed by `zpool status` 260 */ 261 static char * 262 get_vdev_name(nvlist_t *nvroot, const char *parent_name) 263 { 264 static char vdev_name[256]; 265 uint64_t vdev_id = 0; 266 267 const char *vdev_type = "unknown"; 268 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type); 269 270 if (nvlist_lookup_uint64( 271 nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) 272 vdev_id = UINT64_MAX; 273 274 if (parent_name == NULL) { 275 (void) snprintf(vdev_name, sizeof (vdev_name), "%s", 276 vdev_type); 277 } else { 278 (void) snprintf(vdev_name, sizeof (vdev_name), 279 "%.220s/%s-%llu", 280 parent_name, vdev_type, (u_longlong_t)vdev_id); 281 } 282 return (vdev_name); 283 } 284 285 /* 286 * get a string suitable for an influxdb tag that describes this vdev 287 * 288 * By default only the vdev hierarchical name is shown, separated by '/' 289 * If the vdev has an associated path, which is typical of leaf vdevs, 290 * then the path is added. 291 * It would be nice to have the devid instead of the path, but under 292 * Linux we cannot be sure a devid will exist and we'd rather have 293 * something than nothing, so we'll use path instead. 294 */ 295 static char * 296 get_vdev_desc(nvlist_t *nvroot, const char *parent_name) 297 { 298 static char vdev_desc[2 * MAXPATHLEN]; 299 char vdev_value[MAXPATHLEN]; 300 char *s, *t; 301 302 const char *vdev_type = "unknown"; 303 uint64_t vdev_id = UINT64_MAX; 304 const char *vdev_path = NULL; 305 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type); 306 (void) nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id); 307 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &vdev_path); 308 309 if (parent_name == NULL) { 310 s = escape_string(vdev_type); 311 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); 312 free(s); 313 } else { 314 s = escape_string((char *)parent_name); 315 t = escape_string(vdev_type); 316 (void) snprintf(vdev_value, sizeof (vdev_value), 317 "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); 318 free(s); 319 free(t); 320 } 321 if (vdev_path == NULL) { 322 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", 323 vdev_value); 324 } else { 325 s = escape_string(vdev_path); 326 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", 327 s, vdev_value); 328 free(s); 329 } 330 return (vdev_desc); 331 } 332 333 /* 334 * vdev summary stats are a combination of the data shown by 335 * `zpool status` and `zpool list -v` 336 */ 337 static int 338 print_summary_stats(nvlist_t *nvroot, const char *pool_name, 339 const char *parent_name) 340 { 341 uint_t c; 342 vdev_stat_t *vs; 343 char *vdev_desc = NULL; 344 vdev_desc = get_vdev_desc(nvroot, parent_name); 345 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 346 (uint64_t **)&vs, &c) != 0) { 347 return (1); 348 } 349 printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, 350 pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, 351 (vdev_aux_t)vs->vs_aux), vdev_desc); 352 print_kv("alloc", vs->vs_alloc); 353 print_kv(",free", vs->vs_space - vs->vs_alloc); 354 print_kv(",size", vs->vs_space); 355 print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); 356 print_kv(",read_errors", vs->vs_read_errors); 357 print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); 358 print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); 359 print_kv(",write_errors", vs->vs_write_errors); 360 print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); 361 print_kv(",checksum_errors", vs->vs_checksum_errors); 362 print_kv(",fragmentation", vs->vs_fragmentation); 363 printf(" %llu\n", (u_longlong_t)timestamp); 364 return (0); 365 } 366 367 /* 368 * vdev latency stats are histograms stored as nvlist arrays of uint64. 369 * Latency stats include the ZIO scheduler classes plus lower-level 370 * vdev latencies. 371 * 372 * In many cases, the top-level "root" view obscures the underlying 373 * top-level vdev operations. For example, if a pool has a log, special, 374 * or cache device, then each can behave very differently. It is useful 375 * to see how each is responding. 376 */ 377 static int 378 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, 379 const char *parent_name) 380 { 381 uint_t c, end = 0; 382 nvlist_t *nv_ex; 383 char *vdev_desc = NULL; 384 385 /* short_names become part of the metric name and are influxdb-ready */ 386 struct lat_lookup { 387 const char *name; 388 const char *short_name; 389 uint64_t sum; 390 uint64_t *array; 391 }; 392 struct lat_lookup lat_type[] = { 393 {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, 394 {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, 395 {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, 396 {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, 397 {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, 398 {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, 399 {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, 400 {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, 401 {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, 402 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO 403 {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, 404 #endif 405 {ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO, "rebuild", 0}, 406 {NULL, NULL} 407 }; 408 409 if (nvlist_lookup_nvlist(nvroot, 410 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 411 return (6); 412 } 413 414 vdev_desc = get_vdev_desc(nvroot, parent_name); 415 416 for (int i = 0; lat_type[i].name; i++) { 417 if (nvlist_lookup_uint64_array(nv_ex, 418 lat_type[i].name, &lat_type[i].array, &c) != 0) { 419 fprintf(stderr, "error: can't get %s\n", 420 lat_type[i].name); 421 return (3); 422 } 423 /* end count count, all of the arrays are the same size */ 424 end = c - 1; 425 } 426 427 for (int bucket = 0; bucket <= end; bucket++) { 428 if (bucket < MIN_LAT_INDEX) { 429 /* don't print, but collect the sum */ 430 for (int i = 0; lat_type[i].name; i++) { 431 lat_type[i].sum += lat_type[i].array[bucket]; 432 } 433 continue; 434 } 435 if (bucket < end) { 436 printf("%s%s,le=%0.6f,name=%s,%s ", 437 POOL_LATENCY_MEASUREMENT, tags, 438 (float)(1ULL << bucket) * 1e-9, 439 pool_name, vdev_desc); 440 } else { 441 printf("%s%s,le=+Inf,name=%s,%s ", 442 POOL_LATENCY_MEASUREMENT, tags, pool_name, 443 vdev_desc); 444 } 445 for (int i = 0; lat_type[i].name; i++) { 446 if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { 447 lat_type[i].sum += lat_type[i].array[bucket]; 448 } else { 449 lat_type[i].sum = lat_type[i].array[bucket]; 450 } 451 print_kv(lat_type[i].short_name, lat_type[i].sum); 452 if (lat_type[i + 1].name != NULL) { 453 printf(","); 454 } 455 } 456 printf(" %llu\n", (u_longlong_t)timestamp); 457 } 458 return (0); 459 } 460 461 /* 462 * vdev request size stats are histograms stored as nvlist arrays of uint64. 463 * Request size stats include the ZIO scheduler classes plus lower-level 464 * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. 465 * 466 * In many cases, the top-level "root" view obscures the underlying 467 * top-level vdev operations. For example, if a pool has a log, special, 468 * or cache device, then each can behave very differently. It is useful 469 * to see how each is responding. 470 */ 471 static int 472 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, 473 const char *parent_name) 474 { 475 uint_t c, end = 0; 476 nvlist_t *nv_ex; 477 char *vdev_desc = NULL; 478 479 /* short_names become the field name */ 480 struct size_lookup { 481 const char *name; 482 const char *short_name; 483 uint64_t sum; 484 uint64_t *array; 485 }; 486 struct size_lookup size_type[] = { 487 {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, 488 {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, 489 {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, 490 {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, 491 {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, 492 {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, 493 {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, 494 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, 495 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, 496 {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, 497 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO 498 {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, 499 {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, 500 #endif 501 {ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO, "rebuild_write_ind"}, 502 {ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO, "rebuild_write_agg"}, 503 {NULL, NULL} 504 }; 505 506 if (nvlist_lookup_nvlist(nvroot, 507 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 508 return (6); 509 } 510 511 vdev_desc = get_vdev_desc(nvroot, parent_name); 512 513 for (int i = 0; size_type[i].name; i++) { 514 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, 515 &size_type[i].array, &c) != 0) { 516 fprintf(stderr, "error: can't get %s\n", 517 size_type[i].name); 518 return (3); 519 } 520 /* end count count, all of the arrays are the same size */ 521 end = c - 1; 522 } 523 524 for (int bucket = 0; bucket <= end; bucket++) { 525 if (bucket < MIN_SIZE_INDEX) { 526 /* don't print, but collect the sum */ 527 for (int i = 0; size_type[i].name; i++) { 528 size_type[i].sum += size_type[i].array[bucket]; 529 } 530 continue; 531 } 532 533 if (bucket < end) { 534 printf("%s%s,le=%llu,name=%s,%s ", 535 POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, 536 pool_name, vdev_desc); 537 } else { 538 printf("%s%s,le=+Inf,name=%s,%s ", 539 POOL_IO_SIZE_MEASUREMENT, tags, pool_name, 540 vdev_desc); 541 } 542 for (int i = 0; size_type[i].name; i++) { 543 if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { 544 size_type[i].sum += size_type[i].array[bucket]; 545 } else { 546 size_type[i].sum = size_type[i].array[bucket]; 547 } 548 print_kv(size_type[i].short_name, size_type[i].sum); 549 if (size_type[i + 1].name != NULL) { 550 printf(","); 551 } 552 } 553 printf(" %llu\n", (u_longlong_t)timestamp); 554 } 555 return (0); 556 } 557 558 /* 559 * ZIO scheduler queue stats are stored as gauges. This is unfortunate 560 * because the values can change very rapidly and any point-in-time 561 * value will quickly be obsoleted. It is also not easy to downsample. 562 * Thus only the top-level queue stats might be beneficial... maybe. 563 */ 564 static int 565 print_queue_stats(nvlist_t *nvroot, const char *pool_name, 566 const char *parent_name) 567 { 568 nvlist_t *nv_ex; 569 uint64_t value; 570 571 /* short_names are used for the field name */ 572 struct queue_lookup { 573 const char *name; 574 const char *short_name; 575 }; 576 struct queue_lookup queue_type[] = { 577 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, 578 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, 579 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, 580 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, 581 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, 582 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active"}, 583 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, 584 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, 585 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, 586 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, 587 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, 588 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend"}, 589 {NULL, NULL} 590 }; 591 592 if (nvlist_lookup_nvlist(nvroot, 593 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 594 return (6); 595 } 596 597 printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, 598 get_vdev_desc(nvroot, parent_name)); 599 for (int i = 0; queue_type[i].name; i++) { 600 if (nvlist_lookup_uint64(nv_ex, 601 queue_type[i].name, &value) != 0) { 602 fprintf(stderr, "error: can't get %s\n", 603 queue_type[i].name); 604 return (3); 605 } 606 print_kv(queue_type[i].short_name, value); 607 if (queue_type[i + 1].name != NULL) { 608 printf(","); 609 } 610 } 611 printf(" %llu\n", (u_longlong_t)timestamp); 612 return (0); 613 } 614 615 /* 616 * top-level vdev stats are at the pool level 617 */ 618 static int 619 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) 620 { 621 nvlist_t *nv_ex; 622 uint64_t value; 623 624 /* short_names become part of the metric name */ 625 struct queue_lookup { 626 const char *name; 627 const char *short_name; 628 }; 629 struct queue_lookup queue_type[] = { 630 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, 631 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, 632 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, 633 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, 634 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, 635 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active_queue"}, 636 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, 637 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, 638 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, 639 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, 640 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, 641 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend_queue"}, 642 {NULL, NULL} 643 }; 644 645 if (nvlist_lookup_nvlist(nvroot, 646 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 647 return (6); 648 } 649 650 printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, 651 pool_name); 652 for (int i = 0; queue_type[i].name; i++) { 653 if (nvlist_lookup_uint64(nv_ex, 654 queue_type[i].name, &value) != 0) { 655 fprintf(stderr, "error: can't get %s\n", 656 queue_type[i].name); 657 return (3); 658 } 659 if (i > 0) 660 printf(","); 661 print_kv(queue_type[i].short_name, value); 662 } 663 664 printf(" %llu\n", (u_longlong_t)timestamp); 665 return (0); 666 } 667 668 /* 669 * recursive stats printer 670 */ 671 static int 672 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, 673 const char *pool_name, const char *parent_name, int descend) 674 { 675 uint_t c, children; 676 nvlist_t **child; 677 char vdev_name[256]; 678 int err; 679 680 err = func(nvroot, pool_name, parent_name); 681 if (err) 682 return (err); 683 684 if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 685 &child, &children) == 0) { 686 (void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name), 687 sizeof (vdev_name)); 688 689 for (c = 0; c < children; c++) { 690 err = print_recursive_stats(func, child[c], pool_name, 691 vdev_name, descend); 692 if (err) 693 return (err); 694 } 695 } 696 return (0); 697 } 698 699 /* 700 * call-back to print the stats from the pool config 701 * 702 * Note: if the pool is broken, this can hang indefinitely and perhaps in an 703 * unkillable state. 704 */ 705 static int 706 print_stats(zpool_handle_t *zhp, void *data) 707 { 708 uint_t c; 709 int err; 710 boolean_t missing; 711 nvlist_t *config, *nvroot; 712 vdev_stat_t *vs; 713 struct timespec tv; 714 char *pool_name; 715 716 /* if not this pool return quickly */ 717 if (data && 718 strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) { 719 zpool_close(zhp); 720 return (0); 721 } 722 723 if (zpool_refresh_stats(zhp, &missing) != 0) { 724 zpool_close(zhp); 725 return (1); 726 } 727 728 config = zpool_get_config(zhp, NULL); 729 if (clock_gettime(CLOCK_REALTIME, &tv) != 0) 730 timestamp = (uint64_t)time(NULL) * 1000000000; 731 else 732 timestamp = 733 ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; 734 735 if (nvlist_lookup_nvlist( 736 config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 737 zpool_close(zhp); 738 return (2); 739 } 740 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 741 (uint64_t **)&vs, &c) != 0) { 742 zpool_close(zhp); 743 return (3); 744 } 745 746 pool_name = escape_string(zpool_get_name(zhp)); 747 err = print_recursive_stats(print_summary_stats, nvroot, 748 pool_name, NULL, 1); 749 /* if any of these return an error, skip the rest */ 750 if (err == 0) 751 err = print_top_level_vdev_stats(nvroot, pool_name); 752 753 if (no_histograms == 0) { 754 if (err == 0) 755 err = print_recursive_stats(print_vdev_latency_stats, nvroot, 756 pool_name, NULL, 1); 757 if (err == 0) 758 err = print_recursive_stats(print_vdev_size_stats, nvroot, 759 pool_name, NULL, 1); 760 if (err == 0) 761 err = print_recursive_stats(print_queue_stats, nvroot, 762 pool_name, NULL, 0); 763 } 764 if (err == 0) 765 err = print_scan_status(nvroot, pool_name); 766 767 free(pool_name); 768 zpool_close(zhp); 769 return (err); 770 } 771 772 static void 773 usage(char *name) 774 { 775 fprintf(stderr, "usage: %s [--execd][--no-histograms]" 776 "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); 777 exit(EXIT_FAILURE); 778 } 779 780 int 781 main(int argc, char *argv[]) 782 { 783 int opt; 784 int ret = 8; 785 char *line = NULL, *ttags = NULL; 786 size_t len, tagslen = 0; 787 struct option long_options[] = { 788 {"execd", no_argument, NULL, 'e'}, 789 {"help", no_argument, NULL, 'h'}, 790 {"no-histograms", no_argument, NULL, 'n'}, 791 {"signed-int", no_argument, NULL, 'i'}, 792 {"sum-histogram-buckets", no_argument, NULL, 's'}, 793 {"tags", required_argument, NULL, 't'}, 794 {0, 0, 0, 0} 795 }; 796 while ((opt = getopt_long( 797 argc, argv, "ehinst:", long_options, NULL)) != -1) { 798 switch (opt) { 799 case 'e': 800 execd_mode = 1; 801 break; 802 case 'i': 803 metric_data_type = 'i'; 804 metric_value_mask = INT64_MAX; 805 break; 806 case 'n': 807 no_histograms = 1; 808 break; 809 case 's': 810 sum_histogram_buckets = 1; 811 break; 812 case 't': 813 free(ttags); 814 tagslen = strlen(optarg) + 2; 815 ttags = calloc(1, tagslen); 816 if (ttags == NULL) { 817 fprintf(stderr, 818 "error: cannot allocate memory " 819 "for tags\n"); 820 exit(1); 821 } 822 (void) snprintf(ttags, tagslen, ",%s", optarg); 823 tags = ttags; 824 break; 825 default: 826 usage(argv[0]); 827 } 828 } 829 830 libzfs_handle_t *g_zfs; 831 if ((g_zfs = libzfs_init()) == NULL) { 832 fprintf(stderr, 833 "error: cannot initialize libzfs. " 834 "Is the zfs module loaded or zrepl running?\n"); 835 exit(EXIT_FAILURE); 836 } 837 if (execd_mode == 0) { 838 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 839 return (ret); 840 } 841 while (getline(&line, &len, stdin) != -1) { 842 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 843 fflush(stdout); 844 } 845 return (ret); 846 } 847