1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * Gather top-level ZFS pool and resilver/scan statistics and print using 4 * influxdb line protocol 5 * usage: [options] [pool_name] 6 * where options are: 7 * --execd, -e run in telegraf execd input plugin mode, [CR] on 8 * stdin causes a sample to be printed and wait for 9 * the next [CR] 10 * --no-histograms, -n don't print histogram data (reduces cardinality 11 * if you don't care about histograms) 12 * --sum-histogram-buckets, -s sum histogram bucket values 13 * 14 * To integrate into telegraf use one of: 15 * 1. the `inputs.execd` plugin with the `--execd` option 16 * 2. the `inputs.exec` plugin to simply run with no options 17 * 18 * NOTE: libzfs is an unstable interface. YMMV. 19 * 20 * The design goals of this software include: 21 * + be as lightweight as possible 22 * + reduce the number of external dependencies as far as possible, hence 23 * there is no dependency on a client library for managing the metric 24 * collection -- info is printed, KISS 25 * + broken pools or kernel bugs can cause this process to hang in an 26 * unkillable state. For this reason, it is best to keep the damage limited 27 * to a small process like zpool_influxdb rather than a larger collector. 28 * 29 * Copyright 2018-2020 Richard Elling 30 * 31 * This software is dual-licensed MIT and CDDL. 32 * 33 * The MIT License (MIT) 34 * 35 * Permission is hereby granted, free of charge, to any person obtaining a copy 36 * of this software and associated documentation files (the "Software"), to deal 37 * in the Software without restriction, including without limitation the rights 38 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 39 * copies of the Software, and to permit persons to whom the Software is 40 * furnished to do so, subject to the following conditions: 41 * 42 * The above copyright notice and this permission notice shall be included in 43 * all copies or substantial portions of the Software. 44 * 45 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 46 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 47 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 48 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 49 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 50 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 51 * SOFTWARE. 52 * 53 * CDDL HEADER START 54 * 55 * The contents of this file are subject to the terms of the 56 * Common Development and Distribution License (the "License"). 57 * You may not use this file except in compliance with the License. 58 * 59 * The contents of this file are subject to the terms of the 60 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 61 * You can obtain a copy of the license from the top-level file 62 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 63 * You may not use this file except in compliance with the license. 64 * 65 * See the License for the specific language governing permissions 66 * and limitations under the License. 67 * 68 * CDDL HEADER END 69 */ 70 #include <string.h> 71 #include <getopt.h> 72 #include <stdio.h> 73 #include <stdint.h> 74 #include <inttypes.h> 75 #include <libzfs.h> 76 77 #define POOL_MEASUREMENT "zpool_stats" 78 #define SCAN_MEASUREMENT "zpool_scan_stats" 79 #define VDEV_MEASUREMENT "zpool_vdev_stats" 80 #define POOL_LATENCY_MEASUREMENT "zpool_latency" 81 #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" 82 #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ 83 #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" 84 #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ 85 86 /* global options */ 87 int execd_mode = 0; 88 int no_histograms = 0; 89 int sum_histogram_buckets = 0; 90 char metric_data_type = 'u'; 91 uint64_t metric_value_mask = UINT64_MAX; 92 uint64_t timestamp = 0; 93 int complained_about_sync = 0; 94 const char *tags = ""; 95 96 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); 97 98 /* 99 * influxdb line protocol rules for escaping are important because the 100 * zpool name can include characters that need to be escaped 101 * 102 * caller is responsible for freeing result 103 */ 104 static char * 105 escape_string(const char *s) 106 { 107 const char *c; 108 char *d; 109 char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); 110 if (t == NULL) { 111 fprintf(stderr, "error: cannot allocate memory\n"); 112 exit(1); 113 } 114 115 for (c = s, d = t; *c != '\0'; c++, d++) { 116 switch (*c) { 117 case ' ': 118 case ',': 119 case '=': 120 case '\\': 121 *d++ = '\\'; 122 zfs_fallthrough; 123 default: 124 *d = *c; 125 } 126 } 127 *d = '\0'; 128 return (t); 129 } 130 131 /* 132 * print key=value where value is a uint64_t 133 */ 134 static void 135 print_kv(const char *key, uint64_t value) 136 { 137 printf("%s=%llu%c", key, 138 (u_longlong_t)value & metric_value_mask, metric_data_type); 139 } 140 141 /* 142 * print_scan_status() prints the details as often seen in the "zpool status" 143 * output. However, unlike the zpool command, which is intended for humans, 144 * this output is suitable for long-term tracking in influxdb. 145 * TODO: update to include issued scan data 146 */ 147 static int 148 print_scan_status(nvlist_t *nvroot, const char *pool_name) 149 { 150 uint_t c; 151 int64_t elapsed; 152 uint64_t examined, pass_exam, paused_time, paused_ts, rate; 153 uint64_t remaining_time; 154 pool_scan_stat_t *ps = NULL; 155 double pct_done; 156 const char *const state[DSS_NUM_STATES] = { 157 "none", "scanning", "finished", "canceled"}; 158 const char *func; 159 160 (void) nvlist_lookup_uint64_array(nvroot, 161 ZPOOL_CONFIG_SCAN_STATS, 162 (uint64_t **)&ps, &c); 163 164 /* 165 * ignore if there are no stats 166 */ 167 if (ps == NULL) 168 return (0); 169 170 /* 171 * return error if state is bogus 172 */ 173 if (ps->pss_state >= DSS_NUM_STATES || 174 ps->pss_func >= POOL_SCAN_FUNCS) { 175 if (complained_about_sync % 1000 == 0) { 176 fprintf(stderr, "error: cannot decode scan stats: " 177 "ZFS is out of sync with compiled zpool_influxdb"); 178 complained_about_sync++; 179 } 180 return (1); 181 } 182 183 switch (ps->pss_func) { 184 case POOL_SCAN_NONE: 185 func = "none_requested"; 186 break; 187 case POOL_SCAN_SCRUB: 188 func = "scrub"; 189 break; 190 case POOL_SCAN_RESILVER: 191 func = "resilver"; 192 break; 193 #ifdef POOL_SCAN_REBUILD 194 case POOL_SCAN_REBUILD: 195 func = "rebuild"; 196 break; 197 #endif 198 default: 199 func = "scan"; 200 } 201 202 /* overall progress */ 203 examined = ps->pss_examined ? ps->pss_examined : 1; 204 pct_done = 0.0; 205 if (ps->pss_to_examine > 0) 206 pct_done = 100.0 * examined / ps->pss_to_examine; 207 208 #ifdef EZFS_SCRUB_PAUSED 209 paused_ts = ps->pss_pass_scrub_pause; 210 paused_time = ps->pss_pass_scrub_spent_paused; 211 #else 212 paused_ts = 0; 213 paused_time = 0; 214 #endif 215 216 /* calculations for this pass */ 217 if (ps->pss_state == DSS_SCANNING) { 218 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - 219 (int64_t)paused_time; 220 elapsed = (elapsed > 0) ? elapsed : 1; 221 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 222 rate = pass_exam / elapsed; 223 rate = (rate > 0) ? rate : 1; 224 remaining_time = ps->pss_to_examine - examined / rate; 225 } else { 226 elapsed = 227 (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - 228 (int64_t)paused_time; 229 elapsed = (elapsed > 0) ? elapsed : 1; 230 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 231 rate = pass_exam / elapsed; 232 remaining_time = 0; 233 } 234 rate = rate ? rate : 1; 235 236 /* influxdb line protocol format: "tags metrics timestamp" */ 237 printf("%s%s,function=%s,name=%s,state=%s ", 238 SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); 239 print_kv("end_ts", ps->pss_end_time); 240 print_kv(",errors", ps->pss_errors); 241 print_kv(",examined", examined); 242 print_kv(",skipped", ps->pss_skipped); 243 print_kv(",issued", ps->pss_issued); 244 print_kv(",pass_examined", pass_exam); 245 print_kv(",pass_issued", ps->pss_pass_issued); 246 print_kv(",paused_ts", paused_ts); 247 print_kv(",paused_t", paused_time); 248 printf(",pct_done=%.2f", pct_done); 249 print_kv(",processed", ps->pss_processed); 250 print_kv(",rate", rate); 251 print_kv(",remaining_t", remaining_time); 252 print_kv(",start_ts", ps->pss_start_time); 253 print_kv(",to_examine", ps->pss_to_examine); 254 printf(" %llu\n", (u_longlong_t)timestamp); 255 return (0); 256 } 257 258 /* 259 * get a vdev name that corresponds to the top-level vdev names 260 * printed by `zpool status` 261 */ 262 static char * 263 get_vdev_name(nvlist_t *nvroot, const char *parent_name) 264 { 265 static char vdev_name[256]; 266 uint64_t vdev_id = 0; 267 268 const char *vdev_type = "unknown"; 269 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type); 270 271 if (nvlist_lookup_uint64( 272 nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) 273 vdev_id = UINT64_MAX; 274 275 if (parent_name == NULL) { 276 (void) snprintf(vdev_name, sizeof (vdev_name), "%s", 277 vdev_type); 278 } else { 279 (void) snprintf(vdev_name, sizeof (vdev_name), 280 "%.220s/%s-%llu", 281 parent_name, vdev_type, (u_longlong_t)vdev_id); 282 } 283 return (vdev_name); 284 } 285 286 /* 287 * get a string suitable for an influxdb tag that describes this vdev 288 * 289 * By default only the vdev hierarchical name is shown, separated by '/' 290 * If the vdev has an associated path, which is typical of leaf vdevs, 291 * then the path is added. 292 * It would be nice to have the devid instead of the path, but under 293 * Linux we cannot be sure a devid will exist and we'd rather have 294 * something than nothing, so we'll use path instead. 295 */ 296 static char * 297 get_vdev_desc(nvlist_t *nvroot, const char *parent_name) 298 { 299 static char vdev_desc[2 * MAXPATHLEN]; 300 char vdev_value[MAXPATHLEN]; 301 char *s, *t; 302 303 const char *vdev_type = "unknown"; 304 uint64_t vdev_id = UINT64_MAX; 305 const char *vdev_path = NULL; 306 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type); 307 (void) nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id); 308 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &vdev_path); 309 310 if (parent_name == NULL) { 311 s = escape_string(vdev_type); 312 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); 313 free(s); 314 } else { 315 s = escape_string((char *)parent_name); 316 t = escape_string(vdev_type); 317 (void) snprintf(vdev_value, sizeof (vdev_value), 318 "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); 319 free(s); 320 free(t); 321 } 322 if (vdev_path == NULL) { 323 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", 324 vdev_value); 325 } else { 326 s = escape_string(vdev_path); 327 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", 328 s, vdev_value); 329 free(s); 330 } 331 return (vdev_desc); 332 } 333 334 /* 335 * vdev summary stats are a combination of the data shown by 336 * `zpool status` and `zpool list -v` 337 */ 338 static int 339 print_summary_stats(nvlist_t *nvroot, const char *pool_name, 340 const char *parent_name) 341 { 342 uint_t c; 343 vdev_stat_t *vs; 344 char *vdev_desc = NULL; 345 vdev_desc = get_vdev_desc(nvroot, parent_name); 346 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 347 (uint64_t **)&vs, &c) != 0) { 348 return (1); 349 } 350 printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, 351 pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, 352 (vdev_aux_t)vs->vs_aux), vdev_desc); 353 print_kv("alloc", vs->vs_alloc); 354 print_kv(",free", vs->vs_space - vs->vs_alloc); 355 print_kv(",size", vs->vs_space); 356 print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); 357 print_kv(",read_errors", vs->vs_read_errors); 358 print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); 359 print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); 360 print_kv(",write_errors", vs->vs_write_errors); 361 print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); 362 print_kv(",checksum_errors", vs->vs_checksum_errors); 363 print_kv(",fragmentation", vs->vs_fragmentation); 364 printf(" %llu\n", (u_longlong_t)timestamp); 365 return (0); 366 } 367 368 /* 369 * vdev latency stats are histograms stored as nvlist arrays of uint64. 370 * Latency stats include the ZIO scheduler classes plus lower-level 371 * vdev latencies. 372 * 373 * In many cases, the top-level "root" view obscures the underlying 374 * top-level vdev operations. For example, if a pool has a log, special, 375 * or cache device, then each can behave very differently. It is useful 376 * to see how each is responding. 377 */ 378 static int 379 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, 380 const char *parent_name) 381 { 382 uint_t c, end = 0; 383 nvlist_t *nv_ex; 384 char *vdev_desc = NULL; 385 386 /* short_names become part of the metric name and are influxdb-ready */ 387 struct lat_lookup { 388 const char *name; 389 const char *short_name; 390 uint64_t sum; 391 uint64_t *array; 392 }; 393 struct lat_lookup lat_type[] = { 394 {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, 395 {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, 396 {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, 397 {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, 398 {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, 399 {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, 400 {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, 401 {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, 402 {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, 403 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO 404 {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, 405 #endif 406 {ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO, "rebuild", 0}, 407 {NULL, NULL} 408 }; 409 410 if (nvlist_lookup_nvlist(nvroot, 411 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 412 return (6); 413 } 414 415 vdev_desc = get_vdev_desc(nvroot, parent_name); 416 417 for (int i = 0; lat_type[i].name; i++) { 418 if (nvlist_lookup_uint64_array(nv_ex, 419 lat_type[i].name, &lat_type[i].array, &c) != 0) { 420 fprintf(stderr, "error: can't get %s\n", 421 lat_type[i].name); 422 return (3); 423 } 424 /* end count count, all of the arrays are the same size */ 425 end = c - 1; 426 } 427 428 for (int bucket = 0; bucket <= end; bucket++) { 429 if (bucket < MIN_LAT_INDEX) { 430 /* don't print, but collect the sum */ 431 for (int i = 0; lat_type[i].name; i++) { 432 lat_type[i].sum += lat_type[i].array[bucket]; 433 } 434 continue; 435 } 436 if (bucket < end) { 437 printf("%s%s,le=%0.6f,name=%s,%s ", 438 POOL_LATENCY_MEASUREMENT, tags, 439 (float)(1ULL << bucket) * 1e-9, 440 pool_name, vdev_desc); 441 } else { 442 printf("%s%s,le=+Inf,name=%s,%s ", 443 POOL_LATENCY_MEASUREMENT, tags, pool_name, 444 vdev_desc); 445 } 446 for (int i = 0; lat_type[i].name; i++) { 447 if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { 448 lat_type[i].sum += lat_type[i].array[bucket]; 449 } else { 450 lat_type[i].sum = lat_type[i].array[bucket]; 451 } 452 print_kv(lat_type[i].short_name, lat_type[i].sum); 453 if (lat_type[i + 1].name != NULL) { 454 printf(","); 455 } 456 } 457 printf(" %llu\n", (u_longlong_t)timestamp); 458 } 459 return (0); 460 } 461 462 /* 463 * vdev request size stats are histograms stored as nvlist arrays of uint64. 464 * Request size stats include the ZIO scheduler classes plus lower-level 465 * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. 466 * 467 * In many cases, the top-level "root" view obscures the underlying 468 * top-level vdev operations. For example, if a pool has a log, special, 469 * or cache device, then each can behave very differently. It is useful 470 * to see how each is responding. 471 */ 472 static int 473 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, 474 const char *parent_name) 475 { 476 uint_t c, end = 0; 477 nvlist_t *nv_ex; 478 char *vdev_desc = NULL; 479 480 /* short_names become the field name */ 481 struct size_lookup { 482 const char *name; 483 const char *short_name; 484 uint64_t sum; 485 uint64_t *array; 486 }; 487 struct size_lookup size_type[] = { 488 {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, 489 {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, 490 {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, 491 {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, 492 {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, 493 {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, 494 {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, 495 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, 496 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, 497 {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, 498 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO 499 {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, 500 {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, 501 #endif 502 {ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO, "rebuild_write_ind"}, 503 {ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO, "rebuild_write_agg"}, 504 {NULL, NULL} 505 }; 506 507 if (nvlist_lookup_nvlist(nvroot, 508 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 509 return (6); 510 } 511 512 vdev_desc = get_vdev_desc(nvroot, parent_name); 513 514 for (int i = 0; size_type[i].name; i++) { 515 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, 516 &size_type[i].array, &c) != 0) { 517 fprintf(stderr, "error: can't get %s\n", 518 size_type[i].name); 519 return (3); 520 } 521 /* end count count, all of the arrays are the same size */ 522 end = c - 1; 523 } 524 525 for (int bucket = 0; bucket <= end; bucket++) { 526 if (bucket < MIN_SIZE_INDEX) { 527 /* don't print, but collect the sum */ 528 for (int i = 0; size_type[i].name; i++) { 529 size_type[i].sum += size_type[i].array[bucket]; 530 } 531 continue; 532 } 533 534 if (bucket < end) { 535 printf("%s%s,le=%llu,name=%s,%s ", 536 POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, 537 pool_name, vdev_desc); 538 } else { 539 printf("%s%s,le=+Inf,name=%s,%s ", 540 POOL_IO_SIZE_MEASUREMENT, tags, pool_name, 541 vdev_desc); 542 } 543 for (int i = 0; size_type[i].name; i++) { 544 if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { 545 size_type[i].sum += size_type[i].array[bucket]; 546 } else { 547 size_type[i].sum = size_type[i].array[bucket]; 548 } 549 print_kv(size_type[i].short_name, size_type[i].sum); 550 if (size_type[i + 1].name != NULL) { 551 printf(","); 552 } 553 } 554 printf(" %llu\n", (u_longlong_t)timestamp); 555 } 556 return (0); 557 } 558 559 /* 560 * ZIO scheduler queue stats are stored as gauges. This is unfortunate 561 * because the values can change very rapidly and any point-in-time 562 * value will quickly be obsoleted. It is also not easy to downsample. 563 * Thus only the top-level queue stats might be beneficial... maybe. 564 */ 565 static int 566 print_queue_stats(nvlist_t *nvroot, const char *pool_name, 567 const char *parent_name) 568 { 569 nvlist_t *nv_ex; 570 uint64_t value; 571 572 /* short_names are used for the field name */ 573 struct queue_lookup { 574 const char *name; 575 const char *short_name; 576 }; 577 struct queue_lookup queue_type[] = { 578 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, 579 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, 580 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, 581 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, 582 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, 583 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active"}, 584 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, 585 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, 586 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, 587 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, 588 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, 589 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend"}, 590 {NULL, NULL} 591 }; 592 593 if (nvlist_lookup_nvlist(nvroot, 594 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 595 return (6); 596 } 597 598 printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, 599 get_vdev_desc(nvroot, parent_name)); 600 for (int i = 0; queue_type[i].name; i++) { 601 if (nvlist_lookup_uint64(nv_ex, 602 queue_type[i].name, &value) != 0) { 603 fprintf(stderr, "error: can't get %s\n", 604 queue_type[i].name); 605 return (3); 606 } 607 print_kv(queue_type[i].short_name, value); 608 if (queue_type[i + 1].name != NULL) { 609 printf(","); 610 } 611 } 612 printf(" %llu\n", (u_longlong_t)timestamp); 613 return (0); 614 } 615 616 /* 617 * top-level vdev stats are at the pool level 618 */ 619 static int 620 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) 621 { 622 nvlist_t *nv_ex; 623 uint64_t value; 624 625 /* short_names become part of the metric name */ 626 struct queue_lookup { 627 const char *name; 628 const char *short_name; 629 }; 630 struct queue_lookup queue_type[] = { 631 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, 632 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, 633 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, 634 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, 635 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, 636 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active_queue"}, 637 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, 638 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, 639 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, 640 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, 641 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, 642 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend_queue"}, 643 {NULL, NULL} 644 }; 645 646 if (nvlist_lookup_nvlist(nvroot, 647 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 648 return (6); 649 } 650 651 printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, 652 pool_name); 653 for (int i = 0; queue_type[i].name; i++) { 654 if (nvlist_lookup_uint64(nv_ex, 655 queue_type[i].name, &value) != 0) { 656 fprintf(stderr, "error: can't get %s\n", 657 queue_type[i].name); 658 return (3); 659 } 660 if (i > 0) 661 printf(","); 662 print_kv(queue_type[i].short_name, value); 663 } 664 665 printf(" %llu\n", (u_longlong_t)timestamp); 666 return (0); 667 } 668 669 /* 670 * recursive stats printer 671 */ 672 static int 673 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, 674 const char *pool_name, const char *parent_name, int descend) 675 { 676 uint_t c, children; 677 nvlist_t **child; 678 char vdev_name[256]; 679 int err; 680 681 err = func(nvroot, pool_name, parent_name); 682 if (err) 683 return (err); 684 685 if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 686 &child, &children) == 0) { 687 (void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name), 688 sizeof (vdev_name)); 689 690 for (c = 0; c < children; c++) { 691 err = print_recursive_stats(func, child[c], pool_name, 692 vdev_name, descend); 693 if (err) 694 return (err); 695 } 696 } 697 return (0); 698 } 699 700 /* 701 * call-back to print the stats from the pool config 702 * 703 * Note: if the pool is broken, this can hang indefinitely and perhaps in an 704 * unkillable state. 705 */ 706 static int 707 print_stats(zpool_handle_t *zhp, void *data) 708 { 709 uint_t c; 710 int err; 711 boolean_t missing; 712 nvlist_t *config, *nvroot; 713 vdev_stat_t *vs; 714 struct timespec tv; 715 char *pool_name; 716 717 /* if not this pool return quickly */ 718 if (data && 719 strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) { 720 zpool_close(zhp); 721 return (0); 722 } 723 724 if (zpool_refresh_stats(zhp, &missing) != 0) { 725 zpool_close(zhp); 726 return (1); 727 } 728 729 config = zpool_get_config(zhp, NULL); 730 if (clock_gettime(CLOCK_REALTIME, &tv) != 0) 731 timestamp = (uint64_t)time(NULL) * 1000000000; 732 else 733 timestamp = 734 ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; 735 736 if (nvlist_lookup_nvlist( 737 config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 738 zpool_close(zhp); 739 return (2); 740 } 741 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 742 (uint64_t **)&vs, &c) != 0) { 743 zpool_close(zhp); 744 return (3); 745 } 746 747 pool_name = escape_string(zpool_get_name(zhp)); 748 err = print_recursive_stats(print_summary_stats, nvroot, 749 pool_name, NULL, 1); 750 /* if any of these return an error, skip the rest */ 751 if (err == 0) 752 err = print_top_level_vdev_stats(nvroot, pool_name); 753 754 if (no_histograms == 0) { 755 if (err == 0) 756 err = print_recursive_stats(print_vdev_latency_stats, nvroot, 757 pool_name, NULL, 1); 758 if (err == 0) 759 err = print_recursive_stats(print_vdev_size_stats, nvroot, 760 pool_name, NULL, 1); 761 if (err == 0) 762 err = print_recursive_stats(print_queue_stats, nvroot, 763 pool_name, NULL, 0); 764 } 765 if (err == 0) 766 err = print_scan_status(nvroot, pool_name); 767 768 free(pool_name); 769 zpool_close(zhp); 770 return (err); 771 } 772 773 static void 774 usage(char *name) 775 { 776 fprintf(stderr, "usage: %s [--execd][--no-histograms]" 777 "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); 778 exit(EXIT_FAILURE); 779 } 780 781 int 782 main(int argc, char *argv[]) 783 { 784 int opt; 785 int ret = 8; 786 char *line = NULL, *ttags = NULL; 787 size_t len, tagslen = 0; 788 struct option long_options[] = { 789 {"execd", no_argument, NULL, 'e'}, 790 {"help", no_argument, NULL, 'h'}, 791 {"no-histograms", no_argument, NULL, 'n'}, 792 {"signed-int", no_argument, NULL, 'i'}, 793 {"sum-histogram-buckets", no_argument, NULL, 's'}, 794 {"tags", required_argument, NULL, 't'}, 795 {0, 0, 0, 0} 796 }; 797 while ((opt = getopt_long( 798 argc, argv, "ehinst:", long_options, NULL)) != -1) { 799 switch (opt) { 800 case 'e': 801 execd_mode = 1; 802 break; 803 case 'i': 804 metric_data_type = 'i'; 805 metric_value_mask = INT64_MAX; 806 break; 807 case 'n': 808 no_histograms = 1; 809 break; 810 case 's': 811 sum_histogram_buckets = 1; 812 break; 813 case 't': 814 free(ttags); 815 tagslen = strlen(optarg) + 2; 816 ttags = calloc(1, tagslen); 817 if (ttags == NULL) { 818 fprintf(stderr, 819 "error: cannot allocate memory " 820 "for tags\n"); 821 exit(1); 822 } 823 (void) snprintf(ttags, tagslen, ",%s", optarg); 824 tags = ttags; 825 break; 826 default: 827 usage(argv[0]); 828 } 829 } 830 831 libzfs_handle_t *g_zfs; 832 if ((g_zfs = libzfs_init()) == NULL) { 833 fprintf(stderr, 834 "error: cannot initialize libzfs. " 835 "Is the zfs module loaded or zrepl running?\n"); 836 exit(EXIT_FAILURE); 837 } 838 if (execd_mode == 0) { 839 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 840 return (ret); 841 } 842 while (getline(&line, &len, stdin) != -1) { 843 ret = zpool_iter(g_zfs, print_stats, argv[optind]); 844 fflush(stdout); 845 } 846 return (ret); 847 } 848