1 /*
2 * Gather top-level ZFS pool and resilver/scan statistics and print using
3 * influxdb line protocol
4 * usage: [options] [pool_name]
5 * where options are:
6 * --execd, -e run in telegraf execd input plugin mode, [CR] on
7 * stdin causes a sample to be printed and wait for
8 * the next [CR]
9 * --no-histograms, -n don't print histogram data (reduces cardinality
10 * if you don't care about histograms)
11 * --sum-histogram-buckets, -s sum histogram bucket values
12 *
13 * To integrate into telegraf use one of:
14 * 1. the `inputs.execd` plugin with the `--execd` option
15 * 2. the `inputs.exec` plugin to simply run with no options
16 *
17 * NOTE: libzfs is an unstable interface. YMMV.
18 *
19 * The design goals of this software include:
20 * + be as lightweight as possible
21 * + reduce the number of external dependencies as far as possible, hence
22 * there is no dependency on a client library for managing the metric
23 * collection -- info is printed, KISS
24 * + broken pools or kernel bugs can cause this process to hang in an
25 * unkillable state. For this reason, it is best to keep the damage limited
26 * to a small process like zpool_influxdb rather than a larger collector.
27 *
28 * Copyright 2018-2020 Richard Elling
29 *
30 * This software is dual-licensed MIT and CDDL.
31 *
32 * The MIT License (MIT)
33 *
34 * Permission is hereby granted, free of charge, to any person obtaining a copy
35 * of this software and associated documentation files (the "Software"), to deal
36 * in the Software without restriction, including without limitation the rights
37 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
38 * copies of the Software, and to permit persons to whom the Software is
39 * furnished to do so, subject to the following conditions:
40 *
41 * The above copyright notice and this permission notice shall be included in
42 * all copies or substantial portions of the Software.
43 *
44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
45 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
46 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
47 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
48 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
49 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
50 * SOFTWARE.
51 *
52 * CDDL HEADER START
53 *
54 * The contents of this file are subject to the terms of the
55 * Common Development and Distribution License (the "License").
56 * You may not use this file except in compliance with the License.
57 *
58 * The contents of this file are subject to the terms of the
59 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
60 * You can obtain a copy of the license from the top-level file
61 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
62 * You may not use this file except in compliance with the license.
63 *
64 * See the License for the specific language governing permissions
65 * and limitations under the License.
66 *
67 * CDDL HEADER END
68 */
69 #include <string.h>
70 #include <getopt.h>
71 #include <stdio.h>
72 #include <stdint.h>
73 #include <inttypes.h>
74 #include <libzfs.h>
75
76 #define POOL_MEASUREMENT "zpool_stats"
77 #define SCAN_MEASUREMENT "zpool_scan_stats"
78 #define VDEV_MEASUREMENT "zpool_vdev_stats"
79 #define POOL_LATENCY_MEASUREMENT "zpool_latency"
80 #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue"
81 #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */
82 #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size"
83 #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */
84
85 /* global options */
86 int execd_mode = 0;
87 int no_histograms = 0;
88 int sum_histogram_buckets = 0;
89 char metric_data_type = 'u';
90 uint64_t metric_value_mask = UINT64_MAX;
91 uint64_t timestamp = 0;
92 int complained_about_sync = 0;
93 const char *tags = "";
94
95 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *);
96
97 /*
98 * influxdb line protocol rules for escaping are important because the
99 * zpool name can include characters that need to be escaped
100 *
101 * caller is responsible for freeing result
102 */
103 static char *
escape_string(const char * s)104 escape_string(const char *s)
105 {
106 const char *c;
107 char *d;
108 char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2);
109 if (t == NULL) {
110 fprintf(stderr, "error: cannot allocate memory\n");
111 exit(1);
112 }
113
114 for (c = s, d = t; *c != '\0'; c++, d++) {
115 switch (*c) {
116 case ' ':
117 case ',':
118 case '=':
119 case '\\':
120 *d++ = '\\';
121 zfs_fallthrough;
122 default:
123 *d = *c;
124 }
125 }
126 *d = '\0';
127 return (t);
128 }
129
130 /*
131 * print key=value where value is a uint64_t
132 */
133 static void
print_kv(const char * key,uint64_t value)134 print_kv(const char *key, uint64_t value)
135 {
136 printf("%s=%llu%c", key,
137 (u_longlong_t)value & metric_value_mask, metric_data_type);
138 }
139
140 /*
141 * print_scan_status() prints the details as often seen in the "zpool status"
142 * output. However, unlike the zpool command, which is intended for humans,
143 * this output is suitable for long-term tracking in influxdb.
144 * TODO: update to include issued scan data
145 */
146 static int
print_scan_status(nvlist_t * nvroot,const char * pool_name)147 print_scan_status(nvlist_t *nvroot, const char *pool_name)
148 {
149 uint_t c;
150 int64_t elapsed;
151 uint64_t examined, pass_exam, paused_time, paused_ts, rate;
152 uint64_t remaining_time;
153 pool_scan_stat_t *ps = NULL;
154 double pct_done;
155 const char *const state[DSS_NUM_STATES] = {
156 "none", "scanning", "finished", "canceled"};
157 const char *func;
158
159 (void) nvlist_lookup_uint64_array(nvroot,
160 ZPOOL_CONFIG_SCAN_STATS,
161 (uint64_t **)&ps, &c);
162
163 /*
164 * ignore if there are no stats
165 */
166 if (ps == NULL)
167 return (0);
168
169 /*
170 * return error if state is bogus
171 */
172 if (ps->pss_state >= DSS_NUM_STATES ||
173 ps->pss_func >= POOL_SCAN_FUNCS) {
174 if (complained_about_sync % 1000 == 0) {
175 fprintf(stderr, "error: cannot decode scan stats: "
176 "ZFS is out of sync with compiled zpool_influxdb");
177 complained_about_sync++;
178 }
179 return (1);
180 }
181
182 switch (ps->pss_func) {
183 case POOL_SCAN_NONE:
184 func = "none_requested";
185 break;
186 case POOL_SCAN_SCRUB:
187 func = "scrub";
188 break;
189 case POOL_SCAN_RESILVER:
190 func = "resilver";
191 break;
192 #ifdef POOL_SCAN_REBUILD
193 case POOL_SCAN_REBUILD:
194 func = "rebuild";
195 break;
196 #endif
197 default:
198 func = "scan";
199 }
200
201 /* overall progress */
202 examined = ps->pss_examined ? ps->pss_examined : 1;
203 pct_done = 0.0;
204 if (ps->pss_to_examine > 0)
205 pct_done = 100.0 * examined / ps->pss_to_examine;
206
207 #ifdef EZFS_SCRUB_PAUSED
208 paused_ts = ps->pss_pass_scrub_pause;
209 paused_time = ps->pss_pass_scrub_spent_paused;
210 #else
211 paused_ts = 0;
212 paused_time = 0;
213 #endif
214
215 /* calculations for this pass */
216 if (ps->pss_state == DSS_SCANNING) {
217 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start -
218 (int64_t)paused_time;
219 elapsed = (elapsed > 0) ? elapsed : 1;
220 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
221 rate = pass_exam / elapsed;
222 rate = (rate > 0) ? rate : 1;
223 remaining_time = ps->pss_to_examine - examined / rate;
224 } else {
225 elapsed =
226 (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start -
227 (int64_t)paused_time;
228 elapsed = (elapsed > 0) ? elapsed : 1;
229 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
230 rate = pass_exam / elapsed;
231 remaining_time = 0;
232 }
233 rate = rate ? rate : 1;
234
235 /* influxdb line protocol format: "tags metrics timestamp" */
236 printf("%s%s,function=%s,name=%s,state=%s ",
237 SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]);
238 print_kv("end_ts", ps->pss_end_time);
239 print_kv(",errors", ps->pss_errors);
240 print_kv(",examined", examined);
241 print_kv(",skipped", ps->pss_skipped);
242 print_kv(",issued", ps->pss_issued);
243 print_kv(",pass_examined", pass_exam);
244 print_kv(",pass_issued", ps->pss_pass_issued);
245 print_kv(",paused_ts", paused_ts);
246 print_kv(",paused_t", paused_time);
247 printf(",pct_done=%.2f", pct_done);
248 print_kv(",processed", ps->pss_processed);
249 print_kv(",rate", rate);
250 print_kv(",remaining_t", remaining_time);
251 print_kv(",start_ts", ps->pss_start_time);
252 print_kv(",to_examine", ps->pss_to_examine);
253 printf(" %llu\n", (u_longlong_t)timestamp);
254 return (0);
255 }
256
257 /*
258 * get a vdev name that corresponds to the top-level vdev names
259 * printed by `zpool status`
260 */
261 static char *
get_vdev_name(nvlist_t * nvroot,const char * parent_name)262 get_vdev_name(nvlist_t *nvroot, const char *parent_name)
263 {
264 static char vdev_name[256];
265 uint64_t vdev_id = 0;
266
267 const char *vdev_type = "unknown";
268 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type);
269
270 if (nvlist_lookup_uint64(
271 nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0)
272 vdev_id = UINT64_MAX;
273
274 if (parent_name == NULL) {
275 (void) snprintf(vdev_name, sizeof (vdev_name), "%s",
276 vdev_type);
277 } else {
278 (void) snprintf(vdev_name, sizeof (vdev_name),
279 "%.220s/%s-%llu",
280 parent_name, vdev_type, (u_longlong_t)vdev_id);
281 }
282 return (vdev_name);
283 }
284
285 /*
286 * get a string suitable for an influxdb tag that describes this vdev
287 *
288 * By default only the vdev hierarchical name is shown, separated by '/'
289 * If the vdev has an associated path, which is typical of leaf vdevs,
290 * then the path is added.
291 * It would be nice to have the devid instead of the path, but under
292 * Linux we cannot be sure a devid will exist and we'd rather have
293 * something than nothing, so we'll use path instead.
294 */
295 static char *
get_vdev_desc(nvlist_t * nvroot,const char * parent_name)296 get_vdev_desc(nvlist_t *nvroot, const char *parent_name)
297 {
298 static char vdev_desc[2 * MAXPATHLEN];
299 char vdev_value[MAXPATHLEN];
300 char *s, *t;
301
302 const char *vdev_type = "unknown";
303 uint64_t vdev_id = UINT64_MAX;
304 const char *vdev_path = NULL;
305 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type);
306 (void) nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id);
307 (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &vdev_path);
308
309 if (parent_name == NULL) {
310 s = escape_string(vdev_type);
311 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s);
312 free(s);
313 } else {
314 s = escape_string((char *)parent_name);
315 t = escape_string(vdev_type);
316 (void) snprintf(vdev_value, sizeof (vdev_value),
317 "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id);
318 free(s);
319 free(t);
320 }
321 if (vdev_path == NULL) {
322 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s",
323 vdev_value);
324 } else {
325 s = escape_string(vdev_path);
326 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s",
327 s, vdev_value);
328 free(s);
329 }
330 return (vdev_desc);
331 }
332
333 /*
334 * vdev summary stats are a combination of the data shown by
335 * `zpool status` and `zpool list -v`
336 */
337 static int
print_summary_stats(nvlist_t * nvroot,const char * pool_name,const char * parent_name)338 print_summary_stats(nvlist_t *nvroot, const char *pool_name,
339 const char *parent_name)
340 {
341 uint_t c;
342 vdev_stat_t *vs;
343 char *vdev_desc = NULL;
344 vdev_desc = get_vdev_desc(nvroot, parent_name);
345 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
346 (uint64_t **)&vs, &c) != 0) {
347 return (1);
348 }
349 printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags,
350 pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state,
351 (vdev_aux_t)vs->vs_aux), vdev_desc);
352 print_kv("alloc", vs->vs_alloc);
353 print_kv(",free", vs->vs_space - vs->vs_alloc);
354 print_kv(",size", vs->vs_space);
355 print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]);
356 print_kv(",read_errors", vs->vs_read_errors);
357 print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]);
358 print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]);
359 print_kv(",write_errors", vs->vs_write_errors);
360 print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]);
361 print_kv(",checksum_errors", vs->vs_checksum_errors);
362 print_kv(",fragmentation", vs->vs_fragmentation);
363 printf(" %llu\n", (u_longlong_t)timestamp);
364 return (0);
365 }
366
367 /*
368 * vdev latency stats are histograms stored as nvlist arrays of uint64.
369 * Latency stats include the ZIO scheduler classes plus lower-level
370 * vdev latencies.
371 *
372 * In many cases, the top-level "root" view obscures the underlying
373 * top-level vdev operations. For example, if a pool has a log, special,
374 * or cache device, then each can behave very differently. It is useful
375 * to see how each is responding.
376 */
377 static int
print_vdev_latency_stats(nvlist_t * nvroot,const char * pool_name,const char * parent_name)378 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name,
379 const char *parent_name)
380 {
381 uint_t c, end = 0;
382 nvlist_t *nv_ex;
383 char *vdev_desc = NULL;
384
385 /* short_names become part of the metric name and are influxdb-ready */
386 struct lat_lookup {
387 const char *name;
388 const char *short_name;
389 uint64_t sum;
390 uint64_t *array;
391 };
392 struct lat_lookup lat_type[] = {
393 {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0},
394 {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0},
395 {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0},
396 {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0},
397 {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0},
398 {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0},
399 {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0},
400 {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0},
401 {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0},
402 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO
403 {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0},
404 #endif
405 {ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO, "rebuild", 0},
406 {NULL, NULL}
407 };
408
409 if (nvlist_lookup_nvlist(nvroot,
410 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
411 return (6);
412 }
413
414 vdev_desc = get_vdev_desc(nvroot, parent_name);
415
416 for (int i = 0; lat_type[i].name; i++) {
417 if (nvlist_lookup_uint64_array(nv_ex,
418 lat_type[i].name, &lat_type[i].array, &c) != 0) {
419 fprintf(stderr, "error: can't get %s\n",
420 lat_type[i].name);
421 return (3);
422 }
423 /* end count count, all of the arrays are the same size */
424 end = c - 1;
425 }
426
427 for (int bucket = 0; bucket <= end; bucket++) {
428 if (bucket < MIN_LAT_INDEX) {
429 /* don't print, but collect the sum */
430 for (int i = 0; lat_type[i].name; i++) {
431 lat_type[i].sum += lat_type[i].array[bucket];
432 }
433 continue;
434 }
435 if (bucket < end) {
436 printf("%s%s,le=%0.6f,name=%s,%s ",
437 POOL_LATENCY_MEASUREMENT, tags,
438 (float)(1ULL << bucket) * 1e-9,
439 pool_name, vdev_desc);
440 } else {
441 printf("%s%s,le=+Inf,name=%s,%s ",
442 POOL_LATENCY_MEASUREMENT, tags, pool_name,
443 vdev_desc);
444 }
445 for (int i = 0; lat_type[i].name; i++) {
446 if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) {
447 lat_type[i].sum += lat_type[i].array[bucket];
448 } else {
449 lat_type[i].sum = lat_type[i].array[bucket];
450 }
451 print_kv(lat_type[i].short_name, lat_type[i].sum);
452 if (lat_type[i + 1].name != NULL) {
453 printf(",");
454 }
455 }
456 printf(" %llu\n", (u_longlong_t)timestamp);
457 }
458 return (0);
459 }
460
461 /*
462 * vdev request size stats are histograms stored as nvlist arrays of uint64.
463 * Request size stats include the ZIO scheduler classes plus lower-level
464 * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported.
465 *
466 * In many cases, the top-level "root" view obscures the underlying
467 * top-level vdev operations. For example, if a pool has a log, special,
468 * or cache device, then each can behave very differently. It is useful
469 * to see how each is responding.
470 */
471 static int
print_vdev_size_stats(nvlist_t * nvroot,const char * pool_name,const char * parent_name)472 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name,
473 const char *parent_name)
474 {
475 uint_t c, end = 0;
476 nvlist_t *nv_ex;
477 char *vdev_desc = NULL;
478
479 /* short_names become the field name */
480 struct size_lookup {
481 const char *name;
482 const char *short_name;
483 uint64_t sum;
484 uint64_t *array;
485 };
486 struct size_lookup size_type[] = {
487 {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"},
488 {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"},
489 {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"},
490 {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"},
491 {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"},
492 {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"},
493 {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"},
494 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"},
495 {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"},
496 {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"},
497 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO
498 {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"},
499 {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"},
500 #endif
501 {ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO, "rebuild_write_ind"},
502 {ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO, "rebuild_write_agg"},
503 {NULL, NULL}
504 };
505
506 if (nvlist_lookup_nvlist(nvroot,
507 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
508 return (6);
509 }
510
511 vdev_desc = get_vdev_desc(nvroot, parent_name);
512
513 for (int i = 0; size_type[i].name; i++) {
514 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name,
515 &size_type[i].array, &c) != 0) {
516 fprintf(stderr, "error: can't get %s\n",
517 size_type[i].name);
518 return (3);
519 }
520 /* end count count, all of the arrays are the same size */
521 end = c - 1;
522 }
523
524 for (int bucket = 0; bucket <= end; bucket++) {
525 if (bucket < MIN_SIZE_INDEX) {
526 /* don't print, but collect the sum */
527 for (int i = 0; size_type[i].name; i++) {
528 size_type[i].sum += size_type[i].array[bucket];
529 }
530 continue;
531 }
532
533 if (bucket < end) {
534 printf("%s%s,le=%llu,name=%s,%s ",
535 POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket,
536 pool_name, vdev_desc);
537 } else {
538 printf("%s%s,le=+Inf,name=%s,%s ",
539 POOL_IO_SIZE_MEASUREMENT, tags, pool_name,
540 vdev_desc);
541 }
542 for (int i = 0; size_type[i].name; i++) {
543 if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) {
544 size_type[i].sum += size_type[i].array[bucket];
545 } else {
546 size_type[i].sum = size_type[i].array[bucket];
547 }
548 print_kv(size_type[i].short_name, size_type[i].sum);
549 if (size_type[i + 1].name != NULL) {
550 printf(",");
551 }
552 }
553 printf(" %llu\n", (u_longlong_t)timestamp);
554 }
555 return (0);
556 }
557
558 /*
559 * ZIO scheduler queue stats are stored as gauges. This is unfortunate
560 * because the values can change very rapidly and any point-in-time
561 * value will quickly be obsoleted. It is also not easy to downsample.
562 * Thus only the top-level queue stats might be beneficial... maybe.
563 */
564 static int
print_queue_stats(nvlist_t * nvroot,const char * pool_name,const char * parent_name)565 print_queue_stats(nvlist_t *nvroot, const char *pool_name,
566 const char *parent_name)
567 {
568 nvlist_t *nv_ex;
569 uint64_t value;
570
571 /* short_names are used for the field name */
572 struct queue_lookup {
573 const char *name;
574 const char *short_name;
575 };
576 struct queue_lookup queue_type[] = {
577 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"},
578 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"},
579 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"},
580 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"},
581 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"},
582 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active"},
583 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"},
584 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"},
585 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"},
586 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"},
587 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"},
588 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend"},
589 {NULL, NULL}
590 };
591
592 if (nvlist_lookup_nvlist(nvroot,
593 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
594 return (6);
595 }
596
597 printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name,
598 get_vdev_desc(nvroot, parent_name));
599 for (int i = 0; queue_type[i].name; i++) {
600 if (nvlist_lookup_uint64(nv_ex,
601 queue_type[i].name, &value) != 0) {
602 fprintf(stderr, "error: can't get %s\n",
603 queue_type[i].name);
604 return (3);
605 }
606 print_kv(queue_type[i].short_name, value);
607 if (queue_type[i + 1].name != NULL) {
608 printf(",");
609 }
610 }
611 printf(" %llu\n", (u_longlong_t)timestamp);
612 return (0);
613 }
614
615 /*
616 * top-level vdev stats are at the pool level
617 */
618 static int
print_top_level_vdev_stats(nvlist_t * nvroot,const char * pool_name)619 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name)
620 {
621 nvlist_t *nv_ex;
622 uint64_t value;
623
624 /* short_names become part of the metric name */
625 struct queue_lookup {
626 const char *name;
627 const char *short_name;
628 };
629 struct queue_lookup queue_type[] = {
630 {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"},
631 {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"},
632 {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"},
633 {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"},
634 {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"},
635 {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active_queue"},
636 {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"},
637 {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"},
638 {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"},
639 {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"},
640 {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"},
641 {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend_queue"},
642 {NULL, NULL}
643 };
644
645 if (nvlist_lookup_nvlist(nvroot,
646 ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
647 return (6);
648 }
649
650 printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags,
651 pool_name);
652 for (int i = 0; queue_type[i].name; i++) {
653 if (nvlist_lookup_uint64(nv_ex,
654 queue_type[i].name, &value) != 0) {
655 fprintf(stderr, "error: can't get %s\n",
656 queue_type[i].name);
657 return (3);
658 }
659 if (i > 0)
660 printf(",");
661 print_kv(queue_type[i].short_name, value);
662 }
663
664 printf(" %llu\n", (u_longlong_t)timestamp);
665 return (0);
666 }
667
668 /*
669 * recursive stats printer
670 */
671 static int
print_recursive_stats(stat_printer_f func,nvlist_t * nvroot,const char * pool_name,const char * parent_name,int descend)672 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot,
673 const char *pool_name, const char *parent_name, int descend)
674 {
675 uint_t c, children;
676 nvlist_t **child;
677 char vdev_name[256];
678 int err;
679
680 err = func(nvroot, pool_name, parent_name);
681 if (err)
682 return (err);
683
684 if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
685 &child, &children) == 0) {
686 (void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name),
687 sizeof (vdev_name));
688
689 for (c = 0; c < children; c++) {
690 err = print_recursive_stats(func, child[c], pool_name,
691 vdev_name, descend);
692 if (err)
693 return (err);
694 }
695 }
696 return (0);
697 }
698
699 /*
700 * call-back to print the stats from the pool config
701 *
702 * Note: if the pool is broken, this can hang indefinitely and perhaps in an
703 * unkillable state.
704 */
705 static int
print_stats(zpool_handle_t * zhp,void * data)706 print_stats(zpool_handle_t *zhp, void *data)
707 {
708 uint_t c;
709 int err;
710 boolean_t missing;
711 nvlist_t *config, *nvroot;
712 vdev_stat_t *vs;
713 struct timespec tv;
714 char *pool_name;
715
716 /* if not this pool return quickly */
717 if (data &&
718 strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) {
719 zpool_close(zhp);
720 return (0);
721 }
722
723 if (zpool_refresh_stats(zhp, &missing) != 0) {
724 zpool_close(zhp);
725 return (1);
726 }
727
728 config = zpool_get_config(zhp, NULL);
729 if (clock_gettime(CLOCK_REALTIME, &tv) != 0)
730 timestamp = (uint64_t)time(NULL) * 1000000000;
731 else
732 timestamp =
733 ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec;
734
735 if (nvlist_lookup_nvlist(
736 config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) {
737 zpool_close(zhp);
738 return (2);
739 }
740 if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
741 (uint64_t **)&vs, &c) != 0) {
742 zpool_close(zhp);
743 return (3);
744 }
745
746 pool_name = escape_string(zpool_get_name(zhp));
747 err = print_recursive_stats(print_summary_stats, nvroot,
748 pool_name, NULL, 1);
749 /* if any of these return an error, skip the rest */
750 if (err == 0)
751 err = print_top_level_vdev_stats(nvroot, pool_name);
752
753 if (no_histograms == 0) {
754 if (err == 0)
755 err = print_recursive_stats(print_vdev_latency_stats, nvroot,
756 pool_name, NULL, 1);
757 if (err == 0)
758 err = print_recursive_stats(print_vdev_size_stats, nvroot,
759 pool_name, NULL, 1);
760 if (err == 0)
761 err = print_recursive_stats(print_queue_stats, nvroot,
762 pool_name, NULL, 0);
763 }
764 if (err == 0)
765 err = print_scan_status(nvroot, pool_name);
766
767 free(pool_name);
768 zpool_close(zhp);
769 return (err);
770 }
771
772 static void
usage(char * name)773 usage(char *name)
774 {
775 fprintf(stderr, "usage: %s [--execd][--no-histograms]"
776 "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name);
777 exit(EXIT_FAILURE);
778 }
779
780 int
main(int argc,char * argv[])781 main(int argc, char *argv[])
782 {
783 int opt;
784 int ret = 8;
785 char *line = NULL, *ttags = NULL;
786 size_t len, tagslen = 0;
787 struct option long_options[] = {
788 {"execd", no_argument, NULL, 'e'},
789 {"help", no_argument, NULL, 'h'},
790 {"no-histograms", no_argument, NULL, 'n'},
791 {"signed-int", no_argument, NULL, 'i'},
792 {"sum-histogram-buckets", no_argument, NULL, 's'},
793 {"tags", required_argument, NULL, 't'},
794 {0, 0, 0, 0}
795 };
796 while ((opt = getopt_long(
797 argc, argv, "ehinst:", long_options, NULL)) != -1) {
798 switch (opt) {
799 case 'e':
800 execd_mode = 1;
801 break;
802 case 'i':
803 metric_data_type = 'i';
804 metric_value_mask = INT64_MAX;
805 break;
806 case 'n':
807 no_histograms = 1;
808 break;
809 case 's':
810 sum_histogram_buckets = 1;
811 break;
812 case 't':
813 free(ttags);
814 tagslen = strlen(optarg) + 2;
815 ttags = calloc(1, tagslen);
816 if (ttags == NULL) {
817 fprintf(stderr,
818 "error: cannot allocate memory "
819 "for tags\n");
820 exit(1);
821 }
822 (void) snprintf(ttags, tagslen, ",%s", optarg);
823 tags = ttags;
824 break;
825 default:
826 usage(argv[0]);
827 }
828 }
829
830 libzfs_handle_t *g_zfs;
831 if ((g_zfs = libzfs_init()) == NULL) {
832 fprintf(stderr,
833 "error: cannot initialize libzfs. "
834 "Is the zfs module loaded or zrepl running?\n");
835 exit(EXIT_FAILURE);
836 }
837 if (execd_mode == 0) {
838 ret = zpool_iter(g_zfs, print_stats, argv[optind]);
839 return (ret);
840 }
841 while (getline(&line, &len, stdin) != -1) {
842 ret = zpool_iter(g_zfs, print_stats, argv[optind]);
843 fflush(stdout);
844 }
845 return (ret);
846 }
847