1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2017, Intel Corporation.
26 * Copyright (c) 2023-2025, Klara, Inc.
27 */
28
29 /*
30 * ZFS Fault Injector
31 *
32 * This userland component takes a set of options and uses libzpool to translate
33 * from a user-visible object type and name to an internal representation.
34 * There are two basic types of faults: device faults and data faults.
35 *
36 *
37 * DEVICE FAULTS
38 *
39 * Errors can be injected into a particular vdev using the '-d' option. This
40 * option takes a path or vdev GUID to uniquely identify the device within a
41 * pool. There are four types of errors that can be injected, IO, ENXIO,
42 * ECHILD, and EILSEQ. These can be controlled through the '-e' option and the
43 * default is ENXIO. For EIO failures, any attempt to read data from the device
44 * will return EIO, but a subsequent attempt to reopen the device will succeed.
45 * For ENXIO failures, any attempt to read from the device will return EIO, but
46 * any attempt to reopen the device will also return ENXIO. The EILSEQ failures
47 * only apply to read operations (-T read) and will flip a bit after the device
48 * has read the original data.
49 *
50 * For label faults, the -L option must be specified. This allows faults
51 * to be injected into either the nvlist, uberblock, pad1, or pad2 region
52 * of all the labels for the specified device.
53 *
54 * This form of the command looks like:
55 *
56 * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
57 *
58 *
59 * DATA FAULTS
60 *
61 * We begin with a tuple of the form:
62 *
63 * <type,level,range,object>
64 *
65 * type A string describing the type of data to target. Each type
66 * implicitly describes how to interpret 'object'. Currently,
67 * the following values are supported:
68 *
69 * data User data for a file
70 * dnode Dnode for a file or directory
71 *
72 * The following MOS objects are special. Instead of injecting
73 * errors on a particular object or blkid, we inject errors across
74 * all objects of the given type.
75 *
76 * mos Any data in the MOS
77 * mosdir object directory
78 * config pool configuration
79 * bpobj blkptr list
80 * spacemap spacemap
81 * metaslab metaslab
82 * errlog persistent error log
83 *
84 * level Object level. Defaults to '0', not applicable to all types. If
85 * a range is given, this corresponds to the indirect block
86 * corresponding to the specific range.
87 *
88 * range A numerical range [start,end) within the object. Defaults to
89 * the full size of the file.
90 *
91 * object A string describing the logical location of the object. For
92 * files and directories (currently the only supported types),
93 * this is the path of the object on disk.
94 *
95 * This is translated, via libzpool, into the following internal representation:
96 *
97 * <type,objset,object,level,range>
98 *
99 * These types should be self-explanatory. This tuple is then passed to the
100 * kernel via a special ioctl() to initiate fault injection for the given
101 * object. Note that 'type' is not strictly necessary for fault injection, but
102 * is used when translating existing faults into a human-readable string.
103 *
104 *
105 * The command itself takes one of the forms:
106 *
107 * zinject
108 * zinject <-a | -u pool>
109 * zinject -c <id|all>
110 * zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]
111 * [-T iotype] [-t type object | -b bookmark pool]
112 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
113 * [-r range] <object>
114 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
115 *
116 * With no arguments, the command prints all currently registered injection
117 * handlers, with their numeric identifiers.
118 *
119 * The '-c' option will clear the given handler, or all handlers if 'all' is
120 * specified.
121 *
122 * The '-e' option takes a string describing the errno to simulate. This must
123 * be one of 'io', 'checksum', 'decompress', or 'decrypt'. In most cases this
124 * will result in the same behavior, but RAID-Z will produce a different set of
125 * ereports for this situation.
126 *
127 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is
128 * specified, then the ARC cache is flushed appropriately. If '-u' is
129 * specified, then the underlying SPA is unloaded. Either of these flags can be
130 * specified independently of any other handlers. The '-m' flag automatically
131 * does an unmount and remount of the underlying dataset to aid in flushing the
132 * cache.
133 *
134 * The '-f' flag controls the frequency of errors injected, expressed as a
135 * real number percentage between 0.0001 and 100. The default is 100.
136 *
137 * The <object> form is responsible for actually injecting the handler into the
138 * framework. It takes the arguments described above, translates them to the
139 * internal tuple using libzpool, and then issues an ioctl() to register the
140 * handler.
141 *
142 * The '-b' option can target a specific bookmark, regardless of whether a
143 * human-readable interface has been designed. It allows developers to specify
144 * a particular block by number.
145 *
146 * The '-E' option injects pipeline ready stage delays for the given object or
147 * bookmark. The delay is specified in milliseconds, and it supports I/O type
148 * and range filters.
149 */
150
151 #include <errno.h>
152 #include <fcntl.h>
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <string.h>
156 #include <strings.h>
157 #include <unistd.h>
158
159 #include <sys/fs/zfs.h>
160 #include <sys/mount.h>
161
162 #include <libzfs.h>
163
164 #undef verify /* both libzfs.h and zfs_context.h want to define this */
165
166 #include "zinject.h"
167
168 libzfs_handle_t *g_zfs;
169 int zfs_fd;
170
171 static const char *const errtable[TYPE_INVAL] = {
172 "data",
173 "dnode",
174 "mos",
175 "mosdir",
176 "metaslab",
177 "config",
178 "bpobj",
179 "spacemap",
180 "errlog",
181 "uber",
182 "nvlist",
183 "pad1",
184 "pad2"
185 };
186
187 static err_type_t
name_to_type(const char * arg)188 name_to_type(const char *arg)
189 {
190 int i;
191 for (i = 0; i < TYPE_INVAL; i++)
192 if (strcmp(errtable[i], arg) == 0)
193 return (i);
194
195 return (TYPE_INVAL);
196 }
197
198 static const char *
type_to_name(uint64_t type)199 type_to_name(uint64_t type)
200 {
201 switch (type) {
202 case DMU_OT_OBJECT_DIRECTORY:
203 return ("mosdir");
204 case DMU_OT_OBJECT_ARRAY:
205 return ("metaslab");
206 case DMU_OT_PACKED_NVLIST:
207 return ("config");
208 case DMU_OT_BPOBJ:
209 return ("bpobj");
210 case DMU_OT_SPACE_MAP:
211 return ("spacemap");
212 case DMU_OT_ERROR_LOG:
213 return ("errlog");
214 default:
215 return ("-");
216 }
217 }
218
219 struct errstr {
220 int err;
221 const char *str;
222 };
223 static const struct errstr errstrtable[] = {
224 { EIO, "io" },
225 { ECKSUM, "checksum" },
226 { EINVAL, "decompress" },
227 { EACCES, "decrypt" },
228 { ENXIO, "nxio" },
229 { ECHILD, "dtl" },
230 { EILSEQ, "corrupt" },
231 { ENOSYS, "noop" },
232 { EFAULT, "io-prefail" },
233 { 0, NULL },
234 };
235
236 static int
str_to_err(const char * str)237 str_to_err(const char *str)
238 {
239 for (int i = 0; errstrtable[i].str != NULL; i++)
240 if (strcasecmp(errstrtable[i].str, str) == 0)
241 return (errstrtable[i].err);
242 return (-1);
243 }
244 static const char *
err_to_str(int err)245 err_to_str(int err)
246 {
247 for (int i = 0; errstrtable[i].str != NULL; i++)
248 if (errstrtable[i].err == err)
249 return (errstrtable[i].str);
250 return ("[unknown]");
251 }
252
253 static const char *const iotypestrtable[ZINJECT_IOTYPES] = {
254 [ZINJECT_IOTYPE_NULL] = "null",
255 [ZINJECT_IOTYPE_READ] = "read",
256 [ZINJECT_IOTYPE_WRITE] = "write",
257 [ZINJECT_IOTYPE_FREE] = "free",
258 [ZINJECT_IOTYPE_CLAIM] = "claim",
259 [ZINJECT_IOTYPE_FLUSH] = "flush",
260 [ZINJECT_IOTYPE_TRIM] = "trim",
261 [ZINJECT_IOTYPE_ALL] = "all",
262 [ZINJECT_IOTYPE_PROBE] = "probe",
263 };
264
265 static zinject_iotype_t
str_to_iotype(const char * arg)266 str_to_iotype(const char *arg)
267 {
268 for (uint_t iotype = 0; iotype < ZINJECT_IOTYPES; iotype++)
269 if (iotypestrtable[iotype] != NULL &&
270 strcasecmp(iotypestrtable[iotype], arg) == 0)
271 return (iotype);
272 return (ZINJECT_IOTYPES);
273 }
274
275 static const char *
iotype_to_str(zinject_iotype_t iotype)276 iotype_to_str(zinject_iotype_t iotype)
277 {
278 if (iotype >= ZINJECT_IOTYPES || iotypestrtable[iotype] == NULL)
279 return ("[unknown]");
280 return (iotypestrtable[iotype]);
281 }
282
283 /*
284 * Print usage message.
285 */
286 void
usage(void)287 usage(void)
288 {
289 (void) printf(
290 "usage:\n"
291 "\n"
292 "\tzinject\n"
293 "\n"
294 "\t\tList all active injection records.\n"
295 "\n"
296 "\tzinject -c <id|all>\n"
297 "\n"
298 "\t\tClear the particular record (if given a numeric ID), or\n"
299 "\t\tall records if 'all' is specified.\n"
300 "\n"
301 "\tzinject -p <function name> pool\n"
302 "\t\tInject a panic fault at the specified function. Only \n"
303 "\t\tfunctions which call spa_vdev_config_exit(), or \n"
304 "\t\tspa_vdev_exit() will trigger a panic.\n"
305 "\n"
306 "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
307 "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
308 "\t\tInject a fault into a particular device or the device's\n"
309 "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
310 "\t\t'pad1', or 'pad2'.\n"
311 "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
312 "\t\t'corrupt' (bit flip), 'io-prefail' (unsuccessfully do\n"
313 "\t\tnothing) or 'noop' (successfully do nothing).\n"
314 "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
315 "\t\tdevice error injection to a percentage of the IOs.\n"
316 "\n"
317 "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
318 "\t\tPerform a specific action on a particular device.\n"
319 "\n"
320 "\tzinject -d device -D latency:lanes pool\n"
321 "\n"
322 "\t\tAdd an artificial delay to IO requests on a particular\n"
323 "\t\tdevice, such that the requests take a minimum of 'latency'\n"
324 "\t\tmilliseconds to complete. Each delay has an associated\n"
325 "\t\tnumber of 'lanes' which defines the number of concurrent\n"
326 "\t\tIO requests that can be processed.\n"
327 "\n"
328 "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
329 "\t\tthe device will only be able to service a single IO request\n"
330 "\t\tat a time with each request taking 10 ms to complete. So,\n"
331 "\t\tif only a single request is submitted every 10 ms, the\n"
332 "\t\taverage latency will be 10 ms; but if more than one request\n"
333 "\t\tis submitted every 10 ms, the average latency will be more\n"
334 "\t\tthan 10 ms.\n"
335 "\n"
336 "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
337 "\t\tlanes (-D 10:2), then the device will be able to service\n"
338 "\t\ttwo requests at a time, each with a minimum latency of\n"
339 "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
340 "\t\tthe average latency will be 10 ms; but if more than two\n"
341 "\t\trequests are submitted every 10 ms, the average latency\n"
342 "\t\twill be more than 10 ms.\n"
343 "\n"
344 "\t\tAlso note, these delays are additive. So two invocations\n"
345 "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
346 "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
347 "\t\tlanes with differing target latencies. For example, an\n"
348 "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
349 "\t\tcreate 3 lanes on the device; one lane with a latency\n"
350 "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
351 "\n"
352 "\tzinject -P import|export -s <seconds> pool\n"
353 "\t\tAdd an artificial delay to a future pool import or export,\n"
354 "\t\tsuch that the operation takes a minimum of supplied seconds\n"
355 "\t\tto complete.\n"
356 "\n"
357 "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n"
358 "\t\t[-T iotype] [-t type object | -b bookmark pool]\n"
359 "\n"
360 "\t\tInject pipeline ready stage delays for the given object path\n"
361 "\t\t(data or dnode) or raw bookmark. The delay is specified in\n"
362 "\t\tmilliseconds.\n"
363 "\n"
364 "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
365 "\t\tCause the pool to stop writing blocks yet not\n"
366 "\t\treport errors for a duration. Simulates buggy hardware\n"
367 "\t\tthat fails to honor cache flush requests.\n"
368 "\t\tDefault duration is 30 seconds. The machine is panicked\n"
369 "\t\tat the end of the duration.\n"
370 "\n"
371 "\tzinject -b objset:object:level:blkid pool\n"
372 "\n"
373 "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
374 "\t\tspecified by the remaining tuple. Each number is in\n"
375 "\t\thexadecimal, and only one block can be specified.\n"
376 "\n"
377 "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
378 "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
379 "\n"
380 "\t\tInject an error into the object specified by the '-t' option\n"
381 "\t\tand the object descriptor. The 'object' parameter is\n"
382 "\t\tinterpreted depending on the '-t' option.\n"
383 "\n"
384 "\t\t-q\tQuiet mode. Only print out the handler number added.\n"
385 "\t\t-e\tInject a specific error. Must be one of 'io',\n"
386 "\t\t\t'checksum', 'decompress', or 'decrypt'. Default is 'io'.\n"
387 "\t\t-C\tInject the given error only into specific DVAs. The\n"
388 "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
389 "\t\t\tseparated by commas (ex. '0,2').\n"
390 "\t\t-l\tInject error at a particular block level. Default is "
391 "0.\n"
392 "\t\t-m\tAutomatically remount underlying filesystem.\n"
393 "\t\t-r\tInject error over a particular logical range of an\n"
394 "\t\t\tobject, specified as 'start[,end]'. Numeric\n"
395 "\t\t\tsuffixes (K, M, G, T, P, E) are accepted.\n"
396 "\t\t\tWill be translated to the appropriate blkid\n"
397 "\t\t\trange according to the object's properties.\n"
398 "\t\t-a\tFlush the ARC cache. Can be specified without any\n"
399 "\t\t\tassociated object.\n"
400 "\t\t-u\tUnload the associated pool. Can be specified with only\n"
401 "\t\t\ta pool object.\n"
402 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
403 "\t\t\ta percentage between 0.0001 and 100.\n"
404 "\n"
405 "\t-t data\t\tInject an error into the plain file contents of a\n"
406 "\t\t\tfile. The object must be specified as a complete path\n"
407 "\t\t\tto a file on a ZFS filesystem.\n"
408 "\n"
409 "\t-t dnode\tInject an error into the metadnode in the block\n"
410 "\t\t\tcorresponding to the dnode for a file or directory. The\n"
411 "\t\t\t'-r' option is incompatible with this mode. The object\n"
412 "\t\t\tis specified as a complete path to a file or directory\n"
413 "\t\t\ton a ZFS filesystem.\n"
414 "\n"
415 "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
416 "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
417 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
418 "\t\t\tthe poolname.\n");
419 }
420
421 static int
iter_handlers(int (* func)(int,const char *,zinject_record_t *,void *),void * data)422 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
423 void *data)
424 {
425 zfs_cmd_t zc = {"\0"};
426 int ret;
427
428 while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
429 if ((ret = func((int)zc.zc_guid, zc.zc_name,
430 &zc.zc_inject_record, data)) != 0)
431 return (ret);
432
433 if (errno != ENOENT) {
434 (void) fprintf(stderr, "Unable to list handlers: %s\n",
435 strerror(errno));
436 return (-1);
437 }
438
439 return (0);
440 }
441
442 static int
print_data_handler(int id,const char * pool,zinject_record_t * record,void * data)443 print_data_handler(int id, const char *pool, zinject_record_t *record,
444 void *data)
445 {
446 int *count = data;
447
448 if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
449 record->zi_duration != 0) {
450 return (0);
451 }
452
453 if (*count == 0) {
454 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s "
455 "%-15s %-6s %-15s\n", "ID", "POOL", "OBJSET", "OBJECT",
456 "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT");
457 (void) printf("--- --------------- ------ "
458 "------ -------- --- ---- --------------- "
459 "------ ------\n");
460 }
461
462 *count += 1;
463
464 char rangebuf[32];
465 if (record->zi_start == 0 && record->zi_end == -1ULL)
466 snprintf(rangebuf, sizeof (rangebuf), "all");
467 else
468 snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]",
469 (u_longlong_t)record->zi_start,
470 (u_longlong_t)record->zi_end);
471
472
473 (void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x %-15s "
474 "%6" PRIu64 " %6" PRIu64 "\n", id, pool,
475 (u_longlong_t)record->zi_objset,
476 (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
477 record->zi_level, record->zi_dvas, rangebuf,
478 record->zi_match_count, record->zi_inject_count);
479
480 return (0);
481 }
482
483 static int
print_device_handler(int id,const char * pool,zinject_record_t * record,void * data)484 print_device_handler(int id, const char *pool, zinject_record_t *record,
485 void *data)
486 {
487 int *count = data;
488
489 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
490 return (0);
491
492 if (record->zi_cmd == ZINJECT_DELAY_IO)
493 return (0);
494
495 if (*count == 0) {
496 (void) printf("%3s %-15s %-16s %-5s %-10s %-9s "
497 "%-6s %-6s\n",
498 "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ",
499 "MATCH", "INJECT");
500 (void) printf(
501 "--- --------------- ---------------- "
502 "----- ---------- --------- "
503 "------ ------\n");
504 }
505
506 *count += 1;
507
508 double freq = record->zi_freq == 0 ? 100.0f :
509 (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
510
511 (void) printf("%3d %-15s %llx %-5s %-10s %8.4f%% "
512 "%6" PRIu64 " %6" PRIu64 "\n", id, pool,
513 (u_longlong_t)record->zi_guid,
514 iotype_to_str(record->zi_iotype), err_to_str(record->zi_error),
515 freq, record->zi_match_count, record->zi_inject_count);
516
517 return (0);
518 }
519
520 static int
print_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)521 print_delay_handler(int id, const char *pool, zinject_record_t *record,
522 void *data)
523 {
524 int *count = data;
525
526 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
527 return (0);
528
529 if (record->zi_cmd != ZINJECT_DELAY_IO)
530 return (0);
531
532 if (*count == 0) {
533 (void) printf("%3s %-15s %-16s %-10s %-5s %-9s "
534 "%-6s %-6s\n",
535 "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ",
536 "MATCH", "INJECT");
537 (void) printf("--- --------------- ---------------- "
538 "---------- ----- --------- "
539 "------ ------\n");
540 }
541
542 *count += 1;
543
544 double freq = record->zi_freq == 0 ? 100.0f :
545 (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
546
547 (void) printf("%3d %-15s %llx %10llu %5llu %8.4f%% "
548 "%6" PRIu64 " %6" PRIu64 "\n", id, pool,
549 (u_longlong_t)record->zi_guid,
550 (u_longlong_t)NSEC2MSEC(record->zi_timer),
551 (u_longlong_t)record->zi_nlanes,
552 freq, record->zi_match_count, record->zi_inject_count);
553
554 return (0);
555 }
556
557 static int
print_panic_handler(int id,const char * pool,zinject_record_t * record,void * data)558 print_panic_handler(int id, const char *pool, zinject_record_t *record,
559 void *data)
560 {
561 int *count = data;
562
563 if (record->zi_func[0] == '\0')
564 return (0);
565
566 if (*count == 0) {
567 (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
568 (void) printf("--- --------------- ----------------\n");
569 }
570
571 *count += 1;
572
573 (void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
574
575 return (0);
576 }
577
578 static int
print_pool_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)579 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
580 void *data)
581 {
582 int *count = data;
583
584 if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
585 record->zi_cmd != ZINJECT_DELAY_EXPORT) {
586 return (0);
587 }
588
589 if (*count == 0) {
590 (void) printf("%3s %-19s %-11s %s\n",
591 "ID", "POOL", "DELAY (sec)", "COMMAND");
592 (void) printf("--- ------------------- -----------"
593 " -------\n");
594 }
595
596 *count += 1;
597
598 (void) printf("%3d %-19s %-11llu %s\n",
599 id, pool, (u_longlong_t)record->zi_duration,
600 record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
601
602 return (0);
603 }
604
605 /*
606 * Print all registered error handlers. Returns the number of handlers
607 * registered.
608 */
609 static int
print_all_handlers(void)610 print_all_handlers(void)
611 {
612 int count = 0, total = 0;
613
614 (void) iter_handlers(print_device_handler, &count);
615 if (count > 0) {
616 total += count;
617 (void) printf("\n");
618 count = 0;
619 }
620
621 (void) iter_handlers(print_delay_handler, &count);
622 if (count > 0) {
623 total += count;
624 (void) printf("\n");
625 count = 0;
626 }
627
628 (void) iter_handlers(print_data_handler, &count);
629 if (count > 0) {
630 total += count;
631 (void) printf("\n");
632 count = 0;
633 }
634
635 (void) iter_handlers(print_pool_delay_handler, &count);
636 if (count > 0) {
637 total += count;
638 (void) printf("\n");
639 count = 0;
640 }
641
642 (void) iter_handlers(print_panic_handler, &count);
643
644 return (count + total);
645 }
646
647 static int
cancel_one_handler(int id,const char * pool,zinject_record_t * record,void * data)648 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
649 void *data)
650 {
651 (void) pool, (void) record, (void) data;
652 zfs_cmd_t zc = {"\0"};
653
654 zc.zc_guid = (uint64_t)id;
655
656 if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
657 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
658 id, strerror(errno));
659 return (1);
660 }
661
662 return (0);
663 }
664
665 /*
666 * Remove all fault injection handlers.
667 */
668 static int
cancel_all_handlers(void)669 cancel_all_handlers(void)
670 {
671 int ret = iter_handlers(cancel_one_handler, NULL);
672
673 if (ret == 0)
674 (void) printf("removed all registered handlers\n");
675
676 return (ret);
677 }
678
679 /*
680 * Remove a specific fault injection handler.
681 */
682 static int
cancel_handler(int id)683 cancel_handler(int id)
684 {
685 zfs_cmd_t zc = {"\0"};
686
687 zc.zc_guid = (uint64_t)id;
688
689 if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
690 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
691 id, strerror(errno));
692 return (1);
693 }
694
695 (void) printf("removed handler %d\n", id);
696
697 return (0);
698 }
699
700 /*
701 * Register a new fault injection handler.
702 */
703 static int
register_handler(const char * pool,int flags,zinject_record_t * record,int quiet)704 register_handler(const char *pool, int flags, zinject_record_t *record,
705 int quiet)
706 {
707 zfs_cmd_t zc = {"\0"};
708
709 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
710 zc.zc_inject_record = *record;
711 zc.zc_guid = flags;
712
713 if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
714 const char *errmsg = strerror(errno);
715
716 switch (errno) {
717 case EDOM:
718 errmsg = "block level exceeds max level of object";
719 break;
720 case EEXIST:
721 if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
722 errmsg = "pool already imported";
723 if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
724 errmsg = "a handler already exists";
725 break;
726 case ENOENT:
727 /* import delay injector running on older zfs module */
728 if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
729 errmsg = "import delay injector not supported";
730 break;
731 default:
732 break;
733 }
734 (void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
735 return (1);
736 }
737
738 if (flags & ZINJECT_NULL)
739 return (0);
740
741 if (quiet) {
742 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
743 } else {
744 boolean_t show_object = B_FALSE;
745 boolean_t show_iotype = B_FALSE;
746 (void) printf("Added handler %llu with the following "
747 "properties:\n", (u_longlong_t)zc.zc_guid);
748 (void) printf(" pool: %s\n", pool);
749 if (record->zi_guid) {
750 (void) printf(" vdev: %llx\n",
751 (u_longlong_t)record->zi_guid);
752 show_iotype = B_TRUE;
753 } else if (record->zi_func[0] != '\0') {
754 (void) printf(" panic function: %s\n",
755 record->zi_func);
756 } else if (record->zi_duration > 0) {
757 (void) printf(" time: %lld seconds\n",
758 (u_longlong_t)record->zi_duration);
759 } else if (record->zi_duration < 0) {
760 (void) printf(" txgs: %lld \n",
761 (u_longlong_t)-record->zi_duration);
762 } else if (record->zi_timer > 0) {
763 (void) printf(" timer: %lld ms\n",
764 (u_longlong_t)NSEC2MSEC(record->zi_timer));
765 if (record->zi_cmd == ZINJECT_DELAY_READY) {
766 show_object = B_TRUE;
767 show_iotype = B_TRUE;
768 }
769 } else {
770 show_object = B_TRUE;
771 }
772 if (show_iotype) {
773 (void) printf("iotype: %s\n",
774 iotype_to_str(record->zi_iotype));
775 }
776 if (show_object) {
777 (void) printf("objset: %llu\n",
778 (u_longlong_t)record->zi_objset);
779 (void) printf("object: %llu\n",
780 (u_longlong_t)record->zi_object);
781 (void) printf(" type: %llu\n",
782 (u_longlong_t)record->zi_type);
783 (void) printf(" level: %d\n", record->zi_level);
784 if (record->zi_start == 0 &&
785 record->zi_end == -1ULL)
786 (void) printf(" range: all\n");
787 else
788 (void) printf(" range: [%llu, %llu)\n",
789 (u_longlong_t)record->zi_start,
790 (u_longlong_t)record->zi_end);
791 (void) printf(" dvas: 0x%x\n", record->zi_dvas);
792 }
793 }
794
795 return (0);
796 }
797
798 static int
perform_action(const char * pool,zinject_record_t * record,int cmd)799 perform_action(const char *pool, zinject_record_t *record, int cmd)
800 {
801 zfs_cmd_t zc = {"\0"};
802
803 ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
804 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
805 zc.zc_guid = record->zi_guid;
806 zc.zc_cookie = cmd;
807
808 if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
809 return (0);
810
811 return (1);
812 }
813
814 static int
parse_delay(char * str,uint64_t * delay,uint64_t * nlanes)815 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
816 {
817 unsigned long scan_delay;
818 unsigned long scan_nlanes;
819
820 if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
821 return (1);
822
823 /*
824 * We explicitly disallow a delay of zero here, because we key
825 * off this value being non-zero in translate_device(), to
826 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
827 */
828 if (scan_delay == 0)
829 return (1);
830
831 /*
832 * The units for the CLI delay parameter is milliseconds, but
833 * the data passed to the kernel is interpreted as nanoseconds.
834 * Thus we scale the milliseconds to nanoseconds here, and this
835 * nanosecond value is used to pass the delay to the kernel.
836 */
837 *delay = MSEC2NSEC(scan_delay);
838 *nlanes = scan_nlanes;
839
840 return (0);
841 }
842
843 static int
parse_frequency(const char * str,uint32_t * percent)844 parse_frequency(const char *str, uint32_t *percent)
845 {
846 double val;
847 char *post;
848
849 val = strtod(str, &post);
850 if (post == NULL || *post != '\0')
851 return (EINVAL);
852
853 /* valid range is [0.0001, 100.0] */
854 val /= 100.0f;
855 if (val < 0.000001f || val > 1.0f)
856 return (ERANGE);
857
858 /* convert to an integer for use by kernel */
859 *percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
860
861 return (0);
862 }
863
864 /*
865 * This function converts a string specifier for DVAs into a bit mask.
866 * The dva's provided by the user should be 0 indexed and separated by
867 * a comma. For example:
868 * "1" -> 0b0010 (0x2)
869 * "0,1" -> 0b0011 (0x3)
870 * "0,1,2" -> 0b0111 (0x7)
871 */
872 static int
parse_dvas(const char * str,uint32_t * dvas_out)873 parse_dvas(const char *str, uint32_t *dvas_out)
874 {
875 const char *c = str;
876 uint32_t mask = 0;
877 boolean_t need_delim = B_FALSE;
878
879 /* max string length is 5 ("0,1,2") */
880 if (strlen(str) > 5 || strlen(str) == 0)
881 return (EINVAL);
882
883 while (*c != '\0') {
884 switch (*c) {
885 case '0':
886 case '1':
887 case '2':
888 /* check for pipe between DVAs */
889 if (need_delim)
890 return (EINVAL);
891
892 /* check if this DVA has been set already */
893 if (mask & (1 << ((*c) - '0')))
894 return (EINVAL);
895
896 mask |= (1 << ((*c) - '0'));
897 need_delim = B_TRUE;
898 break;
899 case ',':
900 need_delim = B_FALSE;
901 break;
902 default:
903 /* check for invalid character */
904 return (EINVAL);
905 }
906 c++;
907 }
908
909 /* check for dangling delimiter */
910 if (!need_delim)
911 return (EINVAL);
912
913 *dvas_out = mask;
914 return (0);
915 }
916
917 int
main(int argc,char ** argv)918 main(int argc, char **argv)
919 {
920 int c;
921 char *range = NULL;
922 char *cancel = NULL;
923 char *end;
924 char *raw = NULL;
925 char *device = NULL;
926 int level = 0;
927 int quiet = 0;
928 int error = 0;
929 int domount = 0;
930 int io_type = ZINJECT_IOTYPE_ALL;
931 int action = VDEV_STATE_UNKNOWN;
932 err_type_t type = TYPE_INVAL;
933 err_type_t label = TYPE_INVAL;
934 zinject_record_t record = { 0 };
935 char pool[MAXNAMELEN] = "";
936 char dataset[MAXNAMELEN] = "";
937 zfs_handle_t *zhp = NULL;
938 int nowrites = 0;
939 int dur_txg = 0;
940 int dur_secs = 0;
941 int ret;
942 int flags = 0;
943 uint32_t dvas = 0;
944 hrtime_t ready_delay = -1;
945
946 if ((g_zfs = libzfs_init()) == NULL) {
947 (void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
948 return (1);
949 }
950
951 libzfs_print_on_error(g_zfs, B_TRUE);
952
953 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
954 (void) fprintf(stderr, "failed to open ZFS device\n");
955 libzfs_fini(g_zfs);
956 return (1);
957 }
958
959 if (argc == 1) {
960 /*
961 * No arguments. Print the available handlers. If there are no
962 * available handlers, direct the user to '-h' for help
963 * information.
964 */
965 if (print_all_handlers() == 0) {
966 (void) printf("No handlers registered.\n");
967 (void) printf("Run 'zinject -h' for usage "
968 "information.\n");
969 }
970 libzfs_fini(g_zfs);
971 return (0);
972 }
973
974 while ((c = getopt(argc, argv,
975 ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
976 switch (c) {
977 case 'a':
978 flags |= ZINJECT_FLUSH_ARC;
979 break;
980 case 'A':
981 if (strcasecmp(optarg, "degrade") == 0) {
982 action = VDEV_STATE_DEGRADED;
983 } else if (strcasecmp(optarg, "fault") == 0) {
984 action = VDEV_STATE_FAULTED;
985 } else {
986 (void) fprintf(stderr, "invalid action '%s': "
987 "must be 'degrade' or 'fault'\n", optarg);
988 usage();
989 libzfs_fini(g_zfs);
990 return (1);
991 }
992 break;
993 case 'b':
994 raw = optarg;
995 break;
996 case 'c':
997 cancel = optarg;
998 break;
999 case 'C':
1000 ret = parse_dvas(optarg, &dvas);
1001 if (ret != 0) {
1002 (void) fprintf(stderr, "invalid DVA list '%s': "
1003 "DVAs should be 0 indexed and separated by "
1004 "commas.\n", optarg);
1005 usage();
1006 libzfs_fini(g_zfs);
1007 return (1);
1008 }
1009 break;
1010 case 'd':
1011 device = optarg;
1012 break;
1013 case 'D':
1014 errno = 0;
1015 ret = parse_delay(optarg, &record.zi_timer,
1016 &record.zi_nlanes);
1017 if (ret != 0) {
1018
1019 (void) fprintf(stderr, "invalid i/o delay "
1020 "value: '%s'\n", optarg);
1021 usage();
1022 libzfs_fini(g_zfs);
1023 return (1);
1024 }
1025 break;
1026 case 'e':
1027 error = str_to_err(optarg);
1028 if (error < 0) {
1029 (void) fprintf(stderr, "invalid error type "
1030 "'%s': must be one of: io decompress "
1031 "decrypt nxio dtl corrupt noop "
1032 "io-prefail\n",
1033 optarg);
1034 usage();
1035 libzfs_fini(g_zfs);
1036 return (1);
1037 }
1038 break;
1039 case 'f':
1040 ret = parse_frequency(optarg, &record.zi_freq);
1041 if (ret != 0) {
1042 (void) fprintf(stderr, "%sfrequency value must "
1043 "be in the range [0.0001, 100.0]\n",
1044 ret == EINVAL ? "invalid value: " :
1045 ret == ERANGE ? "out of range: " : "");
1046 libzfs_fini(g_zfs);
1047 return (1);
1048 }
1049 break;
1050 case 'F':
1051 record.zi_failfast = B_TRUE;
1052 break;
1053 case 'g':
1054 dur_txg = 1;
1055 record.zi_duration = (int)strtol(optarg, &end, 10);
1056 if (record.zi_duration <= 0 || *end != '\0') {
1057 (void) fprintf(stderr, "invalid duration '%s': "
1058 "must be a positive integer\n", optarg);
1059 usage();
1060 libzfs_fini(g_zfs);
1061 return (1);
1062 }
1063 /* store duration of txgs as its negative */
1064 record.zi_duration *= -1;
1065 break;
1066 case 'h':
1067 usage();
1068 libzfs_fini(g_zfs);
1069 return (0);
1070 case 'I':
1071 /* default duration, if one hasn't yet been defined */
1072 nowrites = 1;
1073 if (dur_secs == 0 && dur_txg == 0)
1074 record.zi_duration = 30;
1075 break;
1076 case 'l':
1077 level = (int)strtol(optarg, &end, 10);
1078 if (*end != '\0') {
1079 (void) fprintf(stderr, "invalid level '%s': "
1080 "must be an integer\n", optarg);
1081 usage();
1082 libzfs_fini(g_zfs);
1083 return (1);
1084 }
1085 break;
1086 case 'm':
1087 domount = 1;
1088 break;
1089 case 'p':
1090 (void) strlcpy(record.zi_func, optarg,
1091 sizeof (record.zi_func));
1092 record.zi_cmd = ZINJECT_PANIC;
1093 break;
1094 case 'P':
1095 if (strcasecmp(optarg, "import") == 0) {
1096 record.zi_cmd = ZINJECT_DELAY_IMPORT;
1097 } else if (strcasecmp(optarg, "export") == 0) {
1098 record.zi_cmd = ZINJECT_DELAY_EXPORT;
1099 } else {
1100 (void) fprintf(stderr, "invalid command '%s': "
1101 "must be 'import' or 'export'\n", optarg);
1102 usage();
1103 libzfs_fini(g_zfs);
1104 return (1);
1105 }
1106 break;
1107 case 'q':
1108 quiet = 1;
1109 break;
1110 case 'r':
1111 range = optarg;
1112 flags |= ZINJECT_CALC_RANGE;
1113 break;
1114 case 's':
1115 dur_secs = 1;
1116 record.zi_duration = (int)strtol(optarg, &end, 10);
1117 if (record.zi_duration <= 0 || *end != '\0') {
1118 (void) fprintf(stderr, "invalid duration '%s': "
1119 "must be a positive integer\n", optarg);
1120 usage();
1121 libzfs_fini(g_zfs);
1122 return (1);
1123 }
1124 break;
1125 case 'T':
1126 io_type = str_to_iotype(optarg);
1127 if (io_type == ZINJECT_IOTYPES) {
1128 (void) fprintf(stderr, "invalid I/O type "
1129 "'%s': must be 'read', 'write', 'free', "
1130 "'claim', 'flush' or 'all'\n", optarg);
1131 usage();
1132 libzfs_fini(g_zfs);
1133 return (1);
1134 }
1135 break;
1136 case 't':
1137 if ((type = name_to_type(optarg)) == TYPE_INVAL &&
1138 !MOS_TYPE(type)) {
1139 (void) fprintf(stderr, "invalid type '%s'\n",
1140 optarg);
1141 usage();
1142 libzfs_fini(g_zfs);
1143 return (1);
1144 }
1145 break;
1146 case 'u':
1147 flags |= ZINJECT_UNLOAD_SPA;
1148 break;
1149 case 'E':
1150 ready_delay = MSEC2NSEC(strtol(optarg, &end, 10));
1151 if (ready_delay <= 0 || *end != '\0') {
1152 (void) fprintf(stderr, "invalid delay '%s': "
1153 "must be a positive duration\n", optarg);
1154 usage();
1155 libzfs_fini(g_zfs);
1156 return (1);
1157 }
1158 record.zi_cmd = ZINJECT_DELAY_READY;
1159 record.zi_timer = ready_delay;
1160 break;
1161 case 'L':
1162 if ((label = name_to_type(optarg)) == TYPE_INVAL &&
1163 !LABEL_TYPE(type)) {
1164 (void) fprintf(stderr, "invalid label type "
1165 "'%s'\n", optarg);
1166 usage();
1167 libzfs_fini(g_zfs);
1168 return (1);
1169 }
1170 break;
1171 case ':':
1172 (void) fprintf(stderr, "option -%c requires an "
1173 "operand\n", optopt);
1174 usage();
1175 libzfs_fini(g_zfs);
1176 return (1);
1177 case '?':
1178 (void) fprintf(stderr, "invalid option '%c'\n",
1179 optopt);
1180 usage();
1181 libzfs_fini(g_zfs);
1182 return (2);
1183 }
1184 }
1185
1186 argc -= optind;
1187 argv += optind;
1188
1189 if (record.zi_duration != 0 && record.zi_cmd == 0)
1190 record.zi_cmd = ZINJECT_IGNORED_WRITES;
1191
1192 if (cancel != NULL) {
1193 /*
1194 * '-c' is invalid with any other options.
1195 */
1196 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1197 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1198 record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) {
1199 (void) fprintf(stderr, "cancel (-c) incompatible with "
1200 "any other options\n");
1201 usage();
1202 libzfs_fini(g_zfs);
1203 return (2);
1204 }
1205 if (argc != 0) {
1206 (void) fprintf(stderr, "extraneous argument to '-c'\n");
1207 usage();
1208 libzfs_fini(g_zfs);
1209 return (2);
1210 }
1211
1212 if (strcmp(cancel, "all") == 0) {
1213 return (cancel_all_handlers());
1214 } else {
1215 int id = (int)strtol(cancel, &end, 10);
1216 if (*end != '\0') {
1217 (void) fprintf(stderr, "invalid handle id '%s':"
1218 " must be an integer or 'all'\n", cancel);
1219 usage();
1220 libzfs_fini(g_zfs);
1221 return (1);
1222 }
1223 return (cancel_handler(id));
1224 }
1225 }
1226
1227 if (device != NULL) {
1228 /*
1229 * Device (-d) injection uses a completely different mechanism
1230 * for doing injection, so handle it separately here.
1231 */
1232 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1233 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1234 dvas != 0 || ready_delay >= 0) {
1235 (void) fprintf(stderr, "device (-d) incompatible with "
1236 "data error injection\n");
1237 usage();
1238 libzfs_fini(g_zfs);
1239 return (2);
1240 }
1241
1242 if (argc != 1) {
1243 (void) fprintf(stderr, "device (-d) injection requires "
1244 "a single pool name\n");
1245 usage();
1246 libzfs_fini(g_zfs);
1247 return (2);
1248 }
1249
1250 (void) strlcpy(pool, argv[0], sizeof (pool));
1251 dataset[0] = '\0';
1252
1253 if (error == ECKSUM) {
1254 (void) fprintf(stderr, "device error type must be "
1255 "'io', 'nxio' or 'corrupt'\n");
1256 libzfs_fini(g_zfs);
1257 return (1);
1258 }
1259
1260 if (error == EILSEQ &&
1261 (record.zi_freq == 0 || io_type != ZINJECT_IOTYPE_READ)) {
1262 (void) fprintf(stderr, "device corrupt errors require "
1263 "io type read and a frequency value\n");
1264 libzfs_fini(g_zfs);
1265 return (1);
1266 }
1267
1268 record.zi_iotype = io_type;
1269 if (translate_device(pool, device, label, &record) != 0) {
1270 libzfs_fini(g_zfs);
1271 return (1);
1272 }
1273
1274 if (record.zi_nlanes) {
1275 switch (io_type) {
1276 case ZINJECT_IOTYPE_READ:
1277 case ZINJECT_IOTYPE_WRITE:
1278 case ZINJECT_IOTYPE_ALL:
1279 break;
1280 default:
1281 (void) fprintf(stderr, "I/O type for a delay "
1282 "must be 'read' or 'write'\n");
1283 usage();
1284 libzfs_fini(g_zfs);
1285 return (1);
1286 }
1287 }
1288
1289 if (!error)
1290 error = ENXIO;
1291
1292 if (action != VDEV_STATE_UNKNOWN)
1293 return (perform_action(pool, &record, action));
1294
1295 } else if (raw != NULL) {
1296 if (range != NULL || type != TYPE_INVAL || level != 0 ||
1297 record.zi_cmd != ZINJECT_UNINITIALIZED ||
1298 record.zi_freq > 0 || dvas != 0) {
1299 (void) fprintf(stderr, "raw (-b) format with "
1300 "any other options\n");
1301 usage();
1302 libzfs_fini(g_zfs);
1303 return (2);
1304 }
1305
1306 if (argc != 1) {
1307 (void) fprintf(stderr, "raw (-b) format expects a "
1308 "single pool name\n");
1309 usage();
1310 libzfs_fini(g_zfs);
1311 return (2);
1312 }
1313
1314 (void) strlcpy(pool, argv[0], sizeof (pool));
1315 dataset[0] = '\0';
1316
1317 if (error == ENXIO) {
1318 (void) fprintf(stderr, "data error type must be "
1319 "'checksum' or 'io'\n");
1320 libzfs_fini(g_zfs);
1321 return (1);
1322 }
1323
1324 if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1325 record.zi_cmd = ZINJECT_DATA_FAULT;
1326 if (!error)
1327 error = EIO;
1328 } else if (error != 0) {
1329 (void) fprintf(stderr, "error type -e incompatible "
1330 "with delay injection\n");
1331 libzfs_fini(g_zfs);
1332 return (1);
1333 } else {
1334 record.zi_iotype = io_type;
1335 }
1336
1337 if (translate_raw(raw, &record) != 0) {
1338 libzfs_fini(g_zfs);
1339 return (1);
1340 }
1341 } else if (record.zi_cmd == ZINJECT_PANIC) {
1342 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1343 level != 0 || device != NULL || record.zi_freq > 0 ||
1344 dvas != 0) {
1345 (void) fprintf(stderr, "%s incompatible with other "
1346 "options\n", "import|export delay (-P)");
1347 usage();
1348 libzfs_fini(g_zfs);
1349 return (2);
1350 }
1351
1352 if (argc < 1 || argc > 2) {
1353 (void) fprintf(stderr, "panic (-p) injection requires "
1354 "a single pool name and an optional id\n");
1355 usage();
1356 libzfs_fini(g_zfs);
1357 return (2);
1358 }
1359
1360 (void) strlcpy(pool, argv[0], sizeof (pool));
1361 if (argv[1] != NULL)
1362 record.zi_type = atoi(argv[1]);
1363 dataset[0] = '\0';
1364 } else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
1365 record.zi_cmd == ZINJECT_DELAY_EXPORT) {
1366 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1367 level != 0 || device != NULL || record.zi_freq > 0 ||
1368 dvas != 0) {
1369 (void) fprintf(stderr, "%s incompatible with other "
1370 "options\n", "import|export delay (-P)");
1371 usage();
1372 libzfs_fini(g_zfs);
1373 return (2);
1374 }
1375
1376 if (argc != 1 || record.zi_duration <= 0) {
1377 (void) fprintf(stderr, "import|export delay (-P) "
1378 "injection requires a duration (-s) and a single "
1379 "pool name\n");
1380 usage();
1381 libzfs_fini(g_zfs);
1382 return (2);
1383 }
1384
1385 (void) strlcpy(pool, argv[0], sizeof (pool));
1386 } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1387 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1388 level != 0 || record.zi_freq > 0 || dvas != 0) {
1389 (void) fprintf(stderr, "hardware failure (-I) "
1390 "incompatible with other options\n");
1391 usage();
1392 libzfs_fini(g_zfs);
1393 return (2);
1394 }
1395
1396 if (nowrites == 0) {
1397 (void) fprintf(stderr, "-s or -g meaningless "
1398 "without -I (ignore writes)\n");
1399 usage();
1400 libzfs_fini(g_zfs);
1401 return (2);
1402 } else if (dur_secs && dur_txg) {
1403 (void) fprintf(stderr, "choose a duration either "
1404 "in seconds (-s) or a number of txgs (-g) "
1405 "but not both\n");
1406 usage();
1407 libzfs_fini(g_zfs);
1408 return (2);
1409 } else if (argc != 1) {
1410 (void) fprintf(stderr, "ignore writes (-I) "
1411 "injection requires a single pool name\n");
1412 usage();
1413 libzfs_fini(g_zfs);
1414 return (2);
1415 }
1416
1417 (void) strlcpy(pool, argv[0], sizeof (pool));
1418 dataset[0] = '\0';
1419 } else if (type == TYPE_INVAL) {
1420 if (flags == 0) {
1421 (void) fprintf(stderr, "at least one of '-b', '-d', "
1422 "'-t', '-a', '-p', '-I' or '-u' "
1423 "must be specified\n");
1424 usage();
1425 libzfs_fini(g_zfs);
1426 return (2);
1427 }
1428
1429 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1430 (void) strlcpy(pool, argv[0], sizeof (pool));
1431 dataset[0] = '\0';
1432 } else if (argc != 0) {
1433 (void) fprintf(stderr, "extraneous argument for "
1434 "'-f'\n");
1435 usage();
1436 libzfs_fini(g_zfs);
1437 return (2);
1438 }
1439
1440 flags |= ZINJECT_NULL;
1441 } else {
1442 if (argc != 1) {
1443 (void) fprintf(stderr, "missing object\n");
1444 usage();
1445 libzfs_fini(g_zfs);
1446 return (2);
1447 }
1448
1449 if (error == ENXIO || error == EILSEQ) {
1450 (void) fprintf(stderr, "data error type must be "
1451 "'checksum' or 'io'\n");
1452 libzfs_fini(g_zfs);
1453 return (1);
1454 }
1455
1456 if (dvas != 0) {
1457 if (error == EACCES || error == EINVAL) {
1458 (void) fprintf(stderr, "the '-C' option may "
1459 "not be used with logical data errors "
1460 "'decrypt' and 'decompress'\n");
1461 libzfs_fini(g_zfs);
1462 return (1);
1463 }
1464
1465 record.zi_dvas = dvas;
1466 }
1467
1468 if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) {
1469 (void) fprintf(stderr, "error type -e incompatible "
1470 "with delay injection\n");
1471 libzfs_fini(g_zfs);
1472 return (1);
1473 }
1474
1475 if (error == EACCES) {
1476 if (type != TYPE_DATA) {
1477 (void) fprintf(stderr, "decryption errors "
1478 "may only be injected for 'data' types\n");
1479 libzfs_fini(g_zfs);
1480 return (1);
1481 }
1482
1483 record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1484 /*
1485 * Internally, ZFS actually uses ECKSUM for decryption
1486 * errors since EACCES is used to indicate the key was
1487 * not found.
1488 */
1489 error = ECKSUM;
1490 } else if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1491 record.zi_cmd = ZINJECT_DATA_FAULT;
1492 if (!error)
1493 error = EIO;
1494 } else {
1495 record.zi_iotype = io_type;
1496 }
1497
1498 if (translate_record(type, argv[0], range, level, &record, pool,
1499 dataset) != 0) {
1500 libzfs_fini(g_zfs);
1501 return (1);
1502 }
1503 }
1504
1505 /*
1506 * If this is pool-wide metadata, unmount everything. The ioctl() will
1507 * unload the pool, so that we trigger spa-wide reopen of metadata next
1508 * time we access the pool.
1509 */
1510 if (dataset[0] != '\0' && domount) {
1511 if ((zhp = zfs_open(g_zfs, dataset,
1512 ZFS_TYPE_DATASET)) == NULL) {
1513 libzfs_fini(g_zfs);
1514 return (1);
1515 }
1516 if (zfs_unmount(zhp, NULL, 0) != 0) {
1517 libzfs_fini(g_zfs);
1518 return (1);
1519 }
1520 }
1521
1522 record.zi_error = error;
1523
1524 ret = register_handler(pool, flags, &record, quiet);
1525
1526 if (dataset[0] != '\0' && domount)
1527 ret = (zfs_mount(zhp, NULL, 0) != 0);
1528
1529 libzfs_fini(g_zfs);
1530
1531 return (ret);
1532 }
1533