1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2017, Intel Corporation.
26 * Copyright (c) 2023-2025, Klara, Inc.
27 */
28
29 /*
30 * ZFS Fault Injector
31 *
32 * This userland component takes a set of options and uses libzpool to translate
33 * from a user-visible object type and name to an internal representation.
34 * There are two basic types of faults: device faults and data faults.
35 *
36 *
37 * DEVICE FAULTS
38 *
39 * Errors can be injected into a particular vdev using the '-d' option. This
40 * option takes a path or vdev GUID to uniquely identify the device within a
41 * pool. There are four types of errors that can be injected, IO, ENXIO,
42 * ECHILD, and EILSEQ. These can be controlled through the '-e' option and the
43 * default is ENXIO. For EIO failures, any attempt to read data from the device
44 * will return EIO, but a subsequent attempt to reopen the device will succeed.
45 * For ENXIO failures, any attempt to read from the device will return EIO, but
46 * any attempt to reopen the device will also return ENXIO. The EILSEQ failures
47 * only apply to read operations (-T read) and will flip a bit after the device
48 * has read the original data.
49 *
50 * For label faults, the -L option must be specified. This allows faults
51 * to be injected into either the nvlist, uberblock, pad1, or pad2 region
52 * of all the labels for the specified device.
53 *
54 * This form of the command looks like:
55 *
56 * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
57 *
58 *
59 * DATA FAULTS
60 *
61 * We begin with a tuple of the form:
62 *
63 * <type,level,range,object>
64 *
65 * type A string describing the type of data to target. Each type
66 * implicitly describes how to interpret 'object'. Currently,
67 * the following values are supported:
68 *
69 * data User data for a file
70 * dnode Dnode for a file or directory
71 *
72 * The following MOS objects are special. Instead of injecting
73 * errors on a particular object or blkid, we inject errors across
74 * all objects of the given type.
75 *
76 * mos Any data in the MOS
77 * mosdir object directory
78 * config pool configuration
79 * bpobj blkptr list
80 * spacemap spacemap
81 * metaslab metaslab
82 * errlog persistent error log
83 *
84 * level Object level. Defaults to '0', not applicable to all types. If
85 * a range is given, this corresponds to the indirect block
86 * corresponding to the specific range.
87 *
88 * range A numerical range [start,end) within the object. Defaults to
89 * the full size of the file.
90 *
91 * object A string describing the logical location of the object. For
92 * files and directories (currently the only supported types),
93 * this is the path of the object on disk.
94 *
95 * This is translated, via libzpool, into the following internal representation:
96 *
97 * <type,objset,object,level,range>
98 *
99 * These types should be self-explanatory. This tuple is then passed to the
100 * kernel via a special ioctl() to initiate fault injection for the given
101 * object. Note that 'type' is not strictly necessary for fault injection, but
102 * is used when translating existing faults into a human-readable string.
103 *
104 *
105 * The command itself takes one of the forms:
106 *
107 * zinject
108 * zinject <-a | -u pool>
109 * zinject -c <id|all>
110 * zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]
111 * [-T iotype] [-t type object | -b bookmark pool]
112 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
113 * [-r range] <object>
114 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
115 *
116 * With no arguments, the command prints all currently registered injection
117 * handlers, with their numeric identifiers.
118 *
119 * The '-c' option will clear the given handler, or all handlers if 'all' is
120 * specified.
121 *
122 * The '-e' option takes a string describing the errno to simulate. This must
123 * be one of 'io', 'checksum', 'decompress', or 'decrypt'. In most cases this
124 * will result in the same behavior, but RAID-Z will produce a different set of
125 * ereports for this situation.
126 *
127 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is
128 * specified, then the ARC cache is flushed appropriately. If '-u' is
129 * specified, then the underlying SPA is unloaded. Either of these flags can be
130 * specified independently of any other handlers. The '-m' flag automatically
131 * does an unmount and remount of the underlying dataset to aid in flushing the
132 * cache.
133 *
134 * The '-f' flag controls the frequency of errors injected, expressed as a
135 * real number percentage between 0.0001 and 100. The default is 100.
136 *
137 * The <object> form is responsible for actually injecting the handler into the
138 * framework. It takes the arguments described above, translates them to the
139 * internal tuple using libzpool, and then issues an ioctl() to register the
140 * handler.
141 *
142 * The '-b' option can target a specific bookmark, regardless of whether a
143 * human-readable interface has been designed. It allows developers to specify
144 * a particular block by number.
145 *
146 * The '-E' option injects pipeline ready stage delays for the given object or
147 * bookmark. The delay is specified in milliseconds, and it supports I/O type
148 * and range filters.
149 */
150
151 #include <errno.h>
152 #include <fcntl.h>
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <string.h>
156 #include <strings.h>
157 #include <unistd.h>
158
159 #include <sys/fs/zfs.h>
160 #include <sys/mount.h>
161
162 #include <libzfs.h>
163
164 #undef verify /* both libzfs.h and zfs_context.h want to define this */
165
166 #include "zinject.h"
167
168 libzfs_handle_t *g_zfs;
169 int zfs_fd;
170
171 static const char *const errtable[TYPE_INVAL] = {
172 "data",
173 "dnode",
174 "mos",
175 "mosdir",
176 "metaslab",
177 "config",
178 "bpobj",
179 "spacemap",
180 "errlog",
181 "uber",
182 "nvlist",
183 "pad1",
184 "pad2"
185 };
186
187 static err_type_t
name_to_type(const char * arg)188 name_to_type(const char *arg)
189 {
190 int i;
191 for (i = 0; i < TYPE_INVAL; i++)
192 if (strcmp(errtable[i], arg) == 0)
193 return (i);
194
195 return (TYPE_INVAL);
196 }
197
198 static const char *
type_to_name(uint64_t type)199 type_to_name(uint64_t type)
200 {
201 switch (type) {
202 case DMU_OT_OBJECT_DIRECTORY:
203 return ("mosdir");
204 case DMU_OT_OBJECT_ARRAY:
205 return ("metaslab");
206 case DMU_OT_PACKED_NVLIST:
207 return ("config");
208 case DMU_OT_BPOBJ:
209 return ("bpobj");
210 case DMU_OT_SPACE_MAP:
211 return ("spacemap");
212 case DMU_OT_ERROR_LOG:
213 return ("errlog");
214 default:
215 return ("-");
216 }
217 }
218
219 struct errstr {
220 int err;
221 const char *str;
222 };
223 static const struct errstr errstrtable[] = {
224 { EIO, "io" },
225 { ECKSUM, "checksum" },
226 { EINVAL, "decompress" },
227 { EACCES, "decrypt" },
228 { ENXIO, "nxio" },
229 { ECHILD, "dtl" },
230 { EILSEQ, "corrupt" },
231 { ENOSYS, "noop" },
232 { 0, NULL },
233 };
234
235 static int
str_to_err(const char * str)236 str_to_err(const char *str)
237 {
238 for (int i = 0; errstrtable[i].str != NULL; i++)
239 if (strcasecmp(errstrtable[i].str, str) == 0)
240 return (errstrtable[i].err);
241 return (-1);
242 }
243 static const char *
err_to_str(int err)244 err_to_str(int err)
245 {
246 for (int i = 0; errstrtable[i].str != NULL; i++)
247 if (errstrtable[i].err == err)
248 return (errstrtable[i].str);
249 return ("[unknown]");
250 }
251
252 static const char *const iotypestrtable[ZINJECT_IOTYPES] = {
253 [ZINJECT_IOTYPE_NULL] = "null",
254 [ZINJECT_IOTYPE_READ] = "read",
255 [ZINJECT_IOTYPE_WRITE] = "write",
256 [ZINJECT_IOTYPE_FREE] = "free",
257 [ZINJECT_IOTYPE_CLAIM] = "claim",
258 [ZINJECT_IOTYPE_FLUSH] = "flush",
259 [ZINJECT_IOTYPE_TRIM] = "trim",
260 [ZINJECT_IOTYPE_ALL] = "all",
261 [ZINJECT_IOTYPE_PROBE] = "probe",
262 };
263
264 static zinject_iotype_t
str_to_iotype(const char * arg)265 str_to_iotype(const char *arg)
266 {
267 for (uint_t iotype = 0; iotype < ZINJECT_IOTYPES; iotype++)
268 if (iotypestrtable[iotype] != NULL &&
269 strcasecmp(iotypestrtable[iotype], arg) == 0)
270 return (iotype);
271 return (ZINJECT_IOTYPES);
272 }
273
274 static const char *
iotype_to_str(zinject_iotype_t iotype)275 iotype_to_str(zinject_iotype_t iotype)
276 {
277 if (iotype >= ZINJECT_IOTYPES || iotypestrtable[iotype] == NULL)
278 return ("[unknown]");
279 return (iotypestrtable[iotype]);
280 }
281
282 /*
283 * Print usage message.
284 */
285 void
usage(void)286 usage(void)
287 {
288 (void) printf(
289 "usage:\n"
290 "\n"
291 "\tzinject\n"
292 "\n"
293 "\t\tList all active injection records.\n"
294 "\n"
295 "\tzinject -c <id|all>\n"
296 "\n"
297 "\t\tClear the particular record (if given a numeric ID), or\n"
298 "\t\tall records if 'all' is specified.\n"
299 "\n"
300 "\tzinject -p <function name> pool\n"
301 "\t\tInject a panic fault at the specified function. Only \n"
302 "\t\tfunctions which call spa_vdev_config_exit(), or \n"
303 "\t\tspa_vdev_exit() will trigger a panic.\n"
304 "\n"
305 "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
306 "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
307 "\t\tInject a fault into a particular device or the device's\n"
308 "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
309 "\t\t'pad1', or 'pad2'.\n"
310 "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
311 "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n"
312 "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
313 "\t\tdevice error injection to a percentage of the IOs.\n"
314 "\n"
315 "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
316 "\t\tPerform a specific action on a particular device.\n"
317 "\n"
318 "\tzinject -d device -D latency:lanes pool\n"
319 "\n"
320 "\t\tAdd an artificial delay to IO requests on a particular\n"
321 "\t\tdevice, such that the requests take a minimum of 'latency'\n"
322 "\t\tmilliseconds to complete. Each delay has an associated\n"
323 "\t\tnumber of 'lanes' which defines the number of concurrent\n"
324 "\t\tIO requests that can be processed.\n"
325 "\n"
326 "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
327 "\t\tthe device will only be able to service a single IO request\n"
328 "\t\tat a time with each request taking 10 ms to complete. So,\n"
329 "\t\tif only a single request is submitted every 10 ms, the\n"
330 "\t\taverage latency will be 10 ms; but if more than one request\n"
331 "\t\tis submitted every 10 ms, the average latency will be more\n"
332 "\t\tthan 10 ms.\n"
333 "\n"
334 "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
335 "\t\tlanes (-D 10:2), then the device will be able to service\n"
336 "\t\ttwo requests at a time, each with a minimum latency of\n"
337 "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
338 "\t\tthe average latency will be 10 ms; but if more than two\n"
339 "\t\trequests are submitted every 10 ms, the average latency\n"
340 "\t\twill be more than 10 ms.\n"
341 "\n"
342 "\t\tAlso note, these delays are additive. So two invocations\n"
343 "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
344 "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
345 "\t\tlanes with differing target latencies. For example, an\n"
346 "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
347 "\t\tcreate 3 lanes on the device; one lane with a latency\n"
348 "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
349 "\n"
350 "\tzinject -P import|export -s <seconds> pool\n"
351 "\t\tAdd an artificial delay to a future pool import or export,\n"
352 "\t\tsuch that the operation takes a minimum of supplied seconds\n"
353 "\t\tto complete.\n"
354 "\n"
355 "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n"
356 "\t\t[-T iotype] [-t type object | -b bookmark pool]\n"
357 "\n"
358 "\t\tInject pipeline ready stage delays for the given object path\n"
359 "\t\t(data or dnode) or raw bookmark. The delay is specified in\n"
360 "\t\tmilliseconds.\n"
361 "\n"
362 "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
363 "\t\tCause the pool to stop writing blocks yet not\n"
364 "\t\treport errors for a duration. Simulates buggy hardware\n"
365 "\t\tthat fails to honor cache flush requests.\n"
366 "\t\tDefault duration is 30 seconds. The machine is panicked\n"
367 "\t\tat the end of the duration.\n"
368 "\n"
369 "\tzinject -b objset:object:level:blkid pool\n"
370 "\n"
371 "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
372 "\t\tspecified by the remaining tuple. Each number is in\n"
373 "\t\thexadecimal, and only one block can be specified.\n"
374 "\n"
375 "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
376 "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
377 "\n"
378 "\t\tInject an error into the object specified by the '-t' option\n"
379 "\t\tand the object descriptor. The 'object' parameter is\n"
380 "\t\tinterpreted depending on the '-t' option.\n"
381 "\n"
382 "\t\t-q\tQuiet mode. Only print out the handler number added.\n"
383 "\t\t-e\tInject a specific error. Must be one of 'io',\n"
384 "\t\t\t'checksum', 'decompress', or 'decrypt'. Default is 'io'.\n"
385 "\t\t-C\tInject the given error only into specific DVAs. The\n"
386 "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
387 "\t\t\tseparated by commas (ex. '0,2').\n"
388 "\t\t-l\tInject error at a particular block level. Default is "
389 "0.\n"
390 "\t\t-m\tAutomatically remount underlying filesystem.\n"
391 "\t\t-r\tInject error over a particular logical range of an\n"
392 "\t\t\tobject. Will be translated to the appropriate blkid\n"
393 "\t\t\trange according to the object's properties.\n"
394 "\t\t-a\tFlush the ARC cache. Can be specified without any\n"
395 "\t\t\tassociated object.\n"
396 "\t\t-u\tUnload the associated pool. Can be specified with only\n"
397 "\t\t\ta pool object.\n"
398 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
399 "\t\t\ta percentage between 0.0001 and 100.\n"
400 "\n"
401 "\t-t data\t\tInject an error into the plain file contents of a\n"
402 "\t\t\tfile. The object must be specified as a complete path\n"
403 "\t\t\tto a file on a ZFS filesystem.\n"
404 "\n"
405 "\t-t dnode\tInject an error into the metadnode in the block\n"
406 "\t\t\tcorresponding to the dnode for a file or directory. The\n"
407 "\t\t\t'-r' option is incompatible with this mode. The object\n"
408 "\t\t\tis specified as a complete path to a file or directory\n"
409 "\t\t\ton a ZFS filesystem.\n"
410 "\n"
411 "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
412 "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
413 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
414 "\t\t\tthe poolname.\n");
415 }
416
417 static int
iter_handlers(int (* func)(int,const char *,zinject_record_t *,void *),void * data)418 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
419 void *data)
420 {
421 zfs_cmd_t zc = {"\0"};
422 int ret;
423
424 while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
425 if ((ret = func((int)zc.zc_guid, zc.zc_name,
426 &zc.zc_inject_record, data)) != 0)
427 return (ret);
428
429 if (errno != ENOENT) {
430 (void) fprintf(stderr, "Unable to list handlers: %s\n",
431 strerror(errno));
432 return (-1);
433 }
434
435 return (0);
436 }
437
438 static int
print_data_handler(int id,const char * pool,zinject_record_t * record,void * data)439 print_data_handler(int id, const char *pool, zinject_record_t *record,
440 void *data)
441 {
442 int *count = data;
443
444 if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
445 record->zi_duration != 0) {
446 return (0);
447 }
448
449 if (*count == 0) {
450 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s "
451 "%-15s %-6s %-15s\n", "ID", "POOL", "OBJSET", "OBJECT",
452 "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT");
453 (void) printf("--- --------------- ------ "
454 "------ -------- --- ---- --------------- "
455 "------ ------\n");
456 }
457
458 *count += 1;
459
460 char rangebuf[32];
461 if (record->zi_start == 0 && record->zi_end == -1ULL)
462 snprintf(rangebuf, sizeof (rangebuf), "all");
463 else
464 snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]",
465 (u_longlong_t)record->zi_start,
466 (u_longlong_t)record->zi_end);
467
468
469 (void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x %-15s "
470 "%6" PRIu64 " %6" PRIu64 "\n", id, pool,
471 (u_longlong_t)record->zi_objset,
472 (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
473 record->zi_level, record->zi_dvas, rangebuf,
474 record->zi_match_count, record->zi_inject_count);
475
476 return (0);
477 }
478
479 static int
print_device_handler(int id,const char * pool,zinject_record_t * record,void * data)480 print_device_handler(int id, const char *pool, zinject_record_t *record,
481 void *data)
482 {
483 int *count = data;
484
485 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
486 return (0);
487
488 if (record->zi_cmd == ZINJECT_DELAY_IO)
489 return (0);
490
491 if (*count == 0) {
492 (void) printf("%3s %-15s %-16s %-5s %-10s %-9s "
493 "%-6s %-6s\n",
494 "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ",
495 "MATCH", "INJECT");
496 (void) printf(
497 "--- --------------- ---------------- "
498 "----- ---------- --------- "
499 "------ ------\n");
500 }
501
502 *count += 1;
503
504 double freq = record->zi_freq == 0 ? 100.0f :
505 (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
506
507 (void) printf("%3d %-15s %llx %-5s %-10s %8.4f%% "
508 "%6" PRIu64 " %6" PRIu64 "\n", id, pool,
509 (u_longlong_t)record->zi_guid,
510 iotype_to_str(record->zi_iotype), err_to_str(record->zi_error),
511 freq, record->zi_match_count, record->zi_inject_count);
512
513 return (0);
514 }
515
516 static int
print_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)517 print_delay_handler(int id, const char *pool, zinject_record_t *record,
518 void *data)
519 {
520 int *count = data;
521
522 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
523 return (0);
524
525 if (record->zi_cmd != ZINJECT_DELAY_IO)
526 return (0);
527
528 if (*count == 0) {
529 (void) printf("%3s %-15s %-16s %-10s %-5s %-9s "
530 "%-6s %-6s\n",
531 "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ",
532 "MATCH", "INJECT");
533 (void) printf("--- --------------- ---------------- "
534 "---------- ----- --------- "
535 "------ ------\n");
536 }
537
538 *count += 1;
539
540 double freq = record->zi_freq == 0 ? 100.0f :
541 (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
542
543 (void) printf("%3d %-15s %llx %10llu %5llu %8.4f%% "
544 "%6" PRIu64 " %6" PRIu64 "\n", id, pool,
545 (u_longlong_t)record->zi_guid,
546 (u_longlong_t)NSEC2MSEC(record->zi_timer),
547 (u_longlong_t)record->zi_nlanes,
548 freq, record->zi_match_count, record->zi_inject_count);
549
550 return (0);
551 }
552
553 static int
print_panic_handler(int id,const char * pool,zinject_record_t * record,void * data)554 print_panic_handler(int id, const char *pool, zinject_record_t *record,
555 void *data)
556 {
557 int *count = data;
558
559 if (record->zi_func[0] == '\0')
560 return (0);
561
562 if (*count == 0) {
563 (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
564 (void) printf("--- --------------- ----------------\n");
565 }
566
567 *count += 1;
568
569 (void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
570
571 return (0);
572 }
573
574 static int
print_pool_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)575 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
576 void *data)
577 {
578 int *count = data;
579
580 if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
581 record->zi_cmd != ZINJECT_DELAY_EXPORT) {
582 return (0);
583 }
584
585 if (*count == 0) {
586 (void) printf("%3s %-19s %-11s %s\n",
587 "ID", "POOL", "DELAY (sec)", "COMMAND");
588 (void) printf("--- ------------------- -----------"
589 " -------\n");
590 }
591
592 *count += 1;
593
594 (void) printf("%3d %-19s %-11llu %s\n",
595 id, pool, (u_longlong_t)record->zi_duration,
596 record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
597
598 return (0);
599 }
600
601 /*
602 * Print all registered error handlers. Returns the number of handlers
603 * registered.
604 */
605 static int
print_all_handlers(void)606 print_all_handlers(void)
607 {
608 int count = 0, total = 0;
609
610 (void) iter_handlers(print_device_handler, &count);
611 if (count > 0) {
612 total += count;
613 (void) printf("\n");
614 count = 0;
615 }
616
617 (void) iter_handlers(print_delay_handler, &count);
618 if (count > 0) {
619 total += count;
620 (void) printf("\n");
621 count = 0;
622 }
623
624 (void) iter_handlers(print_data_handler, &count);
625 if (count > 0) {
626 total += count;
627 (void) printf("\n");
628 count = 0;
629 }
630
631 (void) iter_handlers(print_pool_delay_handler, &count);
632 if (count > 0) {
633 total += count;
634 (void) printf("\n");
635 count = 0;
636 }
637
638 (void) iter_handlers(print_panic_handler, &count);
639
640 return (count + total);
641 }
642
643 static int
cancel_one_handler(int id,const char * pool,zinject_record_t * record,void * data)644 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
645 void *data)
646 {
647 (void) pool, (void) record, (void) data;
648 zfs_cmd_t zc = {"\0"};
649
650 zc.zc_guid = (uint64_t)id;
651
652 if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
653 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
654 id, strerror(errno));
655 return (1);
656 }
657
658 return (0);
659 }
660
661 /*
662 * Remove all fault injection handlers.
663 */
664 static int
cancel_all_handlers(void)665 cancel_all_handlers(void)
666 {
667 int ret = iter_handlers(cancel_one_handler, NULL);
668
669 if (ret == 0)
670 (void) printf("removed all registered handlers\n");
671
672 return (ret);
673 }
674
675 /*
676 * Remove a specific fault injection handler.
677 */
678 static int
cancel_handler(int id)679 cancel_handler(int id)
680 {
681 zfs_cmd_t zc = {"\0"};
682
683 zc.zc_guid = (uint64_t)id;
684
685 if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
686 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
687 id, strerror(errno));
688 return (1);
689 }
690
691 (void) printf("removed handler %d\n", id);
692
693 return (0);
694 }
695
696 /*
697 * Register a new fault injection handler.
698 */
699 static int
register_handler(const char * pool,int flags,zinject_record_t * record,int quiet)700 register_handler(const char *pool, int flags, zinject_record_t *record,
701 int quiet)
702 {
703 zfs_cmd_t zc = {"\0"};
704
705 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
706 zc.zc_inject_record = *record;
707 zc.zc_guid = flags;
708
709 if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
710 const char *errmsg = strerror(errno);
711
712 switch (errno) {
713 case EDOM:
714 errmsg = "block level exceeds max level of object";
715 break;
716 case EEXIST:
717 if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
718 errmsg = "pool already imported";
719 if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
720 errmsg = "a handler already exists";
721 break;
722 case ENOENT:
723 /* import delay injector running on older zfs module */
724 if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
725 errmsg = "import delay injector not supported";
726 break;
727 default:
728 break;
729 }
730 (void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
731 return (1);
732 }
733
734 if (flags & ZINJECT_NULL)
735 return (0);
736
737 if (quiet) {
738 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
739 } else {
740 boolean_t show_object = B_FALSE;
741 boolean_t show_iotype = B_FALSE;
742 (void) printf("Added handler %llu with the following "
743 "properties:\n", (u_longlong_t)zc.zc_guid);
744 (void) printf(" pool: %s\n", pool);
745 if (record->zi_guid) {
746 (void) printf(" vdev: %llx\n",
747 (u_longlong_t)record->zi_guid);
748 show_iotype = B_TRUE;
749 } else if (record->zi_func[0] != '\0') {
750 (void) printf(" panic function: %s\n",
751 record->zi_func);
752 } else if (record->zi_duration > 0) {
753 (void) printf(" time: %lld seconds\n",
754 (u_longlong_t)record->zi_duration);
755 } else if (record->zi_duration < 0) {
756 (void) printf(" txgs: %lld \n",
757 (u_longlong_t)-record->zi_duration);
758 } else if (record->zi_timer > 0) {
759 (void) printf(" timer: %lld ms\n",
760 (u_longlong_t)NSEC2MSEC(record->zi_timer));
761 if (record->zi_cmd == ZINJECT_DELAY_READY) {
762 show_object = B_TRUE;
763 show_iotype = B_TRUE;
764 }
765 } else {
766 show_object = B_TRUE;
767 }
768 if (show_iotype) {
769 (void) printf("iotype: %s\n",
770 iotype_to_str(record->zi_iotype));
771 }
772 if (show_object) {
773 (void) printf("objset: %llu\n",
774 (u_longlong_t)record->zi_objset);
775 (void) printf("object: %llu\n",
776 (u_longlong_t)record->zi_object);
777 (void) printf(" type: %llu\n",
778 (u_longlong_t)record->zi_type);
779 (void) printf(" level: %d\n", record->zi_level);
780 if (record->zi_start == 0 &&
781 record->zi_end == -1ULL)
782 (void) printf(" range: all\n");
783 else
784 (void) printf(" range: [%llu, %llu)\n",
785 (u_longlong_t)record->zi_start,
786 (u_longlong_t)record->zi_end);
787 (void) printf(" dvas: 0x%x\n", record->zi_dvas);
788 }
789 }
790
791 return (0);
792 }
793
794 static int
perform_action(const char * pool,zinject_record_t * record,int cmd)795 perform_action(const char *pool, zinject_record_t *record, int cmd)
796 {
797 zfs_cmd_t zc = {"\0"};
798
799 ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
800 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
801 zc.zc_guid = record->zi_guid;
802 zc.zc_cookie = cmd;
803
804 if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
805 return (0);
806
807 return (1);
808 }
809
810 static int
parse_delay(char * str,uint64_t * delay,uint64_t * nlanes)811 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
812 {
813 unsigned long scan_delay;
814 unsigned long scan_nlanes;
815
816 if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
817 return (1);
818
819 /*
820 * We explicitly disallow a delay of zero here, because we key
821 * off this value being non-zero in translate_device(), to
822 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
823 */
824 if (scan_delay == 0)
825 return (1);
826
827 /*
828 * The units for the CLI delay parameter is milliseconds, but
829 * the data passed to the kernel is interpreted as nanoseconds.
830 * Thus we scale the milliseconds to nanoseconds here, and this
831 * nanosecond value is used to pass the delay to the kernel.
832 */
833 *delay = MSEC2NSEC(scan_delay);
834 *nlanes = scan_nlanes;
835
836 return (0);
837 }
838
839 static int
parse_frequency(const char * str,uint32_t * percent)840 parse_frequency(const char *str, uint32_t *percent)
841 {
842 double val;
843 char *post;
844
845 val = strtod(str, &post);
846 if (post == NULL || *post != '\0')
847 return (EINVAL);
848
849 /* valid range is [0.0001, 100.0] */
850 val /= 100.0f;
851 if (val < 0.000001f || val > 1.0f)
852 return (ERANGE);
853
854 /* convert to an integer for use by kernel */
855 *percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
856
857 return (0);
858 }
859
860 /*
861 * This function converts a string specifier for DVAs into a bit mask.
862 * The dva's provided by the user should be 0 indexed and separated by
863 * a comma. For example:
864 * "1" -> 0b0010 (0x2)
865 * "0,1" -> 0b0011 (0x3)
866 * "0,1,2" -> 0b0111 (0x7)
867 */
868 static int
parse_dvas(const char * str,uint32_t * dvas_out)869 parse_dvas(const char *str, uint32_t *dvas_out)
870 {
871 const char *c = str;
872 uint32_t mask = 0;
873 boolean_t need_delim = B_FALSE;
874
875 /* max string length is 5 ("0,1,2") */
876 if (strlen(str) > 5 || strlen(str) == 0)
877 return (EINVAL);
878
879 while (*c != '\0') {
880 switch (*c) {
881 case '0':
882 case '1':
883 case '2':
884 /* check for pipe between DVAs */
885 if (need_delim)
886 return (EINVAL);
887
888 /* check if this DVA has been set already */
889 if (mask & (1 << ((*c) - '0')))
890 return (EINVAL);
891
892 mask |= (1 << ((*c) - '0'));
893 need_delim = B_TRUE;
894 break;
895 case ',':
896 need_delim = B_FALSE;
897 break;
898 default:
899 /* check for invalid character */
900 return (EINVAL);
901 }
902 c++;
903 }
904
905 /* check for dangling delimiter */
906 if (!need_delim)
907 return (EINVAL);
908
909 *dvas_out = mask;
910 return (0);
911 }
912
913 int
main(int argc,char ** argv)914 main(int argc, char **argv)
915 {
916 int c;
917 char *range = NULL;
918 char *cancel = NULL;
919 char *end;
920 char *raw = NULL;
921 char *device = NULL;
922 int level = 0;
923 int quiet = 0;
924 int error = 0;
925 int domount = 0;
926 int io_type = ZINJECT_IOTYPE_ALL;
927 int action = VDEV_STATE_UNKNOWN;
928 err_type_t type = TYPE_INVAL;
929 err_type_t label = TYPE_INVAL;
930 zinject_record_t record = { 0 };
931 char pool[MAXNAMELEN] = "";
932 char dataset[MAXNAMELEN] = "";
933 zfs_handle_t *zhp = NULL;
934 int nowrites = 0;
935 int dur_txg = 0;
936 int dur_secs = 0;
937 int ret;
938 int flags = 0;
939 uint32_t dvas = 0;
940 hrtime_t ready_delay = -1;
941
942 if ((g_zfs = libzfs_init()) == NULL) {
943 (void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
944 return (1);
945 }
946
947 libzfs_print_on_error(g_zfs, B_TRUE);
948
949 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
950 (void) fprintf(stderr, "failed to open ZFS device\n");
951 libzfs_fini(g_zfs);
952 return (1);
953 }
954
955 if (argc == 1) {
956 /*
957 * No arguments. Print the available handlers. If there are no
958 * available handlers, direct the user to '-h' for help
959 * information.
960 */
961 if (print_all_handlers() == 0) {
962 (void) printf("No handlers registered.\n");
963 (void) printf("Run 'zinject -h' for usage "
964 "information.\n");
965 }
966 libzfs_fini(g_zfs);
967 return (0);
968 }
969
970 while ((c = getopt(argc, argv,
971 ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
972 switch (c) {
973 case 'a':
974 flags |= ZINJECT_FLUSH_ARC;
975 break;
976 case 'A':
977 if (strcasecmp(optarg, "degrade") == 0) {
978 action = VDEV_STATE_DEGRADED;
979 } else if (strcasecmp(optarg, "fault") == 0) {
980 action = VDEV_STATE_FAULTED;
981 } else {
982 (void) fprintf(stderr, "invalid action '%s': "
983 "must be 'degrade' or 'fault'\n", optarg);
984 usage();
985 libzfs_fini(g_zfs);
986 return (1);
987 }
988 break;
989 case 'b':
990 raw = optarg;
991 break;
992 case 'c':
993 cancel = optarg;
994 break;
995 case 'C':
996 ret = parse_dvas(optarg, &dvas);
997 if (ret != 0) {
998 (void) fprintf(stderr, "invalid DVA list '%s': "
999 "DVAs should be 0 indexed and separated by "
1000 "commas.\n", optarg);
1001 usage();
1002 libzfs_fini(g_zfs);
1003 return (1);
1004 }
1005 break;
1006 case 'd':
1007 device = optarg;
1008 break;
1009 case 'D':
1010 errno = 0;
1011 ret = parse_delay(optarg, &record.zi_timer,
1012 &record.zi_nlanes);
1013 if (ret != 0) {
1014
1015 (void) fprintf(stderr, "invalid i/o delay "
1016 "value: '%s'\n", optarg);
1017 usage();
1018 libzfs_fini(g_zfs);
1019 return (1);
1020 }
1021 break;
1022 case 'e':
1023 error = str_to_err(optarg);
1024 if (error < 0) {
1025 (void) fprintf(stderr, "invalid error type "
1026 "'%s': must be one of: io decompress "
1027 "decrypt nxio dtl corrupt noop\n",
1028 optarg);
1029 usage();
1030 libzfs_fini(g_zfs);
1031 return (1);
1032 }
1033 break;
1034 case 'f':
1035 ret = parse_frequency(optarg, &record.zi_freq);
1036 if (ret != 0) {
1037 (void) fprintf(stderr, "%sfrequency value must "
1038 "be in the range [0.0001, 100.0]\n",
1039 ret == EINVAL ? "invalid value: " :
1040 ret == ERANGE ? "out of range: " : "");
1041 libzfs_fini(g_zfs);
1042 return (1);
1043 }
1044 break;
1045 case 'F':
1046 record.zi_failfast = B_TRUE;
1047 break;
1048 case 'g':
1049 dur_txg = 1;
1050 record.zi_duration = (int)strtol(optarg, &end, 10);
1051 if (record.zi_duration <= 0 || *end != '\0') {
1052 (void) fprintf(stderr, "invalid duration '%s': "
1053 "must be a positive integer\n", optarg);
1054 usage();
1055 libzfs_fini(g_zfs);
1056 return (1);
1057 }
1058 /* store duration of txgs as its negative */
1059 record.zi_duration *= -1;
1060 break;
1061 case 'h':
1062 usage();
1063 libzfs_fini(g_zfs);
1064 return (0);
1065 case 'I':
1066 /* default duration, if one hasn't yet been defined */
1067 nowrites = 1;
1068 if (dur_secs == 0 && dur_txg == 0)
1069 record.zi_duration = 30;
1070 break;
1071 case 'l':
1072 level = (int)strtol(optarg, &end, 10);
1073 if (*end != '\0') {
1074 (void) fprintf(stderr, "invalid level '%s': "
1075 "must be an integer\n", optarg);
1076 usage();
1077 libzfs_fini(g_zfs);
1078 return (1);
1079 }
1080 break;
1081 case 'm':
1082 domount = 1;
1083 break;
1084 case 'p':
1085 (void) strlcpy(record.zi_func, optarg,
1086 sizeof (record.zi_func));
1087 record.zi_cmd = ZINJECT_PANIC;
1088 break;
1089 case 'P':
1090 if (strcasecmp(optarg, "import") == 0) {
1091 record.zi_cmd = ZINJECT_DELAY_IMPORT;
1092 } else if (strcasecmp(optarg, "export") == 0) {
1093 record.zi_cmd = ZINJECT_DELAY_EXPORT;
1094 } else {
1095 (void) fprintf(stderr, "invalid command '%s': "
1096 "must be 'import' or 'export'\n", optarg);
1097 usage();
1098 libzfs_fini(g_zfs);
1099 return (1);
1100 }
1101 break;
1102 case 'q':
1103 quiet = 1;
1104 break;
1105 case 'r':
1106 range = optarg;
1107 flags |= ZINJECT_CALC_RANGE;
1108 break;
1109 case 's':
1110 dur_secs = 1;
1111 record.zi_duration = (int)strtol(optarg, &end, 10);
1112 if (record.zi_duration <= 0 || *end != '\0') {
1113 (void) fprintf(stderr, "invalid duration '%s': "
1114 "must be a positive integer\n", optarg);
1115 usage();
1116 libzfs_fini(g_zfs);
1117 return (1);
1118 }
1119 break;
1120 case 'T':
1121 io_type = str_to_iotype(optarg);
1122 if (io_type == ZINJECT_IOTYPES) {
1123 (void) fprintf(stderr, "invalid I/O type "
1124 "'%s': must be 'read', 'write', 'free', "
1125 "'claim', 'flush' or 'all'\n", optarg);
1126 usage();
1127 libzfs_fini(g_zfs);
1128 return (1);
1129 }
1130 break;
1131 case 't':
1132 if ((type = name_to_type(optarg)) == TYPE_INVAL &&
1133 !MOS_TYPE(type)) {
1134 (void) fprintf(stderr, "invalid type '%s'\n",
1135 optarg);
1136 usage();
1137 libzfs_fini(g_zfs);
1138 return (1);
1139 }
1140 break;
1141 case 'u':
1142 flags |= ZINJECT_UNLOAD_SPA;
1143 break;
1144 case 'E':
1145 ready_delay = MSEC2NSEC(strtol(optarg, &end, 10));
1146 if (ready_delay <= 0 || *end != '\0') {
1147 (void) fprintf(stderr, "invalid delay '%s': "
1148 "must be a positive duration\n", optarg);
1149 usage();
1150 libzfs_fini(g_zfs);
1151 return (1);
1152 }
1153 record.zi_cmd = ZINJECT_DELAY_READY;
1154 record.zi_timer = ready_delay;
1155 break;
1156 case 'L':
1157 if ((label = name_to_type(optarg)) == TYPE_INVAL &&
1158 !LABEL_TYPE(type)) {
1159 (void) fprintf(stderr, "invalid label type "
1160 "'%s'\n", optarg);
1161 usage();
1162 libzfs_fini(g_zfs);
1163 return (1);
1164 }
1165 break;
1166 case ':':
1167 (void) fprintf(stderr, "option -%c requires an "
1168 "operand\n", optopt);
1169 usage();
1170 libzfs_fini(g_zfs);
1171 return (1);
1172 case '?':
1173 (void) fprintf(stderr, "invalid option '%c'\n",
1174 optopt);
1175 usage();
1176 libzfs_fini(g_zfs);
1177 return (2);
1178 }
1179 }
1180
1181 argc -= optind;
1182 argv += optind;
1183
1184 if (record.zi_duration != 0 && record.zi_cmd == 0)
1185 record.zi_cmd = ZINJECT_IGNORED_WRITES;
1186
1187 if (cancel != NULL) {
1188 /*
1189 * '-c' is invalid with any other options.
1190 */
1191 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1192 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1193 record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) {
1194 (void) fprintf(stderr, "cancel (-c) incompatible with "
1195 "any other options\n");
1196 usage();
1197 libzfs_fini(g_zfs);
1198 return (2);
1199 }
1200 if (argc != 0) {
1201 (void) fprintf(stderr, "extraneous argument to '-c'\n");
1202 usage();
1203 libzfs_fini(g_zfs);
1204 return (2);
1205 }
1206
1207 if (strcmp(cancel, "all") == 0) {
1208 return (cancel_all_handlers());
1209 } else {
1210 int id = (int)strtol(cancel, &end, 10);
1211 if (*end != '\0') {
1212 (void) fprintf(stderr, "invalid handle id '%s':"
1213 " must be an integer or 'all'\n", cancel);
1214 usage();
1215 libzfs_fini(g_zfs);
1216 return (1);
1217 }
1218 return (cancel_handler(id));
1219 }
1220 }
1221
1222 if (device != NULL) {
1223 /*
1224 * Device (-d) injection uses a completely different mechanism
1225 * for doing injection, so handle it separately here.
1226 */
1227 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1228 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1229 dvas != 0 || ready_delay >= 0) {
1230 (void) fprintf(stderr, "device (-d) incompatible with "
1231 "data error injection\n");
1232 usage();
1233 libzfs_fini(g_zfs);
1234 return (2);
1235 }
1236
1237 if (argc != 1) {
1238 (void) fprintf(stderr, "device (-d) injection requires "
1239 "a single pool name\n");
1240 usage();
1241 libzfs_fini(g_zfs);
1242 return (2);
1243 }
1244
1245 (void) strlcpy(pool, argv[0], sizeof (pool));
1246 dataset[0] = '\0';
1247
1248 if (error == ECKSUM) {
1249 (void) fprintf(stderr, "device error type must be "
1250 "'io', 'nxio' or 'corrupt'\n");
1251 libzfs_fini(g_zfs);
1252 return (1);
1253 }
1254
1255 if (error == EILSEQ &&
1256 (record.zi_freq == 0 || io_type != ZINJECT_IOTYPE_READ)) {
1257 (void) fprintf(stderr, "device corrupt errors require "
1258 "io type read and a frequency value\n");
1259 libzfs_fini(g_zfs);
1260 return (1);
1261 }
1262
1263 record.zi_iotype = io_type;
1264 if (translate_device(pool, device, label, &record) != 0) {
1265 libzfs_fini(g_zfs);
1266 return (1);
1267 }
1268
1269 if (record.zi_nlanes) {
1270 switch (io_type) {
1271 case ZINJECT_IOTYPE_READ:
1272 case ZINJECT_IOTYPE_WRITE:
1273 case ZINJECT_IOTYPE_ALL:
1274 break;
1275 default:
1276 (void) fprintf(stderr, "I/O type for a delay "
1277 "must be 'read' or 'write'\n");
1278 usage();
1279 libzfs_fini(g_zfs);
1280 return (1);
1281 }
1282 }
1283
1284 if (!error)
1285 error = ENXIO;
1286
1287 if (action != VDEV_STATE_UNKNOWN)
1288 return (perform_action(pool, &record, action));
1289
1290 } else if (raw != NULL) {
1291 if (range != NULL || type != TYPE_INVAL || level != 0 ||
1292 record.zi_cmd != ZINJECT_UNINITIALIZED ||
1293 record.zi_freq > 0 || dvas != 0) {
1294 (void) fprintf(stderr, "raw (-b) format with "
1295 "any other options\n");
1296 usage();
1297 libzfs_fini(g_zfs);
1298 return (2);
1299 }
1300
1301 if (argc != 1) {
1302 (void) fprintf(stderr, "raw (-b) format expects a "
1303 "single pool name\n");
1304 usage();
1305 libzfs_fini(g_zfs);
1306 return (2);
1307 }
1308
1309 (void) strlcpy(pool, argv[0], sizeof (pool));
1310 dataset[0] = '\0';
1311
1312 if (error == ENXIO) {
1313 (void) fprintf(stderr, "data error type must be "
1314 "'checksum' or 'io'\n");
1315 libzfs_fini(g_zfs);
1316 return (1);
1317 }
1318
1319 if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1320 record.zi_cmd = ZINJECT_DATA_FAULT;
1321 if (!error)
1322 error = EIO;
1323 } else if (error != 0) {
1324 (void) fprintf(stderr, "error type -e incompatible "
1325 "with delay injection\n");
1326 libzfs_fini(g_zfs);
1327 return (1);
1328 } else {
1329 record.zi_iotype = io_type;
1330 }
1331
1332 if (translate_raw(raw, &record) != 0) {
1333 libzfs_fini(g_zfs);
1334 return (1);
1335 }
1336 } else if (record.zi_cmd == ZINJECT_PANIC) {
1337 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1338 level != 0 || device != NULL || record.zi_freq > 0 ||
1339 dvas != 0) {
1340 (void) fprintf(stderr, "%s incompatible with other "
1341 "options\n", "import|export delay (-P)");
1342 usage();
1343 libzfs_fini(g_zfs);
1344 return (2);
1345 }
1346
1347 if (argc < 1 || argc > 2) {
1348 (void) fprintf(stderr, "panic (-p) injection requires "
1349 "a single pool name and an optional id\n");
1350 usage();
1351 libzfs_fini(g_zfs);
1352 return (2);
1353 }
1354
1355 (void) strlcpy(pool, argv[0], sizeof (pool));
1356 if (argv[1] != NULL)
1357 record.zi_type = atoi(argv[1]);
1358 dataset[0] = '\0';
1359 } else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
1360 record.zi_cmd == ZINJECT_DELAY_EXPORT) {
1361 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1362 level != 0 || device != NULL || record.zi_freq > 0 ||
1363 dvas != 0) {
1364 (void) fprintf(stderr, "%s incompatible with other "
1365 "options\n", "import|export delay (-P)");
1366 usage();
1367 libzfs_fini(g_zfs);
1368 return (2);
1369 }
1370
1371 if (argc != 1 || record.zi_duration <= 0) {
1372 (void) fprintf(stderr, "import|export delay (-P) "
1373 "injection requires a duration (-s) and a single "
1374 "pool name\n");
1375 usage();
1376 libzfs_fini(g_zfs);
1377 return (2);
1378 }
1379
1380 (void) strlcpy(pool, argv[0], sizeof (pool));
1381 } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1382 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1383 level != 0 || record.zi_freq > 0 || dvas != 0) {
1384 (void) fprintf(stderr, "hardware failure (-I) "
1385 "incompatible with other options\n");
1386 usage();
1387 libzfs_fini(g_zfs);
1388 return (2);
1389 }
1390
1391 if (nowrites == 0) {
1392 (void) fprintf(stderr, "-s or -g meaningless "
1393 "without -I (ignore writes)\n");
1394 usage();
1395 libzfs_fini(g_zfs);
1396 return (2);
1397 } else if (dur_secs && dur_txg) {
1398 (void) fprintf(stderr, "choose a duration either "
1399 "in seconds (-s) or a number of txgs (-g) "
1400 "but not both\n");
1401 usage();
1402 libzfs_fini(g_zfs);
1403 return (2);
1404 } else if (argc != 1) {
1405 (void) fprintf(stderr, "ignore writes (-I) "
1406 "injection requires a single pool name\n");
1407 usage();
1408 libzfs_fini(g_zfs);
1409 return (2);
1410 }
1411
1412 (void) strlcpy(pool, argv[0], sizeof (pool));
1413 dataset[0] = '\0';
1414 } else if (type == TYPE_INVAL) {
1415 if (flags == 0) {
1416 (void) fprintf(stderr, "at least one of '-b', '-d', "
1417 "'-t', '-a', '-p', '-I' or '-u' "
1418 "must be specified\n");
1419 usage();
1420 libzfs_fini(g_zfs);
1421 return (2);
1422 }
1423
1424 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1425 (void) strlcpy(pool, argv[0], sizeof (pool));
1426 dataset[0] = '\0';
1427 } else if (argc != 0) {
1428 (void) fprintf(stderr, "extraneous argument for "
1429 "'-f'\n");
1430 usage();
1431 libzfs_fini(g_zfs);
1432 return (2);
1433 }
1434
1435 flags |= ZINJECT_NULL;
1436 } else {
1437 if (argc != 1) {
1438 (void) fprintf(stderr, "missing object\n");
1439 usage();
1440 libzfs_fini(g_zfs);
1441 return (2);
1442 }
1443
1444 if (error == ENXIO || error == EILSEQ) {
1445 (void) fprintf(stderr, "data error type must be "
1446 "'checksum' or 'io'\n");
1447 libzfs_fini(g_zfs);
1448 return (1);
1449 }
1450
1451 if (dvas != 0) {
1452 if (error == EACCES || error == EINVAL) {
1453 (void) fprintf(stderr, "the '-C' option may "
1454 "not be used with logical data errors "
1455 "'decrypt' and 'decompress'\n");
1456 libzfs_fini(g_zfs);
1457 return (1);
1458 }
1459
1460 record.zi_dvas = dvas;
1461 }
1462
1463 if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) {
1464 (void) fprintf(stderr, "error type -e incompatible "
1465 "with delay injection\n");
1466 libzfs_fini(g_zfs);
1467 return (1);
1468 }
1469
1470 if (error == EACCES) {
1471 if (type != TYPE_DATA) {
1472 (void) fprintf(stderr, "decryption errors "
1473 "may only be injected for 'data' types\n");
1474 libzfs_fini(g_zfs);
1475 return (1);
1476 }
1477
1478 record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1479 /*
1480 * Internally, ZFS actually uses ECKSUM for decryption
1481 * errors since EACCES is used to indicate the key was
1482 * not found.
1483 */
1484 error = ECKSUM;
1485 } else if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1486 record.zi_cmd = ZINJECT_DATA_FAULT;
1487 if (!error)
1488 error = EIO;
1489 } else {
1490 record.zi_iotype = io_type;
1491 }
1492
1493 if (translate_record(type, argv[0], range, level, &record, pool,
1494 dataset) != 0) {
1495 libzfs_fini(g_zfs);
1496 return (1);
1497 }
1498 }
1499
1500 /*
1501 * If this is pool-wide metadata, unmount everything. The ioctl() will
1502 * unload the pool, so that we trigger spa-wide reopen of metadata next
1503 * time we access the pool.
1504 */
1505 if (dataset[0] != '\0' && domount) {
1506 if ((zhp = zfs_open(g_zfs, dataset,
1507 ZFS_TYPE_DATASET)) == NULL) {
1508 libzfs_fini(g_zfs);
1509 return (1);
1510 }
1511 if (zfs_unmount(zhp, NULL, 0) != 0) {
1512 libzfs_fini(g_zfs);
1513 return (1);
1514 }
1515 }
1516
1517 record.zi_error = error;
1518
1519 ret = register_handler(pool, flags, &record, quiet);
1520
1521 if (dataset[0] != '\0' && domount)
1522 ret = (zfs_mount(zhp, NULL, 0) != 0);
1523
1524 libzfs_fini(g_zfs);
1525
1526 return (ret);
1527 }
1528