xref: /freebsd/sys/contrib/openzfs/cmd/zinject/zinject.c (revision 5289625dfecb962e0410dfafc403aced3b9a2e4b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24  * Copyright (c) 2017, Intel Corporation.
25  * Copyright (c) 2023-2025, Klara, Inc.
26  */
27 
28 /*
29  * ZFS Fault Injector
30  *
31  * This userland component takes a set of options and uses libzpool to translate
32  * from a user-visible object type and name to an internal representation.
33  * There are two basic types of faults: device faults and data faults.
34  *
35  *
36  * DEVICE FAULTS
37  *
38  * Errors can be injected into a particular vdev using the '-d' option.  This
39  * option takes a path or vdev GUID to uniquely identify the device within a
40  * pool.  There are four types of errors that can be injected, IO, ENXIO,
41  * ECHILD, and EILSEQ.  These can be controlled through the '-e' option and the
42  * default is ENXIO.  For EIO failures, any attempt to read data from the device
43  * will return EIO, but a subsequent attempt to reopen the device will succeed.
44  * For ENXIO failures, any attempt to read from the device will return EIO, but
45  * any attempt to reopen the device will also return ENXIO.  The EILSEQ failures
46  * only apply to read operations (-T read) and will flip a bit after the device
47  * has read the original data.
48  *
49  * For label faults, the -L option must be specified. This allows faults
50  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
51  * of all the labels for the specified device.
52  *
53  * This form of the command looks like:
54  *
55  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
56  *
57  *
58  * DATA FAULTS
59  *
60  * We begin with a tuple of the form:
61  *
62  * 	<type,level,range,object>
63  *
64  * 	type	A string describing the type of data to target.  Each type
65  * 		implicitly describes how to interpret 'object'. Currently,
66  * 		the following values are supported:
67  *
68  * 		data		User data for a file
69  * 		dnode		Dnode for a file or directory
70  *
71  *		The following MOS objects are special.  Instead of injecting
72  *		errors on a particular object or blkid, we inject errors across
73  *		all objects of the given type.
74  *
75  * 		mos		Any data in the MOS
76  * 		mosdir		object directory
77  * 		config		pool configuration
78  * 		bpobj		blkptr list
79  * 		spacemap	spacemap
80  * 		metaslab	metaslab
81  * 		errlog		persistent error log
82  *
83  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
84  * 		a range is given, this corresponds to the indirect block
85  * 		corresponding to the specific range.
86  *
87  *	range	A numerical range [start,end) within the object.  Defaults to
88  *		the full size of the file.
89  *
90  * 	object	A string describing the logical location of the object.  For
91  * 		files and directories (currently the only supported types),
92  * 		this is the path of the object on disk.
93  *
94  * This is translated, via libzpool, into the following internal representation:
95  *
96  * 	<type,objset,object,level,range>
97  *
98  * These types should be self-explanatory.  This tuple is then passed to the
99  * kernel via a special ioctl() to initiate fault injection for the given
100  * object.  Note that 'type' is not strictly necessary for fault injection, but
101  * is used when translating existing faults into a human-readable string.
102  *
103  *
104  * The command itself takes one of the forms:
105  *
106  * 	zinject
107  * 	zinject <-a | -u pool>
108  * 	zinject -c <id|all>
109  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
110  *	    [-r range] <object>
111  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
112  *
113  * With no arguments, the command prints all currently registered injection
114  * handlers, with their numeric identifiers.
115  *
116  * The '-c' option will clear the given handler, or all handlers if 'all' is
117  * specified.
118  *
119  * The '-e' option takes a string describing the errno to simulate.  This must
120  * be one of 'io', 'checksum', 'decompress', or 'decrypt'.  In most cases this
121  * will result in the same behavior, but RAID-Z will produce a different set of
122  * ereports for this situation.
123  *
124  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
125  * specified, then the ARC cache is flushed appropriately.  If '-u' is
126  * specified, then the underlying SPA is unloaded.  Either of these flags can be
127  * specified independently of any other handlers.  The '-m' flag automatically
128  * does an unmount and remount of the underlying dataset to aid in flushing the
129  * cache.
130  *
131  * The '-f' flag controls the frequency of errors injected, expressed as a
132  * real number percentage between 0.0001 and 100.  The default is 100.
133  *
134  * The this form is responsible for actually injecting the handler into the
135  * framework.  It takes the arguments described above, translates them to the
136  * internal tuple using libzpool, and then issues an ioctl() to register the
137  * handler.
138  *
139  * The final form can target a specific bookmark, regardless of whether a
140  * human-readable interface has been designed.  It allows developers to specify
141  * a particular block by number.
142  */
143 
144 #include <errno.h>
145 #include <fcntl.h>
146 #include <stdio.h>
147 #include <stdlib.h>
148 #include <string.h>
149 #include <strings.h>
150 #include <unistd.h>
151 
152 #include <sys/fs/zfs.h>
153 #include <sys/mount.h>
154 
155 #include <libzfs.h>
156 
157 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
158 
159 #include "zinject.h"
160 
161 libzfs_handle_t *g_zfs;
162 int zfs_fd;
163 
164 static const char *const errtable[TYPE_INVAL] = {
165 	"data",
166 	"dnode",
167 	"mos",
168 	"mosdir",
169 	"metaslab",
170 	"config",
171 	"bpobj",
172 	"spacemap",
173 	"errlog",
174 	"uber",
175 	"nvlist",
176 	"pad1",
177 	"pad2"
178 };
179 
180 static err_type_t
181 name_to_type(const char *arg)
182 {
183 	int i;
184 	for (i = 0; i < TYPE_INVAL; i++)
185 		if (strcmp(errtable[i], arg) == 0)
186 			return (i);
187 
188 	return (TYPE_INVAL);
189 }
190 
191 static const char *
192 type_to_name(uint64_t type)
193 {
194 	switch (type) {
195 	case DMU_OT_OBJECT_DIRECTORY:
196 		return ("mosdir");
197 	case DMU_OT_OBJECT_ARRAY:
198 		return ("metaslab");
199 	case DMU_OT_PACKED_NVLIST:
200 		return ("config");
201 	case DMU_OT_BPOBJ:
202 		return ("bpobj");
203 	case DMU_OT_SPACE_MAP:
204 		return ("spacemap");
205 	case DMU_OT_ERROR_LOG:
206 		return ("errlog");
207 	default:
208 		return ("-");
209 	}
210 }
211 
212 struct errstr {
213 	int		err;
214 	const char	*str;
215 };
216 static const struct errstr errstrtable[] = {
217 	{ EIO,		"io" },
218 	{ ECKSUM,	"checksum" },
219 	{ EINVAL,	"decompress" },
220 	{ EACCES,	"decrypt" },
221 	{ ENXIO,	"nxio" },
222 	{ ECHILD,	"dtl" },
223 	{ EILSEQ,	"corrupt" },
224 	{ ENOSYS,	"noop" },
225 	{ 0, NULL },
226 };
227 
228 static int
229 str_to_err(const char *str)
230 {
231 	for (int i = 0; errstrtable[i].str != NULL; i++)
232 		if (strcasecmp(errstrtable[i].str, str) == 0)
233 			return (errstrtable[i].err);
234 	return (-1);
235 }
236 static const char *
237 err_to_str(int err)
238 {
239 	for (int i = 0; errstrtable[i].str != NULL; i++)
240 		if (errstrtable[i].err == err)
241 			return (errstrtable[i].str);
242 	return ("[unknown]");
243 }
244 
245 static const char *const iotypestrtable[ZINJECT_IOTYPES] = {
246 	[ZINJECT_IOTYPE_NULL]	= "null",
247 	[ZINJECT_IOTYPE_READ]	= "read",
248 	[ZINJECT_IOTYPE_WRITE]	= "write",
249 	[ZINJECT_IOTYPE_FREE]	= "free",
250 	[ZINJECT_IOTYPE_CLAIM]	= "claim",
251 	[ZINJECT_IOTYPE_FLUSH]	= "flush",
252 	[ZINJECT_IOTYPE_TRIM]	= "trim",
253 	[ZINJECT_IOTYPE_ALL]	= "all",
254 	[ZINJECT_IOTYPE_PROBE]	= "probe",
255 };
256 
257 static zinject_iotype_t
258 str_to_iotype(const char *arg)
259 {
260 	for (uint_t iotype = 0; iotype < ZINJECT_IOTYPES; iotype++)
261 		if (iotypestrtable[iotype] != NULL &&
262 		    strcasecmp(iotypestrtable[iotype], arg) == 0)
263 			return (iotype);
264 	return (ZINJECT_IOTYPES);
265 }
266 
267 static const char *
268 iotype_to_str(zinject_iotype_t iotype)
269 {
270 	if (iotype >= ZINJECT_IOTYPES || iotypestrtable[iotype] == NULL)
271 		return ("[unknown]");
272 	return (iotypestrtable[iotype]);
273 }
274 
275 /*
276  * Print usage message.
277  */
278 void
279 usage(void)
280 {
281 	(void) printf(
282 	    "usage:\n"
283 	    "\n"
284 	    "\tzinject\n"
285 	    "\n"
286 	    "\t\tList all active injection records.\n"
287 	    "\n"
288 	    "\tzinject -c <id|all>\n"
289 	    "\n"
290 	    "\t\tClear the particular record (if given a numeric ID), or\n"
291 	    "\t\tall records if 'all' is specified.\n"
292 	    "\n"
293 	    "\tzinject -p <function name> pool\n"
294 	    "\t\tInject a panic fault at the specified function. Only \n"
295 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
296 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
297 	    "\n"
298 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
299 	    "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
300 	    "\t\tInject a fault into a particular device or the device's\n"
301 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
302 	    "\t\t'pad1', or 'pad2'.\n"
303 	    "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
304 	    "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n"
305 	    "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
306 	    "\t\tdevice error injection to a percentage of the IOs.\n"
307 	    "\n"
308 	    "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
309 	    "\t\tPerform a specific action on a particular device.\n"
310 	    "\n"
311 	    "\tzinject -d device -D latency:lanes pool\n"
312 	    "\n"
313 	    "\t\tAdd an artificial delay to IO requests on a particular\n"
314 	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
315 	    "\t\tmilliseconds to complete. Each delay has an associated\n"
316 	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
317 	    "\t\tIO requests that can be processed.\n"
318 	    "\n"
319 	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
320 	    "\t\tthe device will only be able to service a single IO request\n"
321 	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
322 	    "\t\tif only a single request is submitted every 10 ms, the\n"
323 	    "\t\taverage latency will be 10 ms; but if more than one request\n"
324 	    "\t\tis submitted every 10 ms, the average latency will be more\n"
325 	    "\t\tthan 10 ms.\n"
326 	    "\n"
327 	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
328 	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
329 	    "\t\ttwo requests at a time, each with a minimum latency of\n"
330 	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
331 	    "\t\tthe average latency will be 10 ms; but if more than two\n"
332 	    "\t\trequests are submitted every 10 ms, the average latency\n"
333 	    "\t\twill be more than 10 ms.\n"
334 	    "\n"
335 	    "\t\tAlso note, these delays are additive. So two invocations\n"
336 	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
337 	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
338 	    "\t\tlanes with differing target latencies. For example, an\n"
339 	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
340 	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
341 	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
342 	    "\n"
343 	    "\tzinject -P import|export -s <seconds> pool\n"
344 	    "\t\tAdd an artificial delay to a future pool import or export,\n"
345 	    "\t\tsuch that the operation takes a minimum of supplied seconds\n"
346 	    "\t\tto complete.\n"
347 	    "\n"
348 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
349 	    "\t\tCause the pool to stop writing blocks yet not\n"
350 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
351 	    "\t\tthat fails to honor cache flush requests.\n"
352 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
353 	    "\t\tat the end of the duration.\n"
354 	    "\n"
355 	    "\tzinject -b objset:object:level:blkid pool\n"
356 	    "\n"
357 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
358 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
359 	    "\t\thexadecimal, and only one block can be specified.\n"
360 	    "\n"
361 	    "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
362 	    "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
363 	    "\n"
364 	    "\t\tInject an error into the object specified by the '-t' option\n"
365 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
366 	    "\t\tinterpreted depending on the '-t' option.\n"
367 	    "\n"
368 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
369 	    "\t\t-e\tInject a specific error.  Must be one of 'io',\n"
370 	    "\t\t\t'checksum', 'decompress', or 'decrypt'.  Default is 'io'.\n"
371 	    "\t\t-C\tInject the given error only into specific DVAs. The\n"
372 	    "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
373 	    "\t\t\tseparated by commas (ex. '0,2').\n"
374 	    "\t\t-l\tInject error at a particular block level. Default is "
375 	    "0.\n"
376 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
377 	    "\t\t-r\tInject error over a particular logical range of an\n"
378 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
379 	    "\t\t\trange according to the object's properties.\n"
380 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
381 	    "\t\t\tassociated object.\n"
382 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
383 	    "\t\t\ta pool object.\n"
384 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
385 	    "\t\t\ta percentage between 0.0001 and 100.\n"
386 	    "\n"
387 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
388 	    "\t\t\tfile.  The object must be specified as a complete path\n"
389 	    "\t\t\tto a file on a ZFS filesystem.\n"
390 	    "\n"
391 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
392 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
393 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
394 	    "\t\t\tis specified as a complete path to a file or directory\n"
395 	    "\t\t\ton a ZFS filesystem.\n"
396 	    "\n"
397 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
398 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
399 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
400 	    "\t\t\tthe poolname.\n");
401 }
402 
403 static int
404 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
405     void *data)
406 {
407 	zfs_cmd_t zc = {"\0"};
408 	int ret;
409 
410 	while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
411 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
412 		    &zc.zc_inject_record, data)) != 0)
413 			return (ret);
414 
415 	if (errno != ENOENT) {
416 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
417 		    strerror(errno));
418 		return (-1);
419 	}
420 
421 	return (0);
422 }
423 
424 static int
425 print_data_handler(int id, const char *pool, zinject_record_t *record,
426     void *data)
427 {
428 	int *count = data;
429 
430 	if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
431 	    record->zi_duration != 0) {
432 		return (0);
433 	}
434 
435 	if (*count == 0) {
436 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  "
437 		    "%-15s  %-6s  %-15s\n", "ID", "POOL", "OBJSET", "OBJECT",
438 		    "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT");
439 		(void) printf("---  ---------------  ------  "
440 		    "------  --------  ---  ----  ---------------  "
441 		    "------  ------\n");
442 	}
443 
444 	*count += 1;
445 
446 	char rangebuf[32];
447 	if (record->zi_start == 0 && record->zi_end == -1ULL)
448 		snprintf(rangebuf, sizeof (rangebuf), "all");
449 	else
450 		snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]",
451 		    (u_longlong_t)record->zi_start,
452 		    (u_longlong_t)record->zi_end);
453 
454 
455 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %-3d  0x%02x  %-15s  "
456 	    "%6llu  %6llu\n", id, pool, (u_longlong_t)record->zi_objset,
457 	    (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
458 	    record->zi_level, record->zi_dvas, rangebuf,
459 	    (u_longlong_t)record->zi_match_count,
460 	    (u_longlong_t)record->zi_inject_count);
461 
462 	return (0);
463 }
464 
465 static int
466 print_device_handler(int id, const char *pool, zinject_record_t *record,
467     void *data)
468 {
469 	int *count = data;
470 
471 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
472 		return (0);
473 
474 	if (record->zi_cmd == ZINJECT_DELAY_IO)
475 		return (0);
476 
477 	if (*count == 0) {
478 		(void) printf("%3s  %-15s  %-16s  %-5s  %-10s  %-9s  "
479 		    "%-6s  %-6s\n",
480 		    "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ",
481 		    "MATCH", "INJECT");
482 		(void) printf(
483 		    "---  ---------------  ----------------  "
484 		    "-----  ----------  ---------  "
485 		    "------  ------\n");
486 	}
487 
488 	*count += 1;
489 
490 	double freq = record->zi_freq == 0 ? 100.0f :
491 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
492 
493 	(void) printf("%3d  %-15s  %llx  %-5s  %-10s  %8.4f%%  "
494 	    "%6llu  %6llu\n", id, pool, (u_longlong_t)record->zi_guid,
495 	    iotype_to_str(record->zi_iotype), err_to_str(record->zi_error),
496 	    freq, (u_longlong_t)record->zi_match_count,
497 	    (u_longlong_t)record->zi_inject_count);
498 
499 	return (0);
500 }
501 
502 static int
503 print_delay_handler(int id, const char *pool, zinject_record_t *record,
504     void *data)
505 {
506 	int *count = data;
507 
508 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
509 		return (0);
510 
511 	if (record->zi_cmd != ZINJECT_DELAY_IO)
512 		return (0);
513 
514 	if (*count == 0) {
515 		(void) printf("%3s  %-15s  %-16s  %-10s  %-5s  %-9s  "
516 		    "%-6s  %-6s\n",
517 		    "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ",
518 		    "MATCH", "INJECT");
519 		(void) printf("---  ---------------  ----------------  "
520 		    "----------  -----  ---------  "
521 		    "------  ------\n");
522 	}
523 
524 	*count += 1;
525 
526 	double freq = record->zi_freq == 0 ? 100.0f :
527 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
528 
529 	(void) printf("%3d  %-15s  %llx  %10llu  %5llu  %8.4f%%  "
530 	    "%6llu  %6llu\n", id, pool, (u_longlong_t)record->zi_guid,
531 	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
532 	    (u_longlong_t)record->zi_nlanes, freq,
533 	    (u_longlong_t)record->zi_match_count,
534 	    (u_longlong_t)record->zi_inject_count);
535 
536 	return (0);
537 }
538 
539 static int
540 print_panic_handler(int id, const char *pool, zinject_record_t *record,
541     void *data)
542 {
543 	int *count = data;
544 
545 	if (record->zi_func[0] == '\0')
546 		return (0);
547 
548 	if (*count == 0) {
549 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
550 		(void) printf("---  ---------------  ----------------\n");
551 	}
552 
553 	*count += 1;
554 
555 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
556 
557 	return (0);
558 }
559 
560 static int
561 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
562     void *data)
563 {
564 	int *count = data;
565 
566 	if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
567 	    record->zi_cmd != ZINJECT_DELAY_EXPORT) {
568 		return (0);
569 	}
570 
571 	if (*count == 0) {
572 		(void) printf("%3s  %-19s  %-11s  %s\n",
573 		    "ID", "POOL", "DELAY (sec)", "COMMAND");
574 		(void) printf("---  -------------------  -----------"
575 		    "  -------\n");
576 	}
577 
578 	*count += 1;
579 
580 	(void) printf("%3d  %-19s  %-11llu  %s\n",
581 	    id, pool, (u_longlong_t)record->zi_duration,
582 	    record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
583 
584 	return (0);
585 }
586 
587 /*
588  * Print all registered error handlers.  Returns the number of handlers
589  * registered.
590  */
591 static int
592 print_all_handlers(void)
593 {
594 	int count = 0, total = 0;
595 
596 	(void) iter_handlers(print_device_handler, &count);
597 	if (count > 0) {
598 		total += count;
599 		(void) printf("\n");
600 		count = 0;
601 	}
602 
603 	(void) iter_handlers(print_delay_handler, &count);
604 	if (count > 0) {
605 		total += count;
606 		(void) printf("\n");
607 		count = 0;
608 	}
609 
610 	(void) iter_handlers(print_data_handler, &count);
611 	if (count > 0) {
612 		total += count;
613 		(void) printf("\n");
614 		count = 0;
615 	}
616 
617 	(void) iter_handlers(print_pool_delay_handler, &count);
618 	if (count > 0) {
619 		total += count;
620 		(void) printf("\n");
621 		count = 0;
622 	}
623 
624 	(void) iter_handlers(print_panic_handler, &count);
625 
626 	return (count + total);
627 }
628 
629 static int
630 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
631     void *data)
632 {
633 	(void) pool, (void) record, (void) data;
634 	zfs_cmd_t zc = {"\0"};
635 
636 	zc.zc_guid = (uint64_t)id;
637 
638 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
639 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
640 		    id, strerror(errno));
641 		return (1);
642 	}
643 
644 	return (0);
645 }
646 
647 /*
648  * Remove all fault injection handlers.
649  */
650 static int
651 cancel_all_handlers(void)
652 {
653 	int ret = iter_handlers(cancel_one_handler, NULL);
654 
655 	if (ret == 0)
656 		(void) printf("removed all registered handlers\n");
657 
658 	return (ret);
659 }
660 
661 /*
662  * Remove a specific fault injection handler.
663  */
664 static int
665 cancel_handler(int id)
666 {
667 	zfs_cmd_t zc = {"\0"};
668 
669 	zc.zc_guid = (uint64_t)id;
670 
671 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
672 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
673 		    id, strerror(errno));
674 		return (1);
675 	}
676 
677 	(void) printf("removed handler %d\n", id);
678 
679 	return (0);
680 }
681 
682 /*
683  * Register a new fault injection handler.
684  */
685 static int
686 register_handler(const char *pool, int flags, zinject_record_t *record,
687     int quiet)
688 {
689 	zfs_cmd_t zc = {"\0"};
690 
691 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
692 	zc.zc_inject_record = *record;
693 	zc.zc_guid = flags;
694 
695 	if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
696 		const char *errmsg = strerror(errno);
697 
698 		switch (errno) {
699 		case EDOM:
700 			errmsg = "block level exceeds max level of object";
701 			break;
702 		case EEXIST:
703 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
704 				errmsg = "pool already imported";
705 			if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
706 				errmsg = "a handler already exists";
707 			break;
708 		case ENOENT:
709 			/* import delay injector running on older zfs module */
710 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
711 				errmsg = "import delay injector not supported";
712 			break;
713 		default:
714 			break;
715 		}
716 		(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
717 		return (1);
718 	}
719 
720 	if (flags & ZINJECT_NULL)
721 		return (0);
722 
723 	if (quiet) {
724 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
725 	} else {
726 		(void) printf("Added handler %llu with the following "
727 		    "properties:\n", (u_longlong_t)zc.zc_guid);
728 		(void) printf("  pool: %s\n", pool);
729 		if (record->zi_guid) {
730 			(void) printf("  vdev: %llx\n",
731 			    (u_longlong_t)record->zi_guid);
732 		} else if (record->zi_func[0] != '\0') {
733 			(void) printf("  panic function: %s\n",
734 			    record->zi_func);
735 		} else if (record->zi_duration > 0) {
736 			(void) printf(" time: %lld seconds\n",
737 			    (u_longlong_t)record->zi_duration);
738 		} else if (record->zi_duration < 0) {
739 			(void) printf(" txgs: %lld \n",
740 			    (u_longlong_t)-record->zi_duration);
741 		} else if (record->zi_timer > 0) {
742 			(void) printf(" timer: %lld ms\n",
743 			    (u_longlong_t)NSEC2MSEC(record->zi_timer));
744 		} else {
745 			(void) printf("objset: %llu\n",
746 			    (u_longlong_t)record->zi_objset);
747 			(void) printf("object: %llu\n",
748 			    (u_longlong_t)record->zi_object);
749 			(void) printf("  type: %llu\n",
750 			    (u_longlong_t)record->zi_type);
751 			(void) printf(" level: %d\n", record->zi_level);
752 			if (record->zi_start == 0 &&
753 			    record->zi_end == -1ULL)
754 				(void) printf(" range: all\n");
755 			else
756 				(void) printf(" range: [%llu, %llu)\n",
757 				    (u_longlong_t)record->zi_start,
758 				    (u_longlong_t)record->zi_end);
759 			(void) printf("  dvas: 0x%x\n", record->zi_dvas);
760 		}
761 	}
762 
763 	return (0);
764 }
765 
766 static int
767 perform_action(const char *pool, zinject_record_t *record, int cmd)
768 {
769 	zfs_cmd_t zc = {"\0"};
770 
771 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
772 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
773 	zc.zc_guid = record->zi_guid;
774 	zc.zc_cookie = cmd;
775 
776 	if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
777 		return (0);
778 
779 	return (1);
780 }
781 
782 static int
783 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
784 {
785 	unsigned long scan_delay;
786 	unsigned long scan_nlanes;
787 
788 	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
789 		return (1);
790 
791 	/*
792 	 * We explicitly disallow a delay of zero here, because we key
793 	 * off this value being non-zero in translate_device(), to
794 	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
795 	 */
796 	if (scan_delay == 0)
797 		return (1);
798 
799 	/*
800 	 * The units for the CLI delay parameter is milliseconds, but
801 	 * the data passed to the kernel is interpreted as nanoseconds.
802 	 * Thus we scale the milliseconds to nanoseconds here, and this
803 	 * nanosecond value is used to pass the delay to the kernel.
804 	 */
805 	*delay = MSEC2NSEC(scan_delay);
806 	*nlanes = scan_nlanes;
807 
808 	return (0);
809 }
810 
811 static int
812 parse_frequency(const char *str, uint32_t *percent)
813 {
814 	double val;
815 	char *post;
816 
817 	val = strtod(str, &post);
818 	if (post == NULL || *post != '\0')
819 		return (EINVAL);
820 
821 	/* valid range is [0.0001, 100.0] */
822 	val /= 100.0f;
823 	if (val < 0.000001f || val > 1.0f)
824 		return (ERANGE);
825 
826 	/* convert to an integer for use by kernel */
827 	*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
828 
829 	return (0);
830 }
831 
832 /*
833  * This function converts a string specifier for DVAs into a bit mask.
834  * The dva's provided by the user should be 0 indexed and separated by
835  * a comma. For example:
836  *	"1"	-> 0b0010  (0x2)
837  *	"0,1"	-> 0b0011  (0x3)
838  *	"0,1,2"	-> 0b0111  (0x7)
839  */
840 static int
841 parse_dvas(const char *str, uint32_t *dvas_out)
842 {
843 	const char *c = str;
844 	uint32_t mask = 0;
845 	boolean_t need_delim = B_FALSE;
846 
847 	/* max string length is 5 ("0,1,2") */
848 	if (strlen(str) > 5 || strlen(str) == 0)
849 		return (EINVAL);
850 
851 	while (*c != '\0') {
852 		switch (*c) {
853 		case '0':
854 		case '1':
855 		case '2':
856 			/* check for pipe between DVAs */
857 			if (need_delim)
858 				return (EINVAL);
859 
860 			/* check if this DVA has been set already */
861 			if (mask & (1 << ((*c) - '0')))
862 				return (EINVAL);
863 
864 			mask |= (1 << ((*c) - '0'));
865 			need_delim = B_TRUE;
866 			break;
867 		case ',':
868 			need_delim = B_FALSE;
869 			break;
870 		default:
871 			/* check for invalid character */
872 			return (EINVAL);
873 		}
874 		c++;
875 	}
876 
877 	/* check for dangling delimiter */
878 	if (!need_delim)
879 		return (EINVAL);
880 
881 	*dvas_out = mask;
882 	return (0);
883 }
884 
885 int
886 main(int argc, char **argv)
887 {
888 	int c;
889 	char *range = NULL;
890 	char *cancel = NULL;
891 	char *end;
892 	char *raw = NULL;
893 	char *device = NULL;
894 	int level = 0;
895 	int quiet = 0;
896 	int error = 0;
897 	int domount = 0;
898 	int io_type = ZINJECT_IOTYPE_ALL;
899 	int action = VDEV_STATE_UNKNOWN;
900 	err_type_t type = TYPE_INVAL;
901 	err_type_t label = TYPE_INVAL;
902 	zinject_record_t record = { 0 };
903 	char pool[MAXNAMELEN] = "";
904 	char dataset[MAXNAMELEN] = "";
905 	zfs_handle_t *zhp = NULL;
906 	int nowrites = 0;
907 	int dur_txg = 0;
908 	int dur_secs = 0;
909 	int ret;
910 	int flags = 0;
911 	uint32_t dvas = 0;
912 
913 	if ((g_zfs = libzfs_init()) == NULL) {
914 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
915 		return (1);
916 	}
917 
918 	libzfs_print_on_error(g_zfs, B_TRUE);
919 
920 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
921 		(void) fprintf(stderr, "failed to open ZFS device\n");
922 		libzfs_fini(g_zfs);
923 		return (1);
924 	}
925 
926 	if (argc == 1) {
927 		/*
928 		 * No arguments.  Print the available handlers.  If there are no
929 		 * available handlers, direct the user to '-h' for help
930 		 * information.
931 		 */
932 		if (print_all_handlers() == 0) {
933 			(void) printf("No handlers registered.\n");
934 			(void) printf("Run 'zinject -h' for usage "
935 			    "information.\n");
936 		}
937 		libzfs_fini(g_zfs);
938 		return (0);
939 	}
940 
941 	while ((c = getopt(argc, argv,
942 	    ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
943 		switch (c) {
944 		case 'a':
945 			flags |= ZINJECT_FLUSH_ARC;
946 			break;
947 		case 'A':
948 			if (strcasecmp(optarg, "degrade") == 0) {
949 				action = VDEV_STATE_DEGRADED;
950 			} else if (strcasecmp(optarg, "fault") == 0) {
951 				action = VDEV_STATE_FAULTED;
952 			} else {
953 				(void) fprintf(stderr, "invalid action '%s': "
954 				    "must be 'degrade' or 'fault'\n", optarg);
955 				usage();
956 				libzfs_fini(g_zfs);
957 				return (1);
958 			}
959 			break;
960 		case 'b':
961 			raw = optarg;
962 			break;
963 		case 'c':
964 			cancel = optarg;
965 			break;
966 		case 'C':
967 			ret = parse_dvas(optarg, &dvas);
968 			if (ret != 0) {
969 				(void) fprintf(stderr, "invalid DVA list '%s': "
970 				    "DVAs should be 0 indexed and separated by "
971 				    "commas.\n", optarg);
972 				usage();
973 				libzfs_fini(g_zfs);
974 				return (1);
975 			}
976 			break;
977 		case 'd':
978 			device = optarg;
979 			break;
980 		case 'D':
981 			errno = 0;
982 			ret = parse_delay(optarg, &record.zi_timer,
983 			    &record.zi_nlanes);
984 			if (ret != 0) {
985 
986 				(void) fprintf(stderr, "invalid i/o delay "
987 				    "value: '%s'\n", optarg);
988 				usage();
989 				libzfs_fini(g_zfs);
990 				return (1);
991 			}
992 			break;
993 		case 'e':
994 			error = str_to_err(optarg);
995 			if (error < 0) {
996 				(void) fprintf(stderr, "invalid error type "
997 				    "'%s': must be one of: io decompress "
998 				    "decrypt nxio dtl corrupt noop\n",
999 				    optarg);
1000 				usage();
1001 				libzfs_fini(g_zfs);
1002 				return (1);
1003 			}
1004 			break;
1005 		case 'f':
1006 			ret = parse_frequency(optarg, &record.zi_freq);
1007 			if (ret != 0) {
1008 				(void) fprintf(stderr, "%sfrequency value must "
1009 				    "be in the range [0.0001, 100.0]\n",
1010 				    ret == EINVAL ? "invalid value: " :
1011 				    ret == ERANGE ? "out of range: " : "");
1012 				libzfs_fini(g_zfs);
1013 				return (1);
1014 			}
1015 			break;
1016 		case 'F':
1017 			record.zi_failfast = B_TRUE;
1018 			break;
1019 		case 'g':
1020 			dur_txg = 1;
1021 			record.zi_duration = (int)strtol(optarg, &end, 10);
1022 			if (record.zi_duration <= 0 || *end != '\0') {
1023 				(void) fprintf(stderr, "invalid duration '%s': "
1024 				    "must be a positive integer\n", optarg);
1025 				usage();
1026 				libzfs_fini(g_zfs);
1027 				return (1);
1028 			}
1029 			/* store duration of txgs as its negative */
1030 			record.zi_duration *= -1;
1031 			break;
1032 		case 'h':
1033 			usage();
1034 			libzfs_fini(g_zfs);
1035 			return (0);
1036 		case 'I':
1037 			/* default duration, if one hasn't yet been defined */
1038 			nowrites = 1;
1039 			if (dur_secs == 0 && dur_txg == 0)
1040 				record.zi_duration = 30;
1041 			break;
1042 		case 'l':
1043 			level = (int)strtol(optarg, &end, 10);
1044 			if (*end != '\0') {
1045 				(void) fprintf(stderr, "invalid level '%s': "
1046 				    "must be an integer\n", optarg);
1047 				usage();
1048 				libzfs_fini(g_zfs);
1049 				return (1);
1050 			}
1051 			break;
1052 		case 'm':
1053 			domount = 1;
1054 			break;
1055 		case 'p':
1056 			(void) strlcpy(record.zi_func, optarg,
1057 			    sizeof (record.zi_func));
1058 			record.zi_cmd = ZINJECT_PANIC;
1059 			break;
1060 		case 'P':
1061 			if (strcasecmp(optarg, "import") == 0) {
1062 				record.zi_cmd = ZINJECT_DELAY_IMPORT;
1063 			} else if (strcasecmp(optarg, "export") == 0) {
1064 				record.zi_cmd = ZINJECT_DELAY_EXPORT;
1065 			} else {
1066 				(void) fprintf(stderr, "invalid command '%s': "
1067 				    "must be 'import' or 'export'\n", optarg);
1068 				usage();
1069 				libzfs_fini(g_zfs);
1070 				return (1);
1071 			}
1072 			break;
1073 		case 'q':
1074 			quiet = 1;
1075 			break;
1076 		case 'r':
1077 			range = optarg;
1078 			flags |= ZINJECT_CALC_RANGE;
1079 			break;
1080 		case 's':
1081 			dur_secs = 1;
1082 			record.zi_duration = (int)strtol(optarg, &end, 10);
1083 			if (record.zi_duration <= 0 || *end != '\0') {
1084 				(void) fprintf(stderr, "invalid duration '%s': "
1085 				    "must be a positive integer\n", optarg);
1086 				usage();
1087 				libzfs_fini(g_zfs);
1088 				return (1);
1089 			}
1090 			break;
1091 		case 'T':
1092 			io_type = str_to_iotype(optarg);
1093 			if (io_type == ZINJECT_IOTYPES) {
1094 				(void) fprintf(stderr, "invalid I/O type "
1095 				    "'%s': must be 'read', 'write', 'free', "
1096 				    "'claim', 'flush' or 'all'\n", optarg);
1097 				usage();
1098 				libzfs_fini(g_zfs);
1099 				return (1);
1100 			}
1101 			break;
1102 		case 't':
1103 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
1104 			    !MOS_TYPE(type)) {
1105 				(void) fprintf(stderr, "invalid type '%s'\n",
1106 				    optarg);
1107 				usage();
1108 				libzfs_fini(g_zfs);
1109 				return (1);
1110 			}
1111 			break;
1112 		case 'u':
1113 			flags |= ZINJECT_UNLOAD_SPA;
1114 			break;
1115 		case 'L':
1116 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
1117 			    !LABEL_TYPE(type)) {
1118 				(void) fprintf(stderr, "invalid label type "
1119 				    "'%s'\n", optarg);
1120 				usage();
1121 				libzfs_fini(g_zfs);
1122 				return (1);
1123 			}
1124 			break;
1125 		case ':':
1126 			(void) fprintf(stderr, "option -%c requires an "
1127 			    "operand\n", optopt);
1128 			usage();
1129 			libzfs_fini(g_zfs);
1130 			return (1);
1131 		case '?':
1132 			(void) fprintf(stderr, "invalid option '%c'\n",
1133 			    optopt);
1134 			usage();
1135 			libzfs_fini(g_zfs);
1136 			return (2);
1137 		}
1138 	}
1139 
1140 	argc -= optind;
1141 	argv += optind;
1142 
1143 	if (record.zi_duration != 0 && record.zi_cmd == 0)
1144 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
1145 
1146 	if (cancel != NULL) {
1147 		/*
1148 		 * '-c' is invalid with any other options.
1149 		 */
1150 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1151 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1152 		    record.zi_freq > 0 || dvas != 0) {
1153 			(void) fprintf(stderr, "cancel (-c) incompatible with "
1154 			    "any other options\n");
1155 			usage();
1156 			libzfs_fini(g_zfs);
1157 			return (2);
1158 		}
1159 		if (argc != 0) {
1160 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
1161 			usage();
1162 			libzfs_fini(g_zfs);
1163 			return (2);
1164 		}
1165 
1166 		if (strcmp(cancel, "all") == 0) {
1167 			return (cancel_all_handlers());
1168 		} else {
1169 			int id = (int)strtol(cancel, &end, 10);
1170 			if (*end != '\0') {
1171 				(void) fprintf(stderr, "invalid handle id '%s':"
1172 				    " must be an integer or 'all'\n", cancel);
1173 				usage();
1174 				libzfs_fini(g_zfs);
1175 				return (1);
1176 			}
1177 			return (cancel_handler(id));
1178 		}
1179 	}
1180 
1181 	if (device != NULL) {
1182 		/*
1183 		 * Device (-d) injection uses a completely different mechanism
1184 		 * for doing injection, so handle it separately here.
1185 		 */
1186 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1187 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1188 		    dvas != 0) {
1189 			(void) fprintf(stderr, "device (-d) incompatible with "
1190 			    "data error injection\n");
1191 			usage();
1192 			libzfs_fini(g_zfs);
1193 			return (2);
1194 		}
1195 
1196 		if (argc != 1) {
1197 			(void) fprintf(stderr, "device (-d) injection requires "
1198 			    "a single pool name\n");
1199 			usage();
1200 			libzfs_fini(g_zfs);
1201 			return (2);
1202 		}
1203 
1204 		(void) strlcpy(pool, argv[0], sizeof (pool));
1205 		dataset[0] = '\0';
1206 
1207 		if (error == ECKSUM) {
1208 			(void) fprintf(stderr, "device error type must be "
1209 			    "'io', 'nxio' or 'corrupt'\n");
1210 			libzfs_fini(g_zfs);
1211 			return (1);
1212 		}
1213 
1214 		if (error == EILSEQ &&
1215 		    (record.zi_freq == 0 || io_type != ZINJECT_IOTYPE_READ)) {
1216 			(void) fprintf(stderr, "device corrupt errors require "
1217 			    "io type read and a frequency value\n");
1218 			libzfs_fini(g_zfs);
1219 			return (1);
1220 		}
1221 
1222 		record.zi_iotype = io_type;
1223 		if (translate_device(pool, device, label, &record) != 0) {
1224 			libzfs_fini(g_zfs);
1225 			return (1);
1226 		}
1227 
1228 		if (record.zi_nlanes) {
1229 			switch (io_type) {
1230 			case ZINJECT_IOTYPE_READ:
1231 			case ZINJECT_IOTYPE_WRITE:
1232 			case ZINJECT_IOTYPE_ALL:
1233 				break;
1234 			default:
1235 				(void) fprintf(stderr, "I/O type for a delay "
1236 				    "must be 'read' or 'write'\n");
1237 				usage();
1238 				libzfs_fini(g_zfs);
1239 				return (1);
1240 			}
1241 		}
1242 
1243 		if (!error)
1244 			error = ENXIO;
1245 
1246 		if (action != VDEV_STATE_UNKNOWN)
1247 			return (perform_action(pool, &record, action));
1248 
1249 	} else if (raw != NULL) {
1250 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
1251 		    record.zi_cmd != ZINJECT_UNINITIALIZED ||
1252 		    record.zi_freq > 0 || dvas != 0) {
1253 			(void) fprintf(stderr, "raw (-b) format with "
1254 			    "any other options\n");
1255 			usage();
1256 			libzfs_fini(g_zfs);
1257 			return (2);
1258 		}
1259 
1260 		if (argc != 1) {
1261 			(void) fprintf(stderr, "raw (-b) format expects a "
1262 			    "single pool name\n");
1263 			usage();
1264 			libzfs_fini(g_zfs);
1265 			return (2);
1266 		}
1267 
1268 		(void) strlcpy(pool, argv[0], sizeof (pool));
1269 		dataset[0] = '\0';
1270 
1271 		if (error == ENXIO) {
1272 			(void) fprintf(stderr, "data error type must be "
1273 			    "'checksum' or 'io'\n");
1274 			libzfs_fini(g_zfs);
1275 			return (1);
1276 		}
1277 
1278 		record.zi_cmd = ZINJECT_DATA_FAULT;
1279 		if (translate_raw(raw, &record) != 0) {
1280 			libzfs_fini(g_zfs);
1281 			return (1);
1282 		}
1283 		if (!error)
1284 			error = EIO;
1285 	} else if (record.zi_cmd == ZINJECT_PANIC) {
1286 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1287 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1288 		    dvas != 0) {
1289 			(void) fprintf(stderr, "%s incompatible with other "
1290 			    "options\n", "import|export delay (-P)");
1291 			usage();
1292 			libzfs_fini(g_zfs);
1293 			return (2);
1294 		}
1295 
1296 		if (argc < 1 || argc > 2) {
1297 			(void) fprintf(stderr, "panic (-p) injection requires "
1298 			    "a single pool name and an optional id\n");
1299 			usage();
1300 			libzfs_fini(g_zfs);
1301 			return (2);
1302 		}
1303 
1304 		(void) strlcpy(pool, argv[0], sizeof (pool));
1305 		if (argv[1] != NULL)
1306 			record.zi_type = atoi(argv[1]);
1307 		dataset[0] = '\0';
1308 	} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
1309 	    record.zi_cmd == ZINJECT_DELAY_EXPORT) {
1310 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1311 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1312 		    dvas != 0) {
1313 			(void) fprintf(stderr, "%s incompatible with other "
1314 			    "options\n", "import|export delay (-P)");
1315 			usage();
1316 			libzfs_fini(g_zfs);
1317 			return (2);
1318 		}
1319 
1320 		if (argc != 1 || record.zi_duration <= 0) {
1321 			(void) fprintf(stderr, "import|export delay (-P) "
1322 			    "injection requires a duration (-s) and a single "
1323 			    "pool name\n");
1324 			usage();
1325 			libzfs_fini(g_zfs);
1326 			return (2);
1327 		}
1328 
1329 		(void) strlcpy(pool, argv[0], sizeof (pool));
1330 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1331 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1332 		    level != 0 || record.zi_freq > 0 || dvas != 0) {
1333 			(void) fprintf(stderr, "hardware failure (-I) "
1334 			    "incompatible with other options\n");
1335 			usage();
1336 			libzfs_fini(g_zfs);
1337 			return (2);
1338 		}
1339 
1340 		if (nowrites == 0) {
1341 			(void) fprintf(stderr, "-s or -g meaningless "
1342 			    "without -I (ignore writes)\n");
1343 			usage();
1344 			libzfs_fini(g_zfs);
1345 			return (2);
1346 		} else if (dur_secs && dur_txg) {
1347 			(void) fprintf(stderr, "choose a duration either "
1348 			    "in seconds (-s) or a number of txgs (-g) "
1349 			    "but not both\n");
1350 			usage();
1351 			libzfs_fini(g_zfs);
1352 			return (2);
1353 		} else if (argc != 1) {
1354 			(void) fprintf(stderr, "ignore writes (-I) "
1355 			    "injection requires a single pool name\n");
1356 			usage();
1357 			libzfs_fini(g_zfs);
1358 			return (2);
1359 		}
1360 
1361 		(void) strlcpy(pool, argv[0], sizeof (pool));
1362 		dataset[0] = '\0';
1363 	} else if (type == TYPE_INVAL) {
1364 		if (flags == 0) {
1365 			(void) fprintf(stderr, "at least one of '-b', '-d', "
1366 			    "'-t', '-a', '-p', '-I' or '-u' "
1367 			    "must be specified\n");
1368 			usage();
1369 			libzfs_fini(g_zfs);
1370 			return (2);
1371 		}
1372 
1373 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1374 			(void) strlcpy(pool, argv[0], sizeof (pool));
1375 			dataset[0] = '\0';
1376 		} else if (argc != 0) {
1377 			(void) fprintf(stderr, "extraneous argument for "
1378 			    "'-f'\n");
1379 			usage();
1380 			libzfs_fini(g_zfs);
1381 			return (2);
1382 		}
1383 
1384 		flags |= ZINJECT_NULL;
1385 	} else {
1386 		if (argc != 1) {
1387 			(void) fprintf(stderr, "missing object\n");
1388 			usage();
1389 			libzfs_fini(g_zfs);
1390 			return (2);
1391 		}
1392 
1393 		if (error == ENXIO || error == EILSEQ) {
1394 			(void) fprintf(stderr, "data error type must be "
1395 			    "'checksum' or 'io'\n");
1396 			libzfs_fini(g_zfs);
1397 			return (1);
1398 		}
1399 
1400 		if (dvas != 0) {
1401 			if (error == EACCES || error == EINVAL) {
1402 				(void) fprintf(stderr, "the '-C' option may "
1403 				    "not be used with logical data errors "
1404 				    "'decrypt' and 'decompress'\n");
1405 				libzfs_fini(g_zfs);
1406 				return (1);
1407 			}
1408 
1409 			record.zi_dvas = dvas;
1410 		}
1411 
1412 		if (error == EACCES) {
1413 			if (type != TYPE_DATA) {
1414 				(void) fprintf(stderr, "decryption errors "
1415 				    "may only be injected for 'data' types\n");
1416 				libzfs_fini(g_zfs);
1417 				return (1);
1418 			}
1419 
1420 			record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1421 			/*
1422 			 * Internally, ZFS actually uses ECKSUM for decryption
1423 			 * errors since EACCES is used to indicate the key was
1424 			 * not found.
1425 			 */
1426 			error = ECKSUM;
1427 		} else {
1428 			record.zi_cmd = ZINJECT_DATA_FAULT;
1429 		}
1430 
1431 		if (translate_record(type, argv[0], range, level, &record, pool,
1432 		    dataset) != 0) {
1433 			libzfs_fini(g_zfs);
1434 			return (1);
1435 		}
1436 		if (!error)
1437 			error = EIO;
1438 	}
1439 
1440 	/*
1441 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
1442 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
1443 	 * time we access the pool.
1444 	 */
1445 	if (dataset[0] != '\0' && domount) {
1446 		if ((zhp = zfs_open(g_zfs, dataset,
1447 		    ZFS_TYPE_DATASET)) == NULL) {
1448 			libzfs_fini(g_zfs);
1449 			return (1);
1450 		}
1451 		if (zfs_unmount(zhp, NULL, 0) != 0) {
1452 			libzfs_fini(g_zfs);
1453 			return (1);
1454 		}
1455 	}
1456 
1457 	record.zi_error = error;
1458 
1459 	ret = register_handler(pool, flags, &record, quiet);
1460 
1461 	if (dataset[0] != '\0' && domount)
1462 		ret = (zfs_mount(zhp, NULL, 0) != 0);
1463 
1464 	libzfs_fini(g_zfs);
1465 
1466 	return (ret);
1467 }
1468