xref: /freebsd/sys/contrib/openzfs/cmd/zinject/zinject.c (revision f9590540c524607d22fa7e718c758725c4365375)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2017, Intel Corporation.
26  * Copyright (c) 2023-2025, Klara, Inc.
27  */
28 
29 /*
30  * ZFS Fault Injector
31  *
32  * This userland component takes a set of options and uses libzpool to translate
33  * from a user-visible object type and name to an internal representation.
34  * There are two basic types of faults: device faults and data faults.
35  *
36  *
37  * DEVICE FAULTS
38  *
39  * Errors can be injected into a particular vdev using the '-d' option.  This
40  * option takes a path or vdev GUID to uniquely identify the device within a
41  * pool.  There are four types of errors that can be injected, IO, ENXIO,
42  * ECHILD, and EILSEQ.  These can be controlled through the '-e' option and the
43  * default is ENXIO.  For EIO failures, any attempt to read data from the device
44  * will return EIO, but a subsequent attempt to reopen the device will succeed.
45  * For ENXIO failures, any attempt to read from the device will return EIO, but
46  * any attempt to reopen the device will also return ENXIO.  The EILSEQ failures
47  * only apply to read operations (-T read) and will flip a bit after the device
48  * has read the original data.
49  *
50  * For label faults, the -L option must be specified. This allows faults
51  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
52  * of all the labels for the specified device.
53  *
54  * This form of the command looks like:
55  *
56  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
57  *
58  *
59  * DATA FAULTS
60  *
61  * We begin with a tuple of the form:
62  *
63  * 	<type,level,range,object>
64  *
65  * 	type	A string describing the type of data to target.  Each type
66  * 		implicitly describes how to interpret 'object'. Currently,
67  * 		the following values are supported:
68  *
69  * 		data		User data for a file
70  * 		dnode		Dnode for a file or directory
71  *
72  *		The following MOS objects are special.  Instead of injecting
73  *		errors on a particular object or blkid, we inject errors across
74  *		all objects of the given type.
75  *
76  * 		mos		Any data in the MOS
77  * 		mosdir		object directory
78  * 		config		pool configuration
79  * 		bpobj		blkptr list
80  * 		spacemap	spacemap
81  * 		metaslab	metaslab
82  * 		errlog		persistent error log
83  *
84  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
85  * 		a range is given, this corresponds to the indirect block
86  * 		corresponding to the specific range.
87  *
88  *	range	A numerical range [start,end) within the object.  Defaults to
89  *		the full size of the file.
90  *
91  * 	object	A string describing the logical location of the object.  For
92  * 		files and directories (currently the only supported types),
93  * 		this is the path of the object on disk.
94  *
95  * This is translated, via libzpool, into the following internal representation:
96  *
97  * 	<type,objset,object,level,range>
98  *
99  * These types should be self-explanatory.  This tuple is then passed to the
100  * kernel via a special ioctl() to initiate fault injection for the given
101  * object.  Note that 'type' is not strictly necessary for fault injection, but
102  * is used when translating existing faults into a human-readable string.
103  *
104  *
105  * The command itself takes one of the forms:
106  *
107  * 	zinject
108  * 	zinject <-a | -u pool>
109  * 	zinject -c <id|all>
110  * 	zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]
111  *	    [-T iotype] [-t type object | -b bookmark pool]
112  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
113  *	    [-r range] <object>
114  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
115  *
116  * With no arguments, the command prints all currently registered injection
117  * handlers, with their numeric identifiers.
118  *
119  * The '-c' option will clear the given handler, or all handlers if 'all' is
120  * specified.
121  *
122  * The '-e' option takes a string describing the errno to simulate.  This must
123  * be one of 'io', 'checksum', 'decompress', or 'decrypt'.  In most cases this
124  * will result in the same behavior, but RAID-Z will produce a different set of
125  * ereports for this situation.
126  *
127  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
128  * specified, then the ARC cache is flushed appropriately.  If '-u' is
129  * specified, then the underlying SPA is unloaded.  Either of these flags can be
130  * specified independently of any other handlers.  The '-m' flag automatically
131  * does an unmount and remount of the underlying dataset to aid in flushing the
132  * cache.
133  *
134  * The '-f' flag controls the frequency of errors injected, expressed as a
135  * real number percentage between 0.0001 and 100.  The default is 100.
136  *
137  * The <object> form is responsible for actually injecting the handler into the
138  * framework.  It takes the arguments described above, translates them to the
139  * internal tuple using libzpool, and then issues an ioctl() to register the
140  * handler.
141  *
142  * The '-b' option can target a specific bookmark, regardless of whether a
143  * human-readable interface has been designed.  It allows developers to specify
144  * a particular block by number.
145  *
146  * The '-E' option injects pipeline ready stage delays for the given object or
147  * bookmark. The delay is specified in milliseconds, and it supports I/O type
148  * and range filters.
149  */
150 
151 #include <errno.h>
152 #include <fcntl.h>
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <string.h>
156 #include <strings.h>
157 #include <unistd.h>
158 
159 #include <sys/fs/zfs.h>
160 #include <sys/mount.h>
161 
162 #include <libzfs.h>
163 
164 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
165 
166 #include "zinject.h"
167 
168 libzfs_handle_t *g_zfs;
169 int zfs_fd;
170 
171 static const char *const errtable[TYPE_INVAL] = {
172 	"data",
173 	"dnode",
174 	"mos",
175 	"mosdir",
176 	"metaslab",
177 	"config",
178 	"bpobj",
179 	"spacemap",
180 	"errlog",
181 	"uber",
182 	"nvlist",
183 	"pad1",
184 	"pad2"
185 };
186 
187 static err_type_t
name_to_type(const char * arg)188 name_to_type(const char *arg)
189 {
190 	int i;
191 	for (i = 0; i < TYPE_INVAL; i++)
192 		if (strcmp(errtable[i], arg) == 0)
193 			return (i);
194 
195 	return (TYPE_INVAL);
196 }
197 
198 static const char *
type_to_name(uint64_t type)199 type_to_name(uint64_t type)
200 {
201 	switch (type) {
202 	case DMU_OT_OBJECT_DIRECTORY:
203 		return ("mosdir");
204 	case DMU_OT_OBJECT_ARRAY:
205 		return ("metaslab");
206 	case DMU_OT_PACKED_NVLIST:
207 		return ("config");
208 	case DMU_OT_BPOBJ:
209 		return ("bpobj");
210 	case DMU_OT_SPACE_MAP:
211 		return ("spacemap");
212 	case DMU_OT_ERROR_LOG:
213 		return ("errlog");
214 	default:
215 		return ("-");
216 	}
217 }
218 
219 struct errstr {
220 	int		err;
221 	const char	*str;
222 };
223 static const struct errstr errstrtable[] = {
224 	{ EIO,		"io" },
225 	{ ECKSUM,	"checksum" },
226 	{ EINVAL,	"decompress" },
227 	{ EACCES,	"decrypt" },
228 	{ ENXIO,	"nxio" },
229 	{ ECHILD,	"dtl" },
230 	{ EILSEQ,	"corrupt" },
231 	{ ENOSYS,	"noop" },
232 	{ EFAULT,	"io-prefail" },
233 	{ 0, NULL },
234 };
235 
236 static int
str_to_err(const char * str)237 str_to_err(const char *str)
238 {
239 	for (int i = 0; errstrtable[i].str != NULL; i++)
240 		if (strcasecmp(errstrtable[i].str, str) == 0)
241 			return (errstrtable[i].err);
242 	return (-1);
243 }
244 static const char *
err_to_str(int err)245 err_to_str(int err)
246 {
247 	for (int i = 0; errstrtable[i].str != NULL; i++)
248 		if (errstrtable[i].err == err)
249 			return (errstrtable[i].str);
250 	return ("[unknown]");
251 }
252 
253 static const char *const iotypestrtable[ZINJECT_IOTYPES] = {
254 	[ZINJECT_IOTYPE_NULL]	= "null",
255 	[ZINJECT_IOTYPE_READ]	= "read",
256 	[ZINJECT_IOTYPE_WRITE]	= "write",
257 	[ZINJECT_IOTYPE_FREE]	= "free",
258 	[ZINJECT_IOTYPE_CLAIM]	= "claim",
259 	[ZINJECT_IOTYPE_FLUSH]	= "flush",
260 	[ZINJECT_IOTYPE_TRIM]	= "trim",
261 	[ZINJECT_IOTYPE_ALL]	= "all",
262 	[ZINJECT_IOTYPE_PROBE]	= "probe",
263 };
264 
265 static zinject_iotype_t
str_to_iotype(const char * arg)266 str_to_iotype(const char *arg)
267 {
268 	for (uint_t iotype = 0; iotype < ZINJECT_IOTYPES; iotype++)
269 		if (iotypestrtable[iotype] != NULL &&
270 		    strcasecmp(iotypestrtable[iotype], arg) == 0)
271 			return (iotype);
272 	return (ZINJECT_IOTYPES);
273 }
274 
275 static const char *
iotype_to_str(zinject_iotype_t iotype)276 iotype_to_str(zinject_iotype_t iotype)
277 {
278 	if (iotype >= ZINJECT_IOTYPES || iotypestrtable[iotype] == NULL)
279 		return ("[unknown]");
280 	return (iotypestrtable[iotype]);
281 }
282 
283 /*
284  * Print usage message.
285  */
286 void
usage(void)287 usage(void)
288 {
289 	(void) printf(
290 	    "usage:\n"
291 	    "\n"
292 	    "\tzinject\n"
293 	    "\n"
294 	    "\t\tList all active injection records.\n"
295 	    "\n"
296 	    "\tzinject -c <id|all>\n"
297 	    "\n"
298 	    "\t\tClear the particular record (if given a numeric ID), or\n"
299 	    "\t\tall records if 'all' is specified.\n"
300 	    "\n"
301 	    "\tzinject -p <function name> pool\n"
302 	    "\t\tInject a panic fault at the specified function. Only \n"
303 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
304 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
305 	    "\n"
306 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
307 	    "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
308 	    "\t\tInject a fault into a particular device or the device's\n"
309 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
310 	    "\t\t'pad1', or 'pad2'.\n"
311 	    "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
312 	    "\t\t'corrupt' (bit flip), 'io-prefail' (unsuccessfully do\n"
313 	    "\t\tnothing) or 'noop' (successfully do nothing).\n"
314 	    "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
315 	    "\t\tdevice error injection to a percentage of the IOs.\n"
316 	    "\n"
317 	    "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
318 	    "\t\tPerform a specific action on a particular device.\n"
319 	    "\n"
320 	    "\tzinject -d device -D latency:lanes pool\n"
321 	    "\n"
322 	    "\t\tAdd an artificial delay to IO requests on a particular\n"
323 	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
324 	    "\t\tmilliseconds to complete. Each delay has an associated\n"
325 	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
326 	    "\t\tIO requests that can be processed.\n"
327 	    "\n"
328 	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
329 	    "\t\tthe device will only be able to service a single IO request\n"
330 	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
331 	    "\t\tif only a single request is submitted every 10 ms, the\n"
332 	    "\t\taverage latency will be 10 ms; but if more than one request\n"
333 	    "\t\tis submitted every 10 ms, the average latency will be more\n"
334 	    "\t\tthan 10 ms.\n"
335 	    "\n"
336 	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
337 	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
338 	    "\t\ttwo requests at a time, each with a minimum latency of\n"
339 	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
340 	    "\t\tthe average latency will be 10 ms; but if more than two\n"
341 	    "\t\trequests are submitted every 10 ms, the average latency\n"
342 	    "\t\twill be more than 10 ms.\n"
343 	    "\n"
344 	    "\t\tAlso note, these delays are additive. So two invocations\n"
345 	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
346 	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
347 	    "\t\tlanes with differing target latencies. For example, an\n"
348 	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
349 	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
350 	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
351 	    "\n"
352 	    "\tzinject -P import|export -s <seconds> pool\n"
353 	    "\t\tAdd an artificial delay to a future pool import or export,\n"
354 	    "\t\tsuch that the operation takes a minimum of supplied seconds\n"
355 	    "\t\tto complete.\n"
356 	    "\n"
357 	    "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n"
358 	    "\t\t[-T iotype] [-t type object | -b bookmark pool]\n"
359 	    "\n"
360 	    "\t\tInject pipeline ready stage delays for the given object path\n"
361 	    "\t\t(data or dnode) or raw bookmark. The delay is specified in\n"
362 	    "\t\tmilliseconds.\n"
363 	    "\n"
364 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
365 	    "\t\tCause the pool to stop writing blocks yet not\n"
366 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
367 	    "\t\tthat fails to honor cache flush requests.\n"
368 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
369 	    "\t\tat the end of the duration.\n"
370 	    "\n"
371 	    "\tzinject -b objset:object:level:blkid pool\n"
372 	    "\n"
373 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
374 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
375 	    "\t\thexadecimal, and only one block can be specified.\n"
376 	    "\n"
377 	    "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
378 	    "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
379 	    "\n"
380 	    "\t\tInject an error into the object specified by the '-t' option\n"
381 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
382 	    "\t\tinterpreted depending on the '-t' option.\n"
383 	    "\n"
384 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
385 	    "\t\t-e\tInject a specific error.  Must be one of 'io',\n"
386 	    "\t\t\t'checksum', 'decompress', or 'decrypt'.  Default is 'io'.\n"
387 	    "\t\t-C\tInject the given error only into specific DVAs. The\n"
388 	    "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
389 	    "\t\t\tseparated by commas (ex. '0,2').\n"
390 	    "\t\t-l\tInject error at a particular block level. Default is "
391 	    "0.\n"
392 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
393 	    "\t\t-r\tInject error over a particular logical range of an\n"
394 	    "\t\t\tobject, specified as 'start[,end]'.  Numeric\n"
395 	    "\t\t\tsuffixes (K, M, G, T, P, E) are accepted.\n"
396 	    "\t\t\tWill be translated to the appropriate blkid\n"
397 	    "\t\t\trange according to the object's properties.\n"
398 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
399 	    "\t\t\tassociated object.\n"
400 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
401 	    "\t\t\ta pool object.\n"
402 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
403 	    "\t\t\ta percentage between 0.0001 and 100.\n"
404 	    "\n"
405 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
406 	    "\t\t\tfile.  The object must be specified as a complete path\n"
407 	    "\t\t\tto a file on a ZFS filesystem.\n"
408 	    "\n"
409 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
410 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
411 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
412 	    "\t\t\tis specified as a complete path to a file or directory\n"
413 	    "\t\t\ton a ZFS filesystem.\n"
414 	    "\n"
415 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
416 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
417 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
418 	    "\t\t\tthe poolname.\n");
419 }
420 
421 static int
iter_handlers(int (* func)(int,const char *,zinject_record_t *,void *),void * data)422 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
423     void *data)
424 {
425 	zfs_cmd_t zc = {"\0"};
426 	int ret;
427 
428 	while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
429 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
430 		    &zc.zc_inject_record, data)) != 0)
431 			return (ret);
432 
433 	if (errno != ENOENT) {
434 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
435 		    strerror(errno));
436 		return (-1);
437 	}
438 
439 	return (0);
440 }
441 
442 static int
print_data_handler(int id,const char * pool,zinject_record_t * record,void * data)443 print_data_handler(int id, const char *pool, zinject_record_t *record,
444     void *data)
445 {
446 	int *count = data;
447 
448 	if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
449 	    record->zi_duration != 0) {
450 		return (0);
451 	}
452 
453 	if (*count == 0) {
454 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  "
455 		    "%-15s  %-6s  %-15s\n", "ID", "POOL", "OBJSET", "OBJECT",
456 		    "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT");
457 		(void) printf("---  ---------------  ------  "
458 		    "------  --------  ---  ----  ---------------  "
459 		    "------  ------\n");
460 	}
461 
462 	*count += 1;
463 
464 	char rangebuf[32];
465 	if (record->zi_start == 0 && record->zi_end == -1ULL)
466 		snprintf(rangebuf, sizeof (rangebuf), "all");
467 	else
468 		snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]",
469 		    (u_longlong_t)record->zi_start,
470 		    (u_longlong_t)record->zi_end);
471 
472 
473 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %-3d  0x%02x  %-15s  "
474 	    "%6" PRIu64 "  %6" PRIu64 "\n", id, pool,
475 	    (u_longlong_t)record->zi_objset,
476 	    (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
477 	    record->zi_level, record->zi_dvas, rangebuf,
478 	    record->zi_match_count, record->zi_inject_count);
479 
480 	return (0);
481 }
482 
483 static int
print_device_handler(int id,const char * pool,zinject_record_t * record,void * data)484 print_device_handler(int id, const char *pool, zinject_record_t *record,
485     void *data)
486 {
487 	int *count = data;
488 
489 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
490 		return (0);
491 
492 	if (record->zi_cmd == ZINJECT_DELAY_IO)
493 		return (0);
494 
495 	if (*count == 0) {
496 		(void) printf("%3s  %-15s  %-16s  %-5s  %-10s  %-9s  "
497 		    "%-6s  %-6s\n",
498 		    "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ",
499 		    "MATCH", "INJECT");
500 		(void) printf(
501 		    "---  ---------------  ----------------  "
502 		    "-----  ----------  ---------  "
503 		    "------  ------\n");
504 	}
505 
506 	*count += 1;
507 
508 	double freq = record->zi_freq == 0 ? 100.0f :
509 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
510 
511 	(void) printf("%3d  %-15s  %llx  %-5s  %-10s  %8.4f%%  "
512 	    "%6" PRIu64 "  %6" PRIu64 "\n", id, pool,
513 	    (u_longlong_t)record->zi_guid,
514 	    iotype_to_str(record->zi_iotype), err_to_str(record->zi_error),
515 	    freq, record->zi_match_count, record->zi_inject_count);
516 
517 	return (0);
518 }
519 
520 static int
print_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)521 print_delay_handler(int id, const char *pool, zinject_record_t *record,
522     void *data)
523 {
524 	int *count = data;
525 
526 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
527 		return (0);
528 
529 	if (record->zi_cmd != ZINJECT_DELAY_IO)
530 		return (0);
531 
532 	if (*count == 0) {
533 		(void) printf("%3s  %-15s  %-16s  %-10s  %-5s  %-9s  "
534 		    "%-6s  %-6s\n",
535 		    "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ",
536 		    "MATCH", "INJECT");
537 		(void) printf("---  ---------------  ----------------  "
538 		    "----------  -----  ---------  "
539 		    "------  ------\n");
540 	}
541 
542 	*count += 1;
543 
544 	double freq = record->zi_freq == 0 ? 100.0f :
545 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
546 
547 	(void) printf("%3d  %-15s  %llx  %10llu  %5llu  %8.4f%%  "
548 	    "%6" PRIu64 "  %6" PRIu64 "\n", id, pool,
549 	    (u_longlong_t)record->zi_guid,
550 	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
551 	    (u_longlong_t)record->zi_nlanes,
552 	    freq, record->zi_match_count, record->zi_inject_count);
553 
554 	return (0);
555 }
556 
557 static int
print_panic_handler(int id,const char * pool,zinject_record_t * record,void * data)558 print_panic_handler(int id, const char *pool, zinject_record_t *record,
559     void *data)
560 {
561 	int *count = data;
562 
563 	if (record->zi_func[0] == '\0')
564 		return (0);
565 
566 	if (*count == 0) {
567 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
568 		(void) printf("---  ---------------  ----------------\n");
569 	}
570 
571 	*count += 1;
572 
573 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
574 
575 	return (0);
576 }
577 
578 static int
print_pool_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)579 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
580     void *data)
581 {
582 	int *count = data;
583 
584 	if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
585 	    record->zi_cmd != ZINJECT_DELAY_EXPORT) {
586 		return (0);
587 	}
588 
589 	if (*count == 0) {
590 		(void) printf("%3s  %-19s  %-11s  %s\n",
591 		    "ID", "POOL", "DELAY (sec)", "COMMAND");
592 		(void) printf("---  -------------------  -----------"
593 		    "  -------\n");
594 	}
595 
596 	*count += 1;
597 
598 	(void) printf("%3d  %-19s  %-11llu  %s\n",
599 	    id, pool, (u_longlong_t)record->zi_duration,
600 	    record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
601 
602 	return (0);
603 }
604 
605 /*
606  * Print all registered error handlers.  Returns the number of handlers
607  * registered.
608  */
609 static int
print_all_handlers(void)610 print_all_handlers(void)
611 {
612 	int count = 0, total = 0;
613 
614 	(void) iter_handlers(print_device_handler, &count);
615 	if (count > 0) {
616 		total += count;
617 		(void) printf("\n");
618 		count = 0;
619 	}
620 
621 	(void) iter_handlers(print_delay_handler, &count);
622 	if (count > 0) {
623 		total += count;
624 		(void) printf("\n");
625 		count = 0;
626 	}
627 
628 	(void) iter_handlers(print_data_handler, &count);
629 	if (count > 0) {
630 		total += count;
631 		(void) printf("\n");
632 		count = 0;
633 	}
634 
635 	(void) iter_handlers(print_pool_delay_handler, &count);
636 	if (count > 0) {
637 		total += count;
638 		(void) printf("\n");
639 		count = 0;
640 	}
641 
642 	(void) iter_handlers(print_panic_handler, &count);
643 
644 	return (count + total);
645 }
646 
647 static int
cancel_one_handler(int id,const char * pool,zinject_record_t * record,void * data)648 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
649     void *data)
650 {
651 	(void) pool, (void) record, (void) data;
652 	zfs_cmd_t zc = {"\0"};
653 
654 	zc.zc_guid = (uint64_t)id;
655 
656 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
657 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
658 		    id, strerror(errno));
659 		return (1);
660 	}
661 
662 	return (0);
663 }
664 
665 /*
666  * Remove all fault injection handlers.
667  */
668 static int
cancel_all_handlers(void)669 cancel_all_handlers(void)
670 {
671 	int ret = iter_handlers(cancel_one_handler, NULL);
672 
673 	if (ret == 0)
674 		(void) printf("removed all registered handlers\n");
675 
676 	return (ret);
677 }
678 
679 /*
680  * Remove a specific fault injection handler.
681  */
682 static int
cancel_handler(int id)683 cancel_handler(int id)
684 {
685 	zfs_cmd_t zc = {"\0"};
686 
687 	zc.zc_guid = (uint64_t)id;
688 
689 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
690 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
691 		    id, strerror(errno));
692 		return (1);
693 	}
694 
695 	(void) printf("removed handler %d\n", id);
696 
697 	return (0);
698 }
699 
700 /*
701  * Register a new fault injection handler.
702  */
703 static int
register_handler(const char * pool,int flags,zinject_record_t * record,int quiet)704 register_handler(const char *pool, int flags, zinject_record_t *record,
705     int quiet)
706 {
707 	zfs_cmd_t zc = {"\0"};
708 
709 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
710 	zc.zc_inject_record = *record;
711 	zc.zc_guid = flags;
712 
713 	if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
714 		const char *errmsg = strerror(errno);
715 
716 		switch (errno) {
717 		case EDOM:
718 			errmsg = "block level exceeds max level of object";
719 			break;
720 		case EEXIST:
721 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
722 				errmsg = "pool already imported";
723 			if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
724 				errmsg = "a handler already exists";
725 			break;
726 		case ENOENT:
727 			/* import delay injector running on older zfs module */
728 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
729 				errmsg = "import delay injector not supported";
730 			break;
731 		default:
732 			break;
733 		}
734 		(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
735 		return (1);
736 	}
737 
738 	if (flags & ZINJECT_NULL)
739 		return (0);
740 
741 	if (quiet) {
742 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
743 	} else {
744 		boolean_t show_object = B_FALSE;
745 		boolean_t show_iotype = B_FALSE;
746 		(void) printf("Added handler %llu with the following "
747 		    "properties:\n", (u_longlong_t)zc.zc_guid);
748 		(void) printf("  pool: %s\n", pool);
749 		if (record->zi_guid) {
750 			(void) printf("  vdev: %llx\n",
751 			    (u_longlong_t)record->zi_guid);
752 			show_iotype = B_TRUE;
753 		} else if (record->zi_func[0] != '\0') {
754 			(void) printf("  panic function: %s\n",
755 			    record->zi_func);
756 		} else if (record->zi_duration > 0) {
757 			(void) printf(" time: %lld seconds\n",
758 			    (u_longlong_t)record->zi_duration);
759 		} else if (record->zi_duration < 0) {
760 			(void) printf(" txgs: %lld \n",
761 			    (u_longlong_t)-record->zi_duration);
762 		} else if (record->zi_timer > 0) {
763 			(void) printf(" timer: %lld ms\n",
764 			    (u_longlong_t)NSEC2MSEC(record->zi_timer));
765 			if (record->zi_cmd == ZINJECT_DELAY_READY) {
766 				show_object = B_TRUE;
767 				show_iotype = B_TRUE;
768 			}
769 		} else {
770 			show_object = B_TRUE;
771 		}
772 		if (show_iotype) {
773 			(void) printf("iotype: %s\n",
774 			    iotype_to_str(record->zi_iotype));
775 		}
776 		if (show_object) {
777 			(void) printf("objset: %llu\n",
778 			    (u_longlong_t)record->zi_objset);
779 			(void) printf("object: %llu\n",
780 			    (u_longlong_t)record->zi_object);
781 			(void) printf("  type: %llu\n",
782 			    (u_longlong_t)record->zi_type);
783 			(void) printf(" level: %d\n", record->zi_level);
784 			if (record->zi_start == 0 &&
785 			    record->zi_end == -1ULL)
786 				(void) printf(" range: all\n");
787 			else
788 				(void) printf(" range: [%llu, %llu)\n",
789 				    (u_longlong_t)record->zi_start,
790 				    (u_longlong_t)record->zi_end);
791 			(void) printf("  dvas: 0x%x\n", record->zi_dvas);
792 		}
793 	}
794 
795 	return (0);
796 }
797 
798 static int
perform_action(const char * pool,zinject_record_t * record,int cmd)799 perform_action(const char *pool, zinject_record_t *record, int cmd)
800 {
801 	zfs_cmd_t zc = {"\0"};
802 
803 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
804 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
805 	zc.zc_guid = record->zi_guid;
806 	zc.zc_cookie = cmd;
807 
808 	if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
809 		return (0);
810 
811 	return (1);
812 }
813 
814 static int
parse_delay(char * str,uint64_t * delay,uint64_t * nlanes)815 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
816 {
817 	unsigned long scan_delay;
818 	unsigned long scan_nlanes;
819 
820 	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
821 		return (1);
822 
823 	/*
824 	 * We explicitly disallow a delay of zero here, because we key
825 	 * off this value being non-zero in translate_device(), to
826 	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
827 	 */
828 	if (scan_delay == 0)
829 		return (1);
830 
831 	/*
832 	 * The units for the CLI delay parameter is milliseconds, but
833 	 * the data passed to the kernel is interpreted as nanoseconds.
834 	 * Thus we scale the milliseconds to nanoseconds here, and this
835 	 * nanosecond value is used to pass the delay to the kernel.
836 	 */
837 	*delay = MSEC2NSEC(scan_delay);
838 	*nlanes = scan_nlanes;
839 
840 	return (0);
841 }
842 
843 static int
parse_frequency(const char * str,uint32_t * percent)844 parse_frequency(const char *str, uint32_t *percent)
845 {
846 	double val;
847 	char *post;
848 
849 	val = strtod(str, &post);
850 	if (post == NULL || *post != '\0')
851 		return (EINVAL);
852 
853 	/* valid range is [0.0001, 100.0] */
854 	val /= 100.0f;
855 	if (val < 0.000001f || val > 1.0f)
856 		return (ERANGE);
857 
858 	/* convert to an integer for use by kernel */
859 	*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
860 
861 	return (0);
862 }
863 
864 /*
865  * This function converts a string specifier for DVAs into a bit mask.
866  * The dva's provided by the user should be 0 indexed and separated by
867  * a comma. For example:
868  *	"1"	-> 0b0010  (0x2)
869  *	"0,1"	-> 0b0011  (0x3)
870  *	"0,1,2"	-> 0b0111  (0x7)
871  */
872 static int
parse_dvas(const char * str,uint32_t * dvas_out)873 parse_dvas(const char *str, uint32_t *dvas_out)
874 {
875 	const char *c = str;
876 	uint32_t mask = 0;
877 	boolean_t need_delim = B_FALSE;
878 
879 	/* max string length is 5 ("0,1,2") */
880 	if (strlen(str) > 5 || strlen(str) == 0)
881 		return (EINVAL);
882 
883 	while (*c != '\0') {
884 		switch (*c) {
885 		case '0':
886 		case '1':
887 		case '2':
888 			/* check for pipe between DVAs */
889 			if (need_delim)
890 				return (EINVAL);
891 
892 			/* check if this DVA has been set already */
893 			if (mask & (1 << ((*c) - '0')))
894 				return (EINVAL);
895 
896 			mask |= (1 << ((*c) - '0'));
897 			need_delim = B_TRUE;
898 			break;
899 		case ',':
900 			need_delim = B_FALSE;
901 			break;
902 		default:
903 			/* check for invalid character */
904 			return (EINVAL);
905 		}
906 		c++;
907 	}
908 
909 	/* check for dangling delimiter */
910 	if (!need_delim)
911 		return (EINVAL);
912 
913 	*dvas_out = mask;
914 	return (0);
915 }
916 
917 int
main(int argc,char ** argv)918 main(int argc, char **argv)
919 {
920 	int c;
921 	char *range = NULL;
922 	char *cancel = NULL;
923 	char *end;
924 	char *raw = NULL;
925 	char *device = NULL;
926 	int level = 0;
927 	int quiet = 0;
928 	int error = 0;
929 	int domount = 0;
930 	int io_type = ZINJECT_IOTYPE_ALL;
931 	int action = VDEV_STATE_UNKNOWN;
932 	err_type_t type = TYPE_INVAL;
933 	err_type_t label = TYPE_INVAL;
934 	zinject_record_t record = { 0 };
935 	char pool[MAXNAMELEN] = "";
936 	char dataset[MAXNAMELEN] = "";
937 	zfs_handle_t *zhp = NULL;
938 	int nowrites = 0;
939 	int dur_txg = 0;
940 	int dur_secs = 0;
941 	int ret;
942 	int flags = 0;
943 	uint32_t dvas = 0;
944 	hrtime_t ready_delay = -1;
945 
946 	if ((g_zfs = libzfs_init()) == NULL) {
947 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
948 		return (1);
949 	}
950 
951 	libzfs_print_on_error(g_zfs, B_TRUE);
952 
953 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
954 		(void) fprintf(stderr, "failed to open ZFS device\n");
955 		libzfs_fini(g_zfs);
956 		return (1);
957 	}
958 
959 	if (argc == 1) {
960 		/*
961 		 * No arguments.  Print the available handlers.  If there are no
962 		 * available handlers, direct the user to '-h' for help
963 		 * information.
964 		 */
965 		if (print_all_handlers() == 0) {
966 			(void) printf("No handlers registered.\n");
967 			(void) printf("Run 'zinject -h' for usage "
968 			    "information.\n");
969 		}
970 		libzfs_fini(g_zfs);
971 		return (0);
972 	}
973 
974 	while ((c = getopt(argc, argv,
975 	    ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
976 		switch (c) {
977 		case 'a':
978 			flags |= ZINJECT_FLUSH_ARC;
979 			break;
980 		case 'A':
981 			if (strcasecmp(optarg, "degrade") == 0) {
982 				action = VDEV_STATE_DEGRADED;
983 			} else if (strcasecmp(optarg, "fault") == 0) {
984 				action = VDEV_STATE_FAULTED;
985 			} else {
986 				(void) fprintf(stderr, "invalid action '%s': "
987 				    "must be 'degrade' or 'fault'\n", optarg);
988 				usage();
989 				libzfs_fini(g_zfs);
990 				return (1);
991 			}
992 			break;
993 		case 'b':
994 			raw = optarg;
995 			break;
996 		case 'c':
997 			cancel = optarg;
998 			break;
999 		case 'C':
1000 			ret = parse_dvas(optarg, &dvas);
1001 			if (ret != 0) {
1002 				(void) fprintf(stderr, "invalid DVA list '%s': "
1003 				    "DVAs should be 0 indexed and separated by "
1004 				    "commas.\n", optarg);
1005 				usage();
1006 				libzfs_fini(g_zfs);
1007 				return (1);
1008 			}
1009 			break;
1010 		case 'd':
1011 			device = optarg;
1012 			break;
1013 		case 'D':
1014 			errno = 0;
1015 			ret = parse_delay(optarg, &record.zi_timer,
1016 			    &record.zi_nlanes);
1017 			if (ret != 0) {
1018 
1019 				(void) fprintf(stderr, "invalid i/o delay "
1020 				    "value: '%s'\n", optarg);
1021 				usage();
1022 				libzfs_fini(g_zfs);
1023 				return (1);
1024 			}
1025 			break;
1026 		case 'e':
1027 			error = str_to_err(optarg);
1028 			if (error < 0) {
1029 				(void) fprintf(stderr, "invalid error type "
1030 				    "'%s': must be one of: io decompress "
1031 				    "decrypt nxio dtl corrupt noop "
1032 				    "io-prefail\n",
1033 				    optarg);
1034 				usage();
1035 				libzfs_fini(g_zfs);
1036 				return (1);
1037 			}
1038 			break;
1039 		case 'f':
1040 			ret = parse_frequency(optarg, &record.zi_freq);
1041 			if (ret != 0) {
1042 				(void) fprintf(stderr, "%sfrequency value must "
1043 				    "be in the range [0.0001, 100.0]\n",
1044 				    ret == EINVAL ? "invalid value: " :
1045 				    ret == ERANGE ? "out of range: " : "");
1046 				libzfs_fini(g_zfs);
1047 				return (1);
1048 			}
1049 			break;
1050 		case 'F':
1051 			record.zi_failfast = B_TRUE;
1052 			break;
1053 		case 'g':
1054 			dur_txg = 1;
1055 			record.zi_duration = (int)strtol(optarg, &end, 10);
1056 			if (record.zi_duration <= 0 || *end != '\0') {
1057 				(void) fprintf(stderr, "invalid duration '%s': "
1058 				    "must be a positive integer\n", optarg);
1059 				usage();
1060 				libzfs_fini(g_zfs);
1061 				return (1);
1062 			}
1063 			/* store duration of txgs as its negative */
1064 			record.zi_duration *= -1;
1065 			break;
1066 		case 'h':
1067 			usage();
1068 			libzfs_fini(g_zfs);
1069 			return (0);
1070 		case 'I':
1071 			/* default duration, if one hasn't yet been defined */
1072 			nowrites = 1;
1073 			if (dur_secs == 0 && dur_txg == 0)
1074 				record.zi_duration = 30;
1075 			break;
1076 		case 'l':
1077 			level = (int)strtol(optarg, &end, 10);
1078 			if (*end != '\0') {
1079 				(void) fprintf(stderr, "invalid level '%s': "
1080 				    "must be an integer\n", optarg);
1081 				usage();
1082 				libzfs_fini(g_zfs);
1083 				return (1);
1084 			}
1085 			break;
1086 		case 'm':
1087 			domount = 1;
1088 			break;
1089 		case 'p':
1090 			(void) strlcpy(record.zi_func, optarg,
1091 			    sizeof (record.zi_func));
1092 			record.zi_cmd = ZINJECT_PANIC;
1093 			break;
1094 		case 'P':
1095 			if (strcasecmp(optarg, "import") == 0) {
1096 				record.zi_cmd = ZINJECT_DELAY_IMPORT;
1097 			} else if (strcasecmp(optarg, "export") == 0) {
1098 				record.zi_cmd = ZINJECT_DELAY_EXPORT;
1099 			} else {
1100 				(void) fprintf(stderr, "invalid command '%s': "
1101 				    "must be 'import' or 'export'\n", optarg);
1102 				usage();
1103 				libzfs_fini(g_zfs);
1104 				return (1);
1105 			}
1106 			break;
1107 		case 'q':
1108 			quiet = 1;
1109 			break;
1110 		case 'r':
1111 			range = optarg;
1112 			flags |= ZINJECT_CALC_RANGE;
1113 			break;
1114 		case 's':
1115 			dur_secs = 1;
1116 			record.zi_duration = (int)strtol(optarg, &end, 10);
1117 			if (record.zi_duration <= 0 || *end != '\0') {
1118 				(void) fprintf(stderr, "invalid duration '%s': "
1119 				    "must be a positive integer\n", optarg);
1120 				usage();
1121 				libzfs_fini(g_zfs);
1122 				return (1);
1123 			}
1124 			break;
1125 		case 'T':
1126 			io_type = str_to_iotype(optarg);
1127 			if (io_type == ZINJECT_IOTYPES) {
1128 				(void) fprintf(stderr, "invalid I/O type "
1129 				    "'%s': must be 'read', 'write', 'free', "
1130 				    "'claim', 'flush' or 'all'\n", optarg);
1131 				usage();
1132 				libzfs_fini(g_zfs);
1133 				return (1);
1134 			}
1135 			break;
1136 		case 't':
1137 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
1138 			    !MOS_TYPE(type)) {
1139 				(void) fprintf(stderr, "invalid type '%s'\n",
1140 				    optarg);
1141 				usage();
1142 				libzfs_fini(g_zfs);
1143 				return (1);
1144 			}
1145 			break;
1146 		case 'u':
1147 			flags |= ZINJECT_UNLOAD_SPA;
1148 			break;
1149 		case 'E':
1150 			ready_delay = MSEC2NSEC(strtol(optarg, &end, 10));
1151 			if (ready_delay <= 0 || *end != '\0') {
1152 				(void) fprintf(stderr, "invalid delay '%s': "
1153 				    "must be a positive duration\n", optarg);
1154 				usage();
1155 				libzfs_fini(g_zfs);
1156 				return (1);
1157 			}
1158 			record.zi_cmd = ZINJECT_DELAY_READY;
1159 			record.zi_timer = ready_delay;
1160 			break;
1161 		case 'L':
1162 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
1163 			    !LABEL_TYPE(type)) {
1164 				(void) fprintf(stderr, "invalid label type "
1165 				    "'%s'\n", optarg);
1166 				usage();
1167 				libzfs_fini(g_zfs);
1168 				return (1);
1169 			}
1170 			break;
1171 		case ':':
1172 			(void) fprintf(stderr, "option -%c requires an "
1173 			    "operand\n", optopt);
1174 			usage();
1175 			libzfs_fini(g_zfs);
1176 			return (1);
1177 		case '?':
1178 			(void) fprintf(stderr, "invalid option '%c'\n",
1179 			    optopt);
1180 			usage();
1181 			libzfs_fini(g_zfs);
1182 			return (2);
1183 		}
1184 	}
1185 
1186 	argc -= optind;
1187 	argv += optind;
1188 
1189 	if (record.zi_duration != 0 && record.zi_cmd == 0)
1190 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
1191 
1192 	if (cancel != NULL) {
1193 		/*
1194 		 * '-c' is invalid with any other options.
1195 		 */
1196 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1197 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1198 		    record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) {
1199 			(void) fprintf(stderr, "cancel (-c) incompatible with "
1200 			    "any other options\n");
1201 			usage();
1202 			libzfs_fini(g_zfs);
1203 			return (2);
1204 		}
1205 		if (argc != 0) {
1206 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
1207 			usage();
1208 			libzfs_fini(g_zfs);
1209 			return (2);
1210 		}
1211 
1212 		if (strcmp(cancel, "all") == 0) {
1213 			return (cancel_all_handlers());
1214 		} else {
1215 			int id = (int)strtol(cancel, &end, 10);
1216 			if (*end != '\0') {
1217 				(void) fprintf(stderr, "invalid handle id '%s':"
1218 				    " must be an integer or 'all'\n", cancel);
1219 				usage();
1220 				libzfs_fini(g_zfs);
1221 				return (1);
1222 			}
1223 			return (cancel_handler(id));
1224 		}
1225 	}
1226 
1227 	if (device != NULL) {
1228 		/*
1229 		 * Device (-d) injection uses a completely different mechanism
1230 		 * for doing injection, so handle it separately here.
1231 		 */
1232 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1233 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1234 		    dvas != 0 || ready_delay >= 0) {
1235 			(void) fprintf(stderr, "device (-d) incompatible with "
1236 			    "data error injection\n");
1237 			usage();
1238 			libzfs_fini(g_zfs);
1239 			return (2);
1240 		}
1241 
1242 		if (argc != 1) {
1243 			(void) fprintf(stderr, "device (-d) injection requires "
1244 			    "a single pool name\n");
1245 			usage();
1246 			libzfs_fini(g_zfs);
1247 			return (2);
1248 		}
1249 
1250 		(void) strlcpy(pool, argv[0], sizeof (pool));
1251 		dataset[0] = '\0';
1252 
1253 		if (error == ECKSUM) {
1254 			(void) fprintf(stderr, "device error type must be "
1255 			    "'io', 'nxio' or 'corrupt'\n");
1256 			libzfs_fini(g_zfs);
1257 			return (1);
1258 		}
1259 
1260 		if (error == EILSEQ &&
1261 		    (record.zi_freq == 0 || io_type != ZINJECT_IOTYPE_READ)) {
1262 			(void) fprintf(stderr, "device corrupt errors require "
1263 			    "io type read and a frequency value\n");
1264 			libzfs_fini(g_zfs);
1265 			return (1);
1266 		}
1267 
1268 		record.zi_iotype = io_type;
1269 		if (translate_device(pool, device, label, &record) != 0) {
1270 			libzfs_fini(g_zfs);
1271 			return (1);
1272 		}
1273 
1274 		if (record.zi_nlanes) {
1275 			switch (io_type) {
1276 			case ZINJECT_IOTYPE_READ:
1277 			case ZINJECT_IOTYPE_WRITE:
1278 			case ZINJECT_IOTYPE_ALL:
1279 				break;
1280 			default:
1281 				(void) fprintf(stderr, "I/O type for a delay "
1282 				    "must be 'read' or 'write'\n");
1283 				usage();
1284 				libzfs_fini(g_zfs);
1285 				return (1);
1286 			}
1287 		}
1288 
1289 		if (!error)
1290 			error = ENXIO;
1291 
1292 		if (action != VDEV_STATE_UNKNOWN)
1293 			return (perform_action(pool, &record, action));
1294 
1295 	} else if (raw != NULL) {
1296 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
1297 		    record.zi_cmd != ZINJECT_UNINITIALIZED ||
1298 		    record.zi_freq > 0 || dvas != 0) {
1299 			(void) fprintf(stderr, "raw (-b) format with "
1300 			    "any other options\n");
1301 			usage();
1302 			libzfs_fini(g_zfs);
1303 			return (2);
1304 		}
1305 
1306 		if (argc != 1) {
1307 			(void) fprintf(stderr, "raw (-b) format expects a "
1308 			    "single pool name\n");
1309 			usage();
1310 			libzfs_fini(g_zfs);
1311 			return (2);
1312 		}
1313 
1314 		(void) strlcpy(pool, argv[0], sizeof (pool));
1315 		dataset[0] = '\0';
1316 
1317 		if (error == ENXIO) {
1318 			(void) fprintf(stderr, "data error type must be "
1319 			    "'checksum' or 'io'\n");
1320 			libzfs_fini(g_zfs);
1321 			return (1);
1322 		}
1323 
1324 		if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1325 			record.zi_cmd = ZINJECT_DATA_FAULT;
1326 			if (!error)
1327 				error = EIO;
1328 		} else if (error != 0) {
1329 			(void) fprintf(stderr, "error type -e incompatible "
1330 			    "with delay injection\n");
1331 			libzfs_fini(g_zfs);
1332 			return (1);
1333 		} else {
1334 			record.zi_iotype = io_type;
1335 		}
1336 
1337 		if (translate_raw(raw, &record) != 0) {
1338 			libzfs_fini(g_zfs);
1339 			return (1);
1340 		}
1341 	} else if (record.zi_cmd == ZINJECT_PANIC) {
1342 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1343 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1344 		    dvas != 0) {
1345 			(void) fprintf(stderr, "%s incompatible with other "
1346 			    "options\n", "import|export delay (-P)");
1347 			usage();
1348 			libzfs_fini(g_zfs);
1349 			return (2);
1350 		}
1351 
1352 		if (argc < 1 || argc > 2) {
1353 			(void) fprintf(stderr, "panic (-p) injection requires "
1354 			    "a single pool name and an optional id\n");
1355 			usage();
1356 			libzfs_fini(g_zfs);
1357 			return (2);
1358 		}
1359 
1360 		(void) strlcpy(pool, argv[0], sizeof (pool));
1361 		if (argv[1] != NULL)
1362 			record.zi_type = atoi(argv[1]);
1363 		dataset[0] = '\0';
1364 	} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
1365 	    record.zi_cmd == ZINJECT_DELAY_EXPORT) {
1366 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1367 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1368 		    dvas != 0) {
1369 			(void) fprintf(stderr, "%s incompatible with other "
1370 			    "options\n", "import|export delay (-P)");
1371 			usage();
1372 			libzfs_fini(g_zfs);
1373 			return (2);
1374 		}
1375 
1376 		if (argc != 1 || record.zi_duration <= 0) {
1377 			(void) fprintf(stderr, "import|export delay (-P) "
1378 			    "injection requires a duration (-s) and a single "
1379 			    "pool name\n");
1380 			usage();
1381 			libzfs_fini(g_zfs);
1382 			return (2);
1383 		}
1384 
1385 		(void) strlcpy(pool, argv[0], sizeof (pool));
1386 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1387 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1388 		    level != 0 || record.zi_freq > 0 || dvas != 0) {
1389 			(void) fprintf(stderr, "hardware failure (-I) "
1390 			    "incompatible with other options\n");
1391 			usage();
1392 			libzfs_fini(g_zfs);
1393 			return (2);
1394 		}
1395 
1396 		if (nowrites == 0) {
1397 			(void) fprintf(stderr, "-s or -g meaningless "
1398 			    "without -I (ignore writes)\n");
1399 			usage();
1400 			libzfs_fini(g_zfs);
1401 			return (2);
1402 		} else if (dur_secs && dur_txg) {
1403 			(void) fprintf(stderr, "choose a duration either "
1404 			    "in seconds (-s) or a number of txgs (-g) "
1405 			    "but not both\n");
1406 			usage();
1407 			libzfs_fini(g_zfs);
1408 			return (2);
1409 		} else if (argc != 1) {
1410 			(void) fprintf(stderr, "ignore writes (-I) "
1411 			    "injection requires a single pool name\n");
1412 			usage();
1413 			libzfs_fini(g_zfs);
1414 			return (2);
1415 		}
1416 
1417 		(void) strlcpy(pool, argv[0], sizeof (pool));
1418 		dataset[0] = '\0';
1419 	} else if (type == TYPE_INVAL) {
1420 		if (flags == 0) {
1421 			(void) fprintf(stderr, "at least one of '-b', '-d', "
1422 			    "'-t', '-a', '-p', '-I' or '-u' "
1423 			    "must be specified\n");
1424 			usage();
1425 			libzfs_fini(g_zfs);
1426 			return (2);
1427 		}
1428 
1429 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1430 			(void) strlcpy(pool, argv[0], sizeof (pool));
1431 			dataset[0] = '\0';
1432 		} else if (argc != 0) {
1433 			(void) fprintf(stderr, "extraneous argument for "
1434 			    "'-f'\n");
1435 			usage();
1436 			libzfs_fini(g_zfs);
1437 			return (2);
1438 		}
1439 
1440 		flags |= ZINJECT_NULL;
1441 	} else {
1442 		if (argc != 1) {
1443 			(void) fprintf(stderr, "missing object\n");
1444 			usage();
1445 			libzfs_fini(g_zfs);
1446 			return (2);
1447 		}
1448 
1449 		if (error == ENXIO || error == EILSEQ) {
1450 			(void) fprintf(stderr, "data error type must be "
1451 			    "'checksum' or 'io'\n");
1452 			libzfs_fini(g_zfs);
1453 			return (1);
1454 		}
1455 
1456 		if (dvas != 0) {
1457 			if (error == EACCES || error == EINVAL) {
1458 				(void) fprintf(stderr, "the '-C' option may "
1459 				    "not be used with logical data errors "
1460 				    "'decrypt' and 'decompress'\n");
1461 				libzfs_fini(g_zfs);
1462 				return (1);
1463 			}
1464 
1465 			record.zi_dvas = dvas;
1466 		}
1467 
1468 		if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) {
1469 			(void) fprintf(stderr, "error type -e incompatible "
1470 			    "with delay injection\n");
1471 			libzfs_fini(g_zfs);
1472 			return (1);
1473 		}
1474 
1475 		if (error == EACCES) {
1476 			if (type != TYPE_DATA) {
1477 				(void) fprintf(stderr, "decryption errors "
1478 				    "may only be injected for 'data' types\n");
1479 				libzfs_fini(g_zfs);
1480 				return (1);
1481 			}
1482 
1483 			record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1484 			/*
1485 			 * Internally, ZFS actually uses ECKSUM for decryption
1486 			 * errors since EACCES is used to indicate the key was
1487 			 * not found.
1488 			 */
1489 			error = ECKSUM;
1490 		} else if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1491 			record.zi_cmd = ZINJECT_DATA_FAULT;
1492 			if (!error)
1493 				error = EIO;
1494 		} else {
1495 			record.zi_iotype = io_type;
1496 		}
1497 
1498 		if (translate_record(type, argv[0], range, level, &record, pool,
1499 		    dataset) != 0) {
1500 			libzfs_fini(g_zfs);
1501 			return (1);
1502 		}
1503 	}
1504 
1505 	/*
1506 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
1507 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
1508 	 * time we access the pool.
1509 	 */
1510 	if (dataset[0] != '\0' && domount) {
1511 		if ((zhp = zfs_open(g_zfs, dataset,
1512 		    ZFS_TYPE_DATASET)) == NULL) {
1513 			libzfs_fini(g_zfs);
1514 			return (1);
1515 		}
1516 		if (zfs_unmount(zhp, NULL, 0) != 0) {
1517 			libzfs_fini(g_zfs);
1518 			return (1);
1519 		}
1520 	}
1521 
1522 	record.zi_error = error;
1523 
1524 	ret = register_handler(pool, flags, &record, quiet);
1525 
1526 	if (dataset[0] != '\0' && domount)
1527 		ret = (zfs_mount(zhp, NULL, 0) != 0);
1528 
1529 	libzfs_fini(g_zfs);
1530 
1531 	return (ret);
1532 }
1533