xref: /freebsd/sys/contrib/openzfs/cmd/zinject/zinject.c (revision 36c970ed985ff3dd5443db4bf2aa58799028512c)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2017, Intel Corporation.
26  * Copyright (c) 2023-2025, Klara, Inc.
27  */
28 
29 /*
30  * ZFS Fault Injector
31  *
32  * This userland component takes a set of options and uses libzpool to translate
33  * from a user-visible object type and name to an internal representation.
34  * There are two basic types of faults: device faults and data faults.
35  *
36  *
37  * DEVICE FAULTS
38  *
39  * Errors can be injected into a particular vdev using the '-d' option.  This
40  * option takes a path or vdev GUID to uniquely identify the device within a
41  * pool.  There are four types of errors that can be injected, IO, ENXIO,
42  * ECHILD, and EILSEQ.  These can be controlled through the '-e' option and the
43  * default is ENXIO.  For EIO failures, any attempt to read data from the device
44  * will return EIO, but a subsequent attempt to reopen the device will succeed.
45  * For ENXIO failures, any attempt to read from the device will return EIO, but
46  * any attempt to reopen the device will also return ENXIO.  The EILSEQ failures
47  * only apply to read operations (-T read) and will flip a bit after the device
48  * has read the original data.
49  *
50  * For label faults, the -L option must be specified. This allows faults
51  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
52  * of all the labels for the specified device.
53  *
54  * This form of the command looks like:
55  *
56  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
57  *
58  *
59  * DATA FAULTS
60  *
61  * We begin with a tuple of the form:
62  *
63  * 	<type,level,range,object>
64  *
65  * 	type	A string describing the type of data to target.  Each type
66  * 		implicitly describes how to interpret 'object'. Currently,
67  * 		the following values are supported:
68  *
69  * 		data		User data for a file
70  * 		dnode		Dnode for a file or directory
71  *
72  *		The following MOS objects are special.  Instead of injecting
73  *		errors on a particular object or blkid, we inject errors across
74  *		all objects of the given type.
75  *
76  * 		mos		Any data in the MOS
77  * 		mosdir		object directory
78  * 		config		pool configuration
79  * 		bpobj		blkptr list
80  * 		spacemap	spacemap
81  * 		metaslab	metaslab
82  * 		errlog		persistent error log
83  *
84  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
85  * 		a range is given, this corresponds to the indirect block
86  * 		corresponding to the specific range.
87  *
88  *	range	A numerical range [start,end) within the object.  Defaults to
89  *		the full size of the file.
90  *
91  * 	object	A string describing the logical location of the object.  For
92  * 		files and directories (currently the only supported types),
93  * 		this is the path of the object on disk.
94  *
95  * This is translated, via libzpool, into the following internal representation:
96  *
97  * 	<type,objset,object,level,range>
98  *
99  * These types should be self-explanatory.  This tuple is then passed to the
100  * kernel via a special ioctl() to initiate fault injection for the given
101  * object.  Note that 'type' is not strictly necessary for fault injection, but
102  * is used when translating existing faults into a human-readable string.
103  *
104  *
105  * The command itself takes one of the forms:
106  *
107  * 	zinject
108  * 	zinject <-a | -u pool>
109  * 	zinject -c <id|all>
110  * 	zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]
111  *	    [-T iotype] [-t type object | -b bookmark pool]
112  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
113  *	    [-r range] <object>
114  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
115  *
116  * With no arguments, the command prints all currently registered injection
117  * handlers, with their numeric identifiers.
118  *
119  * The '-c' option will clear the given handler, or all handlers if 'all' is
120  * specified.
121  *
122  * The '-e' option takes a string describing the errno to simulate.  This must
123  * be one of 'io', 'checksum', 'decompress', or 'decrypt'.  In most cases this
124  * will result in the same behavior, but RAID-Z will produce a different set of
125  * ereports for this situation.
126  *
127  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
128  * specified, then the ARC cache is flushed appropriately.  If '-u' is
129  * specified, then the underlying SPA is unloaded.  Either of these flags can be
130  * specified independently of any other handlers.  The '-m' flag automatically
131  * does an unmount and remount of the underlying dataset to aid in flushing the
132  * cache.
133  *
134  * The '-f' flag controls the frequency of errors injected, expressed as a
135  * real number percentage between 0.0001 and 100.  The default is 100.
136  *
137  * The <object> form is responsible for actually injecting the handler into the
138  * framework.  It takes the arguments described above, translates them to the
139  * internal tuple using libzpool, and then issues an ioctl() to register the
140  * handler.
141  *
142  * The '-b' option can target a specific bookmark, regardless of whether a
143  * human-readable interface has been designed.  It allows developers to specify
144  * a particular block by number.
145  *
146  * The '-E' option injects pipeline ready stage delays for the given object or
147  * bookmark. The delay is specified in milliseconds, and it supports I/O type
148  * and range filters.
149  */
150 
151 #include <errno.h>
152 #include <fcntl.h>
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <string.h>
156 #include <strings.h>
157 #include <unistd.h>
158 
159 #include <sys/fs/zfs.h>
160 #include <sys/mount.h>
161 
162 #include <libzfs.h>
163 
164 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
165 
166 #include "zinject.h"
167 
168 libzfs_handle_t *g_zfs;
169 int zfs_fd;
170 
171 static const char *const errtable[TYPE_INVAL] = {
172 	"data",
173 	"dnode",
174 	"mos",
175 	"mosdir",
176 	"metaslab",
177 	"config",
178 	"bpobj",
179 	"spacemap",
180 	"errlog",
181 	"uber",
182 	"nvlist",
183 	"pad1",
184 	"pad2"
185 };
186 
187 static err_type_t
name_to_type(const char * arg)188 name_to_type(const char *arg)
189 {
190 	int i;
191 	for (i = 0; i < TYPE_INVAL; i++)
192 		if (strcmp(errtable[i], arg) == 0)
193 			return (i);
194 
195 	return (TYPE_INVAL);
196 }
197 
198 static const char *
type_to_name(uint64_t type)199 type_to_name(uint64_t type)
200 {
201 	switch (type) {
202 	case DMU_OT_OBJECT_DIRECTORY:
203 		return ("mosdir");
204 	case DMU_OT_OBJECT_ARRAY:
205 		return ("metaslab");
206 	case DMU_OT_PACKED_NVLIST:
207 		return ("config");
208 	case DMU_OT_BPOBJ:
209 		return ("bpobj");
210 	case DMU_OT_SPACE_MAP:
211 		return ("spacemap");
212 	case DMU_OT_ERROR_LOG:
213 		return ("errlog");
214 	default:
215 		return ("-");
216 	}
217 }
218 
219 struct errstr {
220 	int		err;
221 	const char	*str;
222 };
223 static const struct errstr errstrtable[] = {
224 	{ EIO,		"io" },
225 	{ ECKSUM,	"checksum" },
226 	{ EINVAL,	"decompress" },
227 	{ EACCES,	"decrypt" },
228 	{ ENXIO,	"nxio" },
229 	{ ECHILD,	"dtl" },
230 	{ EILSEQ,	"corrupt" },
231 	{ ENOSYS,	"noop" },
232 	{ 0, NULL },
233 };
234 
235 static int
str_to_err(const char * str)236 str_to_err(const char *str)
237 {
238 	for (int i = 0; errstrtable[i].str != NULL; i++)
239 		if (strcasecmp(errstrtable[i].str, str) == 0)
240 			return (errstrtable[i].err);
241 	return (-1);
242 }
243 static const char *
err_to_str(int err)244 err_to_str(int err)
245 {
246 	for (int i = 0; errstrtable[i].str != NULL; i++)
247 		if (errstrtable[i].err == err)
248 			return (errstrtable[i].str);
249 	return ("[unknown]");
250 }
251 
252 static const char *const iotypestrtable[ZINJECT_IOTYPES] = {
253 	[ZINJECT_IOTYPE_NULL]	= "null",
254 	[ZINJECT_IOTYPE_READ]	= "read",
255 	[ZINJECT_IOTYPE_WRITE]	= "write",
256 	[ZINJECT_IOTYPE_FREE]	= "free",
257 	[ZINJECT_IOTYPE_CLAIM]	= "claim",
258 	[ZINJECT_IOTYPE_FLUSH]	= "flush",
259 	[ZINJECT_IOTYPE_TRIM]	= "trim",
260 	[ZINJECT_IOTYPE_ALL]	= "all",
261 	[ZINJECT_IOTYPE_PROBE]	= "probe",
262 };
263 
264 static zinject_iotype_t
str_to_iotype(const char * arg)265 str_to_iotype(const char *arg)
266 {
267 	for (uint_t iotype = 0; iotype < ZINJECT_IOTYPES; iotype++)
268 		if (iotypestrtable[iotype] != NULL &&
269 		    strcasecmp(iotypestrtable[iotype], arg) == 0)
270 			return (iotype);
271 	return (ZINJECT_IOTYPES);
272 }
273 
274 static const char *
iotype_to_str(zinject_iotype_t iotype)275 iotype_to_str(zinject_iotype_t iotype)
276 {
277 	if (iotype >= ZINJECT_IOTYPES || iotypestrtable[iotype] == NULL)
278 		return ("[unknown]");
279 	return (iotypestrtable[iotype]);
280 }
281 
282 /*
283  * Print usage message.
284  */
285 void
usage(void)286 usage(void)
287 {
288 	(void) printf(
289 	    "usage:\n"
290 	    "\n"
291 	    "\tzinject\n"
292 	    "\n"
293 	    "\t\tList all active injection records.\n"
294 	    "\n"
295 	    "\tzinject -c <id|all>\n"
296 	    "\n"
297 	    "\t\tClear the particular record (if given a numeric ID), or\n"
298 	    "\t\tall records if 'all' is specified.\n"
299 	    "\n"
300 	    "\tzinject -p <function name> pool\n"
301 	    "\t\tInject a panic fault at the specified function. Only \n"
302 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
303 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
304 	    "\n"
305 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
306 	    "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
307 	    "\t\tInject a fault into a particular device or the device's\n"
308 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
309 	    "\t\t'pad1', or 'pad2'.\n"
310 	    "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
311 	    "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n"
312 	    "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
313 	    "\t\tdevice error injection to a percentage of the IOs.\n"
314 	    "\n"
315 	    "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
316 	    "\t\tPerform a specific action on a particular device.\n"
317 	    "\n"
318 	    "\tzinject -d device -D latency:lanes pool\n"
319 	    "\n"
320 	    "\t\tAdd an artificial delay to IO requests on a particular\n"
321 	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
322 	    "\t\tmilliseconds to complete. Each delay has an associated\n"
323 	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
324 	    "\t\tIO requests that can be processed.\n"
325 	    "\n"
326 	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
327 	    "\t\tthe device will only be able to service a single IO request\n"
328 	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
329 	    "\t\tif only a single request is submitted every 10 ms, the\n"
330 	    "\t\taverage latency will be 10 ms; but if more than one request\n"
331 	    "\t\tis submitted every 10 ms, the average latency will be more\n"
332 	    "\t\tthan 10 ms.\n"
333 	    "\n"
334 	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
335 	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
336 	    "\t\ttwo requests at a time, each with a minimum latency of\n"
337 	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
338 	    "\t\tthe average latency will be 10 ms; but if more than two\n"
339 	    "\t\trequests are submitted every 10 ms, the average latency\n"
340 	    "\t\twill be more than 10 ms.\n"
341 	    "\n"
342 	    "\t\tAlso note, these delays are additive. So two invocations\n"
343 	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
344 	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
345 	    "\t\tlanes with differing target latencies. For example, an\n"
346 	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
347 	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
348 	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
349 	    "\n"
350 	    "\tzinject -P import|export -s <seconds> pool\n"
351 	    "\t\tAdd an artificial delay to a future pool import or export,\n"
352 	    "\t\tsuch that the operation takes a minimum of supplied seconds\n"
353 	    "\t\tto complete.\n"
354 	    "\n"
355 	    "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n"
356 	    "\t\t[-T iotype] [-t type object | -b bookmark pool]\n"
357 	    "\n"
358 	    "\t\tInject pipeline ready stage delays for the given object path\n"
359 	    "\t\t(data or dnode) or raw bookmark. The delay is specified in\n"
360 	    "\t\tmilliseconds.\n"
361 	    "\n"
362 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
363 	    "\t\tCause the pool to stop writing blocks yet not\n"
364 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
365 	    "\t\tthat fails to honor cache flush requests.\n"
366 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
367 	    "\t\tat the end of the duration.\n"
368 	    "\n"
369 	    "\tzinject -b objset:object:level:blkid pool\n"
370 	    "\n"
371 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
372 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
373 	    "\t\thexadecimal, and only one block can be specified.\n"
374 	    "\n"
375 	    "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
376 	    "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
377 	    "\n"
378 	    "\t\tInject an error into the object specified by the '-t' option\n"
379 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
380 	    "\t\tinterpreted depending on the '-t' option.\n"
381 	    "\n"
382 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
383 	    "\t\t-e\tInject a specific error.  Must be one of 'io',\n"
384 	    "\t\t\t'checksum', 'decompress', or 'decrypt'.  Default is 'io'.\n"
385 	    "\t\t-C\tInject the given error only into specific DVAs. The\n"
386 	    "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
387 	    "\t\t\tseparated by commas (ex. '0,2').\n"
388 	    "\t\t-l\tInject error at a particular block level. Default is "
389 	    "0.\n"
390 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
391 	    "\t\t-r\tInject error over a particular logical range of an\n"
392 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
393 	    "\t\t\trange according to the object's properties.\n"
394 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
395 	    "\t\t\tassociated object.\n"
396 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
397 	    "\t\t\ta pool object.\n"
398 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
399 	    "\t\t\ta percentage between 0.0001 and 100.\n"
400 	    "\n"
401 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
402 	    "\t\t\tfile.  The object must be specified as a complete path\n"
403 	    "\t\t\tto a file on a ZFS filesystem.\n"
404 	    "\n"
405 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
406 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
407 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
408 	    "\t\t\tis specified as a complete path to a file or directory\n"
409 	    "\t\t\ton a ZFS filesystem.\n"
410 	    "\n"
411 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
412 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
413 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
414 	    "\t\t\tthe poolname.\n");
415 }
416 
417 static int
iter_handlers(int (* func)(int,const char *,zinject_record_t *,void *),void * data)418 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
419     void *data)
420 {
421 	zfs_cmd_t zc = {"\0"};
422 	int ret;
423 
424 	while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
425 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
426 		    &zc.zc_inject_record, data)) != 0)
427 			return (ret);
428 
429 	if (errno != ENOENT) {
430 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
431 		    strerror(errno));
432 		return (-1);
433 	}
434 
435 	return (0);
436 }
437 
438 static int
print_data_handler(int id,const char * pool,zinject_record_t * record,void * data)439 print_data_handler(int id, const char *pool, zinject_record_t *record,
440     void *data)
441 {
442 	int *count = data;
443 
444 	if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
445 	    record->zi_duration != 0) {
446 		return (0);
447 	}
448 
449 	if (*count == 0) {
450 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  "
451 		    "%-15s  %-6s  %-15s\n", "ID", "POOL", "OBJSET", "OBJECT",
452 		    "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT");
453 		(void) printf("---  ---------------  ------  "
454 		    "------  --------  ---  ----  ---------------  "
455 		    "------  ------\n");
456 	}
457 
458 	*count += 1;
459 
460 	char rangebuf[32];
461 	if (record->zi_start == 0 && record->zi_end == -1ULL)
462 		snprintf(rangebuf, sizeof (rangebuf), "all");
463 	else
464 		snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]",
465 		    (u_longlong_t)record->zi_start,
466 		    (u_longlong_t)record->zi_end);
467 
468 
469 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %-3d  0x%02x  %-15s  "
470 	    "%6" PRIu64 "  %6" PRIu64 "\n", id, pool,
471 	    (u_longlong_t)record->zi_objset,
472 	    (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
473 	    record->zi_level, record->zi_dvas, rangebuf,
474 	    record->zi_match_count, record->zi_inject_count);
475 
476 	return (0);
477 }
478 
479 static int
print_device_handler(int id,const char * pool,zinject_record_t * record,void * data)480 print_device_handler(int id, const char *pool, zinject_record_t *record,
481     void *data)
482 {
483 	int *count = data;
484 
485 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
486 		return (0);
487 
488 	if (record->zi_cmd == ZINJECT_DELAY_IO)
489 		return (0);
490 
491 	if (*count == 0) {
492 		(void) printf("%3s  %-15s  %-16s  %-5s  %-10s  %-9s  "
493 		    "%-6s  %-6s\n",
494 		    "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ",
495 		    "MATCH", "INJECT");
496 		(void) printf(
497 		    "---  ---------------  ----------------  "
498 		    "-----  ----------  ---------  "
499 		    "------  ------\n");
500 	}
501 
502 	*count += 1;
503 
504 	double freq = record->zi_freq == 0 ? 100.0f :
505 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
506 
507 	(void) printf("%3d  %-15s  %llx  %-5s  %-10s  %8.4f%%  "
508 	    "%6" PRIu64 "  %6" PRIu64 "\n", id, pool,
509 	    (u_longlong_t)record->zi_guid,
510 	    iotype_to_str(record->zi_iotype), err_to_str(record->zi_error),
511 	    freq, record->zi_match_count, record->zi_inject_count);
512 
513 	return (0);
514 }
515 
516 static int
print_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)517 print_delay_handler(int id, const char *pool, zinject_record_t *record,
518     void *data)
519 {
520 	int *count = data;
521 
522 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
523 		return (0);
524 
525 	if (record->zi_cmd != ZINJECT_DELAY_IO)
526 		return (0);
527 
528 	if (*count == 0) {
529 		(void) printf("%3s  %-15s  %-16s  %-10s  %-5s  %-9s  "
530 		    "%-6s  %-6s\n",
531 		    "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ",
532 		    "MATCH", "INJECT");
533 		(void) printf("---  ---------------  ----------------  "
534 		    "----------  -----  ---------  "
535 		    "------  ------\n");
536 	}
537 
538 	*count += 1;
539 
540 	double freq = record->zi_freq == 0 ? 100.0f :
541 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
542 
543 	(void) printf("%3d  %-15s  %llx  %10llu  %5llu  %8.4f%%  "
544 	    "%6" PRIu64 "  %6" PRIu64 "\n", id, pool,
545 	    (u_longlong_t)record->zi_guid,
546 	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
547 	    (u_longlong_t)record->zi_nlanes,
548 	    freq, record->zi_match_count, record->zi_inject_count);
549 
550 	return (0);
551 }
552 
553 static int
print_panic_handler(int id,const char * pool,zinject_record_t * record,void * data)554 print_panic_handler(int id, const char *pool, zinject_record_t *record,
555     void *data)
556 {
557 	int *count = data;
558 
559 	if (record->zi_func[0] == '\0')
560 		return (0);
561 
562 	if (*count == 0) {
563 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
564 		(void) printf("---  ---------------  ----------------\n");
565 	}
566 
567 	*count += 1;
568 
569 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
570 
571 	return (0);
572 }
573 
574 static int
print_pool_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)575 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
576     void *data)
577 {
578 	int *count = data;
579 
580 	if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
581 	    record->zi_cmd != ZINJECT_DELAY_EXPORT) {
582 		return (0);
583 	}
584 
585 	if (*count == 0) {
586 		(void) printf("%3s  %-19s  %-11s  %s\n",
587 		    "ID", "POOL", "DELAY (sec)", "COMMAND");
588 		(void) printf("---  -------------------  -----------"
589 		    "  -------\n");
590 	}
591 
592 	*count += 1;
593 
594 	(void) printf("%3d  %-19s  %-11llu  %s\n",
595 	    id, pool, (u_longlong_t)record->zi_duration,
596 	    record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
597 
598 	return (0);
599 }
600 
601 /*
602  * Print all registered error handlers.  Returns the number of handlers
603  * registered.
604  */
605 static int
print_all_handlers(void)606 print_all_handlers(void)
607 {
608 	int count = 0, total = 0;
609 
610 	(void) iter_handlers(print_device_handler, &count);
611 	if (count > 0) {
612 		total += count;
613 		(void) printf("\n");
614 		count = 0;
615 	}
616 
617 	(void) iter_handlers(print_delay_handler, &count);
618 	if (count > 0) {
619 		total += count;
620 		(void) printf("\n");
621 		count = 0;
622 	}
623 
624 	(void) iter_handlers(print_data_handler, &count);
625 	if (count > 0) {
626 		total += count;
627 		(void) printf("\n");
628 		count = 0;
629 	}
630 
631 	(void) iter_handlers(print_pool_delay_handler, &count);
632 	if (count > 0) {
633 		total += count;
634 		(void) printf("\n");
635 		count = 0;
636 	}
637 
638 	(void) iter_handlers(print_panic_handler, &count);
639 
640 	return (count + total);
641 }
642 
643 static int
cancel_one_handler(int id,const char * pool,zinject_record_t * record,void * data)644 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
645     void *data)
646 {
647 	(void) pool, (void) record, (void) data;
648 	zfs_cmd_t zc = {"\0"};
649 
650 	zc.zc_guid = (uint64_t)id;
651 
652 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
653 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
654 		    id, strerror(errno));
655 		return (1);
656 	}
657 
658 	return (0);
659 }
660 
661 /*
662  * Remove all fault injection handlers.
663  */
664 static int
cancel_all_handlers(void)665 cancel_all_handlers(void)
666 {
667 	int ret = iter_handlers(cancel_one_handler, NULL);
668 
669 	if (ret == 0)
670 		(void) printf("removed all registered handlers\n");
671 
672 	return (ret);
673 }
674 
675 /*
676  * Remove a specific fault injection handler.
677  */
678 static int
cancel_handler(int id)679 cancel_handler(int id)
680 {
681 	zfs_cmd_t zc = {"\0"};
682 
683 	zc.zc_guid = (uint64_t)id;
684 
685 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
686 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
687 		    id, strerror(errno));
688 		return (1);
689 	}
690 
691 	(void) printf("removed handler %d\n", id);
692 
693 	return (0);
694 }
695 
696 /*
697  * Register a new fault injection handler.
698  */
699 static int
register_handler(const char * pool,int flags,zinject_record_t * record,int quiet)700 register_handler(const char *pool, int flags, zinject_record_t *record,
701     int quiet)
702 {
703 	zfs_cmd_t zc = {"\0"};
704 
705 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
706 	zc.zc_inject_record = *record;
707 	zc.zc_guid = flags;
708 
709 	if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
710 		const char *errmsg = strerror(errno);
711 
712 		switch (errno) {
713 		case EDOM:
714 			errmsg = "block level exceeds max level of object";
715 			break;
716 		case EEXIST:
717 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
718 				errmsg = "pool already imported";
719 			if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
720 				errmsg = "a handler already exists";
721 			break;
722 		case ENOENT:
723 			/* import delay injector running on older zfs module */
724 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
725 				errmsg = "import delay injector not supported";
726 			break;
727 		default:
728 			break;
729 		}
730 		(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
731 		return (1);
732 	}
733 
734 	if (flags & ZINJECT_NULL)
735 		return (0);
736 
737 	if (quiet) {
738 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
739 	} else {
740 		boolean_t show_object = B_FALSE;
741 		boolean_t show_iotype = B_FALSE;
742 		(void) printf("Added handler %llu with the following "
743 		    "properties:\n", (u_longlong_t)zc.zc_guid);
744 		(void) printf("  pool: %s\n", pool);
745 		if (record->zi_guid) {
746 			(void) printf("  vdev: %llx\n",
747 			    (u_longlong_t)record->zi_guid);
748 			show_iotype = B_TRUE;
749 		} else if (record->zi_func[0] != '\0') {
750 			(void) printf("  panic function: %s\n",
751 			    record->zi_func);
752 		} else if (record->zi_duration > 0) {
753 			(void) printf(" time: %lld seconds\n",
754 			    (u_longlong_t)record->zi_duration);
755 		} else if (record->zi_duration < 0) {
756 			(void) printf(" txgs: %lld \n",
757 			    (u_longlong_t)-record->zi_duration);
758 		} else if (record->zi_timer > 0) {
759 			(void) printf(" timer: %lld ms\n",
760 			    (u_longlong_t)NSEC2MSEC(record->zi_timer));
761 			if (record->zi_cmd == ZINJECT_DELAY_READY) {
762 				show_object = B_TRUE;
763 				show_iotype = B_TRUE;
764 			}
765 		} else {
766 			show_object = B_TRUE;
767 		}
768 		if (show_iotype) {
769 			(void) printf("iotype: %s\n",
770 			    iotype_to_str(record->zi_iotype));
771 		}
772 		if (show_object) {
773 			(void) printf("objset: %llu\n",
774 			    (u_longlong_t)record->zi_objset);
775 			(void) printf("object: %llu\n",
776 			    (u_longlong_t)record->zi_object);
777 			(void) printf("  type: %llu\n",
778 			    (u_longlong_t)record->zi_type);
779 			(void) printf(" level: %d\n", record->zi_level);
780 			if (record->zi_start == 0 &&
781 			    record->zi_end == -1ULL)
782 				(void) printf(" range: all\n");
783 			else
784 				(void) printf(" range: [%llu, %llu)\n",
785 				    (u_longlong_t)record->zi_start,
786 				    (u_longlong_t)record->zi_end);
787 			(void) printf("  dvas: 0x%x\n", record->zi_dvas);
788 		}
789 	}
790 
791 	return (0);
792 }
793 
794 static int
perform_action(const char * pool,zinject_record_t * record,int cmd)795 perform_action(const char *pool, zinject_record_t *record, int cmd)
796 {
797 	zfs_cmd_t zc = {"\0"};
798 
799 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
800 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
801 	zc.zc_guid = record->zi_guid;
802 	zc.zc_cookie = cmd;
803 
804 	if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
805 		return (0);
806 
807 	return (1);
808 }
809 
810 static int
parse_delay(char * str,uint64_t * delay,uint64_t * nlanes)811 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
812 {
813 	unsigned long scan_delay;
814 	unsigned long scan_nlanes;
815 
816 	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
817 		return (1);
818 
819 	/*
820 	 * We explicitly disallow a delay of zero here, because we key
821 	 * off this value being non-zero in translate_device(), to
822 	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
823 	 */
824 	if (scan_delay == 0)
825 		return (1);
826 
827 	/*
828 	 * The units for the CLI delay parameter is milliseconds, but
829 	 * the data passed to the kernel is interpreted as nanoseconds.
830 	 * Thus we scale the milliseconds to nanoseconds here, and this
831 	 * nanosecond value is used to pass the delay to the kernel.
832 	 */
833 	*delay = MSEC2NSEC(scan_delay);
834 	*nlanes = scan_nlanes;
835 
836 	return (0);
837 }
838 
839 static int
parse_frequency(const char * str,uint32_t * percent)840 parse_frequency(const char *str, uint32_t *percent)
841 {
842 	double val;
843 	char *post;
844 
845 	val = strtod(str, &post);
846 	if (post == NULL || *post != '\0')
847 		return (EINVAL);
848 
849 	/* valid range is [0.0001, 100.0] */
850 	val /= 100.0f;
851 	if (val < 0.000001f || val > 1.0f)
852 		return (ERANGE);
853 
854 	/* convert to an integer for use by kernel */
855 	*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
856 
857 	return (0);
858 }
859 
860 /*
861  * This function converts a string specifier for DVAs into a bit mask.
862  * The dva's provided by the user should be 0 indexed and separated by
863  * a comma. For example:
864  *	"1"	-> 0b0010  (0x2)
865  *	"0,1"	-> 0b0011  (0x3)
866  *	"0,1,2"	-> 0b0111  (0x7)
867  */
868 static int
parse_dvas(const char * str,uint32_t * dvas_out)869 parse_dvas(const char *str, uint32_t *dvas_out)
870 {
871 	const char *c = str;
872 	uint32_t mask = 0;
873 	boolean_t need_delim = B_FALSE;
874 
875 	/* max string length is 5 ("0,1,2") */
876 	if (strlen(str) > 5 || strlen(str) == 0)
877 		return (EINVAL);
878 
879 	while (*c != '\0') {
880 		switch (*c) {
881 		case '0':
882 		case '1':
883 		case '2':
884 			/* check for pipe between DVAs */
885 			if (need_delim)
886 				return (EINVAL);
887 
888 			/* check if this DVA has been set already */
889 			if (mask & (1 << ((*c) - '0')))
890 				return (EINVAL);
891 
892 			mask |= (1 << ((*c) - '0'));
893 			need_delim = B_TRUE;
894 			break;
895 		case ',':
896 			need_delim = B_FALSE;
897 			break;
898 		default:
899 			/* check for invalid character */
900 			return (EINVAL);
901 		}
902 		c++;
903 	}
904 
905 	/* check for dangling delimiter */
906 	if (!need_delim)
907 		return (EINVAL);
908 
909 	*dvas_out = mask;
910 	return (0);
911 }
912 
913 int
main(int argc,char ** argv)914 main(int argc, char **argv)
915 {
916 	int c;
917 	char *range = NULL;
918 	char *cancel = NULL;
919 	char *end;
920 	char *raw = NULL;
921 	char *device = NULL;
922 	int level = 0;
923 	int quiet = 0;
924 	int error = 0;
925 	int domount = 0;
926 	int io_type = ZINJECT_IOTYPE_ALL;
927 	int action = VDEV_STATE_UNKNOWN;
928 	err_type_t type = TYPE_INVAL;
929 	err_type_t label = TYPE_INVAL;
930 	zinject_record_t record = { 0 };
931 	char pool[MAXNAMELEN] = "";
932 	char dataset[MAXNAMELEN] = "";
933 	zfs_handle_t *zhp = NULL;
934 	int nowrites = 0;
935 	int dur_txg = 0;
936 	int dur_secs = 0;
937 	int ret;
938 	int flags = 0;
939 	uint32_t dvas = 0;
940 	hrtime_t ready_delay = -1;
941 
942 	if ((g_zfs = libzfs_init()) == NULL) {
943 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
944 		return (1);
945 	}
946 
947 	libzfs_print_on_error(g_zfs, B_TRUE);
948 
949 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
950 		(void) fprintf(stderr, "failed to open ZFS device\n");
951 		libzfs_fini(g_zfs);
952 		return (1);
953 	}
954 
955 	if (argc == 1) {
956 		/*
957 		 * No arguments.  Print the available handlers.  If there are no
958 		 * available handlers, direct the user to '-h' for help
959 		 * information.
960 		 */
961 		if (print_all_handlers() == 0) {
962 			(void) printf("No handlers registered.\n");
963 			(void) printf("Run 'zinject -h' for usage "
964 			    "information.\n");
965 		}
966 		libzfs_fini(g_zfs);
967 		return (0);
968 	}
969 
970 	while ((c = getopt(argc, argv,
971 	    ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
972 		switch (c) {
973 		case 'a':
974 			flags |= ZINJECT_FLUSH_ARC;
975 			break;
976 		case 'A':
977 			if (strcasecmp(optarg, "degrade") == 0) {
978 				action = VDEV_STATE_DEGRADED;
979 			} else if (strcasecmp(optarg, "fault") == 0) {
980 				action = VDEV_STATE_FAULTED;
981 			} else {
982 				(void) fprintf(stderr, "invalid action '%s': "
983 				    "must be 'degrade' or 'fault'\n", optarg);
984 				usage();
985 				libzfs_fini(g_zfs);
986 				return (1);
987 			}
988 			break;
989 		case 'b':
990 			raw = optarg;
991 			break;
992 		case 'c':
993 			cancel = optarg;
994 			break;
995 		case 'C':
996 			ret = parse_dvas(optarg, &dvas);
997 			if (ret != 0) {
998 				(void) fprintf(stderr, "invalid DVA list '%s': "
999 				    "DVAs should be 0 indexed and separated by "
1000 				    "commas.\n", optarg);
1001 				usage();
1002 				libzfs_fini(g_zfs);
1003 				return (1);
1004 			}
1005 			break;
1006 		case 'd':
1007 			device = optarg;
1008 			break;
1009 		case 'D':
1010 			errno = 0;
1011 			ret = parse_delay(optarg, &record.zi_timer,
1012 			    &record.zi_nlanes);
1013 			if (ret != 0) {
1014 
1015 				(void) fprintf(stderr, "invalid i/o delay "
1016 				    "value: '%s'\n", optarg);
1017 				usage();
1018 				libzfs_fini(g_zfs);
1019 				return (1);
1020 			}
1021 			break;
1022 		case 'e':
1023 			error = str_to_err(optarg);
1024 			if (error < 0) {
1025 				(void) fprintf(stderr, "invalid error type "
1026 				    "'%s': must be one of: io decompress "
1027 				    "decrypt nxio dtl corrupt noop\n",
1028 				    optarg);
1029 				usage();
1030 				libzfs_fini(g_zfs);
1031 				return (1);
1032 			}
1033 			break;
1034 		case 'f':
1035 			ret = parse_frequency(optarg, &record.zi_freq);
1036 			if (ret != 0) {
1037 				(void) fprintf(stderr, "%sfrequency value must "
1038 				    "be in the range [0.0001, 100.0]\n",
1039 				    ret == EINVAL ? "invalid value: " :
1040 				    ret == ERANGE ? "out of range: " : "");
1041 				libzfs_fini(g_zfs);
1042 				return (1);
1043 			}
1044 			break;
1045 		case 'F':
1046 			record.zi_failfast = B_TRUE;
1047 			break;
1048 		case 'g':
1049 			dur_txg = 1;
1050 			record.zi_duration = (int)strtol(optarg, &end, 10);
1051 			if (record.zi_duration <= 0 || *end != '\0') {
1052 				(void) fprintf(stderr, "invalid duration '%s': "
1053 				    "must be a positive integer\n", optarg);
1054 				usage();
1055 				libzfs_fini(g_zfs);
1056 				return (1);
1057 			}
1058 			/* store duration of txgs as its negative */
1059 			record.zi_duration *= -1;
1060 			break;
1061 		case 'h':
1062 			usage();
1063 			libzfs_fini(g_zfs);
1064 			return (0);
1065 		case 'I':
1066 			/* default duration, if one hasn't yet been defined */
1067 			nowrites = 1;
1068 			if (dur_secs == 0 && dur_txg == 0)
1069 				record.zi_duration = 30;
1070 			break;
1071 		case 'l':
1072 			level = (int)strtol(optarg, &end, 10);
1073 			if (*end != '\0') {
1074 				(void) fprintf(stderr, "invalid level '%s': "
1075 				    "must be an integer\n", optarg);
1076 				usage();
1077 				libzfs_fini(g_zfs);
1078 				return (1);
1079 			}
1080 			break;
1081 		case 'm':
1082 			domount = 1;
1083 			break;
1084 		case 'p':
1085 			(void) strlcpy(record.zi_func, optarg,
1086 			    sizeof (record.zi_func));
1087 			record.zi_cmd = ZINJECT_PANIC;
1088 			break;
1089 		case 'P':
1090 			if (strcasecmp(optarg, "import") == 0) {
1091 				record.zi_cmd = ZINJECT_DELAY_IMPORT;
1092 			} else if (strcasecmp(optarg, "export") == 0) {
1093 				record.zi_cmd = ZINJECT_DELAY_EXPORT;
1094 			} else {
1095 				(void) fprintf(stderr, "invalid command '%s': "
1096 				    "must be 'import' or 'export'\n", optarg);
1097 				usage();
1098 				libzfs_fini(g_zfs);
1099 				return (1);
1100 			}
1101 			break;
1102 		case 'q':
1103 			quiet = 1;
1104 			break;
1105 		case 'r':
1106 			range = optarg;
1107 			flags |= ZINJECT_CALC_RANGE;
1108 			break;
1109 		case 's':
1110 			dur_secs = 1;
1111 			record.zi_duration = (int)strtol(optarg, &end, 10);
1112 			if (record.zi_duration <= 0 || *end != '\0') {
1113 				(void) fprintf(stderr, "invalid duration '%s': "
1114 				    "must be a positive integer\n", optarg);
1115 				usage();
1116 				libzfs_fini(g_zfs);
1117 				return (1);
1118 			}
1119 			break;
1120 		case 'T':
1121 			io_type = str_to_iotype(optarg);
1122 			if (io_type == ZINJECT_IOTYPES) {
1123 				(void) fprintf(stderr, "invalid I/O type "
1124 				    "'%s': must be 'read', 'write', 'free', "
1125 				    "'claim', 'flush' or 'all'\n", optarg);
1126 				usage();
1127 				libzfs_fini(g_zfs);
1128 				return (1);
1129 			}
1130 			break;
1131 		case 't':
1132 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
1133 			    !MOS_TYPE(type)) {
1134 				(void) fprintf(stderr, "invalid type '%s'\n",
1135 				    optarg);
1136 				usage();
1137 				libzfs_fini(g_zfs);
1138 				return (1);
1139 			}
1140 			break;
1141 		case 'u':
1142 			flags |= ZINJECT_UNLOAD_SPA;
1143 			break;
1144 		case 'E':
1145 			ready_delay = MSEC2NSEC(strtol(optarg, &end, 10));
1146 			if (ready_delay <= 0 || *end != '\0') {
1147 				(void) fprintf(stderr, "invalid delay '%s': "
1148 				    "must be a positive duration\n", optarg);
1149 				usage();
1150 				libzfs_fini(g_zfs);
1151 				return (1);
1152 			}
1153 			record.zi_cmd = ZINJECT_DELAY_READY;
1154 			record.zi_timer = ready_delay;
1155 			break;
1156 		case 'L':
1157 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
1158 			    !LABEL_TYPE(type)) {
1159 				(void) fprintf(stderr, "invalid label type "
1160 				    "'%s'\n", optarg);
1161 				usage();
1162 				libzfs_fini(g_zfs);
1163 				return (1);
1164 			}
1165 			break;
1166 		case ':':
1167 			(void) fprintf(stderr, "option -%c requires an "
1168 			    "operand\n", optopt);
1169 			usage();
1170 			libzfs_fini(g_zfs);
1171 			return (1);
1172 		case '?':
1173 			(void) fprintf(stderr, "invalid option '%c'\n",
1174 			    optopt);
1175 			usage();
1176 			libzfs_fini(g_zfs);
1177 			return (2);
1178 		}
1179 	}
1180 
1181 	argc -= optind;
1182 	argv += optind;
1183 
1184 	if (record.zi_duration != 0 && record.zi_cmd == 0)
1185 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
1186 
1187 	if (cancel != NULL) {
1188 		/*
1189 		 * '-c' is invalid with any other options.
1190 		 */
1191 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1192 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1193 		    record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) {
1194 			(void) fprintf(stderr, "cancel (-c) incompatible with "
1195 			    "any other options\n");
1196 			usage();
1197 			libzfs_fini(g_zfs);
1198 			return (2);
1199 		}
1200 		if (argc != 0) {
1201 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
1202 			usage();
1203 			libzfs_fini(g_zfs);
1204 			return (2);
1205 		}
1206 
1207 		if (strcmp(cancel, "all") == 0) {
1208 			return (cancel_all_handlers());
1209 		} else {
1210 			int id = (int)strtol(cancel, &end, 10);
1211 			if (*end != '\0') {
1212 				(void) fprintf(stderr, "invalid handle id '%s':"
1213 				    " must be an integer or 'all'\n", cancel);
1214 				usage();
1215 				libzfs_fini(g_zfs);
1216 				return (1);
1217 			}
1218 			return (cancel_handler(id));
1219 		}
1220 	}
1221 
1222 	if (device != NULL) {
1223 		/*
1224 		 * Device (-d) injection uses a completely different mechanism
1225 		 * for doing injection, so handle it separately here.
1226 		 */
1227 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1228 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1229 		    dvas != 0 || ready_delay >= 0) {
1230 			(void) fprintf(stderr, "device (-d) incompatible with "
1231 			    "data error injection\n");
1232 			usage();
1233 			libzfs_fini(g_zfs);
1234 			return (2);
1235 		}
1236 
1237 		if (argc != 1) {
1238 			(void) fprintf(stderr, "device (-d) injection requires "
1239 			    "a single pool name\n");
1240 			usage();
1241 			libzfs_fini(g_zfs);
1242 			return (2);
1243 		}
1244 
1245 		(void) strlcpy(pool, argv[0], sizeof (pool));
1246 		dataset[0] = '\0';
1247 
1248 		if (error == ECKSUM) {
1249 			(void) fprintf(stderr, "device error type must be "
1250 			    "'io', 'nxio' or 'corrupt'\n");
1251 			libzfs_fini(g_zfs);
1252 			return (1);
1253 		}
1254 
1255 		if (error == EILSEQ &&
1256 		    (record.zi_freq == 0 || io_type != ZINJECT_IOTYPE_READ)) {
1257 			(void) fprintf(stderr, "device corrupt errors require "
1258 			    "io type read and a frequency value\n");
1259 			libzfs_fini(g_zfs);
1260 			return (1);
1261 		}
1262 
1263 		record.zi_iotype = io_type;
1264 		if (translate_device(pool, device, label, &record) != 0) {
1265 			libzfs_fini(g_zfs);
1266 			return (1);
1267 		}
1268 
1269 		if (record.zi_nlanes) {
1270 			switch (io_type) {
1271 			case ZINJECT_IOTYPE_READ:
1272 			case ZINJECT_IOTYPE_WRITE:
1273 			case ZINJECT_IOTYPE_ALL:
1274 				break;
1275 			default:
1276 				(void) fprintf(stderr, "I/O type for a delay "
1277 				    "must be 'read' or 'write'\n");
1278 				usage();
1279 				libzfs_fini(g_zfs);
1280 				return (1);
1281 			}
1282 		}
1283 
1284 		if (!error)
1285 			error = ENXIO;
1286 
1287 		if (action != VDEV_STATE_UNKNOWN)
1288 			return (perform_action(pool, &record, action));
1289 
1290 	} else if (raw != NULL) {
1291 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
1292 		    record.zi_cmd != ZINJECT_UNINITIALIZED ||
1293 		    record.zi_freq > 0 || dvas != 0) {
1294 			(void) fprintf(stderr, "raw (-b) format with "
1295 			    "any other options\n");
1296 			usage();
1297 			libzfs_fini(g_zfs);
1298 			return (2);
1299 		}
1300 
1301 		if (argc != 1) {
1302 			(void) fprintf(stderr, "raw (-b) format expects a "
1303 			    "single pool name\n");
1304 			usage();
1305 			libzfs_fini(g_zfs);
1306 			return (2);
1307 		}
1308 
1309 		(void) strlcpy(pool, argv[0], sizeof (pool));
1310 		dataset[0] = '\0';
1311 
1312 		if (error == ENXIO) {
1313 			(void) fprintf(stderr, "data error type must be "
1314 			    "'checksum' or 'io'\n");
1315 			libzfs_fini(g_zfs);
1316 			return (1);
1317 		}
1318 
1319 		if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1320 			record.zi_cmd = ZINJECT_DATA_FAULT;
1321 			if (!error)
1322 				error = EIO;
1323 		} else if (error != 0) {
1324 			(void) fprintf(stderr, "error type -e incompatible "
1325 			    "with delay injection\n");
1326 			libzfs_fini(g_zfs);
1327 			return (1);
1328 		} else {
1329 			record.zi_iotype = io_type;
1330 		}
1331 
1332 		if (translate_raw(raw, &record) != 0) {
1333 			libzfs_fini(g_zfs);
1334 			return (1);
1335 		}
1336 	} else if (record.zi_cmd == ZINJECT_PANIC) {
1337 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1338 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1339 		    dvas != 0) {
1340 			(void) fprintf(stderr, "%s incompatible with other "
1341 			    "options\n", "import|export delay (-P)");
1342 			usage();
1343 			libzfs_fini(g_zfs);
1344 			return (2);
1345 		}
1346 
1347 		if (argc < 1 || argc > 2) {
1348 			(void) fprintf(stderr, "panic (-p) injection requires "
1349 			    "a single pool name and an optional id\n");
1350 			usage();
1351 			libzfs_fini(g_zfs);
1352 			return (2);
1353 		}
1354 
1355 		(void) strlcpy(pool, argv[0], sizeof (pool));
1356 		if (argv[1] != NULL)
1357 			record.zi_type = atoi(argv[1]);
1358 		dataset[0] = '\0';
1359 	} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
1360 	    record.zi_cmd == ZINJECT_DELAY_EXPORT) {
1361 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1362 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1363 		    dvas != 0) {
1364 			(void) fprintf(stderr, "%s incompatible with other "
1365 			    "options\n", "import|export delay (-P)");
1366 			usage();
1367 			libzfs_fini(g_zfs);
1368 			return (2);
1369 		}
1370 
1371 		if (argc != 1 || record.zi_duration <= 0) {
1372 			(void) fprintf(stderr, "import|export delay (-P) "
1373 			    "injection requires a duration (-s) and a single "
1374 			    "pool name\n");
1375 			usage();
1376 			libzfs_fini(g_zfs);
1377 			return (2);
1378 		}
1379 
1380 		(void) strlcpy(pool, argv[0], sizeof (pool));
1381 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1382 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1383 		    level != 0 || record.zi_freq > 0 || dvas != 0) {
1384 			(void) fprintf(stderr, "hardware failure (-I) "
1385 			    "incompatible with other options\n");
1386 			usage();
1387 			libzfs_fini(g_zfs);
1388 			return (2);
1389 		}
1390 
1391 		if (nowrites == 0) {
1392 			(void) fprintf(stderr, "-s or -g meaningless "
1393 			    "without -I (ignore writes)\n");
1394 			usage();
1395 			libzfs_fini(g_zfs);
1396 			return (2);
1397 		} else if (dur_secs && dur_txg) {
1398 			(void) fprintf(stderr, "choose a duration either "
1399 			    "in seconds (-s) or a number of txgs (-g) "
1400 			    "but not both\n");
1401 			usage();
1402 			libzfs_fini(g_zfs);
1403 			return (2);
1404 		} else if (argc != 1) {
1405 			(void) fprintf(stderr, "ignore writes (-I) "
1406 			    "injection requires a single pool name\n");
1407 			usage();
1408 			libzfs_fini(g_zfs);
1409 			return (2);
1410 		}
1411 
1412 		(void) strlcpy(pool, argv[0], sizeof (pool));
1413 		dataset[0] = '\0';
1414 	} else if (type == TYPE_INVAL) {
1415 		if (flags == 0) {
1416 			(void) fprintf(stderr, "at least one of '-b', '-d', "
1417 			    "'-t', '-a', '-p', '-I' or '-u' "
1418 			    "must be specified\n");
1419 			usage();
1420 			libzfs_fini(g_zfs);
1421 			return (2);
1422 		}
1423 
1424 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1425 			(void) strlcpy(pool, argv[0], sizeof (pool));
1426 			dataset[0] = '\0';
1427 		} else if (argc != 0) {
1428 			(void) fprintf(stderr, "extraneous argument for "
1429 			    "'-f'\n");
1430 			usage();
1431 			libzfs_fini(g_zfs);
1432 			return (2);
1433 		}
1434 
1435 		flags |= ZINJECT_NULL;
1436 	} else {
1437 		if (argc != 1) {
1438 			(void) fprintf(stderr, "missing object\n");
1439 			usage();
1440 			libzfs_fini(g_zfs);
1441 			return (2);
1442 		}
1443 
1444 		if (error == ENXIO || error == EILSEQ) {
1445 			(void) fprintf(stderr, "data error type must be "
1446 			    "'checksum' or 'io'\n");
1447 			libzfs_fini(g_zfs);
1448 			return (1);
1449 		}
1450 
1451 		if (dvas != 0) {
1452 			if (error == EACCES || error == EINVAL) {
1453 				(void) fprintf(stderr, "the '-C' option may "
1454 				    "not be used with logical data errors "
1455 				    "'decrypt' and 'decompress'\n");
1456 				libzfs_fini(g_zfs);
1457 				return (1);
1458 			}
1459 
1460 			record.zi_dvas = dvas;
1461 		}
1462 
1463 		if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) {
1464 			(void) fprintf(stderr, "error type -e incompatible "
1465 			    "with delay injection\n");
1466 			libzfs_fini(g_zfs);
1467 			return (1);
1468 		}
1469 
1470 		if (error == EACCES) {
1471 			if (type != TYPE_DATA) {
1472 				(void) fprintf(stderr, "decryption errors "
1473 				    "may only be injected for 'data' types\n");
1474 				libzfs_fini(g_zfs);
1475 				return (1);
1476 			}
1477 
1478 			record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1479 			/*
1480 			 * Internally, ZFS actually uses ECKSUM for decryption
1481 			 * errors since EACCES is used to indicate the key was
1482 			 * not found.
1483 			 */
1484 			error = ECKSUM;
1485 		} else if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
1486 			record.zi_cmd = ZINJECT_DATA_FAULT;
1487 			if (!error)
1488 				error = EIO;
1489 		} else {
1490 			record.zi_iotype = io_type;
1491 		}
1492 
1493 		if (translate_record(type, argv[0], range, level, &record, pool,
1494 		    dataset) != 0) {
1495 			libzfs_fini(g_zfs);
1496 			return (1);
1497 		}
1498 	}
1499 
1500 	/*
1501 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
1502 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
1503 	 * time we access the pool.
1504 	 */
1505 	if (dataset[0] != '\0' && domount) {
1506 		if ((zhp = zfs_open(g_zfs, dataset,
1507 		    ZFS_TYPE_DATASET)) == NULL) {
1508 			libzfs_fini(g_zfs);
1509 			return (1);
1510 		}
1511 		if (zfs_unmount(zhp, NULL, 0) != 0) {
1512 			libzfs_fini(g_zfs);
1513 			return (1);
1514 		}
1515 	}
1516 
1517 	record.zi_error = error;
1518 
1519 	ret = register_handler(pool, flags, &record, quiet);
1520 
1521 	if (dataset[0] != '\0' && domount)
1522 		ret = (zfs_mount(zhp, NULL, 0) != 0);
1523 
1524 	libzfs_fini(g_zfs);
1525 
1526 	return (ret);
1527 }
1528