xref: /freebsd/sys/contrib/openzfs/cmd/zinject/zinject.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2017, Intel Corporation.
26  * Copyright (c) 2023-2025, Klara, Inc.
27  */
28 
29 /*
30  * ZFS Fault Injector
31  *
32  * This userland component takes a set of options and uses libzpool to translate
33  * from a user-visible object type and name to an internal representation.
34  * There are two basic types of faults: device faults and data faults.
35  *
36  *
37  * DEVICE FAULTS
38  *
39  * Errors can be injected into a particular vdev using the '-d' option.  This
40  * option takes a path or vdev GUID to uniquely identify the device within a
41  * pool.  There are four types of errors that can be injected, IO, ENXIO,
42  * ECHILD, and EILSEQ.  These can be controlled through the '-e' option and the
43  * default is ENXIO.  For EIO failures, any attempt to read data from the device
44  * will return EIO, but a subsequent attempt to reopen the device will succeed.
45  * For ENXIO failures, any attempt to read from the device will return EIO, but
46  * any attempt to reopen the device will also return ENXIO.  The EILSEQ failures
47  * only apply to read operations (-T read) and will flip a bit after the device
48  * has read the original data.
49  *
50  * For label faults, the -L option must be specified. This allows faults
51  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
52  * of all the labels for the specified device.
53  *
54  * This form of the command looks like:
55  *
56  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
57  *
58  *
59  * DATA FAULTS
60  *
61  * We begin with a tuple of the form:
62  *
63  * 	<type,level,range,object>
64  *
65  * 	type	A string describing the type of data to target.  Each type
66  * 		implicitly describes how to interpret 'object'. Currently,
67  * 		the following values are supported:
68  *
69  * 		data		User data for a file
70  * 		dnode		Dnode for a file or directory
71  *
72  *		The following MOS objects are special.  Instead of injecting
73  *		errors on a particular object or blkid, we inject errors across
74  *		all objects of the given type.
75  *
76  * 		mos		Any data in the MOS
77  * 		mosdir		object directory
78  * 		config		pool configuration
79  * 		bpobj		blkptr list
80  * 		spacemap	spacemap
81  * 		metaslab	metaslab
82  * 		errlog		persistent error log
83  *
84  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
85  * 		a range is given, this corresponds to the indirect block
86  * 		corresponding to the specific range.
87  *
88  *	range	A numerical range [start,end) within the object.  Defaults to
89  *		the full size of the file.
90  *
91  * 	object	A string describing the logical location of the object.  For
92  * 		files and directories (currently the only supported types),
93  * 		this is the path of the object on disk.
94  *
95  * This is translated, via libzpool, into the following internal representation:
96  *
97  * 	<type,objset,object,level,range>
98  *
99  * These types should be self-explanatory.  This tuple is then passed to the
100  * kernel via a special ioctl() to initiate fault injection for the given
101  * object.  Note that 'type' is not strictly necessary for fault injection, but
102  * is used when translating existing faults into a human-readable string.
103  *
104  *
105  * The command itself takes one of the forms:
106  *
107  * 	zinject
108  * 	zinject <-a | -u pool>
109  * 	zinject -c <id|all>
110  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
111  *	    [-r range] <object>
112  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
113  *
114  * With no arguments, the command prints all currently registered injection
115  * handlers, with their numeric identifiers.
116  *
117  * The '-c' option will clear the given handler, or all handlers if 'all' is
118  * specified.
119  *
120  * The '-e' option takes a string describing the errno to simulate.  This must
121  * be one of 'io', 'checksum', 'decompress', or 'decrypt'.  In most cases this
122  * will result in the same behavior, but RAID-Z will produce a different set of
123  * ereports for this situation.
124  *
125  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
126  * specified, then the ARC cache is flushed appropriately.  If '-u' is
127  * specified, then the underlying SPA is unloaded.  Either of these flags can be
128  * specified independently of any other handlers.  The '-m' flag automatically
129  * does an unmount and remount of the underlying dataset to aid in flushing the
130  * cache.
131  *
132  * The '-f' flag controls the frequency of errors injected, expressed as a
133  * real number percentage between 0.0001 and 100.  The default is 100.
134  *
135  * The this form is responsible for actually injecting the handler into the
136  * framework.  It takes the arguments described above, translates them to the
137  * internal tuple using libzpool, and then issues an ioctl() to register the
138  * handler.
139  *
140  * The final form can target a specific bookmark, regardless of whether a
141  * human-readable interface has been designed.  It allows developers to specify
142  * a particular block by number.
143  */
144 
145 #include <errno.h>
146 #include <fcntl.h>
147 #include <stdio.h>
148 #include <stdlib.h>
149 #include <string.h>
150 #include <strings.h>
151 #include <unistd.h>
152 
153 #include <sys/fs/zfs.h>
154 #include <sys/mount.h>
155 
156 #include <libzfs.h>
157 
158 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
159 
160 #include "zinject.h"
161 
162 libzfs_handle_t *g_zfs;
163 int zfs_fd;
164 
165 static const char *const errtable[TYPE_INVAL] = {
166 	"data",
167 	"dnode",
168 	"mos",
169 	"mosdir",
170 	"metaslab",
171 	"config",
172 	"bpobj",
173 	"spacemap",
174 	"errlog",
175 	"uber",
176 	"nvlist",
177 	"pad1",
178 	"pad2"
179 };
180 
181 static err_type_t
name_to_type(const char * arg)182 name_to_type(const char *arg)
183 {
184 	int i;
185 	for (i = 0; i < TYPE_INVAL; i++)
186 		if (strcmp(errtable[i], arg) == 0)
187 			return (i);
188 
189 	return (TYPE_INVAL);
190 }
191 
192 static const char *
type_to_name(uint64_t type)193 type_to_name(uint64_t type)
194 {
195 	switch (type) {
196 	case DMU_OT_OBJECT_DIRECTORY:
197 		return ("mosdir");
198 	case DMU_OT_OBJECT_ARRAY:
199 		return ("metaslab");
200 	case DMU_OT_PACKED_NVLIST:
201 		return ("config");
202 	case DMU_OT_BPOBJ:
203 		return ("bpobj");
204 	case DMU_OT_SPACE_MAP:
205 		return ("spacemap");
206 	case DMU_OT_ERROR_LOG:
207 		return ("errlog");
208 	default:
209 		return ("-");
210 	}
211 }
212 
213 struct errstr {
214 	int		err;
215 	const char	*str;
216 };
217 static const struct errstr errstrtable[] = {
218 	{ EIO,		"io" },
219 	{ ECKSUM,	"checksum" },
220 	{ EINVAL,	"decompress" },
221 	{ EACCES,	"decrypt" },
222 	{ ENXIO,	"nxio" },
223 	{ ECHILD,	"dtl" },
224 	{ EILSEQ,	"corrupt" },
225 	{ ENOSYS,	"noop" },
226 	{ 0, NULL },
227 };
228 
229 static int
str_to_err(const char * str)230 str_to_err(const char *str)
231 {
232 	for (int i = 0; errstrtable[i].str != NULL; i++)
233 		if (strcasecmp(errstrtable[i].str, str) == 0)
234 			return (errstrtable[i].err);
235 	return (-1);
236 }
237 static const char *
err_to_str(int err)238 err_to_str(int err)
239 {
240 	for (int i = 0; errstrtable[i].str != NULL; i++)
241 		if (errstrtable[i].err == err)
242 			return (errstrtable[i].str);
243 	return ("[unknown]");
244 }
245 
246 static const char *const iotypestrtable[ZINJECT_IOTYPES] = {
247 	[ZINJECT_IOTYPE_NULL]	= "null",
248 	[ZINJECT_IOTYPE_READ]	= "read",
249 	[ZINJECT_IOTYPE_WRITE]	= "write",
250 	[ZINJECT_IOTYPE_FREE]	= "free",
251 	[ZINJECT_IOTYPE_CLAIM]	= "claim",
252 	[ZINJECT_IOTYPE_FLUSH]	= "flush",
253 	[ZINJECT_IOTYPE_TRIM]	= "trim",
254 	[ZINJECT_IOTYPE_ALL]	= "all",
255 	[ZINJECT_IOTYPE_PROBE]	= "probe",
256 };
257 
258 static zinject_iotype_t
str_to_iotype(const char * arg)259 str_to_iotype(const char *arg)
260 {
261 	for (uint_t iotype = 0; iotype < ZINJECT_IOTYPES; iotype++)
262 		if (iotypestrtable[iotype] != NULL &&
263 		    strcasecmp(iotypestrtable[iotype], arg) == 0)
264 			return (iotype);
265 	return (ZINJECT_IOTYPES);
266 }
267 
268 static const char *
iotype_to_str(zinject_iotype_t iotype)269 iotype_to_str(zinject_iotype_t iotype)
270 {
271 	if (iotype >= ZINJECT_IOTYPES || iotypestrtable[iotype] == NULL)
272 		return ("[unknown]");
273 	return (iotypestrtable[iotype]);
274 }
275 
276 /*
277  * Print usage message.
278  */
279 void
usage(void)280 usage(void)
281 {
282 	(void) printf(
283 	    "usage:\n"
284 	    "\n"
285 	    "\tzinject\n"
286 	    "\n"
287 	    "\t\tList all active injection records.\n"
288 	    "\n"
289 	    "\tzinject -c <id|all>\n"
290 	    "\n"
291 	    "\t\tClear the particular record (if given a numeric ID), or\n"
292 	    "\t\tall records if 'all' is specified.\n"
293 	    "\n"
294 	    "\tzinject -p <function name> pool\n"
295 	    "\t\tInject a panic fault at the specified function. Only \n"
296 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
297 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
298 	    "\n"
299 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
300 	    "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
301 	    "\t\tInject a fault into a particular device or the device's\n"
302 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
303 	    "\t\t'pad1', or 'pad2'.\n"
304 	    "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
305 	    "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n"
306 	    "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
307 	    "\t\tdevice error injection to a percentage of the IOs.\n"
308 	    "\n"
309 	    "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
310 	    "\t\tPerform a specific action on a particular device.\n"
311 	    "\n"
312 	    "\tzinject -d device -D latency:lanes pool\n"
313 	    "\n"
314 	    "\t\tAdd an artificial delay to IO requests on a particular\n"
315 	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
316 	    "\t\tmilliseconds to complete. Each delay has an associated\n"
317 	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
318 	    "\t\tIO requests that can be processed.\n"
319 	    "\n"
320 	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
321 	    "\t\tthe device will only be able to service a single IO request\n"
322 	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
323 	    "\t\tif only a single request is submitted every 10 ms, the\n"
324 	    "\t\taverage latency will be 10 ms; but if more than one request\n"
325 	    "\t\tis submitted every 10 ms, the average latency will be more\n"
326 	    "\t\tthan 10 ms.\n"
327 	    "\n"
328 	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
329 	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
330 	    "\t\ttwo requests at a time, each with a minimum latency of\n"
331 	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
332 	    "\t\tthe average latency will be 10 ms; but if more than two\n"
333 	    "\t\trequests are submitted every 10 ms, the average latency\n"
334 	    "\t\twill be more than 10 ms.\n"
335 	    "\n"
336 	    "\t\tAlso note, these delays are additive. So two invocations\n"
337 	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
338 	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
339 	    "\t\tlanes with differing target latencies. For example, an\n"
340 	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
341 	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
342 	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
343 	    "\n"
344 	    "\tzinject -P import|export -s <seconds> pool\n"
345 	    "\t\tAdd an artificial delay to a future pool import or export,\n"
346 	    "\t\tsuch that the operation takes a minimum of supplied seconds\n"
347 	    "\t\tto complete.\n"
348 	    "\n"
349 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
350 	    "\t\tCause the pool to stop writing blocks yet not\n"
351 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
352 	    "\t\tthat fails to honor cache flush requests.\n"
353 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
354 	    "\t\tat the end of the duration.\n"
355 	    "\n"
356 	    "\tzinject -b objset:object:level:blkid pool\n"
357 	    "\n"
358 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
359 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
360 	    "\t\thexadecimal, and only one block can be specified.\n"
361 	    "\n"
362 	    "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
363 	    "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
364 	    "\n"
365 	    "\t\tInject an error into the object specified by the '-t' option\n"
366 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
367 	    "\t\tinterpreted depending on the '-t' option.\n"
368 	    "\n"
369 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
370 	    "\t\t-e\tInject a specific error.  Must be one of 'io',\n"
371 	    "\t\t\t'checksum', 'decompress', or 'decrypt'.  Default is 'io'.\n"
372 	    "\t\t-C\tInject the given error only into specific DVAs. The\n"
373 	    "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
374 	    "\t\t\tseparated by commas (ex. '0,2').\n"
375 	    "\t\t-l\tInject error at a particular block level. Default is "
376 	    "0.\n"
377 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
378 	    "\t\t-r\tInject error over a particular logical range of an\n"
379 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
380 	    "\t\t\trange according to the object's properties.\n"
381 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
382 	    "\t\t\tassociated object.\n"
383 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
384 	    "\t\t\ta pool object.\n"
385 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
386 	    "\t\t\ta percentage between 0.0001 and 100.\n"
387 	    "\n"
388 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
389 	    "\t\t\tfile.  The object must be specified as a complete path\n"
390 	    "\t\t\tto a file on a ZFS filesystem.\n"
391 	    "\n"
392 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
393 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
394 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
395 	    "\t\t\tis specified as a complete path to a file or directory\n"
396 	    "\t\t\ton a ZFS filesystem.\n"
397 	    "\n"
398 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
399 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
400 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
401 	    "\t\t\tthe poolname.\n");
402 }
403 
404 static int
iter_handlers(int (* func)(int,const char *,zinject_record_t *,void *),void * data)405 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
406     void *data)
407 {
408 	zfs_cmd_t zc = {"\0"};
409 	int ret;
410 
411 	while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
412 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
413 		    &zc.zc_inject_record, data)) != 0)
414 			return (ret);
415 
416 	if (errno != ENOENT) {
417 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
418 		    strerror(errno));
419 		return (-1);
420 	}
421 
422 	return (0);
423 }
424 
425 static int
print_data_handler(int id,const char * pool,zinject_record_t * record,void * data)426 print_data_handler(int id, const char *pool, zinject_record_t *record,
427     void *data)
428 {
429 	int *count = data;
430 
431 	if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
432 	    record->zi_duration != 0) {
433 		return (0);
434 	}
435 
436 	if (*count == 0) {
437 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  "
438 		    "%-15s  %-6s  %-15s\n", "ID", "POOL", "OBJSET", "OBJECT",
439 		    "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT");
440 		(void) printf("---  ---------------  ------  "
441 		    "------  --------  ---  ----  ---------------  "
442 		    "------  ------\n");
443 	}
444 
445 	*count += 1;
446 
447 	char rangebuf[32];
448 	if (record->zi_start == 0 && record->zi_end == -1ULL)
449 		snprintf(rangebuf, sizeof (rangebuf), "all");
450 	else
451 		snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]",
452 		    (u_longlong_t)record->zi_start,
453 		    (u_longlong_t)record->zi_end);
454 
455 
456 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %-3d  0x%02x  %-15s  "
457 	    "%6llu  %6llu\n", id, pool, (u_longlong_t)record->zi_objset,
458 	    (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
459 	    record->zi_level, record->zi_dvas, rangebuf,
460 	    (u_longlong_t)record->zi_match_count,
461 	    (u_longlong_t)record->zi_inject_count);
462 
463 	return (0);
464 }
465 
466 static int
print_device_handler(int id,const char * pool,zinject_record_t * record,void * data)467 print_device_handler(int id, const char *pool, zinject_record_t *record,
468     void *data)
469 {
470 	int *count = data;
471 
472 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
473 		return (0);
474 
475 	if (record->zi_cmd == ZINJECT_DELAY_IO)
476 		return (0);
477 
478 	if (*count == 0) {
479 		(void) printf("%3s  %-15s  %-16s  %-5s  %-10s  %-9s  "
480 		    "%-6s  %-6s\n",
481 		    "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ",
482 		    "MATCH", "INJECT");
483 		(void) printf(
484 		    "---  ---------------  ----------------  "
485 		    "-----  ----------  ---------  "
486 		    "------  ------\n");
487 	}
488 
489 	*count += 1;
490 
491 	double freq = record->zi_freq == 0 ? 100.0f :
492 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
493 
494 	(void) printf("%3d  %-15s  %llx  %-5s  %-10s  %8.4f%%  "
495 	    "%6llu  %6llu\n", id, pool, (u_longlong_t)record->zi_guid,
496 	    iotype_to_str(record->zi_iotype), err_to_str(record->zi_error),
497 	    freq, (u_longlong_t)record->zi_match_count,
498 	    (u_longlong_t)record->zi_inject_count);
499 
500 	return (0);
501 }
502 
503 static int
print_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)504 print_delay_handler(int id, const char *pool, zinject_record_t *record,
505     void *data)
506 {
507 	int *count = data;
508 
509 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
510 		return (0);
511 
512 	if (record->zi_cmd != ZINJECT_DELAY_IO)
513 		return (0);
514 
515 	if (*count == 0) {
516 		(void) printf("%3s  %-15s  %-16s  %-10s  %-5s  %-9s  "
517 		    "%-6s  %-6s\n",
518 		    "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ",
519 		    "MATCH", "INJECT");
520 		(void) printf("---  ---------------  ----------------  "
521 		    "----------  -----  ---------  "
522 		    "------  ------\n");
523 	}
524 
525 	*count += 1;
526 
527 	double freq = record->zi_freq == 0 ? 100.0f :
528 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
529 
530 	(void) printf("%3d  %-15s  %llx  %10llu  %5llu  %8.4f%%  "
531 	    "%6llu  %6llu\n", id, pool, (u_longlong_t)record->zi_guid,
532 	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
533 	    (u_longlong_t)record->zi_nlanes, freq,
534 	    (u_longlong_t)record->zi_match_count,
535 	    (u_longlong_t)record->zi_inject_count);
536 
537 	return (0);
538 }
539 
540 static int
print_panic_handler(int id,const char * pool,zinject_record_t * record,void * data)541 print_panic_handler(int id, const char *pool, zinject_record_t *record,
542     void *data)
543 {
544 	int *count = data;
545 
546 	if (record->zi_func[0] == '\0')
547 		return (0);
548 
549 	if (*count == 0) {
550 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
551 		(void) printf("---  ---------------  ----------------\n");
552 	}
553 
554 	*count += 1;
555 
556 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
557 
558 	return (0);
559 }
560 
561 static int
print_pool_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)562 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
563     void *data)
564 {
565 	int *count = data;
566 
567 	if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
568 	    record->zi_cmd != ZINJECT_DELAY_EXPORT) {
569 		return (0);
570 	}
571 
572 	if (*count == 0) {
573 		(void) printf("%3s  %-19s  %-11s  %s\n",
574 		    "ID", "POOL", "DELAY (sec)", "COMMAND");
575 		(void) printf("---  -------------------  -----------"
576 		    "  -------\n");
577 	}
578 
579 	*count += 1;
580 
581 	(void) printf("%3d  %-19s  %-11llu  %s\n",
582 	    id, pool, (u_longlong_t)record->zi_duration,
583 	    record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
584 
585 	return (0);
586 }
587 
588 /*
589  * Print all registered error handlers.  Returns the number of handlers
590  * registered.
591  */
592 static int
print_all_handlers(void)593 print_all_handlers(void)
594 {
595 	int count = 0, total = 0;
596 
597 	(void) iter_handlers(print_device_handler, &count);
598 	if (count > 0) {
599 		total += count;
600 		(void) printf("\n");
601 		count = 0;
602 	}
603 
604 	(void) iter_handlers(print_delay_handler, &count);
605 	if (count > 0) {
606 		total += count;
607 		(void) printf("\n");
608 		count = 0;
609 	}
610 
611 	(void) iter_handlers(print_data_handler, &count);
612 	if (count > 0) {
613 		total += count;
614 		(void) printf("\n");
615 		count = 0;
616 	}
617 
618 	(void) iter_handlers(print_pool_delay_handler, &count);
619 	if (count > 0) {
620 		total += count;
621 		(void) printf("\n");
622 		count = 0;
623 	}
624 
625 	(void) iter_handlers(print_panic_handler, &count);
626 
627 	return (count + total);
628 }
629 
630 static int
cancel_one_handler(int id,const char * pool,zinject_record_t * record,void * data)631 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
632     void *data)
633 {
634 	(void) pool, (void) record, (void) data;
635 	zfs_cmd_t zc = {"\0"};
636 
637 	zc.zc_guid = (uint64_t)id;
638 
639 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
640 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
641 		    id, strerror(errno));
642 		return (1);
643 	}
644 
645 	return (0);
646 }
647 
648 /*
649  * Remove all fault injection handlers.
650  */
651 static int
cancel_all_handlers(void)652 cancel_all_handlers(void)
653 {
654 	int ret = iter_handlers(cancel_one_handler, NULL);
655 
656 	if (ret == 0)
657 		(void) printf("removed all registered handlers\n");
658 
659 	return (ret);
660 }
661 
662 /*
663  * Remove a specific fault injection handler.
664  */
665 static int
cancel_handler(int id)666 cancel_handler(int id)
667 {
668 	zfs_cmd_t zc = {"\0"};
669 
670 	zc.zc_guid = (uint64_t)id;
671 
672 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
673 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
674 		    id, strerror(errno));
675 		return (1);
676 	}
677 
678 	(void) printf("removed handler %d\n", id);
679 
680 	return (0);
681 }
682 
683 /*
684  * Register a new fault injection handler.
685  */
686 static int
register_handler(const char * pool,int flags,zinject_record_t * record,int quiet)687 register_handler(const char *pool, int flags, zinject_record_t *record,
688     int quiet)
689 {
690 	zfs_cmd_t zc = {"\0"};
691 
692 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
693 	zc.zc_inject_record = *record;
694 	zc.zc_guid = flags;
695 
696 	if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
697 		const char *errmsg = strerror(errno);
698 
699 		switch (errno) {
700 		case EDOM:
701 			errmsg = "block level exceeds max level of object";
702 			break;
703 		case EEXIST:
704 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
705 				errmsg = "pool already imported";
706 			if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
707 				errmsg = "a handler already exists";
708 			break;
709 		case ENOENT:
710 			/* import delay injector running on older zfs module */
711 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
712 				errmsg = "import delay injector not supported";
713 			break;
714 		default:
715 			break;
716 		}
717 		(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
718 		return (1);
719 	}
720 
721 	if (flags & ZINJECT_NULL)
722 		return (0);
723 
724 	if (quiet) {
725 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
726 	} else {
727 		(void) printf("Added handler %llu with the following "
728 		    "properties:\n", (u_longlong_t)zc.zc_guid);
729 		(void) printf("  pool: %s\n", pool);
730 		if (record->zi_guid) {
731 			(void) printf("  vdev: %llx\n",
732 			    (u_longlong_t)record->zi_guid);
733 		} else if (record->zi_func[0] != '\0') {
734 			(void) printf("  panic function: %s\n",
735 			    record->zi_func);
736 		} else if (record->zi_duration > 0) {
737 			(void) printf(" time: %lld seconds\n",
738 			    (u_longlong_t)record->zi_duration);
739 		} else if (record->zi_duration < 0) {
740 			(void) printf(" txgs: %lld \n",
741 			    (u_longlong_t)-record->zi_duration);
742 		} else if (record->zi_timer > 0) {
743 			(void) printf(" timer: %lld ms\n",
744 			    (u_longlong_t)NSEC2MSEC(record->zi_timer));
745 		} else {
746 			(void) printf("objset: %llu\n",
747 			    (u_longlong_t)record->zi_objset);
748 			(void) printf("object: %llu\n",
749 			    (u_longlong_t)record->zi_object);
750 			(void) printf("  type: %llu\n",
751 			    (u_longlong_t)record->zi_type);
752 			(void) printf(" level: %d\n", record->zi_level);
753 			if (record->zi_start == 0 &&
754 			    record->zi_end == -1ULL)
755 				(void) printf(" range: all\n");
756 			else
757 				(void) printf(" range: [%llu, %llu)\n",
758 				    (u_longlong_t)record->zi_start,
759 				    (u_longlong_t)record->zi_end);
760 			(void) printf("  dvas: 0x%x\n", record->zi_dvas);
761 		}
762 	}
763 
764 	return (0);
765 }
766 
767 static int
perform_action(const char * pool,zinject_record_t * record,int cmd)768 perform_action(const char *pool, zinject_record_t *record, int cmd)
769 {
770 	zfs_cmd_t zc = {"\0"};
771 
772 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
773 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
774 	zc.zc_guid = record->zi_guid;
775 	zc.zc_cookie = cmd;
776 
777 	if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
778 		return (0);
779 
780 	return (1);
781 }
782 
783 static int
parse_delay(char * str,uint64_t * delay,uint64_t * nlanes)784 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
785 {
786 	unsigned long scan_delay;
787 	unsigned long scan_nlanes;
788 
789 	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
790 		return (1);
791 
792 	/*
793 	 * We explicitly disallow a delay of zero here, because we key
794 	 * off this value being non-zero in translate_device(), to
795 	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
796 	 */
797 	if (scan_delay == 0)
798 		return (1);
799 
800 	/*
801 	 * The units for the CLI delay parameter is milliseconds, but
802 	 * the data passed to the kernel is interpreted as nanoseconds.
803 	 * Thus we scale the milliseconds to nanoseconds here, and this
804 	 * nanosecond value is used to pass the delay to the kernel.
805 	 */
806 	*delay = MSEC2NSEC(scan_delay);
807 	*nlanes = scan_nlanes;
808 
809 	return (0);
810 }
811 
812 static int
parse_frequency(const char * str,uint32_t * percent)813 parse_frequency(const char *str, uint32_t *percent)
814 {
815 	double val;
816 	char *post;
817 
818 	val = strtod(str, &post);
819 	if (post == NULL || *post != '\0')
820 		return (EINVAL);
821 
822 	/* valid range is [0.0001, 100.0] */
823 	val /= 100.0f;
824 	if (val < 0.000001f || val > 1.0f)
825 		return (ERANGE);
826 
827 	/* convert to an integer for use by kernel */
828 	*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
829 
830 	return (0);
831 }
832 
833 /*
834  * This function converts a string specifier for DVAs into a bit mask.
835  * The dva's provided by the user should be 0 indexed and separated by
836  * a comma. For example:
837  *	"1"	-> 0b0010  (0x2)
838  *	"0,1"	-> 0b0011  (0x3)
839  *	"0,1,2"	-> 0b0111  (0x7)
840  */
841 static int
parse_dvas(const char * str,uint32_t * dvas_out)842 parse_dvas(const char *str, uint32_t *dvas_out)
843 {
844 	const char *c = str;
845 	uint32_t mask = 0;
846 	boolean_t need_delim = B_FALSE;
847 
848 	/* max string length is 5 ("0,1,2") */
849 	if (strlen(str) > 5 || strlen(str) == 0)
850 		return (EINVAL);
851 
852 	while (*c != '\0') {
853 		switch (*c) {
854 		case '0':
855 		case '1':
856 		case '2':
857 			/* check for pipe between DVAs */
858 			if (need_delim)
859 				return (EINVAL);
860 
861 			/* check if this DVA has been set already */
862 			if (mask & (1 << ((*c) - '0')))
863 				return (EINVAL);
864 
865 			mask |= (1 << ((*c) - '0'));
866 			need_delim = B_TRUE;
867 			break;
868 		case ',':
869 			need_delim = B_FALSE;
870 			break;
871 		default:
872 			/* check for invalid character */
873 			return (EINVAL);
874 		}
875 		c++;
876 	}
877 
878 	/* check for dangling delimiter */
879 	if (!need_delim)
880 		return (EINVAL);
881 
882 	*dvas_out = mask;
883 	return (0);
884 }
885 
886 int
main(int argc,char ** argv)887 main(int argc, char **argv)
888 {
889 	int c;
890 	char *range = NULL;
891 	char *cancel = NULL;
892 	char *end;
893 	char *raw = NULL;
894 	char *device = NULL;
895 	int level = 0;
896 	int quiet = 0;
897 	int error = 0;
898 	int domount = 0;
899 	int io_type = ZINJECT_IOTYPE_ALL;
900 	int action = VDEV_STATE_UNKNOWN;
901 	err_type_t type = TYPE_INVAL;
902 	err_type_t label = TYPE_INVAL;
903 	zinject_record_t record = { 0 };
904 	char pool[MAXNAMELEN] = "";
905 	char dataset[MAXNAMELEN] = "";
906 	zfs_handle_t *zhp = NULL;
907 	int nowrites = 0;
908 	int dur_txg = 0;
909 	int dur_secs = 0;
910 	int ret;
911 	int flags = 0;
912 	uint32_t dvas = 0;
913 
914 	if ((g_zfs = libzfs_init()) == NULL) {
915 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
916 		return (1);
917 	}
918 
919 	libzfs_print_on_error(g_zfs, B_TRUE);
920 
921 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
922 		(void) fprintf(stderr, "failed to open ZFS device\n");
923 		libzfs_fini(g_zfs);
924 		return (1);
925 	}
926 
927 	if (argc == 1) {
928 		/*
929 		 * No arguments.  Print the available handlers.  If there are no
930 		 * available handlers, direct the user to '-h' for help
931 		 * information.
932 		 */
933 		if (print_all_handlers() == 0) {
934 			(void) printf("No handlers registered.\n");
935 			(void) printf("Run 'zinject -h' for usage "
936 			    "information.\n");
937 		}
938 		libzfs_fini(g_zfs);
939 		return (0);
940 	}
941 
942 	while ((c = getopt(argc, argv,
943 	    ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
944 		switch (c) {
945 		case 'a':
946 			flags |= ZINJECT_FLUSH_ARC;
947 			break;
948 		case 'A':
949 			if (strcasecmp(optarg, "degrade") == 0) {
950 				action = VDEV_STATE_DEGRADED;
951 			} else if (strcasecmp(optarg, "fault") == 0) {
952 				action = VDEV_STATE_FAULTED;
953 			} else {
954 				(void) fprintf(stderr, "invalid action '%s': "
955 				    "must be 'degrade' or 'fault'\n", optarg);
956 				usage();
957 				libzfs_fini(g_zfs);
958 				return (1);
959 			}
960 			break;
961 		case 'b':
962 			raw = optarg;
963 			break;
964 		case 'c':
965 			cancel = optarg;
966 			break;
967 		case 'C':
968 			ret = parse_dvas(optarg, &dvas);
969 			if (ret != 0) {
970 				(void) fprintf(stderr, "invalid DVA list '%s': "
971 				    "DVAs should be 0 indexed and separated by "
972 				    "commas.\n", optarg);
973 				usage();
974 				libzfs_fini(g_zfs);
975 				return (1);
976 			}
977 			break;
978 		case 'd':
979 			device = optarg;
980 			break;
981 		case 'D':
982 			errno = 0;
983 			ret = parse_delay(optarg, &record.zi_timer,
984 			    &record.zi_nlanes);
985 			if (ret != 0) {
986 
987 				(void) fprintf(stderr, "invalid i/o delay "
988 				    "value: '%s'\n", optarg);
989 				usage();
990 				libzfs_fini(g_zfs);
991 				return (1);
992 			}
993 			break;
994 		case 'e':
995 			error = str_to_err(optarg);
996 			if (error < 0) {
997 				(void) fprintf(stderr, "invalid error type "
998 				    "'%s': must be one of: io decompress "
999 				    "decrypt nxio dtl corrupt noop\n",
1000 				    optarg);
1001 				usage();
1002 				libzfs_fini(g_zfs);
1003 				return (1);
1004 			}
1005 			break;
1006 		case 'f':
1007 			ret = parse_frequency(optarg, &record.zi_freq);
1008 			if (ret != 0) {
1009 				(void) fprintf(stderr, "%sfrequency value must "
1010 				    "be in the range [0.0001, 100.0]\n",
1011 				    ret == EINVAL ? "invalid value: " :
1012 				    ret == ERANGE ? "out of range: " : "");
1013 				libzfs_fini(g_zfs);
1014 				return (1);
1015 			}
1016 			break;
1017 		case 'F':
1018 			record.zi_failfast = B_TRUE;
1019 			break;
1020 		case 'g':
1021 			dur_txg = 1;
1022 			record.zi_duration = (int)strtol(optarg, &end, 10);
1023 			if (record.zi_duration <= 0 || *end != '\0') {
1024 				(void) fprintf(stderr, "invalid duration '%s': "
1025 				    "must be a positive integer\n", optarg);
1026 				usage();
1027 				libzfs_fini(g_zfs);
1028 				return (1);
1029 			}
1030 			/* store duration of txgs as its negative */
1031 			record.zi_duration *= -1;
1032 			break;
1033 		case 'h':
1034 			usage();
1035 			libzfs_fini(g_zfs);
1036 			return (0);
1037 		case 'I':
1038 			/* default duration, if one hasn't yet been defined */
1039 			nowrites = 1;
1040 			if (dur_secs == 0 && dur_txg == 0)
1041 				record.zi_duration = 30;
1042 			break;
1043 		case 'l':
1044 			level = (int)strtol(optarg, &end, 10);
1045 			if (*end != '\0') {
1046 				(void) fprintf(stderr, "invalid level '%s': "
1047 				    "must be an integer\n", optarg);
1048 				usage();
1049 				libzfs_fini(g_zfs);
1050 				return (1);
1051 			}
1052 			break;
1053 		case 'm':
1054 			domount = 1;
1055 			break;
1056 		case 'p':
1057 			(void) strlcpy(record.zi_func, optarg,
1058 			    sizeof (record.zi_func));
1059 			record.zi_cmd = ZINJECT_PANIC;
1060 			break;
1061 		case 'P':
1062 			if (strcasecmp(optarg, "import") == 0) {
1063 				record.zi_cmd = ZINJECT_DELAY_IMPORT;
1064 			} else if (strcasecmp(optarg, "export") == 0) {
1065 				record.zi_cmd = ZINJECT_DELAY_EXPORT;
1066 			} else {
1067 				(void) fprintf(stderr, "invalid command '%s': "
1068 				    "must be 'import' or 'export'\n", optarg);
1069 				usage();
1070 				libzfs_fini(g_zfs);
1071 				return (1);
1072 			}
1073 			break;
1074 		case 'q':
1075 			quiet = 1;
1076 			break;
1077 		case 'r':
1078 			range = optarg;
1079 			flags |= ZINJECT_CALC_RANGE;
1080 			break;
1081 		case 's':
1082 			dur_secs = 1;
1083 			record.zi_duration = (int)strtol(optarg, &end, 10);
1084 			if (record.zi_duration <= 0 || *end != '\0') {
1085 				(void) fprintf(stderr, "invalid duration '%s': "
1086 				    "must be a positive integer\n", optarg);
1087 				usage();
1088 				libzfs_fini(g_zfs);
1089 				return (1);
1090 			}
1091 			break;
1092 		case 'T':
1093 			io_type = str_to_iotype(optarg);
1094 			if (io_type == ZINJECT_IOTYPES) {
1095 				(void) fprintf(stderr, "invalid I/O type "
1096 				    "'%s': must be 'read', 'write', 'free', "
1097 				    "'claim', 'flush' or 'all'\n", optarg);
1098 				usage();
1099 				libzfs_fini(g_zfs);
1100 				return (1);
1101 			}
1102 			break;
1103 		case 't':
1104 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
1105 			    !MOS_TYPE(type)) {
1106 				(void) fprintf(stderr, "invalid type '%s'\n",
1107 				    optarg);
1108 				usage();
1109 				libzfs_fini(g_zfs);
1110 				return (1);
1111 			}
1112 			break;
1113 		case 'u':
1114 			flags |= ZINJECT_UNLOAD_SPA;
1115 			break;
1116 		case 'L':
1117 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
1118 			    !LABEL_TYPE(type)) {
1119 				(void) fprintf(stderr, "invalid label type "
1120 				    "'%s'\n", optarg);
1121 				usage();
1122 				libzfs_fini(g_zfs);
1123 				return (1);
1124 			}
1125 			break;
1126 		case ':':
1127 			(void) fprintf(stderr, "option -%c requires an "
1128 			    "operand\n", optopt);
1129 			usage();
1130 			libzfs_fini(g_zfs);
1131 			return (1);
1132 		case '?':
1133 			(void) fprintf(stderr, "invalid option '%c'\n",
1134 			    optopt);
1135 			usage();
1136 			libzfs_fini(g_zfs);
1137 			return (2);
1138 		}
1139 	}
1140 
1141 	argc -= optind;
1142 	argv += optind;
1143 
1144 	if (record.zi_duration != 0 && record.zi_cmd == 0)
1145 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
1146 
1147 	if (cancel != NULL) {
1148 		/*
1149 		 * '-c' is invalid with any other options.
1150 		 */
1151 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1152 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1153 		    record.zi_freq > 0 || dvas != 0) {
1154 			(void) fprintf(stderr, "cancel (-c) incompatible with "
1155 			    "any other options\n");
1156 			usage();
1157 			libzfs_fini(g_zfs);
1158 			return (2);
1159 		}
1160 		if (argc != 0) {
1161 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
1162 			usage();
1163 			libzfs_fini(g_zfs);
1164 			return (2);
1165 		}
1166 
1167 		if (strcmp(cancel, "all") == 0) {
1168 			return (cancel_all_handlers());
1169 		} else {
1170 			int id = (int)strtol(cancel, &end, 10);
1171 			if (*end != '\0') {
1172 				(void) fprintf(stderr, "invalid handle id '%s':"
1173 				    " must be an integer or 'all'\n", cancel);
1174 				usage();
1175 				libzfs_fini(g_zfs);
1176 				return (1);
1177 			}
1178 			return (cancel_handler(id));
1179 		}
1180 	}
1181 
1182 	if (device != NULL) {
1183 		/*
1184 		 * Device (-d) injection uses a completely different mechanism
1185 		 * for doing injection, so handle it separately here.
1186 		 */
1187 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1188 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1189 		    dvas != 0) {
1190 			(void) fprintf(stderr, "device (-d) incompatible with "
1191 			    "data error injection\n");
1192 			usage();
1193 			libzfs_fini(g_zfs);
1194 			return (2);
1195 		}
1196 
1197 		if (argc != 1) {
1198 			(void) fprintf(stderr, "device (-d) injection requires "
1199 			    "a single pool name\n");
1200 			usage();
1201 			libzfs_fini(g_zfs);
1202 			return (2);
1203 		}
1204 
1205 		(void) strlcpy(pool, argv[0], sizeof (pool));
1206 		dataset[0] = '\0';
1207 
1208 		if (error == ECKSUM) {
1209 			(void) fprintf(stderr, "device error type must be "
1210 			    "'io', 'nxio' or 'corrupt'\n");
1211 			libzfs_fini(g_zfs);
1212 			return (1);
1213 		}
1214 
1215 		if (error == EILSEQ &&
1216 		    (record.zi_freq == 0 || io_type != ZINJECT_IOTYPE_READ)) {
1217 			(void) fprintf(stderr, "device corrupt errors require "
1218 			    "io type read and a frequency value\n");
1219 			libzfs_fini(g_zfs);
1220 			return (1);
1221 		}
1222 
1223 		record.zi_iotype = io_type;
1224 		if (translate_device(pool, device, label, &record) != 0) {
1225 			libzfs_fini(g_zfs);
1226 			return (1);
1227 		}
1228 
1229 		if (record.zi_nlanes) {
1230 			switch (io_type) {
1231 			case ZINJECT_IOTYPE_READ:
1232 			case ZINJECT_IOTYPE_WRITE:
1233 			case ZINJECT_IOTYPE_ALL:
1234 				break;
1235 			default:
1236 				(void) fprintf(stderr, "I/O type for a delay "
1237 				    "must be 'read' or 'write'\n");
1238 				usage();
1239 				libzfs_fini(g_zfs);
1240 				return (1);
1241 			}
1242 		}
1243 
1244 		if (!error)
1245 			error = ENXIO;
1246 
1247 		if (action != VDEV_STATE_UNKNOWN)
1248 			return (perform_action(pool, &record, action));
1249 
1250 	} else if (raw != NULL) {
1251 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
1252 		    record.zi_cmd != ZINJECT_UNINITIALIZED ||
1253 		    record.zi_freq > 0 || dvas != 0) {
1254 			(void) fprintf(stderr, "raw (-b) format with "
1255 			    "any other options\n");
1256 			usage();
1257 			libzfs_fini(g_zfs);
1258 			return (2);
1259 		}
1260 
1261 		if (argc != 1) {
1262 			(void) fprintf(stderr, "raw (-b) format expects a "
1263 			    "single pool name\n");
1264 			usage();
1265 			libzfs_fini(g_zfs);
1266 			return (2);
1267 		}
1268 
1269 		(void) strlcpy(pool, argv[0], sizeof (pool));
1270 		dataset[0] = '\0';
1271 
1272 		if (error == ENXIO) {
1273 			(void) fprintf(stderr, "data error type must be "
1274 			    "'checksum' or 'io'\n");
1275 			libzfs_fini(g_zfs);
1276 			return (1);
1277 		}
1278 
1279 		record.zi_cmd = ZINJECT_DATA_FAULT;
1280 		if (translate_raw(raw, &record) != 0) {
1281 			libzfs_fini(g_zfs);
1282 			return (1);
1283 		}
1284 		if (!error)
1285 			error = EIO;
1286 	} else if (record.zi_cmd == ZINJECT_PANIC) {
1287 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1288 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1289 		    dvas != 0) {
1290 			(void) fprintf(stderr, "%s incompatible with other "
1291 			    "options\n", "import|export delay (-P)");
1292 			usage();
1293 			libzfs_fini(g_zfs);
1294 			return (2);
1295 		}
1296 
1297 		if (argc < 1 || argc > 2) {
1298 			(void) fprintf(stderr, "panic (-p) injection requires "
1299 			    "a single pool name and an optional id\n");
1300 			usage();
1301 			libzfs_fini(g_zfs);
1302 			return (2);
1303 		}
1304 
1305 		(void) strlcpy(pool, argv[0], sizeof (pool));
1306 		if (argv[1] != NULL)
1307 			record.zi_type = atoi(argv[1]);
1308 		dataset[0] = '\0';
1309 	} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
1310 	    record.zi_cmd == ZINJECT_DELAY_EXPORT) {
1311 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1312 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1313 		    dvas != 0) {
1314 			(void) fprintf(stderr, "%s incompatible with other "
1315 			    "options\n", "import|export delay (-P)");
1316 			usage();
1317 			libzfs_fini(g_zfs);
1318 			return (2);
1319 		}
1320 
1321 		if (argc != 1 || record.zi_duration <= 0) {
1322 			(void) fprintf(stderr, "import|export delay (-P) "
1323 			    "injection requires a duration (-s) and a single "
1324 			    "pool name\n");
1325 			usage();
1326 			libzfs_fini(g_zfs);
1327 			return (2);
1328 		}
1329 
1330 		(void) strlcpy(pool, argv[0], sizeof (pool));
1331 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1332 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1333 		    level != 0 || record.zi_freq > 0 || dvas != 0) {
1334 			(void) fprintf(stderr, "hardware failure (-I) "
1335 			    "incompatible with other options\n");
1336 			usage();
1337 			libzfs_fini(g_zfs);
1338 			return (2);
1339 		}
1340 
1341 		if (nowrites == 0) {
1342 			(void) fprintf(stderr, "-s or -g meaningless "
1343 			    "without -I (ignore writes)\n");
1344 			usage();
1345 			libzfs_fini(g_zfs);
1346 			return (2);
1347 		} else if (dur_secs && dur_txg) {
1348 			(void) fprintf(stderr, "choose a duration either "
1349 			    "in seconds (-s) or a number of txgs (-g) "
1350 			    "but not both\n");
1351 			usage();
1352 			libzfs_fini(g_zfs);
1353 			return (2);
1354 		} else if (argc != 1) {
1355 			(void) fprintf(stderr, "ignore writes (-I) "
1356 			    "injection requires a single pool name\n");
1357 			usage();
1358 			libzfs_fini(g_zfs);
1359 			return (2);
1360 		}
1361 
1362 		(void) strlcpy(pool, argv[0], sizeof (pool));
1363 		dataset[0] = '\0';
1364 	} else if (type == TYPE_INVAL) {
1365 		if (flags == 0) {
1366 			(void) fprintf(stderr, "at least one of '-b', '-d', "
1367 			    "'-t', '-a', '-p', '-I' or '-u' "
1368 			    "must be specified\n");
1369 			usage();
1370 			libzfs_fini(g_zfs);
1371 			return (2);
1372 		}
1373 
1374 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1375 			(void) strlcpy(pool, argv[0], sizeof (pool));
1376 			dataset[0] = '\0';
1377 		} else if (argc != 0) {
1378 			(void) fprintf(stderr, "extraneous argument for "
1379 			    "'-f'\n");
1380 			usage();
1381 			libzfs_fini(g_zfs);
1382 			return (2);
1383 		}
1384 
1385 		flags |= ZINJECT_NULL;
1386 	} else {
1387 		if (argc != 1) {
1388 			(void) fprintf(stderr, "missing object\n");
1389 			usage();
1390 			libzfs_fini(g_zfs);
1391 			return (2);
1392 		}
1393 
1394 		if (error == ENXIO || error == EILSEQ) {
1395 			(void) fprintf(stderr, "data error type must be "
1396 			    "'checksum' or 'io'\n");
1397 			libzfs_fini(g_zfs);
1398 			return (1);
1399 		}
1400 
1401 		if (dvas != 0) {
1402 			if (error == EACCES || error == EINVAL) {
1403 				(void) fprintf(stderr, "the '-C' option may "
1404 				    "not be used with logical data errors "
1405 				    "'decrypt' and 'decompress'\n");
1406 				libzfs_fini(g_zfs);
1407 				return (1);
1408 			}
1409 
1410 			record.zi_dvas = dvas;
1411 		}
1412 
1413 		if (error == EACCES) {
1414 			if (type != TYPE_DATA) {
1415 				(void) fprintf(stderr, "decryption errors "
1416 				    "may only be injected for 'data' types\n");
1417 				libzfs_fini(g_zfs);
1418 				return (1);
1419 			}
1420 
1421 			record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1422 			/*
1423 			 * Internally, ZFS actually uses ECKSUM for decryption
1424 			 * errors since EACCES is used to indicate the key was
1425 			 * not found.
1426 			 */
1427 			error = ECKSUM;
1428 		} else {
1429 			record.zi_cmd = ZINJECT_DATA_FAULT;
1430 		}
1431 
1432 		if (translate_record(type, argv[0], range, level, &record, pool,
1433 		    dataset) != 0) {
1434 			libzfs_fini(g_zfs);
1435 			return (1);
1436 		}
1437 		if (!error)
1438 			error = EIO;
1439 	}
1440 
1441 	/*
1442 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
1443 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
1444 	 * time we access the pool.
1445 	 */
1446 	if (dataset[0] != '\0' && domount) {
1447 		if ((zhp = zfs_open(g_zfs, dataset,
1448 		    ZFS_TYPE_DATASET)) == NULL) {
1449 			libzfs_fini(g_zfs);
1450 			return (1);
1451 		}
1452 		if (zfs_unmount(zhp, NULL, 0) != 0) {
1453 			libzfs_fini(g_zfs);
1454 			return (1);
1455 		}
1456 	}
1457 
1458 	record.zi_error = error;
1459 
1460 	ret = register_handler(pool, flags, &record, quiet);
1461 
1462 	if (dataset[0] != '\0' && domount)
1463 		ret = (zfs_mount(zhp, NULL, 0) != 0);
1464 
1465 	libzfs_fini(g_zfs);
1466 
1467 	return (ret);
1468 }
1469