xref: /titanic_44/usr/src/cmd/zinject/zinject.c (revision 16ba0fac26f672b18447f2e17a2f91f14ed3ce40)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * ZFS Fault Injector
28  *
29  * This userland component takes a set of options and uses libzpool to translate
30  * from a user-visible object type and name to an internal representation.
31  * There are two basic types of faults: device faults and data faults.
32  *
33  *
34  * DEVICE FAULTS
35  *
36  * Errors can be injected into a particular vdev using the '-d' option.  This
37  * option takes a path or vdev GUID to uniquely identify the device within a
38  * pool.  There are two types of errors that can be injected, EIO and ENXIO,
39  * that can be controlled through the '-e' option.  The default is ENXIO.  For
40  * EIO failures, any attempt to read data from the device will return EIO, but
41  * subsequent attempt to reopen the device will succeed.  For ENXIO failures,
42  * any attempt to read from the device will return EIO, but any attempt to
43  * reopen the device will also return ENXIO.
44  * For label faults, the -L option must be specified. This allows faults
45  * to be injected into either the nvlist or uberblock region of all the labels
46  * for the specified device.
47  *
48  * This form of the command looks like:
49  *
50  * 	zinject -d device [-e errno] [-L <uber | nvlist>] pool
51  *
52  *
53  * DATA FAULTS
54  *
55  * We begin with a tuple of the form:
56  *
57  * 	<type,level,range,object>
58  *
59  * 	type	A string describing the type of data to target.  Each type
60  * 		implicitly describes how to interpret 'object'. Currently,
61  * 		the following values are supported:
62  *
63  * 		data		User data for a file
64  * 		dnode		Dnode for a file or directory
65  *
66  *		The following MOS objects are special.  Instead of injecting
67  *		errors on a particular object or blkid, we inject errors across
68  *		all objects of the given type.
69  *
70  * 		mos		Any data in the MOS
71  * 		mosdir		object directory
72  * 		config		pool configuration
73  * 		bplist		blkptr list
74  * 		spacemap	spacemap
75  * 		metaslab	metaslab
76  * 		errlog		persistent error log
77  *
78  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
79  * 		a range is given, this corresponds to the indirect block
80  * 		corresponding to the specific range.
81  *
82  *	range	A numerical range [start,end) within the object.  Defaults to
83  *		the full size of the file.
84  *
85  * 	object	A string describing the logical location of the object.  For
86  * 		files and directories (currently the only supported types),
87  * 		this is the path of the object on disk.
88  *
89  * This is translated, via libzpool, into the following internal representation:
90  *
91  * 	<type,objset,object,level,range>
92  *
93  * These types should be self-explanatory.  This tuple is then passed to the
94  * kernel via a special ioctl() to initiate fault injection for the given
95  * object.  Note that 'type' is not strictly necessary for fault injection, but
96  * is used when translating existing faults into a human-readable string.
97  *
98  *
99  * The command itself takes one of the forms:
100  *
101  * 	zinject
102  * 	zinject <-a | -u pool>
103  * 	zinject -c <id|all>
104  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
105  *	    [-r range] <object>
106  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
107  *
108  * With no arguments, the command prints all currently registered injection
109  * handlers, with their numeric identifiers.
110  *
111  * The '-c' option will clear the given handler, or all handlers if 'all' is
112  * specified.
113  *
114  * The '-e' option takes a string describing the errno to simulate.  This must
115  * be either 'io' or 'checksum'.  In most cases this will result in the same
116  * behavior, but RAID-Z will produce a different set of ereports for this
117  * situation.
118  *
119  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
120  * specified, then the ARC cache is flushed appropriately.  If '-u' is
121  * specified, then the underlying SPA is unloaded.  Either of these flags can be
122  * specified independently of any other handlers.  The '-m' flag automatically
123  * does an unmount and remount of the underlying dataset to aid in flushing the
124  * cache.
125  *
126  * The '-f' flag controls the frequency of errors injected, expressed as a
127  * integer percentage between 1 and 100.  The default is 100.
128  *
129  * The this form is responsible for actually injecting the handler into the
130  * framework.  It takes the arguments described above, translates them to the
131  * internal tuple using libzpool, and then issues an ioctl() to register the
132  * handler.
133  *
134  * The final form can target a specific bookmark, regardless of whether a
135  * human-readable interface has been designed.  It allows developers to specify
136  * a particular block by number.
137  */
138 
139 #include <errno.h>
140 #include <fcntl.h>
141 #include <stdio.h>
142 #include <stdlib.h>
143 #include <strings.h>
144 #include <unistd.h>
145 
146 #include <sys/fs/zfs.h>
147 #include <sys/mount.h>
148 
149 #include <libzfs.h>
150 
151 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
152 
153 #include "zinject.h"
154 
155 libzfs_handle_t *g_zfs;
156 int zfs_fd;
157 
158 #define	ECKSUM	EBADE
159 
160 static const char *errtable[TYPE_INVAL] = {
161 	"data",
162 	"dnode",
163 	"mos",
164 	"mosdir",
165 	"metaslab",
166 	"config",
167 	"bplist",
168 	"spacemap",
169 	"errlog",
170 	"uber",
171 	"nvlist"
172 };
173 
174 static err_type_t
175 name_to_type(const char *arg)
176 {
177 	int i;
178 	for (i = 0; i < TYPE_INVAL; i++)
179 		if (strcmp(errtable[i], arg) == 0)
180 			return (i);
181 
182 	return (TYPE_INVAL);
183 }
184 
185 static const char *
186 type_to_name(uint64_t type)
187 {
188 	switch (type) {
189 	case DMU_OT_OBJECT_DIRECTORY:
190 		return ("mosdir");
191 	case DMU_OT_OBJECT_ARRAY:
192 		return ("metaslab");
193 	case DMU_OT_PACKED_NVLIST:
194 		return ("config");
195 	case DMU_OT_BPLIST:
196 		return ("bplist");
197 	case DMU_OT_SPACE_MAP:
198 		return ("spacemap");
199 	case DMU_OT_ERROR_LOG:
200 		return ("errlog");
201 	default:
202 		return ("-");
203 	}
204 }
205 
206 
207 /*
208  * Print usage message.
209  */
210 void
211 usage(void)
212 {
213 	(void) printf(
214 	    "usage:\n"
215 	    "\n"
216 	    "\tzinject\n"
217 	    "\n"
218 	    "\t\tList all active injection records.\n"
219 	    "\n"
220 	    "\tzinject -c <id|all>\n"
221 	    "\n"
222 	    "\t\tClear the particular record (if given a numeric ID), or\n"
223 	    "\t\tall records if 'all' is specificed.\n"
224 	    "\n"
225 	    "\tzinject -p <function name> pool\n"
226 	    "\t\tInject a panic fault at the specified function. Only \n"
227 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
228 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
229 	    "\n"
230 	    "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F]\n"
231 	    "\t    [-T <read|write|free|claim|all> pool\n"
232 	    "\t\tInject a fault into a particular device or the device's\n"
233 	    "\t\tlabel.  Label injection can either be 'nvlist' or 'uber'.\n"
234 	    "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
235 	    "\n"
236 	    "\tzinject -d device -A <degrade|fault> pool\n"
237 	    "\t\tPerform a specific action on a particular device\n"
238 	    "\n"
239 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
240 	    "\t\tCause the pool to stop writing blocks yet not\n"
241 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
242 	    "\t\tthat fails to honor cache flush requests.\n"
243 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
244 	    "\t\tat the end of the duration.\n"
245 	    "\n"
246 	    "\tzinject -b objset:object:level:blkid pool\n"
247 	    "\n"
248 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
249 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
250 	    "\t\thexidecimal, and only one block can be specified.\n"
251 	    "\n"
252 	    "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
253 	    "\t    [-a] [-m] [-u] [-f freq] <object>\n"
254 	    "\n"
255 	    "\t\tInject an error into the object specified by the '-t' option\n"
256 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
257 	    "\t\tinterperted depending on the '-t' option.\n"
258 	    "\n"
259 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
260 	    "\t\t-e\tInject a specific error.  Must be either 'io' or\n"
261 	    "\t\t\t'checksum'.  Default is 'io'.\n"
262 	    "\t\t-l\tInject error at a particular block level. Default is "
263 	    "0.\n"
264 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
265 	    "\t\t-r\tInject error over a particular logical range of an\n"
266 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
267 	    "\t\t\trange according to the object's properties.\n"
268 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
269 	    "\t\t\tassociated object.\n"
270 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
271 	    "\t\t\ta pool object.\n"
272 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
273 	    "\t\t\ta percentage between 1 and 100.\n"
274 	    "\n"
275 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
276 	    "\t\t\tfile.  The object must be specified as a complete path\n"
277 	    "\t\t\tto a file on a ZFS filesystem.\n"
278 	    "\n"
279 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
280 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
281 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
282 	    "\t\t\tis specified as a complete path to a file or directory\n"
283 	    "\t\t\ton a ZFS filesystem.\n"
284 	    "\n"
285 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
286 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bplist,\n"
287 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
288 	    "\t\t\tthe poolname.\n");
289 }
290 
291 static int
292 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
293     void *data)
294 {
295 	zfs_cmd_t zc;
296 	int ret;
297 
298 	zc.zc_guid = 0;
299 
300 	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
301 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
302 		    &zc.zc_inject_record, data)) != 0)
303 			return (ret);
304 
305 	return (0);
306 }
307 
308 static int
309 print_data_handler(int id, const char *pool, zinject_record_t *record,
310     void *data)
311 {
312 	int *count = data;
313 
314 	if (record->zi_guid != 0 || record->zi_func[0] != '\0')
315 		return (0);
316 
317 	if (*count == 0) {
318 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-15s\n",
319 		    "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL",  "RANGE");
320 		(void) printf("---  ---------------  ------  "
321 		    "------  --------  ---  ---------------\n");
322 	}
323 
324 	*count += 1;
325 
326 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %3d  ", id, pool,
327 	    (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
328 	    type_to_name(record->zi_type), record->zi_level);
329 
330 	if (record->zi_start == 0 &&
331 	    record->zi_end == -1ULL)
332 		(void) printf("all\n");
333 	else
334 		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
335 		    (u_longlong_t)record->zi_end);
336 
337 	return (0);
338 }
339 
340 static int
341 print_device_handler(int id, const char *pool, zinject_record_t *record,
342     void *data)
343 {
344 	int *count = data;
345 
346 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
347 		return (0);
348 
349 	if (*count == 0) {
350 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
351 		(void) printf("---  ---------------  ----------------\n");
352 	}
353 
354 	*count += 1;
355 
356 	(void) printf("%3d  %-15s  %llx\n", id, pool,
357 	    (u_longlong_t)record->zi_guid);
358 
359 	return (0);
360 }
361 
362 static int
363 print_panic_handler(int id, const char *pool, zinject_record_t *record,
364     void *data)
365 {
366 	int *count = data;
367 
368 	if (record->zi_func[0] == '\0')
369 		return (0);
370 
371 	if (*count == 0) {
372 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
373 		(void) printf("---  ---------------  ----------------\n");
374 	}
375 
376 	*count += 1;
377 
378 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
379 
380 	return (0);
381 }
382 
383 /*
384  * Print all registered error handlers.  Returns the number of handlers
385  * registered.
386  */
387 static int
388 print_all_handlers(void)
389 {
390 	int count = 0;
391 
392 	(void) iter_handlers(print_device_handler, &count);
393 	(void) printf("\n");
394 	count = 0;
395 	(void) iter_handlers(print_data_handler, &count);
396 	(void) printf("\n");
397 	count = 0;
398 	(void) iter_handlers(print_panic_handler, &count);
399 
400 	return (count);
401 }
402 
403 /* ARGSUSED */
404 static int
405 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
406     void *data)
407 {
408 	zfs_cmd_t zc;
409 
410 	zc.zc_guid = (uint64_t)id;
411 
412 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
413 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
414 		    id, strerror(errno));
415 		return (1);
416 	}
417 
418 	return (0);
419 }
420 
421 /*
422  * Remove all fault injection handlers.
423  */
424 static int
425 cancel_all_handlers(void)
426 {
427 	int ret = iter_handlers(cancel_one_handler, NULL);
428 
429 	(void) printf("removed all registered handlers\n");
430 
431 	return (ret);
432 }
433 
434 /*
435  * Remove a specific fault injection handler.
436  */
437 static int
438 cancel_handler(int id)
439 {
440 	zfs_cmd_t zc;
441 
442 	zc.zc_guid = (uint64_t)id;
443 
444 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
445 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
446 		    id, strerror(errno));
447 		return (1);
448 	}
449 
450 	(void) printf("removed handler %d\n", id);
451 
452 	return (0);
453 }
454 
455 /*
456  * Register a new fault injection handler.
457  */
458 static int
459 register_handler(const char *pool, int flags, zinject_record_t *record,
460     int quiet)
461 {
462 	zfs_cmd_t zc;
463 
464 	(void) strcpy(zc.zc_name, pool);
465 	zc.zc_inject_record = *record;
466 	zc.zc_guid = flags;
467 
468 	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
469 		(void) fprintf(stderr, "failed to add handler: %s\n",
470 		    strerror(errno));
471 		return (1);
472 	}
473 
474 	if (flags & ZINJECT_NULL)
475 		return (0);
476 
477 	if (quiet) {
478 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
479 	} else {
480 		(void) printf("Added handler %llu with the following "
481 		    "properties:\n", (u_longlong_t)zc.zc_guid);
482 		(void) printf("  pool: %s\n", pool);
483 		if (record->zi_guid) {
484 			(void) printf("  vdev: %llx\n",
485 			    (u_longlong_t)record->zi_guid);
486 		} else if (record->zi_func[0] != '\0') {
487 			(void) printf("  panic function: %s\n",
488 			    record->zi_func);
489 		} else if (record->zi_duration > 0) {
490 			(void) printf(" time: %lld seconds\n",
491 			    (u_longlong_t)record->zi_duration);
492 		} else if (record->zi_duration < 0) {
493 			(void) printf(" txgs: %lld \n",
494 			    (u_longlong_t)-record->zi_duration);
495 		} else {
496 			(void) printf("objset: %llu\n",
497 			    (u_longlong_t)record->zi_objset);
498 			(void) printf("object: %llu\n",
499 			    (u_longlong_t)record->zi_object);
500 			(void) printf("  type: %llu\n",
501 			    (u_longlong_t)record->zi_type);
502 			(void) printf(" level: %d\n", record->zi_level);
503 			if (record->zi_start == 0 &&
504 			    record->zi_end == -1ULL)
505 				(void) printf(" range: all\n");
506 			else
507 				(void) printf(" range: [%llu, %llu)\n",
508 				    (u_longlong_t)record->zi_start,
509 				    (u_longlong_t)record->zi_end);
510 		}
511 	}
512 
513 	return (0);
514 }
515 
516 int
517 perform_action(const char *pool, zinject_record_t *record, int cmd)
518 {
519 	zfs_cmd_t zc;
520 
521 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
522 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
523 	zc.zc_guid = record->zi_guid;
524 	zc.zc_cookie = cmd;
525 
526 	if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
527 		return (0);
528 
529 	return (1);
530 }
531 
532 int
533 main(int argc, char **argv)
534 {
535 	int c;
536 	char *range = NULL;
537 	char *cancel = NULL;
538 	char *end;
539 	char *raw = NULL;
540 	char *device = NULL;
541 	int level = 0;
542 	int quiet = 0;
543 	int error = 0;
544 	int domount = 0;
545 	int io_type = ZIO_TYPES;
546 	int action = VDEV_STATE_UNKNOWN;
547 	err_type_t type = TYPE_INVAL;
548 	err_type_t label = TYPE_INVAL;
549 	zinject_record_t record = { 0 };
550 	char pool[MAXNAMELEN];
551 	char dataset[MAXNAMELEN];
552 	zfs_handle_t *zhp;
553 	int nowrites = 0;
554 	int dur_txg = 0;
555 	int dur_secs = 0;
556 	int ret;
557 	int flags = 0;
558 
559 	if ((g_zfs = libzfs_init()) == NULL) {
560 		(void) fprintf(stderr, "internal error: failed to "
561 		    "initialize ZFS library\n");
562 		return (1);
563 	}
564 
565 	libzfs_print_on_error(g_zfs, B_TRUE);
566 
567 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
568 		(void) fprintf(stderr, "failed to open ZFS device\n");
569 		return (1);
570 	}
571 
572 	if (argc == 1) {
573 		/*
574 		 * No arguments.  Print the available handlers.  If there are no
575 		 * available handlers, direct the user to '-h' for help
576 		 * information.
577 		 */
578 		if (print_all_handlers() == 0) {
579 			(void) printf("No handlers registered.\n");
580 			(void) printf("Run 'zinject -h' for usage "
581 			    "information.\n");
582 		}
583 
584 		return (0);
585 	}
586 
587 	while ((c = getopt(argc, argv,
588 	    ":aA:b:d:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
589 		switch (c) {
590 		case 'a':
591 			flags |= ZINJECT_FLUSH_ARC;
592 			break;
593 		case 'A':
594 			if (strcasecmp(optarg, "degrade") == 0) {
595 				action = VDEV_STATE_DEGRADED;
596 			} else if (strcasecmp(optarg, "fault") == 0) {
597 				action = VDEV_STATE_FAULTED;
598 			} else {
599 				(void) fprintf(stderr, "invalid action '%s': "
600 				    "must be 'degrade' or 'fault'\n", optarg);
601 				usage();
602 				return (1);
603 			}
604 			break;
605 		case 'b':
606 			raw = optarg;
607 			break;
608 		case 'c':
609 			cancel = optarg;
610 			break;
611 		case 'd':
612 			device = optarg;
613 			break;
614 		case 'e':
615 			if (strcasecmp(optarg, "io") == 0) {
616 				error = EIO;
617 			} else if (strcasecmp(optarg, "checksum") == 0) {
618 				error = ECKSUM;
619 			} else if (strcasecmp(optarg, "nxio") == 0) {
620 				error = ENXIO;
621 			} else {
622 				(void) fprintf(stderr, "invalid error type "
623 				    "'%s': must be 'io', 'checksum' or "
624 				    "'nxio'\n", optarg);
625 				usage();
626 				return (1);
627 			}
628 			break;
629 		case 'f':
630 			record.zi_freq = atoi(optarg);
631 			if (record.zi_freq < 1 || record.zi_freq > 100) {
632 				(void) fprintf(stderr, "frequency range must "
633 				    "be in the range (0, 100]\n");
634 				return (1);
635 			}
636 			break;
637 		case 'F':
638 			record.zi_failfast = B_TRUE;
639 			break;
640 		case 'g':
641 			dur_txg = 1;
642 			record.zi_duration = (int)strtol(optarg, &end, 10);
643 			if (record.zi_duration <= 0 || *end != '\0') {
644 				(void) fprintf(stderr, "invalid duration '%s': "
645 				    "must be a positive integer\n", optarg);
646 				usage();
647 				return (1);
648 			}
649 			/* store duration of txgs as its negative */
650 			record.zi_duration *= -1;
651 			break;
652 		case 'h':
653 			usage();
654 			return (0);
655 		case 'I':
656 			/* default duration, if one hasn't yet been defined */
657 			nowrites = 1;
658 			if (dur_secs == 0 && dur_txg == 0)
659 				record.zi_duration = 30;
660 			break;
661 		case 'l':
662 			level = (int)strtol(optarg, &end, 10);
663 			if (*end != '\0') {
664 				(void) fprintf(stderr, "invalid level '%s': "
665 				    "must be an integer\n", optarg);
666 				usage();
667 				return (1);
668 			}
669 			break;
670 		case 'm':
671 			domount = 1;
672 			break;
673 		case 'p':
674 			(void) strlcpy(record.zi_func, optarg,
675 			    sizeof (record.zi_func));
676 			break;
677 		case 'q':
678 			quiet = 1;
679 			break;
680 		case 'r':
681 			range = optarg;
682 			break;
683 		case 's':
684 			dur_secs = 1;
685 			record.zi_duration = (int)strtol(optarg, &end, 10);
686 			if (record.zi_duration <= 0 || *end != '\0') {
687 				(void) fprintf(stderr, "invalid duration '%s': "
688 				    "must be a positive integer\n", optarg);
689 				usage();
690 				return (1);
691 			}
692 			break;
693 		case 'T':
694 			if (strcasecmp(optarg, "read") == 0) {
695 				io_type = ZIO_TYPE_READ;
696 			} else if (strcasecmp(optarg, "write") == 0) {
697 				io_type = ZIO_TYPE_WRITE;
698 			} else if (strcasecmp(optarg, "free") == 0) {
699 				io_type = ZIO_TYPE_FREE;
700 			} else if (strcasecmp(optarg, "claim") == 0) {
701 				io_type = ZIO_TYPE_CLAIM;
702 			} else if (strcasecmp(optarg, "all") == 0) {
703 				io_type = ZIO_TYPES;
704 			} else {
705 				(void) fprintf(stderr, "invalid I/O type "
706 				    "'%s': must be 'read', 'write', 'free', "
707 				    "'claim' or 'all'\n", optarg);
708 				usage();
709 				return (1);
710 			}
711 			break;
712 		case 't':
713 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
714 			    !MOS_TYPE(type)) {
715 				(void) fprintf(stderr, "invalid type '%s'\n",
716 				    optarg);
717 				usage();
718 				return (1);
719 			}
720 			break;
721 		case 'u':
722 			flags |= ZINJECT_UNLOAD_SPA;
723 			break;
724 		case 'L':
725 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
726 			    !LABEL_TYPE(type)) {
727 				(void) fprintf(stderr, "invalid label type "
728 				    "'%s'\n", optarg);
729 				usage();
730 				return (1);
731 			}
732 			break;
733 		case ':':
734 			(void) fprintf(stderr, "option -%c requires an "
735 			    "operand\n", optopt);
736 			usage();
737 			return (1);
738 		case '?':
739 			(void) fprintf(stderr, "invalid option '%c'\n",
740 			    optopt);
741 			usage();
742 			return (2);
743 		}
744 	}
745 
746 	argc -= optind;
747 	argv += optind;
748 
749 	if (cancel != NULL) {
750 		/*
751 		 * '-c' is invalid with any other options.
752 		 */
753 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
754 		    level != 0 || record.zi_func[0] != '\0' ||
755 		    record.zi_duration != 0) {
756 			(void) fprintf(stderr, "cancel (-c) incompatible with "
757 			    "any other options\n");
758 			usage();
759 			return (2);
760 		}
761 		if (argc != 0) {
762 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
763 			usage();
764 			return (2);
765 		}
766 
767 		if (strcmp(cancel, "all") == 0) {
768 			return (cancel_all_handlers());
769 		} else {
770 			int id = (int)strtol(cancel, &end, 10);
771 			if (*end != '\0') {
772 				(void) fprintf(stderr, "invalid handle id '%s':"
773 				    " must be an integer or 'all'\n", cancel);
774 				usage();
775 				return (1);
776 			}
777 			return (cancel_handler(id));
778 		}
779 	}
780 
781 	if (device != NULL) {
782 		/*
783 		 * Device (-d) injection uses a completely different mechanism
784 		 * for doing injection, so handle it separately here.
785 		 */
786 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
787 		    level != 0 || record.zi_func[0] != '\0' ||
788 		    record.zi_duration != 0) {
789 			(void) fprintf(stderr, "device (-d) incompatible with "
790 			    "data error injection\n");
791 			usage();
792 			return (2);
793 		}
794 
795 		if (argc != 1) {
796 			(void) fprintf(stderr, "device (-d) injection requires "
797 			    "a single pool name\n");
798 			usage();
799 			return (2);
800 		}
801 
802 		(void) strcpy(pool, argv[0]);
803 		dataset[0] = '\0';
804 
805 		if (error == ECKSUM) {
806 			(void) fprintf(stderr, "device error type must be "
807 			    "'io' or 'nxio'\n");
808 			return (1);
809 		}
810 
811 		record.zi_iotype = io_type;
812 		if (translate_device(pool, device, label, &record) != 0)
813 			return (1);
814 		if (!error)
815 			error = ENXIO;
816 
817 		if (action != VDEV_STATE_UNKNOWN)
818 			return (perform_action(pool, &record, action));
819 
820 	} else if (raw != NULL) {
821 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
822 		    record.zi_func[0] != '\0' || record.zi_duration != 0) {
823 			(void) fprintf(stderr, "raw (-b) format with "
824 			    "any other options\n");
825 			usage();
826 			return (2);
827 		}
828 
829 		if (argc != 1) {
830 			(void) fprintf(stderr, "raw (-b) format expects a "
831 			    "single pool name\n");
832 			usage();
833 			return (2);
834 		}
835 
836 		(void) strcpy(pool, argv[0]);
837 		dataset[0] = '\0';
838 
839 		if (error == ENXIO) {
840 			(void) fprintf(stderr, "data error type must be "
841 			    "'checksum' or 'io'\n");
842 			return (1);
843 		}
844 
845 		if (translate_raw(raw, &record) != 0)
846 			return (1);
847 		if (!error)
848 			error = EIO;
849 	} else if (record.zi_func[0] != '\0') {
850 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
851 		    level != 0 || device != NULL || record.zi_duration != 0) {
852 			(void) fprintf(stderr, "panic (-p) incompatible with "
853 			    "other options\n");
854 			usage();
855 			return (2);
856 		}
857 
858 		if (argc < 1 || argc > 2) {
859 			(void) fprintf(stderr, "panic (-p) injection requires "
860 			    "a single pool name and an optional id\n");
861 			usage();
862 			return (2);
863 		}
864 
865 		(void) strcpy(pool, argv[0]);
866 		if (argv[1] != NULL)
867 			record.zi_type = atoi(argv[1]);
868 		dataset[0] = '\0';
869 	} else if (record.zi_duration != 0) {
870 		if (nowrites == 0) {
871 			(void) fprintf(stderr, "-s or -g meaningless "
872 			    "without -I (ignore writes)\n");
873 			usage();
874 			return (2);
875 		} else if (dur_secs && dur_txg) {
876 			(void) fprintf(stderr, "choose a duration either "
877 			    "in seconds (-s) or a number of txgs (-g) "
878 			    "but not both\n");
879 			usage();
880 			return (2);
881 		} else if (argc != 1) {
882 			(void) fprintf(stderr, "ignore writes (-I) "
883 			    "injection requires a single pool name\n");
884 			usage();
885 			return (2);
886 		}
887 
888 		(void) strcpy(pool, argv[0]);
889 		dataset[0] = '\0';
890 	} else if (type == TYPE_INVAL) {
891 		if (flags == 0) {
892 			(void) fprintf(stderr, "at least one of '-b', '-d', "
893 			    "'-t', '-a', '-p', '-I' or '-u' "
894 			    "must be specified\n");
895 			usage();
896 			return (2);
897 		}
898 
899 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
900 			(void) strcpy(pool, argv[0]);
901 			dataset[0] = '\0';
902 		} else if (argc != 0) {
903 			(void) fprintf(stderr, "extraneous argument for "
904 			    "'-f'\n");
905 			usage();
906 			return (2);
907 		}
908 
909 		flags |= ZINJECT_NULL;
910 	} else {
911 		if (argc != 1) {
912 			(void) fprintf(stderr, "missing object\n");
913 			usage();
914 			return (2);
915 		}
916 
917 		if (error == ENXIO) {
918 			(void) fprintf(stderr, "data error type must be "
919 			    "'checksum' or 'io'\n");
920 			return (1);
921 		}
922 
923 		if (translate_record(type, argv[0], range, level, &record, pool,
924 		    dataset) != 0)
925 			return (1);
926 		if (!error)
927 			error = EIO;
928 	}
929 
930 	/*
931 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
932 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
933 	 * time we access the pool.
934 	 */
935 	if (dataset[0] != '\0' && domount) {
936 		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
937 			return (1);
938 
939 		if (zfs_unmount(zhp, NULL, 0) != 0)
940 			return (1);
941 	}
942 
943 	record.zi_error = error;
944 
945 	ret = register_handler(pool, flags, &record, quiet);
946 
947 	if (dataset[0] != '\0' && domount)
948 		ret = (zfs_mount(zhp, NULL, 0) != 0);
949 
950 	libzfs_fini(g_zfs);
951 
952 	return (ret);
953 }
954