xref: /titanic_51/usr/src/cmd/zinject/zinject.c (revision 7c6d7024e51780d3aacf9063d2133c1e957d7eea)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012 by Delphix. All rights reserved.
24  */
25 
26 /*
27  * ZFS Fault Injector
28  *
29  * This userland component takes a set of options and uses libzpool to translate
30  * from a user-visible object type and name to an internal representation.
31  * There are two basic types of faults: device faults and data faults.
32  *
33  *
34  * DEVICE FAULTS
35  *
36  * Errors can be injected into a particular vdev using the '-d' option.  This
37  * option takes a path or vdev GUID to uniquely identify the device within a
38  * pool.  There are two types of errors that can be injected, EIO and ENXIO,
39  * that can be controlled through the '-e' option.  The default is ENXIO.  For
40  * EIO failures, any attempt to read data from the device will return EIO, but
41  * subsequent attempt to reopen the device will succeed.  For ENXIO failures,
42  * any attempt to read from the device will return EIO, but any attempt to
43  * reopen the device will also return ENXIO.
44  * For label faults, the -L option must be specified. This allows faults
45  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
46  * of all the labels for the specified device.
47  *
48  * This form of the command looks like:
49  *
50  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
51  *
52  *
53  * DATA FAULTS
54  *
55  * We begin with a tuple of the form:
56  *
57  * 	<type,level,range,object>
58  *
59  * 	type	A string describing the type of data to target.  Each type
60  * 		implicitly describes how to interpret 'object'. Currently,
61  * 		the following values are supported:
62  *
63  * 		data		User data for a file
64  * 		dnode		Dnode for a file or directory
65  *
66  *		The following MOS objects are special.  Instead of injecting
67  *		errors on a particular object or blkid, we inject errors across
68  *		all objects of the given type.
69  *
70  * 		mos		Any data in the MOS
71  * 		mosdir		object directory
72  * 		config		pool configuration
73  * 		bpobj		blkptr list
74  * 		spacemap	spacemap
75  * 		metaslab	metaslab
76  * 		errlog		persistent error log
77  *
78  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
79  * 		a range is given, this corresponds to the indirect block
80  * 		corresponding to the specific range.
81  *
82  *	range	A numerical range [start,end) within the object.  Defaults to
83  *		the full size of the file.
84  *
85  * 	object	A string describing the logical location of the object.  For
86  * 		files and directories (currently the only supported types),
87  * 		this is the path of the object on disk.
88  *
89  * This is translated, via libzpool, into the following internal representation:
90  *
91  * 	<type,objset,object,level,range>
92  *
93  * These types should be self-explanatory.  This tuple is then passed to the
94  * kernel via a special ioctl() to initiate fault injection for the given
95  * object.  Note that 'type' is not strictly necessary for fault injection, but
96  * is used when translating existing faults into a human-readable string.
97  *
98  *
99  * The command itself takes one of the forms:
100  *
101  * 	zinject
102  * 	zinject <-a | -u pool>
103  * 	zinject -c <id|all>
104  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
105  *	    [-r range] <object>
106  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
107  *
108  * With no arguments, the command prints all currently registered injection
109  * handlers, with their numeric identifiers.
110  *
111  * The '-c' option will clear the given handler, or all handlers if 'all' is
112  * specified.
113  *
114  * The '-e' option takes a string describing the errno to simulate.  This must
115  * be either 'io' or 'checksum'.  In most cases this will result in the same
116  * behavior, but RAID-Z will produce a different set of ereports for this
117  * situation.
118  *
119  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
120  * specified, then the ARC cache is flushed appropriately.  If '-u' is
121  * specified, then the underlying SPA is unloaded.  Either of these flags can be
122  * specified independently of any other handlers.  The '-m' flag automatically
123  * does an unmount and remount of the underlying dataset to aid in flushing the
124  * cache.
125  *
126  * The '-f' flag controls the frequency of errors injected, expressed as a
127  * integer percentage between 1 and 100.  The default is 100.
128  *
129  * The this form is responsible for actually injecting the handler into the
130  * framework.  It takes the arguments described above, translates them to the
131  * internal tuple using libzpool, and then issues an ioctl() to register the
132  * handler.
133  *
134  * The final form can target a specific bookmark, regardless of whether a
135  * human-readable interface has been designed.  It allows developers to specify
136  * a particular block by number.
137  */
138 
139 #include <errno.h>
140 #include <fcntl.h>
141 #include <stdio.h>
142 #include <stdlib.h>
143 #include <strings.h>
144 #include <unistd.h>
145 
146 #include <sys/fs/zfs.h>
147 #include <sys/mount.h>
148 
149 #include <libzfs.h>
150 
151 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
152 
153 #include "zinject.h"
154 
155 libzfs_handle_t *g_zfs;
156 int zfs_fd;
157 
158 #define	ECKSUM	EBADE
159 
160 static const char *errtable[TYPE_INVAL] = {
161 	"data",
162 	"dnode",
163 	"mos",
164 	"mosdir",
165 	"metaslab",
166 	"config",
167 	"bpobj",
168 	"spacemap",
169 	"errlog",
170 	"uber",
171 	"nvlist",
172 	"pad1",
173 	"pad2"
174 };
175 
176 static err_type_t
177 name_to_type(const char *arg)
178 {
179 	int i;
180 	for (i = 0; i < TYPE_INVAL; i++)
181 		if (strcmp(errtable[i], arg) == 0)
182 			return (i);
183 
184 	return (TYPE_INVAL);
185 }
186 
187 static const char *
188 type_to_name(uint64_t type)
189 {
190 	switch (type) {
191 	case DMU_OT_OBJECT_DIRECTORY:
192 		return ("mosdir");
193 	case DMU_OT_OBJECT_ARRAY:
194 		return ("metaslab");
195 	case DMU_OT_PACKED_NVLIST:
196 		return ("config");
197 	case DMU_OT_BPOBJ:
198 		return ("bpobj");
199 	case DMU_OT_SPACE_MAP:
200 		return ("spacemap");
201 	case DMU_OT_ERROR_LOG:
202 		return ("errlog");
203 	default:
204 		return ("-");
205 	}
206 }
207 
208 
209 /*
210  * Print usage message.
211  */
212 void
213 usage(void)
214 {
215 	(void) printf(
216 	    "usage:\n"
217 	    "\n"
218 	    "\tzinject\n"
219 	    "\n"
220 	    "\t\tList all active injection records.\n"
221 	    "\n"
222 	    "\tzinject -c <id|all>\n"
223 	    "\n"
224 	    "\t\tClear the particular record (if given a numeric ID), or\n"
225 	    "\t\tall records if 'all' is specificed.\n"
226 	    "\n"
227 	    "\tzinject -p <function name> pool\n"
228 	    "\t\tInject a panic fault at the specified function. Only \n"
229 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
230 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
231 	    "\n"
232 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
233 	    "\t    [-T <read|write|free|claim|all> pool\n"
234 	    "\t\tInject a fault into a particular device or the device's\n"
235 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
236 	    "\t\t'pad1', or 'pad2'.\n"
237 	    "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
238 	    "\n"
239 	    "\tzinject -d device -A <degrade|fault> pool\n"
240 	    "\t\tPerform a specific action on a particular device\n"
241 	    "\n"
242 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
243 	    "\t\tCause the pool to stop writing blocks yet not\n"
244 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
245 	    "\t\tthat fails to honor cache flush requests.\n"
246 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
247 	    "\t\tat the end of the duration.\n"
248 	    "\n"
249 	    "\tzinject -b objset:object:level:blkid pool\n"
250 	    "\n"
251 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
252 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
253 	    "\t\thexidecimal, and only one block can be specified.\n"
254 	    "\n"
255 	    "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
256 	    "\t    [-a] [-m] [-u] [-f freq] <object>\n"
257 	    "\n"
258 	    "\t\tInject an error into the object specified by the '-t' option\n"
259 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
260 	    "\t\tinterperted depending on the '-t' option.\n"
261 	    "\n"
262 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
263 	    "\t\t-e\tInject a specific error.  Must be either 'io' or\n"
264 	    "\t\t\t'checksum'.  Default is 'io'.\n"
265 	    "\t\t-l\tInject error at a particular block level. Default is "
266 	    "0.\n"
267 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
268 	    "\t\t-r\tInject error over a particular logical range of an\n"
269 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
270 	    "\t\t\trange according to the object's properties.\n"
271 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
272 	    "\t\t\tassociated object.\n"
273 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
274 	    "\t\t\ta pool object.\n"
275 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
276 	    "\t\t\ta percentage between 1 and 100.\n"
277 	    "\n"
278 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
279 	    "\t\t\tfile.  The object must be specified as a complete path\n"
280 	    "\t\t\tto a file on a ZFS filesystem.\n"
281 	    "\n"
282 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
283 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
284 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
285 	    "\t\t\tis specified as a complete path to a file or directory\n"
286 	    "\t\t\ton a ZFS filesystem.\n"
287 	    "\n"
288 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
289 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
290 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
291 	    "\t\t\tthe poolname.\n");
292 }
293 
294 static int
295 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
296     void *data)
297 {
298 	zfs_cmd_t zc = { 0 };
299 	int ret;
300 
301 	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
302 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
303 		    &zc.zc_inject_record, data)) != 0)
304 			return (ret);
305 
306 	if (errno != ENOENT) {
307 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
308 		    strerror(errno));
309 		return (-1);
310 	}
311 
312 	return (0);
313 }
314 
315 static int
316 print_data_handler(int id, const char *pool, zinject_record_t *record,
317     void *data)
318 {
319 	int *count = data;
320 
321 	if (record->zi_guid != 0 || record->zi_func[0] != '\0')
322 		return (0);
323 
324 	if (*count == 0) {
325 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-15s\n",
326 		    "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL",  "RANGE");
327 		(void) printf("---  ---------------  ------  "
328 		    "------  --------  ---  ---------------\n");
329 	}
330 
331 	*count += 1;
332 
333 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %3d  ", id, pool,
334 	    (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
335 	    type_to_name(record->zi_type), record->zi_level);
336 
337 	if (record->zi_start == 0 &&
338 	    record->zi_end == -1ULL)
339 		(void) printf("all\n");
340 	else
341 		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
342 		    (u_longlong_t)record->zi_end);
343 
344 	return (0);
345 }
346 
347 static int
348 print_device_handler(int id, const char *pool, zinject_record_t *record,
349     void *data)
350 {
351 	int *count = data;
352 
353 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
354 		return (0);
355 
356 	if (*count == 0) {
357 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
358 		(void) printf("---  ---------------  ----------------\n");
359 	}
360 
361 	*count += 1;
362 
363 	(void) printf("%3d  %-15s  %llx\n", id, pool,
364 	    (u_longlong_t)record->zi_guid);
365 
366 	return (0);
367 }
368 
369 static int
370 print_panic_handler(int id, const char *pool, zinject_record_t *record,
371     void *data)
372 {
373 	int *count = data;
374 
375 	if (record->zi_func[0] == '\0')
376 		return (0);
377 
378 	if (*count == 0) {
379 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
380 		(void) printf("---  ---------------  ----------------\n");
381 	}
382 
383 	*count += 1;
384 
385 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
386 
387 	return (0);
388 }
389 
390 /*
391  * Print all registered error handlers.  Returns the number of handlers
392  * registered.
393  */
394 static int
395 print_all_handlers(void)
396 {
397 	int count = 0, total = 0;
398 
399 	(void) iter_handlers(print_device_handler, &count);
400 	if (count > 0) {
401 		total += count;
402 		(void) printf("\n");
403 		count = 0;
404 	}
405 
406 	(void) iter_handlers(print_data_handler, &count);
407 	if (count > 0) {
408 		total += count;
409 		(void) printf("\n");
410 		count = 0;
411 	}
412 
413 	(void) iter_handlers(print_panic_handler, &count);
414 
415 	return (count + total);
416 }
417 
418 /* ARGSUSED */
419 static int
420 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
421     void *data)
422 {
423 	zfs_cmd_t zc = { 0 };
424 
425 	zc.zc_guid = (uint64_t)id;
426 
427 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
428 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
429 		    id, strerror(errno));
430 		return (1);
431 	}
432 
433 	return (0);
434 }
435 
436 /*
437  * Remove all fault injection handlers.
438  */
439 static int
440 cancel_all_handlers(void)
441 {
442 	int ret = iter_handlers(cancel_one_handler, NULL);
443 
444 	if (ret == 0)
445 		(void) printf("removed all registered handlers\n");
446 
447 	return (ret);
448 }
449 
450 /*
451  * Remove a specific fault injection handler.
452  */
453 static int
454 cancel_handler(int id)
455 {
456 	zfs_cmd_t zc = { 0 };
457 
458 	zc.zc_guid = (uint64_t)id;
459 
460 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
461 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
462 		    id, strerror(errno));
463 		return (1);
464 	}
465 
466 	(void) printf("removed handler %d\n", id);
467 
468 	return (0);
469 }
470 
471 /*
472  * Register a new fault injection handler.
473  */
474 static int
475 register_handler(const char *pool, int flags, zinject_record_t *record,
476     int quiet)
477 {
478 	zfs_cmd_t zc = { 0 };
479 
480 	(void) strcpy(zc.zc_name, pool);
481 	zc.zc_inject_record = *record;
482 	zc.zc_guid = flags;
483 
484 	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
485 		(void) fprintf(stderr, "failed to add handler: %s\n",
486 		    strerror(errno));
487 		return (1);
488 	}
489 
490 	if (flags & ZINJECT_NULL)
491 		return (0);
492 
493 	if (quiet) {
494 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
495 	} else {
496 		(void) printf("Added handler %llu with the following "
497 		    "properties:\n", (u_longlong_t)zc.zc_guid);
498 		(void) printf("  pool: %s\n", pool);
499 		if (record->zi_guid) {
500 			(void) printf("  vdev: %llx\n",
501 			    (u_longlong_t)record->zi_guid);
502 		} else if (record->zi_func[0] != '\0') {
503 			(void) printf("  panic function: %s\n",
504 			    record->zi_func);
505 		} else if (record->zi_duration > 0) {
506 			(void) printf(" time: %lld seconds\n",
507 			    (u_longlong_t)record->zi_duration);
508 		} else if (record->zi_duration < 0) {
509 			(void) printf(" txgs: %lld \n",
510 			    (u_longlong_t)-record->zi_duration);
511 		} else {
512 			(void) printf("objset: %llu\n",
513 			    (u_longlong_t)record->zi_objset);
514 			(void) printf("object: %llu\n",
515 			    (u_longlong_t)record->zi_object);
516 			(void) printf("  type: %llu\n",
517 			    (u_longlong_t)record->zi_type);
518 			(void) printf(" level: %d\n", record->zi_level);
519 			if (record->zi_start == 0 &&
520 			    record->zi_end == -1ULL)
521 				(void) printf(" range: all\n");
522 			else
523 				(void) printf(" range: [%llu, %llu)\n",
524 				    (u_longlong_t)record->zi_start,
525 				    (u_longlong_t)record->zi_end);
526 		}
527 	}
528 
529 	return (0);
530 }
531 
532 int
533 perform_action(const char *pool, zinject_record_t *record, int cmd)
534 {
535 	zfs_cmd_t zc = { 0 };
536 
537 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
538 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
539 	zc.zc_guid = record->zi_guid;
540 	zc.zc_cookie = cmd;
541 
542 	if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
543 		return (0);
544 
545 	return (1);
546 }
547 
548 int
549 main(int argc, char **argv)
550 {
551 	int c;
552 	char *range = NULL;
553 	char *cancel = NULL;
554 	char *end;
555 	char *raw = NULL;
556 	char *device = NULL;
557 	int level = 0;
558 	int quiet = 0;
559 	int error = 0;
560 	int domount = 0;
561 	int io_type = ZIO_TYPES;
562 	int action = VDEV_STATE_UNKNOWN;
563 	err_type_t type = TYPE_INVAL;
564 	err_type_t label = TYPE_INVAL;
565 	zinject_record_t record = { 0 };
566 	char pool[MAXNAMELEN];
567 	char dataset[MAXNAMELEN];
568 	zfs_handle_t *zhp;
569 	int nowrites = 0;
570 	int dur_txg = 0;
571 	int dur_secs = 0;
572 	int ret;
573 	int flags = 0;
574 
575 	if ((g_zfs = libzfs_init()) == NULL) {
576 		(void) fprintf(stderr, "internal error: failed to "
577 		    "initialize ZFS library\n");
578 		return (1);
579 	}
580 
581 	libzfs_print_on_error(g_zfs, B_TRUE);
582 
583 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
584 		(void) fprintf(stderr, "failed to open ZFS device\n");
585 		return (1);
586 	}
587 
588 	if (argc == 1) {
589 		/*
590 		 * No arguments.  Print the available handlers.  If there are no
591 		 * available handlers, direct the user to '-h' for help
592 		 * information.
593 		 */
594 		if (print_all_handlers() == 0) {
595 			(void) printf("No handlers registered.\n");
596 			(void) printf("Run 'zinject -h' for usage "
597 			    "information.\n");
598 		}
599 
600 		return (0);
601 	}
602 
603 	while ((c = getopt(argc, argv,
604 	    ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
605 		switch (c) {
606 		case 'a':
607 			flags |= ZINJECT_FLUSH_ARC;
608 			break;
609 		case 'A':
610 			if (strcasecmp(optarg, "degrade") == 0) {
611 				action = VDEV_STATE_DEGRADED;
612 			} else if (strcasecmp(optarg, "fault") == 0) {
613 				action = VDEV_STATE_FAULTED;
614 			} else {
615 				(void) fprintf(stderr, "invalid action '%s': "
616 				    "must be 'degrade' or 'fault'\n", optarg);
617 				usage();
618 				return (1);
619 			}
620 			break;
621 		case 'b':
622 			raw = optarg;
623 			break;
624 		case 'c':
625 			cancel = optarg;
626 			break;
627 		case 'd':
628 			device = optarg;
629 			break;
630 		case 'D':
631 			record.zi_timer = strtoull(optarg, &end, 10);
632 			if (errno != 0 || *end != '\0') {
633 				(void) fprintf(stderr, "invalid i/o delay "
634 				    "value: '%s'\n", optarg);
635 				usage();
636 				return (1);
637 			}
638 			break;
639 		case 'e':
640 			if (strcasecmp(optarg, "io") == 0) {
641 				error = EIO;
642 			} else if (strcasecmp(optarg, "checksum") == 0) {
643 				error = ECKSUM;
644 			} else if (strcasecmp(optarg, "nxio") == 0) {
645 				error = ENXIO;
646 			} else if (strcasecmp(optarg, "dtl") == 0) {
647 				error = ECHILD;
648 			} else {
649 				(void) fprintf(stderr, "invalid error type "
650 				    "'%s': must be 'io', 'checksum' or "
651 				    "'nxio'\n", optarg);
652 				usage();
653 				return (1);
654 			}
655 			break;
656 		case 'f':
657 			record.zi_freq = atoi(optarg);
658 			if (record.zi_freq < 1 || record.zi_freq > 100) {
659 				(void) fprintf(stderr, "frequency range must "
660 				    "be in the range (0, 100]\n");
661 				return (1);
662 			}
663 			break;
664 		case 'F':
665 			record.zi_failfast = B_TRUE;
666 			break;
667 		case 'g':
668 			dur_txg = 1;
669 			record.zi_duration = (int)strtol(optarg, &end, 10);
670 			if (record.zi_duration <= 0 || *end != '\0') {
671 				(void) fprintf(stderr, "invalid duration '%s': "
672 				    "must be a positive integer\n", optarg);
673 				usage();
674 				return (1);
675 			}
676 			/* store duration of txgs as its negative */
677 			record.zi_duration *= -1;
678 			break;
679 		case 'h':
680 			usage();
681 			return (0);
682 		case 'I':
683 			/* default duration, if one hasn't yet been defined */
684 			nowrites = 1;
685 			if (dur_secs == 0 && dur_txg == 0)
686 				record.zi_duration = 30;
687 			break;
688 		case 'l':
689 			level = (int)strtol(optarg, &end, 10);
690 			if (*end != '\0') {
691 				(void) fprintf(stderr, "invalid level '%s': "
692 				    "must be an integer\n", optarg);
693 				usage();
694 				return (1);
695 			}
696 			break;
697 		case 'm':
698 			domount = 1;
699 			break;
700 		case 'p':
701 			(void) strlcpy(record.zi_func, optarg,
702 			    sizeof (record.zi_func));
703 			record.zi_cmd = ZINJECT_PANIC;
704 			break;
705 		case 'q':
706 			quiet = 1;
707 			break;
708 		case 'r':
709 			range = optarg;
710 			break;
711 		case 's':
712 			dur_secs = 1;
713 			record.zi_duration = (int)strtol(optarg, &end, 10);
714 			if (record.zi_duration <= 0 || *end != '\0') {
715 				(void) fprintf(stderr, "invalid duration '%s': "
716 				    "must be a positive integer\n", optarg);
717 				usage();
718 				return (1);
719 			}
720 			break;
721 		case 'T':
722 			if (strcasecmp(optarg, "read") == 0) {
723 				io_type = ZIO_TYPE_READ;
724 			} else if (strcasecmp(optarg, "write") == 0) {
725 				io_type = ZIO_TYPE_WRITE;
726 			} else if (strcasecmp(optarg, "free") == 0) {
727 				io_type = ZIO_TYPE_FREE;
728 			} else if (strcasecmp(optarg, "claim") == 0) {
729 				io_type = ZIO_TYPE_CLAIM;
730 			} else if (strcasecmp(optarg, "all") == 0) {
731 				io_type = ZIO_TYPES;
732 			} else {
733 				(void) fprintf(stderr, "invalid I/O type "
734 				    "'%s': must be 'read', 'write', 'free', "
735 				    "'claim' or 'all'\n", optarg);
736 				usage();
737 				return (1);
738 			}
739 			break;
740 		case 't':
741 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
742 			    !MOS_TYPE(type)) {
743 				(void) fprintf(stderr, "invalid type '%s'\n",
744 				    optarg);
745 				usage();
746 				return (1);
747 			}
748 			break;
749 		case 'u':
750 			flags |= ZINJECT_UNLOAD_SPA;
751 			break;
752 		case 'L':
753 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
754 			    !LABEL_TYPE(type)) {
755 				(void) fprintf(stderr, "invalid label type "
756 				    "'%s'\n", optarg);
757 				usage();
758 				return (1);
759 			}
760 			break;
761 		case ':':
762 			(void) fprintf(stderr, "option -%c requires an "
763 			    "operand\n", optopt);
764 			usage();
765 			return (1);
766 		case '?':
767 			(void) fprintf(stderr, "invalid option '%c'\n",
768 			    optopt);
769 			usage();
770 			return (2);
771 		}
772 	}
773 
774 	argc -= optind;
775 	argv += optind;
776 
777 	if (record.zi_duration != 0)
778 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
779 
780 	if (cancel != NULL) {
781 		/*
782 		 * '-c' is invalid with any other options.
783 		 */
784 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
785 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
786 			(void) fprintf(stderr, "cancel (-c) incompatible with "
787 			    "any other options\n");
788 			usage();
789 			return (2);
790 		}
791 		if (argc != 0) {
792 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
793 			usage();
794 			return (2);
795 		}
796 
797 		if (strcmp(cancel, "all") == 0) {
798 			return (cancel_all_handlers());
799 		} else {
800 			int id = (int)strtol(cancel, &end, 10);
801 			if (*end != '\0') {
802 				(void) fprintf(stderr, "invalid handle id '%s':"
803 				    " must be an integer or 'all'\n", cancel);
804 				usage();
805 				return (1);
806 			}
807 			return (cancel_handler(id));
808 		}
809 	}
810 
811 	if (device != NULL) {
812 		/*
813 		 * Device (-d) injection uses a completely different mechanism
814 		 * for doing injection, so handle it separately here.
815 		 */
816 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
817 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
818 			(void) fprintf(stderr, "device (-d) incompatible with "
819 			    "data error injection\n");
820 			usage();
821 			return (2);
822 		}
823 
824 		if (argc != 1) {
825 			(void) fprintf(stderr, "device (-d) injection requires "
826 			    "a single pool name\n");
827 			usage();
828 			return (2);
829 		}
830 
831 		(void) strcpy(pool, argv[0]);
832 		dataset[0] = '\0';
833 
834 		if (error == ECKSUM) {
835 			(void) fprintf(stderr, "device error type must be "
836 			    "'io' or 'nxio'\n");
837 			return (1);
838 		}
839 
840 		record.zi_iotype = io_type;
841 		if (translate_device(pool, device, label, &record) != 0)
842 			return (1);
843 		if (!error)
844 			error = ENXIO;
845 
846 		if (action != VDEV_STATE_UNKNOWN)
847 			return (perform_action(pool, &record, action));
848 
849 	} else if (raw != NULL) {
850 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
851 		    record.zi_cmd != ZINJECT_UNINITIALIZED) {
852 			(void) fprintf(stderr, "raw (-b) format with "
853 			    "any other options\n");
854 			usage();
855 			return (2);
856 		}
857 
858 		if (argc != 1) {
859 			(void) fprintf(stderr, "raw (-b) format expects a "
860 			    "single pool name\n");
861 			usage();
862 			return (2);
863 		}
864 
865 		(void) strcpy(pool, argv[0]);
866 		dataset[0] = '\0';
867 
868 		if (error == ENXIO) {
869 			(void) fprintf(stderr, "data error type must be "
870 			    "'checksum' or 'io'\n");
871 			return (1);
872 		}
873 
874 		record.zi_cmd = ZINJECT_DATA_FAULT;
875 		if (translate_raw(raw, &record) != 0)
876 			return (1);
877 		if (!error)
878 			error = EIO;
879 	} else if (record.zi_cmd == ZINJECT_PANIC) {
880 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
881 		    level != 0 || device != NULL) {
882 			(void) fprintf(stderr, "panic (-p) incompatible with "
883 			    "other options\n");
884 			usage();
885 			return (2);
886 		}
887 
888 		if (argc < 1 || argc > 2) {
889 			(void) fprintf(stderr, "panic (-p) injection requires "
890 			    "a single pool name and an optional id\n");
891 			usage();
892 			return (2);
893 		}
894 
895 		(void) strcpy(pool, argv[0]);
896 		if (argv[1] != NULL)
897 			record.zi_type = atoi(argv[1]);
898 		dataset[0] = '\0';
899 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
900 		if (nowrites == 0) {
901 			(void) fprintf(stderr, "-s or -g meaningless "
902 			    "without -I (ignore writes)\n");
903 			usage();
904 			return (2);
905 		} else if (dur_secs && dur_txg) {
906 			(void) fprintf(stderr, "choose a duration either "
907 			    "in seconds (-s) or a number of txgs (-g) "
908 			    "but not both\n");
909 			usage();
910 			return (2);
911 		} else if (argc != 1) {
912 			(void) fprintf(stderr, "ignore writes (-I) "
913 			    "injection requires a single pool name\n");
914 			usage();
915 			return (2);
916 		}
917 
918 		(void) strcpy(pool, argv[0]);
919 		dataset[0] = '\0';
920 	} else if (type == TYPE_INVAL) {
921 		if (flags == 0) {
922 			(void) fprintf(stderr, "at least one of '-b', '-d', "
923 			    "'-t', '-a', '-p', '-I' or '-u' "
924 			    "must be specified\n");
925 			usage();
926 			return (2);
927 		}
928 
929 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
930 			(void) strcpy(pool, argv[0]);
931 			dataset[0] = '\0';
932 		} else if (argc != 0) {
933 			(void) fprintf(stderr, "extraneous argument for "
934 			    "'-f'\n");
935 			usage();
936 			return (2);
937 		}
938 
939 		flags |= ZINJECT_NULL;
940 	} else {
941 		if (argc != 1) {
942 			(void) fprintf(stderr, "missing object\n");
943 			usage();
944 			return (2);
945 		}
946 
947 		if (error == ENXIO) {
948 			(void) fprintf(stderr, "data error type must be "
949 			    "'checksum' or 'io'\n");
950 			return (1);
951 		}
952 
953 		record.zi_cmd = ZINJECT_DATA_FAULT;
954 		if (translate_record(type, argv[0], range, level, &record, pool,
955 		    dataset) != 0)
956 			return (1);
957 		if (!error)
958 			error = EIO;
959 	}
960 
961 	/*
962 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
963 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
964 	 * time we access the pool.
965 	 */
966 	if (dataset[0] != '\0' && domount) {
967 		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
968 			return (1);
969 
970 		if (zfs_unmount(zhp, NULL, 0) != 0)
971 			return (1);
972 	}
973 
974 	record.zi_error = error;
975 
976 	ret = register_handler(pool, flags, &record, quiet);
977 
978 	if (dataset[0] != '\0' && domount)
979 		ret = (zfs_mount(zhp, NULL, 0) != 0);
980 
981 	libzfs_fini(g_zfs);
982 
983 	return (ret);
984 }
985