xref: /titanic_52/usr/src/cmd/zinject/zinject.c (revision 990b4856d0eaada6f8140335733a1b1771ed2746)
1ea8dc4b6Seschrock /*
2ea8dc4b6Seschrock  * CDDL HEADER START
3ea8dc4b6Seschrock  *
4ea8dc4b6Seschrock  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7ea8dc4b6Seschrock  *
8ea8dc4b6Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ea8dc4b6Seschrock  * or http://www.opensolaris.org/os/licensing.
10ea8dc4b6Seschrock  * See the License for the specific language governing permissions
11ea8dc4b6Seschrock  * and limitations under the License.
12ea8dc4b6Seschrock  *
13ea8dc4b6Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
14ea8dc4b6Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ea8dc4b6Seschrock  * If applicable, add the following below this CDDL HEADER, with the
16ea8dc4b6Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
17ea8dc4b6Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
18ea8dc4b6Seschrock  *
19ea8dc4b6Seschrock  * CDDL HEADER END
20ea8dc4b6Seschrock  */
21ea8dc4b6Seschrock /*
2255434c77Sek110237  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23ea8dc4b6Seschrock  * Use is subject to license terms.
24ea8dc4b6Seschrock  */
25ea8dc4b6Seschrock 
26ea8dc4b6Seschrock #pragma ident	"%Z%%M%	%I%	%E% SMI"
27ea8dc4b6Seschrock 
28ea8dc4b6Seschrock /*
29ea8dc4b6Seschrock  * ZFS Fault Injector
30ea8dc4b6Seschrock  *
31ea8dc4b6Seschrock  * This userland component takes a set of options and uses libzpool to translate
32ea8dc4b6Seschrock  * from a user-visible object type and name to an internal representation.
33ea8dc4b6Seschrock  * There are two basic types of faults: device faults and data faults.
34ea8dc4b6Seschrock  *
35ea8dc4b6Seschrock  *
36ea8dc4b6Seschrock  * DEVICE FAULTS
37ea8dc4b6Seschrock  *
38ea8dc4b6Seschrock  * Errors can be injected into a particular vdev using the '-d' option.  This
39ea8dc4b6Seschrock  * option takes a path or vdev GUID to uniquely identify the device within a
40ea8dc4b6Seschrock  * pool.  There are two types of errors that can be injected, EIO and ENXIO,
41ea8dc4b6Seschrock  * that can be controlled through the '-t' option.  The default is ENXIO.  For
42ea8dc4b6Seschrock  * EIO failures, any attempt to read data from the device will return EIO, but
43ea8dc4b6Seschrock  * subsequent attempt to reopen the device will succeed.  For ENXIO failures,
44ea8dc4b6Seschrock  * any attempt to read from the device will return EIO, but any attempt to
45ea8dc4b6Seschrock  * reopen the device will also return ENXIO.
46ea8dc4b6Seschrock  *
47ea8dc4b6Seschrock  * This form of the command looks like:
48ea8dc4b6Seschrock  *
49ea8dc4b6Seschrock  * 	zinject -d device [-t type] pool
50ea8dc4b6Seschrock  *
51ea8dc4b6Seschrock  *
52ea8dc4b6Seschrock  * DATA FAULTS
53ea8dc4b6Seschrock  *
54ea8dc4b6Seschrock  * We begin with a tuple of the form:
55ea8dc4b6Seschrock  *
56ea8dc4b6Seschrock  * 	<type,level,range,object>
57ea8dc4b6Seschrock  *
58ea8dc4b6Seschrock  * 	type	A string describing the type of data to target.  Each type
59ea8dc4b6Seschrock  * 		implicitly describes how to interpret 'object'. Currently,
60ea8dc4b6Seschrock  * 		the following values are supported:
61ea8dc4b6Seschrock  *
62ea8dc4b6Seschrock  * 		data		User data for a file
63ea8dc4b6Seschrock  * 		dnode		Dnode for a file or directory
64ea8dc4b6Seschrock  *
65ea8dc4b6Seschrock  *		The following MOS objects are special.  Instead of injecting
66ea8dc4b6Seschrock  *		errors on a particular object or blkid, we inject errors across
67ea8dc4b6Seschrock  *		all objects of the given type.
68ea8dc4b6Seschrock  *
69ea8dc4b6Seschrock  * 		mos		Any data in the MOS
70ea8dc4b6Seschrock  * 		mosdir		object directory
71ea8dc4b6Seschrock  * 		config		pool configuration
72ea8dc4b6Seschrock  * 		bplist		blkptr list
73ea8dc4b6Seschrock  * 		spacemap	spacemap
74ea8dc4b6Seschrock  * 		metaslab	metaslab
75ea8dc4b6Seschrock  * 		errlog		persistent error log
76ea8dc4b6Seschrock  *
77ea8dc4b6Seschrock  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
78ea8dc4b6Seschrock  * 		a range is given, this corresponds to the indirect block
79ea8dc4b6Seschrock  * 		corresponding to the specific range.
80ea8dc4b6Seschrock  *
81ea8dc4b6Seschrock  *	range	A numerical range [start,end) within the object.  Defaults to
82ea8dc4b6Seschrock  *		the full size of the file.
83ea8dc4b6Seschrock  *
84ea8dc4b6Seschrock  * 	object	A string describing the logical location of the object.  For
85ea8dc4b6Seschrock  * 		files and directories (currently the only supported types),
86ea8dc4b6Seschrock  * 		this is the path of the object on disk.
87ea8dc4b6Seschrock  *
88ea8dc4b6Seschrock  * This is translated, via libzpool, into the following internal representation:
89ea8dc4b6Seschrock  *
90ea8dc4b6Seschrock  * 	<type,objset,object,level,range>
91ea8dc4b6Seschrock  *
92ea8dc4b6Seschrock  * These types should be self-explanatory.  This tuple is then passed to the
93ea8dc4b6Seschrock  * kernel via a special ioctl() to initiate fault injection for the given
94ea8dc4b6Seschrock  * object.  Note that 'type' is not strictly necessary for fault injection, but
95ea8dc4b6Seschrock  * is used when translating existing faults into a human-readable string.
96ea8dc4b6Seschrock  *
97ea8dc4b6Seschrock  *
98ea8dc4b6Seschrock  * The command itself takes one of the forms:
99ea8dc4b6Seschrock  *
100ea8dc4b6Seschrock  * 	zinject
101ea8dc4b6Seschrock  * 	zinject <-a | -u pool>
102ea8dc4b6Seschrock  * 	zinject -c <id|all>
103ea8dc4b6Seschrock  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
104ea8dc4b6Seschrock  *	    [-r range] <object>
105ea8dc4b6Seschrock  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
106ea8dc4b6Seschrock  *
107ea8dc4b6Seschrock  * With no arguments, the command prints all currently registered injection
108ea8dc4b6Seschrock  * handlers, with their numeric identifiers.
109ea8dc4b6Seschrock  *
110ea8dc4b6Seschrock  * The '-c' option will clear the given handler, or all handlers if 'all' is
111ea8dc4b6Seschrock  * specified.
112ea8dc4b6Seschrock  *
113ea8dc4b6Seschrock  * The '-e' option takes a string describing the errno to simulate.  This must
114ea8dc4b6Seschrock  * be either 'io' or 'checksum'.  In most cases this will result in the same
115ea8dc4b6Seschrock  * behavior, but RAID-Z will produce a different set of ereports for this
116ea8dc4b6Seschrock  * situation.
117ea8dc4b6Seschrock  *
118ea8dc4b6Seschrock  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
119ea8dc4b6Seschrock  * specified, then the ARC cache is flushed appropriately.  If '-u' is
120ea8dc4b6Seschrock  * specified, then the underlying SPA is unloaded.  Either of these flags can be
121ea8dc4b6Seschrock  * specified independently of any other handlers.  The '-m' flag automatically
122ea8dc4b6Seschrock  * does an unmount and remount of the underlying dataset to aid in flushing the
123ea8dc4b6Seschrock  * cache.
124ea8dc4b6Seschrock  *
125ea8dc4b6Seschrock  * The '-f' flag controls the frequency of errors injected, expressed as a
126ea8dc4b6Seschrock  * integer percentage between 1 and 100.  The default is 100.
127ea8dc4b6Seschrock  *
128ea8dc4b6Seschrock  * The this form is responsible for actually injecting the handler into the
129ea8dc4b6Seschrock  * framework.  It takes the arguments described above, translates them to the
130ea8dc4b6Seschrock  * internal tuple using libzpool, and then issues an ioctl() to register the
131ea8dc4b6Seschrock  * handler.
132ea8dc4b6Seschrock  *
133ea8dc4b6Seschrock  * The final form can target a specific bookmark, regardless of whether a
134ea8dc4b6Seschrock  * human-readable interface has been designed.  It allows developers to specify
135ea8dc4b6Seschrock  * a particular block by number.
136ea8dc4b6Seschrock  */
137ea8dc4b6Seschrock 
138ea8dc4b6Seschrock #include <errno.h>
139ea8dc4b6Seschrock #include <fcntl.h>
140ea8dc4b6Seschrock #include <stdio.h>
141ea8dc4b6Seschrock #include <stdlib.h>
142ea8dc4b6Seschrock #include <strings.h>
143ea8dc4b6Seschrock #include <unistd.h>
144ea8dc4b6Seschrock 
145ea8dc4b6Seschrock #include <sys/fs/zfs.h>
146ea8dc4b6Seschrock #include <sys/mount.h>
147ea8dc4b6Seschrock 
148ea8dc4b6Seschrock #include <libzfs.h>
149ea8dc4b6Seschrock 
150ea8dc4b6Seschrock #undef verify	/* both libzfs.h and zfs_context.h want to define this */
151ea8dc4b6Seschrock 
152ea8dc4b6Seschrock #include "zinject.h"
153ea8dc4b6Seschrock 
15499653d4eSeschrock libzfs_handle_t *g_zfs;
155ea8dc4b6Seschrock int zfs_fd;
156ea8dc4b6Seschrock 
157ea8dc4b6Seschrock #define	ECKSUM	EBADE
158ea8dc4b6Seschrock 
159ea8dc4b6Seschrock static const char *errtable[TYPE_INVAL] = {
160ea8dc4b6Seschrock 	"data",
161ea8dc4b6Seschrock 	"dnode",
162ea8dc4b6Seschrock 	"mos",
163ea8dc4b6Seschrock 	"mosdir",
164ea8dc4b6Seschrock 	"metaslab",
165ea8dc4b6Seschrock 	"config",
166ea8dc4b6Seschrock 	"bplist",
167ea8dc4b6Seschrock 	"spacemap",
168ea8dc4b6Seschrock 	"errlog"
169ea8dc4b6Seschrock };
170ea8dc4b6Seschrock 
171ea8dc4b6Seschrock static err_type_t
172ea8dc4b6Seschrock name_to_type(const char *arg)
173ea8dc4b6Seschrock {
174ea8dc4b6Seschrock 	int i;
175ea8dc4b6Seschrock 	for (i = 0; i < TYPE_INVAL; i++)
176ea8dc4b6Seschrock 		if (strcmp(errtable[i], arg) == 0)
177ea8dc4b6Seschrock 			return (i);
178ea8dc4b6Seschrock 
179ea8dc4b6Seschrock 	return (TYPE_INVAL);
180ea8dc4b6Seschrock }
181ea8dc4b6Seschrock 
182ea8dc4b6Seschrock static const char *
183ea8dc4b6Seschrock type_to_name(uint64_t type)
184ea8dc4b6Seschrock {
185ea8dc4b6Seschrock 	switch (type) {
186ea8dc4b6Seschrock 	case DMU_OT_OBJECT_DIRECTORY:
187ea8dc4b6Seschrock 		return ("mosdir");
188ea8dc4b6Seschrock 	case DMU_OT_OBJECT_ARRAY:
189ea8dc4b6Seschrock 		return ("metaslab");
190ea8dc4b6Seschrock 	case DMU_OT_PACKED_NVLIST:
191ea8dc4b6Seschrock 		return ("config");
192ea8dc4b6Seschrock 	case DMU_OT_BPLIST:
193ea8dc4b6Seschrock 		return ("bplist");
194ea8dc4b6Seschrock 	case DMU_OT_SPACE_MAP:
195ea8dc4b6Seschrock 		return ("spacemap");
196ea8dc4b6Seschrock 	case DMU_OT_ERROR_LOG:
197ea8dc4b6Seschrock 		return ("errlog");
198ea8dc4b6Seschrock 	default:
199ea8dc4b6Seschrock 		return ("-");
200ea8dc4b6Seschrock 	}
201ea8dc4b6Seschrock }
202ea8dc4b6Seschrock 
203ea8dc4b6Seschrock 
204ea8dc4b6Seschrock /*
205ea8dc4b6Seschrock  * Print usage message.
206ea8dc4b6Seschrock  */
207ea8dc4b6Seschrock void
208ea8dc4b6Seschrock usage(void)
209ea8dc4b6Seschrock {
210ea8dc4b6Seschrock 	(void) printf(
211ea8dc4b6Seschrock 	    "usage:\n"
212ea8dc4b6Seschrock 	    "\n"
213ea8dc4b6Seschrock 	    "\tzinject\n"
214ea8dc4b6Seschrock 	    "\n"
215ea8dc4b6Seschrock 	    "\t\tList all active injection records.\n"
216ea8dc4b6Seschrock 	    "\n"
217ea8dc4b6Seschrock 	    "\tzinject -c <id|all>\n"
218ea8dc4b6Seschrock 	    "\n"
219ea8dc4b6Seschrock 	    "\t\tClear the particular record (if given a numeric ID), or\n"
220ea8dc4b6Seschrock 	    "\t\tall records if 'all' is specificed.\n"
221ea8dc4b6Seschrock 	    "\n"
222ea8dc4b6Seschrock 	    "\tzinject -d device [-e errno] pool\n"
223ea8dc4b6Seschrock 	    "\t\tInject a fault into a particular device.  'errno' can either\n"
224ea8dc4b6Seschrock 	    "\t\tbe 'nxio' (the default) or 'io'.\n"
225ea8dc4b6Seschrock 	    "\n"
226ea8dc4b6Seschrock 	    "\tzinject -b objset:object:level:blkid pool\n"
227ea8dc4b6Seschrock 	    "\n"
228ea8dc4b6Seschrock 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
229ea8dc4b6Seschrock 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
230ea8dc4b6Seschrock 	    "\t\thexidecimal, and only one block can be specified.\n"
231ea8dc4b6Seschrock 	    "\n"
232ea8dc4b6Seschrock 	    "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
233ea8dc4b6Seschrock 	    "\t    [-a] [-m] [-u] [-f freq] <object>\n"
234ea8dc4b6Seschrock 	    "\n"
235ea8dc4b6Seschrock 	    "\t\tInject an error into the object specified by the '-t' option\n"
236ea8dc4b6Seschrock 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
237ea8dc4b6Seschrock 	    "\t\tinterperted depending on the '-t' option.\n"
238ea8dc4b6Seschrock 	    "\n"
239ea8dc4b6Seschrock 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
240ea8dc4b6Seschrock 	    "\t\t-e\tInject a specific error.  Must be either 'io' or\n"
241ea8dc4b6Seschrock 	    "\t\t\t'checksum'.  Default is 'io'.\n"
242ea8dc4b6Seschrock 	    "\t\t-l\tInject error at a particular block level. Default is "
243ea8dc4b6Seschrock 	    "0.\n"
244ea8dc4b6Seschrock 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
245ea8dc4b6Seschrock 	    "\t\t-r\tInject error over a particular logical range of an\n"
246ea8dc4b6Seschrock 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
247ea8dc4b6Seschrock 	    "\t\t\trange according to the object's properties.\n"
248ea8dc4b6Seschrock 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
249ea8dc4b6Seschrock 	    "\t\t\tassociated object.\n"
250ea8dc4b6Seschrock 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
251ea8dc4b6Seschrock 	    "\t\t\ta pool object.\n"
252ea8dc4b6Seschrock 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
253ea8dc4b6Seschrock 	    "\t\t\ta percentage between 1 and 100.\n"
254ea8dc4b6Seschrock 	    "\n"
255ea8dc4b6Seschrock 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
256ea8dc4b6Seschrock 	    "\t\t\tfile.  The object must be specified as a complete path\n"
257ea8dc4b6Seschrock 	    "\t\t\tto a file on a ZFS filesystem.\n"
258ea8dc4b6Seschrock 	    "\n"
259ea8dc4b6Seschrock 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
260ea8dc4b6Seschrock 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
261ea8dc4b6Seschrock 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
262ea8dc4b6Seschrock 	    "\t\t\tis specified as a complete path to a file or directory\n"
263ea8dc4b6Seschrock 	    "\t\t\ton a ZFS filesystem.\n"
264ea8dc4b6Seschrock 	    "\n"
265ea8dc4b6Seschrock 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
266ea8dc4b6Seschrock 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bplist,\n"
26755434c77Sek110237 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
26855434c77Sek110237 	    "\t\t\tthe poolname.\n");
269ea8dc4b6Seschrock }
270ea8dc4b6Seschrock 
271ea8dc4b6Seschrock static int
272ea8dc4b6Seschrock iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
273ea8dc4b6Seschrock     void *data)
274ea8dc4b6Seschrock {
275ea8dc4b6Seschrock 	zfs_cmd_t zc;
276ea8dc4b6Seschrock 	int ret;
277ea8dc4b6Seschrock 
278ea8dc4b6Seschrock 	zc.zc_guid = 0;
279ea8dc4b6Seschrock 
280ea8dc4b6Seschrock 	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
281ea8dc4b6Seschrock 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
282ea8dc4b6Seschrock 		    &zc.zc_inject_record, data)) != 0)
283ea8dc4b6Seschrock 			return (ret);
284ea8dc4b6Seschrock 
285ea8dc4b6Seschrock 	return (0);
286ea8dc4b6Seschrock }
287ea8dc4b6Seschrock 
288ea8dc4b6Seschrock static int
289ea8dc4b6Seschrock print_data_handler(int id, const char *pool, zinject_record_t *record,
290ea8dc4b6Seschrock     void *data)
291ea8dc4b6Seschrock {
292ea8dc4b6Seschrock 	int *count = data;
293ea8dc4b6Seschrock 
294ea8dc4b6Seschrock 	if (record->zi_guid != 0)
295ea8dc4b6Seschrock 		return (0);
296ea8dc4b6Seschrock 
297ea8dc4b6Seschrock 	if (*count == 0) {
298ea8dc4b6Seschrock 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-15s\n",
299ea8dc4b6Seschrock 		    "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL",  "RANGE");
300ea8dc4b6Seschrock 		(void) printf("---  ---------------  ------  "
301ea8dc4b6Seschrock 		    "------  --------  ---  ---------------\n");
302ea8dc4b6Seschrock 	}
303ea8dc4b6Seschrock 
304ea8dc4b6Seschrock 	*count += 1;
305ea8dc4b6Seschrock 
306ea8dc4b6Seschrock 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %3d  ", id, pool,
307ea8dc4b6Seschrock 	    (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
308ea8dc4b6Seschrock 	    type_to_name(record->zi_type), record->zi_level);
309ea8dc4b6Seschrock 
310ea8dc4b6Seschrock 	if (record->zi_start == 0 &&
311ea8dc4b6Seschrock 	    record->zi_end == -1ULL)
312ea8dc4b6Seschrock 		(void) printf("all\n");
313ea8dc4b6Seschrock 	else
314ea8dc4b6Seschrock 		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
315ea8dc4b6Seschrock 		    (u_longlong_t)record->zi_end);
316ea8dc4b6Seschrock 
317ea8dc4b6Seschrock 	return (0);
318ea8dc4b6Seschrock }
319ea8dc4b6Seschrock 
320ea8dc4b6Seschrock static int
321ea8dc4b6Seschrock print_device_handler(int id, const char *pool, zinject_record_t *record,
322ea8dc4b6Seschrock     void *data)
323ea8dc4b6Seschrock {
324ea8dc4b6Seschrock 	int *count = data;
325ea8dc4b6Seschrock 
326ea8dc4b6Seschrock 	if (record->zi_guid == 0)
327ea8dc4b6Seschrock 		return (0);
328ea8dc4b6Seschrock 
329ea8dc4b6Seschrock 	if (*count == 0) {
330ea8dc4b6Seschrock 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
331ea8dc4b6Seschrock 		(void) printf("---  ---------------  ----------------\n");
332ea8dc4b6Seschrock 	}
333ea8dc4b6Seschrock 
334ea8dc4b6Seschrock 	*count += 1;
335ea8dc4b6Seschrock 
336ea8dc4b6Seschrock 	(void) printf("%3d  %-15s  %llx\n", id, pool,
337ea8dc4b6Seschrock 	    (u_longlong_t)record->zi_guid);
338ea8dc4b6Seschrock 
339ea8dc4b6Seschrock 	return (0);
340ea8dc4b6Seschrock }
341ea8dc4b6Seschrock 
342ea8dc4b6Seschrock /*
343ea8dc4b6Seschrock  * Print all registered error handlers.  Returns the number of handlers
344ea8dc4b6Seschrock  * registered.
345ea8dc4b6Seschrock  */
346ea8dc4b6Seschrock static int
347ea8dc4b6Seschrock print_all_handlers(void)
348ea8dc4b6Seschrock {
349ea8dc4b6Seschrock 	int count = 0;
350ea8dc4b6Seschrock 
351ea8dc4b6Seschrock 	(void) iter_handlers(print_device_handler, &count);
352ea8dc4b6Seschrock 	(void) printf("\n");
353ea8dc4b6Seschrock 	count = 0;
354ea8dc4b6Seschrock 	(void) iter_handlers(print_data_handler, &count);
355ea8dc4b6Seschrock 
356ea8dc4b6Seschrock 	return (count);
357ea8dc4b6Seschrock }
358ea8dc4b6Seschrock 
359ea8dc4b6Seschrock /* ARGSUSED */
360ea8dc4b6Seschrock static int
361ea8dc4b6Seschrock cancel_one_handler(int id, const char *pool, zinject_record_t *record,
362ea8dc4b6Seschrock     void *data)
363ea8dc4b6Seschrock {
364ea8dc4b6Seschrock 	zfs_cmd_t zc;
365ea8dc4b6Seschrock 
366ea8dc4b6Seschrock 	zc.zc_guid = (uint64_t)id;
367ea8dc4b6Seschrock 
368ea8dc4b6Seschrock 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
369ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
370ea8dc4b6Seschrock 		    id, strerror(errno));
371ea8dc4b6Seschrock 		return (1);
372ea8dc4b6Seschrock 	}
373ea8dc4b6Seschrock 
374ea8dc4b6Seschrock 	return (0);
375ea8dc4b6Seschrock }
376ea8dc4b6Seschrock 
377ea8dc4b6Seschrock /*
378ea8dc4b6Seschrock  * Remove all fault injection handlers.
379ea8dc4b6Seschrock  */
380ea8dc4b6Seschrock static int
381ea8dc4b6Seschrock cancel_all_handlers(void)
382ea8dc4b6Seschrock {
383ea8dc4b6Seschrock 	int ret = iter_handlers(cancel_one_handler, NULL);
384ea8dc4b6Seschrock 
385ea8dc4b6Seschrock 	(void) printf("removed all registered handlers\n");
386ea8dc4b6Seschrock 
387ea8dc4b6Seschrock 	return (ret);
388ea8dc4b6Seschrock }
389ea8dc4b6Seschrock 
390ea8dc4b6Seschrock /*
391ea8dc4b6Seschrock  * Remove a specific fault injection handler.
392ea8dc4b6Seschrock  */
393ea8dc4b6Seschrock static int
394ea8dc4b6Seschrock cancel_handler(int id)
395ea8dc4b6Seschrock {
396ea8dc4b6Seschrock 	zfs_cmd_t zc;
397ea8dc4b6Seschrock 
398ea8dc4b6Seschrock 	zc.zc_guid = (uint64_t)id;
399ea8dc4b6Seschrock 
400ea8dc4b6Seschrock 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
401ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
402ea8dc4b6Seschrock 		    id, strerror(errno));
403ea8dc4b6Seschrock 		return (1);
404ea8dc4b6Seschrock 	}
405ea8dc4b6Seschrock 
406ea8dc4b6Seschrock 	(void) printf("removed handler %d\n", id);
407ea8dc4b6Seschrock 
408ea8dc4b6Seschrock 	return (0);
409ea8dc4b6Seschrock }
410ea8dc4b6Seschrock 
411ea8dc4b6Seschrock /*
412ea8dc4b6Seschrock  * Register a new fault injection handler.
413ea8dc4b6Seschrock  */
414ea8dc4b6Seschrock static int
415ea8dc4b6Seschrock register_handler(const char *pool, int flags, zinject_record_t *record,
416ea8dc4b6Seschrock     int quiet)
417ea8dc4b6Seschrock {
418ea8dc4b6Seschrock 	zfs_cmd_t zc;
419ea8dc4b6Seschrock 
420ea8dc4b6Seschrock 	(void) strcpy(zc.zc_name, pool);
421ea8dc4b6Seschrock 	zc.zc_inject_record = *record;
422ea8dc4b6Seschrock 	zc.zc_guid = flags;
423ea8dc4b6Seschrock 
424ea8dc4b6Seschrock 	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
425ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to add handler: %s\n",
426ea8dc4b6Seschrock 		    strerror(errno));
427ea8dc4b6Seschrock 		return (1);
428ea8dc4b6Seschrock 	}
429ea8dc4b6Seschrock 
430ea8dc4b6Seschrock 	if (flags & ZINJECT_NULL)
431ea8dc4b6Seschrock 		return (0);
432ea8dc4b6Seschrock 
433ea8dc4b6Seschrock 	if (quiet) {
434ea8dc4b6Seschrock 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
435ea8dc4b6Seschrock 	} else {
436ea8dc4b6Seschrock 		(void) printf("Added handler %llu with the following "
437ea8dc4b6Seschrock 		    "properties:\n", (u_longlong_t)zc.zc_guid);
438ea8dc4b6Seschrock 		(void) printf("  pool: %s\n", pool);
439ea8dc4b6Seschrock 		if (record->zi_guid) {
440ea8dc4b6Seschrock 			(void) printf("  vdev: %llx\n",
441ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_guid);
442ea8dc4b6Seschrock 		} else {
443ea8dc4b6Seschrock 			(void) printf("objset: %llu\n",
444ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_objset);
445ea8dc4b6Seschrock 			(void) printf("object: %llu\n",
446ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_object);
447ea8dc4b6Seschrock 			(void) printf("  type: %llu\n",
448ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_type);
449ea8dc4b6Seschrock 			(void) printf(" level: %d\n", record->zi_level);
450ea8dc4b6Seschrock 			if (record->zi_start == 0 &&
451ea8dc4b6Seschrock 			    record->zi_end == -1ULL)
452ea8dc4b6Seschrock 				(void) printf(" range: all\n");
453ea8dc4b6Seschrock 			else
454ea8dc4b6Seschrock 				(void) printf(" range: [%llu, %llu)\n",
455ea8dc4b6Seschrock 				    (u_longlong_t)record->zi_start,
456ea8dc4b6Seschrock 				    (u_longlong_t)record->zi_end);
457ea8dc4b6Seschrock 		}
458ea8dc4b6Seschrock 	}
459ea8dc4b6Seschrock 
460ea8dc4b6Seschrock 	return (0);
461ea8dc4b6Seschrock }
462ea8dc4b6Seschrock 
463ea8dc4b6Seschrock int
464ea8dc4b6Seschrock main(int argc, char **argv)
465ea8dc4b6Seschrock {
466ea8dc4b6Seschrock 	int c;
467ea8dc4b6Seschrock 	char *range = NULL;
468ea8dc4b6Seschrock 	char *cancel = NULL;
469ea8dc4b6Seschrock 	char *end;
470ea8dc4b6Seschrock 	char *raw = NULL;
471ea8dc4b6Seschrock 	char *device = NULL;
472ea8dc4b6Seschrock 	int level = 0;
473ea8dc4b6Seschrock 	int quiet = 0;
474ea8dc4b6Seschrock 	int error = 0;
475ea8dc4b6Seschrock 	int domount = 0;
476ea8dc4b6Seschrock 	err_type_t type = TYPE_INVAL;
477ea8dc4b6Seschrock 	zinject_record_t record = { 0 };
478ea8dc4b6Seschrock 	char pool[MAXNAMELEN];
479ea8dc4b6Seschrock 	char dataset[MAXNAMELEN];
480ea8dc4b6Seschrock 	zfs_handle_t *zhp;
481ea8dc4b6Seschrock 	int ret;
482ea8dc4b6Seschrock 	int flags = 0;
483ea8dc4b6Seschrock 
48499653d4eSeschrock 	if ((g_zfs = libzfs_init()) == NULL) {
48599653d4eSeschrock 		(void) fprintf(stderr, "internal error: failed to "
48699653d4eSeschrock 		    "initialize ZFS library\n");
48799653d4eSeschrock 		return (1);
48899653d4eSeschrock 	}
48999653d4eSeschrock 
49099653d4eSeschrock 	libzfs_print_on_error(g_zfs, B_TRUE);
49199653d4eSeschrock 
492ea8dc4b6Seschrock 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
493ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to open ZFS device\n");
494ea8dc4b6Seschrock 		return (1);
495ea8dc4b6Seschrock 	}
496ea8dc4b6Seschrock 
497ea8dc4b6Seschrock 	if (argc == 1) {
498ea8dc4b6Seschrock 		/*
499ea8dc4b6Seschrock 		 * No arguments.  Print the available handlers.  If there are no
500ea8dc4b6Seschrock 		 * available handlers, direct the user to '-h' for help
501ea8dc4b6Seschrock 		 * information.
502ea8dc4b6Seschrock 		 */
503ea8dc4b6Seschrock 		if (print_all_handlers() == 0) {
504ea8dc4b6Seschrock 			(void) printf("No handlers registered.\n");
505ea8dc4b6Seschrock 			(void) printf("Run 'zinject -h' for usage "
506ea8dc4b6Seschrock 			    "information.\n");
507ea8dc4b6Seschrock 		}
508ea8dc4b6Seschrock 
509ea8dc4b6Seschrock 		return (0);
510ea8dc4b6Seschrock 	}
511ea8dc4b6Seschrock 
512ea8dc4b6Seschrock 	while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:u")) != -1) {
513ea8dc4b6Seschrock 		switch (c) {
514ea8dc4b6Seschrock 		case 'a':
515ea8dc4b6Seschrock 			flags |= ZINJECT_FLUSH_ARC;
516ea8dc4b6Seschrock 			break;
517ea8dc4b6Seschrock 		case 'b':
518ea8dc4b6Seschrock 			raw = optarg;
519ea8dc4b6Seschrock 			break;
520ea8dc4b6Seschrock 		case 'c':
521ea8dc4b6Seschrock 			cancel = optarg;
522ea8dc4b6Seschrock 			break;
523ea8dc4b6Seschrock 		case 'd':
524ea8dc4b6Seschrock 			device = optarg;
525ea8dc4b6Seschrock 			break;
526ea8dc4b6Seschrock 		case 'e':
527ea8dc4b6Seschrock 			if (strcasecmp(optarg, "io") == 0) {
528ea8dc4b6Seschrock 				error = EIO;
529ea8dc4b6Seschrock 			} else if (strcasecmp(optarg, "checksum") == 0) {
530ea8dc4b6Seschrock 				error = ECKSUM;
531ea8dc4b6Seschrock 			} else if (strcasecmp(optarg, "nxio") == 0) {
532ea8dc4b6Seschrock 				error = ENXIO;
533ea8dc4b6Seschrock 			} else {
534ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid error type "
535ea8dc4b6Seschrock 				    "'%s': must be 'io', 'checksum' or "
536ea8dc4b6Seschrock 				    "'nxio'\n", optarg);
537ea8dc4b6Seschrock 				usage();
538ea8dc4b6Seschrock 				return (1);
539ea8dc4b6Seschrock 			}
540ea8dc4b6Seschrock 			break;
541ea8dc4b6Seschrock 		case 'f':
542ea8dc4b6Seschrock 			record.zi_freq = atoi(optarg);
543ea8dc4b6Seschrock 			if (record.zi_freq < 1 || record.zi_freq > 100) {
544ea8dc4b6Seschrock 				(void) fprintf(stderr, "frequency range must "
545ea8dc4b6Seschrock 				    "be in the range (0, 100]\n");
546ea8dc4b6Seschrock 				return (1);
547ea8dc4b6Seschrock 			}
548ea8dc4b6Seschrock 			break;
549ea8dc4b6Seschrock 		case 'h':
550ea8dc4b6Seschrock 			usage();
551ea8dc4b6Seschrock 			return (0);
552ea8dc4b6Seschrock 		case 'l':
553ea8dc4b6Seschrock 			level = (int)strtol(optarg, &end, 10);
554ea8dc4b6Seschrock 			if (*end != '\0') {
555ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid level '%s': "
556ea8dc4b6Seschrock 				    "must be an integer\n", optarg);
557ea8dc4b6Seschrock 				usage();
558ea8dc4b6Seschrock 				return (1);
559ea8dc4b6Seschrock 			}
560ea8dc4b6Seschrock 			break;
561ea8dc4b6Seschrock 		case 'm':
562ea8dc4b6Seschrock 			domount = 1;
563ea8dc4b6Seschrock 			break;
564ea8dc4b6Seschrock 		case 'q':
565ea8dc4b6Seschrock 			quiet = 1;
566ea8dc4b6Seschrock 			break;
567ea8dc4b6Seschrock 		case 'r':
568ea8dc4b6Seschrock 			range = optarg;
569ea8dc4b6Seschrock 			break;
570ea8dc4b6Seschrock 		case 't':
571ea8dc4b6Seschrock 			if ((type = name_to_type(optarg)) == TYPE_INVAL) {
572ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid type '%s'\n",
573ea8dc4b6Seschrock 				    optarg);
574ea8dc4b6Seschrock 				usage();
575ea8dc4b6Seschrock 				return (1);
576ea8dc4b6Seschrock 			}
577ea8dc4b6Seschrock 			break;
578ea8dc4b6Seschrock 		case 'u':
579ea8dc4b6Seschrock 			flags |= ZINJECT_UNLOAD_SPA;
580ea8dc4b6Seschrock 			break;
581ea8dc4b6Seschrock 		case ':':
582ea8dc4b6Seschrock 			(void) fprintf(stderr, "option -%c requires an "
583ea8dc4b6Seschrock 			    "operand\n", optopt);
584ea8dc4b6Seschrock 			usage();
585ea8dc4b6Seschrock 			return (1);
586ea8dc4b6Seschrock 		case '?':
587ea8dc4b6Seschrock 			(void) fprintf(stderr, "invalid option '%c'\n",
588ea8dc4b6Seschrock 			    optopt);
589ea8dc4b6Seschrock 			usage();
590ea8dc4b6Seschrock 			return (2);
591ea8dc4b6Seschrock 		}
592ea8dc4b6Seschrock 	}
593ea8dc4b6Seschrock 
594ea8dc4b6Seschrock 	argc -= optind;
595ea8dc4b6Seschrock 	argv += optind;
596ea8dc4b6Seschrock 
597ea8dc4b6Seschrock 	if (cancel != NULL) {
598ea8dc4b6Seschrock 		/*
599ea8dc4b6Seschrock 		 * '-c' is invalid with any other options.
600ea8dc4b6Seschrock 		 */
601ea8dc4b6Seschrock 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
602ea8dc4b6Seschrock 		    level != 0) {
603ea8dc4b6Seschrock 			(void) fprintf(stderr, "cancel (-c) incompatible with "
604ea8dc4b6Seschrock 			    "any other options\n");
605ea8dc4b6Seschrock 			usage();
606ea8dc4b6Seschrock 			return (2);
607ea8dc4b6Seschrock 		}
608ea8dc4b6Seschrock 		if (argc != 0) {
609ea8dc4b6Seschrock 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
610ea8dc4b6Seschrock 			usage();
611ea8dc4b6Seschrock 			return (2);
612ea8dc4b6Seschrock 		}
613ea8dc4b6Seschrock 
614ea8dc4b6Seschrock 		if (strcmp(cancel, "all") == 0) {
615ea8dc4b6Seschrock 			return (cancel_all_handlers());
616ea8dc4b6Seschrock 		} else {
617ea8dc4b6Seschrock 			int id = (int)strtol(cancel, &end, 10);
618ea8dc4b6Seschrock 			if (*end != '\0') {
619ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid handle id '%s':"
620ea8dc4b6Seschrock 				    " must be an integer or 'all'\n", cancel);
621ea8dc4b6Seschrock 				usage();
622ea8dc4b6Seschrock 				return (1);
623ea8dc4b6Seschrock 			}
624ea8dc4b6Seschrock 			return (cancel_handler(id));
625ea8dc4b6Seschrock 		}
626ea8dc4b6Seschrock 	}
627ea8dc4b6Seschrock 
628ea8dc4b6Seschrock 	if (device != NULL) {
629ea8dc4b6Seschrock 		/*
630ea8dc4b6Seschrock 		 * Device (-d) injection uses a completely different mechanism
631ea8dc4b6Seschrock 		 * for doing injection, so handle it separately here.
632ea8dc4b6Seschrock 		 */
633ea8dc4b6Seschrock 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
634ea8dc4b6Seschrock 		    level != 0) {
635ea8dc4b6Seschrock 			(void) fprintf(stderr, "device (-d) incompatible with "
636ea8dc4b6Seschrock 			    "data error injection\n");
637ea8dc4b6Seschrock 			usage();
638ea8dc4b6Seschrock 			return (2);
639ea8dc4b6Seschrock 		}
640ea8dc4b6Seschrock 
641ea8dc4b6Seschrock 		if (argc != 1) {
642ea8dc4b6Seschrock 			(void) fprintf(stderr, "device (-d) injection requires "
643ea8dc4b6Seschrock 			    "a single pool name\n");
644ea8dc4b6Seschrock 			usage();
645ea8dc4b6Seschrock 			return (2);
646ea8dc4b6Seschrock 		}
647ea8dc4b6Seschrock 
648ea8dc4b6Seschrock 		(void) strcpy(pool, argv[0]);
649ea8dc4b6Seschrock 		dataset[0] = '\0';
650ea8dc4b6Seschrock 
651ea8dc4b6Seschrock 		if (error == ECKSUM) {
652ea8dc4b6Seschrock 			(void) fprintf(stderr, "device error type must be "
653ea8dc4b6Seschrock 			    "'io' or 'nxio'\n");
654ea8dc4b6Seschrock 			return (1);
655ea8dc4b6Seschrock 		}
656ea8dc4b6Seschrock 
657ea8dc4b6Seschrock 		if (translate_device(pool, device, &record) != 0)
658ea8dc4b6Seschrock 			return (1);
659ea8dc4b6Seschrock 		if (!error)
660ea8dc4b6Seschrock 			error = ENXIO;
661ea8dc4b6Seschrock 	} else if (raw != NULL) {
662ea8dc4b6Seschrock 		if (range != NULL || type != TYPE_INVAL || level != 0) {
663ea8dc4b6Seschrock 			(void) fprintf(stderr, "raw (-b) format with "
664ea8dc4b6Seschrock 			    "any other options\n");
665ea8dc4b6Seschrock 			usage();
666ea8dc4b6Seschrock 			return (2);
667ea8dc4b6Seschrock 		}
668ea8dc4b6Seschrock 
669ea8dc4b6Seschrock 		if (argc != 1) {
670ea8dc4b6Seschrock 			(void) fprintf(stderr, "raw (-b) format expects a "
671ea8dc4b6Seschrock 			    "single pool name\n");
672ea8dc4b6Seschrock 			usage();
673ea8dc4b6Seschrock 			return (2);
674ea8dc4b6Seschrock 		}
675ea8dc4b6Seschrock 
676ea8dc4b6Seschrock 		(void) strcpy(pool, argv[0]);
677ea8dc4b6Seschrock 		dataset[0] = '\0';
678ea8dc4b6Seschrock 
679ea8dc4b6Seschrock 		if (error == ENXIO) {
680ea8dc4b6Seschrock 			(void) fprintf(stderr, "data error type must be "
681ea8dc4b6Seschrock 			    "'checksum' or 'io'\n");
682ea8dc4b6Seschrock 			return (1);
683ea8dc4b6Seschrock 		}
684ea8dc4b6Seschrock 
685ea8dc4b6Seschrock 		if (translate_raw(raw, &record) != 0)
686ea8dc4b6Seschrock 			return (1);
687ea8dc4b6Seschrock 		if (!error)
688ea8dc4b6Seschrock 			error = EIO;
689ea8dc4b6Seschrock 	} else if (type == TYPE_INVAL) {
690ea8dc4b6Seschrock 		if (flags == 0) {
691ea8dc4b6Seschrock 			(void) fprintf(stderr, "at least one of '-b', '-d', "
692ea8dc4b6Seschrock 			    "'-t', '-a', or '-u' must be specified\n");
693ea8dc4b6Seschrock 			usage();
694ea8dc4b6Seschrock 			return (2);
695ea8dc4b6Seschrock 		}
696ea8dc4b6Seschrock 
697ea8dc4b6Seschrock 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
698ea8dc4b6Seschrock 			(void) strcpy(pool, argv[0]);
699ea8dc4b6Seschrock 			dataset[0] = '\0';
700ea8dc4b6Seschrock 		} else if (argc != 0) {
701ea8dc4b6Seschrock 			(void) fprintf(stderr, "extraneous argument for "
702ea8dc4b6Seschrock 			    "'-f'\n");
703ea8dc4b6Seschrock 			usage();
704ea8dc4b6Seschrock 			return (2);
705ea8dc4b6Seschrock 		}
706ea8dc4b6Seschrock 
707ea8dc4b6Seschrock 		flags |= ZINJECT_NULL;
708ea8dc4b6Seschrock 	} else {
709ea8dc4b6Seschrock 		if (argc != 1) {
710ea8dc4b6Seschrock 			(void) fprintf(stderr, "missing object\n");
711ea8dc4b6Seschrock 			usage();
712ea8dc4b6Seschrock 			return (2);
713ea8dc4b6Seschrock 		}
714ea8dc4b6Seschrock 
715ea8dc4b6Seschrock 		if (error == ENXIO) {
716ea8dc4b6Seschrock 			(void) fprintf(stderr, "data error type must be "
717ea8dc4b6Seschrock 			    "'checksum' or 'io'\n");
718ea8dc4b6Seschrock 			return (1);
719ea8dc4b6Seschrock 		}
720ea8dc4b6Seschrock 
721ea8dc4b6Seschrock 		if (translate_record(type, argv[0], range, level, &record, pool,
722ea8dc4b6Seschrock 		    dataset) != 0)
723ea8dc4b6Seschrock 			return (1);
724ea8dc4b6Seschrock 		if (!error)
725ea8dc4b6Seschrock 			error = EIO;
726ea8dc4b6Seschrock 	}
727ea8dc4b6Seschrock 
728ea8dc4b6Seschrock 	/*
729ea8dc4b6Seschrock 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
730ea8dc4b6Seschrock 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
731ea8dc4b6Seschrock 	 * time we access the pool.
732ea8dc4b6Seschrock 	 */
733ea8dc4b6Seschrock 	if (dataset[0] != '\0' && domount) {
734*990b4856Slling 		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
735ea8dc4b6Seschrock 			return (1);
736ea8dc4b6Seschrock 
737ea8dc4b6Seschrock 		if (zfs_unmount(zhp, NULL, 0) != 0)
738ea8dc4b6Seschrock 			return (1);
739ea8dc4b6Seschrock 	}
740ea8dc4b6Seschrock 
741ea8dc4b6Seschrock 	record.zi_error = error;
742ea8dc4b6Seschrock 
743ea8dc4b6Seschrock 	ret = register_handler(pool, flags, &record, quiet);
744ea8dc4b6Seschrock 
745ea8dc4b6Seschrock 	if (dataset[0] != '\0' && domount)
746ea8dc4b6Seschrock 		ret = (zfs_mount(zhp, NULL, 0) != 0);
747ea8dc4b6Seschrock 
74899653d4eSeschrock 	libzfs_fini(g_zfs);
74999653d4eSeschrock 
750ea8dc4b6Seschrock 	return (ret);
751ea8dc4b6Seschrock }
752