1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2017, Intel Corporation. 26 * Copyright (c) 2023-2025, Klara, Inc. 27 */ 28 29 /* 30 * ZFS Fault Injector 31 * 32 * This userland component takes a set of options and uses libzpool to translate 33 * from a user-visible object type and name to an internal representation. 34 * There are two basic types of faults: device faults and data faults. 35 * 36 * 37 * DEVICE FAULTS 38 * 39 * Errors can be injected into a particular vdev using the '-d' option. This 40 * option takes a path or vdev GUID to uniquely identify the device within a 41 * pool. There are four types of errors that can be injected, IO, ENXIO, 42 * ECHILD, and EILSEQ. These can be controlled through the '-e' option and the 43 * default is ENXIO. For EIO failures, any attempt to read data from the device 44 * will return EIO, but a subsequent attempt to reopen the device will succeed. 45 * For ENXIO failures, any attempt to read from the device will return EIO, but 46 * any attempt to reopen the device will also return ENXIO. The EILSEQ failures 47 * only apply to read operations (-T read) and will flip a bit after the device 48 * has read the original data. 49 * 50 * For label faults, the -L option must be specified. This allows faults 51 * to be injected into either the nvlist, uberblock, pad1, or pad2 region 52 * of all the labels for the specified device. 53 * 54 * This form of the command looks like: 55 * 56 * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool 57 * 58 * 59 * DATA FAULTS 60 * 61 * We begin with a tuple of the form: 62 * 63 * <type,level,range,object> 64 * 65 * type A string describing the type of data to target. Each type 66 * implicitly describes how to interpret 'object'. Currently, 67 * the following values are supported: 68 * 69 * data User data for a file 70 * dnode Dnode for a file or directory 71 * 72 * The following MOS objects are special. Instead of injecting 73 * errors on a particular object or blkid, we inject errors across 74 * all objects of the given type. 75 * 76 * mos Any data in the MOS 77 * mosdir object directory 78 * config pool configuration 79 * bpobj blkptr list 80 * spacemap spacemap 81 * metaslab metaslab 82 * errlog persistent error log 83 * 84 * level Object level. Defaults to '0', not applicable to all types. If 85 * a range is given, this corresponds to the indirect block 86 * corresponding to the specific range. 87 * 88 * range A numerical range [start,end) within the object. Defaults to 89 * the full size of the file. 90 * 91 * object A string describing the logical location of the object. For 92 * files and directories (currently the only supported types), 93 * this is the path of the object on disk. 94 * 95 * This is translated, via libzpool, into the following internal representation: 96 * 97 * <type,objset,object,level,range> 98 * 99 * These types should be self-explanatory. This tuple is then passed to the 100 * kernel via a special ioctl() to initiate fault injection for the given 101 * object. Note that 'type' is not strictly necessary for fault injection, but 102 * is used when translating existing faults into a human-readable string. 103 * 104 * 105 * The command itself takes one of the forms: 106 * 107 * zinject 108 * zinject <-a | -u pool> 109 * zinject -c <id|all> 110 * zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range] 111 * [-T iotype] [-t type object | -b bookmark pool] 112 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 113 * [-r range] <object> 114 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 115 * 116 * With no arguments, the command prints all currently registered injection 117 * handlers, with their numeric identifiers. 118 * 119 * The '-c' option will clear the given handler, or all handlers if 'all' is 120 * specified. 121 * 122 * The '-e' option takes a string describing the errno to simulate. This must 123 * be one of 'io', 'checksum', 'decompress', or 'decrypt'. In most cases this 124 * will result in the same behavior, but RAID-Z will produce a different set of 125 * ereports for this situation. 126 * 127 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 128 * specified, then the ARC cache is flushed appropriately. If '-u' is 129 * specified, then the underlying SPA is unloaded. Either of these flags can be 130 * specified independently of any other handlers. The '-m' flag automatically 131 * does an unmount and remount of the underlying dataset to aid in flushing the 132 * cache. 133 * 134 * The '-f' flag controls the frequency of errors injected, expressed as a 135 * real number percentage between 0.0001 and 100. The default is 100. 136 * 137 * The <object> form is responsible for actually injecting the handler into the 138 * framework. It takes the arguments described above, translates them to the 139 * internal tuple using libzpool, and then issues an ioctl() to register the 140 * handler. 141 * 142 * The '-b' option can target a specific bookmark, regardless of whether a 143 * human-readable interface has been designed. It allows developers to specify 144 * a particular block by number. 145 * 146 * The '-E' option injects pipeline ready stage delays for the given object or 147 * bookmark. The delay is specified in milliseconds, and it supports I/O type 148 * and range filters. 149 */ 150 151 #include <errno.h> 152 #include <fcntl.h> 153 #include <stdio.h> 154 #include <stdlib.h> 155 #include <string.h> 156 #include <strings.h> 157 #include <unistd.h> 158 159 #include <sys/fs/zfs.h> 160 #include <sys/mount.h> 161 162 #include <libzfs.h> 163 164 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 165 166 #include "zinject.h" 167 168 libzfs_handle_t *g_zfs; 169 int zfs_fd; 170 171 static const char *const errtable[TYPE_INVAL] = { 172 "data", 173 "dnode", 174 "mos", 175 "mosdir", 176 "metaslab", 177 "config", 178 "bpobj", 179 "spacemap", 180 "errlog", 181 "uber", 182 "nvlist", 183 "pad1", 184 "pad2" 185 }; 186 187 static err_type_t 188 name_to_type(const char *arg) 189 { 190 int i; 191 for (i = 0; i < TYPE_INVAL; i++) 192 if (strcmp(errtable[i], arg) == 0) 193 return (i); 194 195 return (TYPE_INVAL); 196 } 197 198 static const char * 199 type_to_name(uint64_t type) 200 { 201 switch (type) { 202 case DMU_OT_OBJECT_DIRECTORY: 203 return ("mosdir"); 204 case DMU_OT_OBJECT_ARRAY: 205 return ("metaslab"); 206 case DMU_OT_PACKED_NVLIST: 207 return ("config"); 208 case DMU_OT_BPOBJ: 209 return ("bpobj"); 210 case DMU_OT_SPACE_MAP: 211 return ("spacemap"); 212 case DMU_OT_ERROR_LOG: 213 return ("errlog"); 214 default: 215 return ("-"); 216 } 217 } 218 219 struct errstr { 220 int err; 221 const char *str; 222 }; 223 static const struct errstr errstrtable[] = { 224 { EIO, "io" }, 225 { ECKSUM, "checksum" }, 226 { EINVAL, "decompress" }, 227 { EACCES, "decrypt" }, 228 { ENXIO, "nxio" }, 229 { ECHILD, "dtl" }, 230 { EILSEQ, "corrupt" }, 231 { ENOSYS, "noop" }, 232 { 0, NULL }, 233 }; 234 235 static int 236 str_to_err(const char *str) 237 { 238 for (int i = 0; errstrtable[i].str != NULL; i++) 239 if (strcasecmp(errstrtable[i].str, str) == 0) 240 return (errstrtable[i].err); 241 return (-1); 242 } 243 static const char * 244 err_to_str(int err) 245 { 246 for (int i = 0; errstrtable[i].str != NULL; i++) 247 if (errstrtable[i].err == err) 248 return (errstrtable[i].str); 249 return ("[unknown]"); 250 } 251 252 static const char *const iotypestrtable[ZINJECT_IOTYPES] = { 253 [ZINJECT_IOTYPE_NULL] = "null", 254 [ZINJECT_IOTYPE_READ] = "read", 255 [ZINJECT_IOTYPE_WRITE] = "write", 256 [ZINJECT_IOTYPE_FREE] = "free", 257 [ZINJECT_IOTYPE_CLAIM] = "claim", 258 [ZINJECT_IOTYPE_FLUSH] = "flush", 259 [ZINJECT_IOTYPE_TRIM] = "trim", 260 [ZINJECT_IOTYPE_ALL] = "all", 261 [ZINJECT_IOTYPE_PROBE] = "probe", 262 }; 263 264 static zinject_iotype_t 265 str_to_iotype(const char *arg) 266 { 267 for (uint_t iotype = 0; iotype < ZINJECT_IOTYPES; iotype++) 268 if (iotypestrtable[iotype] != NULL && 269 strcasecmp(iotypestrtable[iotype], arg) == 0) 270 return (iotype); 271 return (ZINJECT_IOTYPES); 272 } 273 274 static const char * 275 iotype_to_str(zinject_iotype_t iotype) 276 { 277 if (iotype >= ZINJECT_IOTYPES || iotypestrtable[iotype] == NULL) 278 return ("[unknown]"); 279 return (iotypestrtable[iotype]); 280 } 281 282 /* 283 * Print usage message. 284 */ 285 void 286 usage(void) 287 { 288 (void) printf( 289 "usage:\n" 290 "\n" 291 "\tzinject\n" 292 "\n" 293 "\t\tList all active injection records.\n" 294 "\n" 295 "\tzinject -c <id|all>\n" 296 "\n" 297 "\t\tClear the particular record (if given a numeric ID), or\n" 298 "\t\tall records if 'all' is specified.\n" 299 "\n" 300 "\tzinject -p <function name> pool\n" 301 "\t\tInject a panic fault at the specified function. Only \n" 302 "\t\tfunctions which call spa_vdev_config_exit(), or \n" 303 "\t\tspa_vdev_exit() will trigger a panic.\n" 304 "\n" 305 "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n" 306 "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n" 307 "\t\tInject a fault into a particular device or the device's\n" 308 "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " 309 "\t\t'pad1', or 'pad2'.\n" 310 "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n" 311 "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n" 312 "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n" 313 "\t\tdevice error injection to a percentage of the IOs.\n" 314 "\n" 315 "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n" 316 "\t\tPerform a specific action on a particular device.\n" 317 "\n" 318 "\tzinject -d device -D latency:lanes pool\n" 319 "\n" 320 "\t\tAdd an artificial delay to IO requests on a particular\n" 321 "\t\tdevice, such that the requests take a minimum of 'latency'\n" 322 "\t\tmilliseconds to complete. Each delay has an associated\n" 323 "\t\tnumber of 'lanes' which defines the number of concurrent\n" 324 "\t\tIO requests that can be processed.\n" 325 "\n" 326 "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n" 327 "\t\tthe device will only be able to service a single IO request\n" 328 "\t\tat a time with each request taking 10 ms to complete. So,\n" 329 "\t\tif only a single request is submitted every 10 ms, the\n" 330 "\t\taverage latency will be 10 ms; but if more than one request\n" 331 "\t\tis submitted every 10 ms, the average latency will be more\n" 332 "\t\tthan 10 ms.\n" 333 "\n" 334 "\t\tSimilarly, if a delay of 10 ms is specified to have two\n" 335 "\t\tlanes (-D 10:2), then the device will be able to service\n" 336 "\t\ttwo requests at a time, each with a minimum latency of\n" 337 "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n" 338 "\t\tthe average latency will be 10 ms; but if more than two\n" 339 "\t\trequests are submitted every 10 ms, the average latency\n" 340 "\t\twill be more than 10 ms.\n" 341 "\n" 342 "\t\tAlso note, these delays are additive. So two invocations\n" 343 "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n" 344 "\t\tof '-D 10:2'. This also means, one can specify multiple\n" 345 "\t\tlanes with differing target latencies. For example, an\n" 346 "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n" 347 "\t\tcreate 3 lanes on the device; one lane with a latency\n" 348 "\t\tof 10 ms and two lanes with a 25 ms latency.\n" 349 "\n" 350 "\tzinject -P import|export -s <seconds> pool\n" 351 "\t\tAdd an artificial delay to a future pool import or export,\n" 352 "\t\tsuch that the operation takes a minimum of supplied seconds\n" 353 "\t\tto complete.\n" 354 "\n" 355 "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n" 356 "\t\t[-T iotype] [-t type object | -b bookmark pool]\n" 357 "\n" 358 "\t\tInject pipeline ready stage delays for the given object path\n" 359 "\t\t(data or dnode) or raw bookmark. The delay is specified in\n" 360 "\t\tmilliseconds.\n" 361 "\n" 362 "\tzinject -I [-s <seconds> | -g <txgs>] pool\n" 363 "\t\tCause the pool to stop writing blocks yet not\n" 364 "\t\treport errors for a duration. Simulates buggy hardware\n" 365 "\t\tthat fails to honor cache flush requests.\n" 366 "\t\tDefault duration is 30 seconds. The machine is panicked\n" 367 "\t\tat the end of the duration.\n" 368 "\n" 369 "\tzinject -b objset:object:level:blkid pool\n" 370 "\n" 371 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 372 "\t\tspecified by the remaining tuple. Each number is in\n" 373 "\t\thexadecimal, and only one block can be specified.\n" 374 "\n" 375 "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n" 376 "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n" 377 "\n" 378 "\t\tInject an error into the object specified by the '-t' option\n" 379 "\t\tand the object descriptor. The 'object' parameter is\n" 380 "\t\tinterpreted depending on the '-t' option.\n" 381 "\n" 382 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 383 "\t\t-e\tInject a specific error. Must be one of 'io',\n" 384 "\t\t\t'checksum', 'decompress', or 'decrypt'. Default is 'io'.\n" 385 "\t\t-C\tInject the given error only into specific DVAs. The\n" 386 "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n" 387 "\t\t\tseparated by commas (ex. '0,2').\n" 388 "\t\t-l\tInject error at a particular block level. Default is " 389 "0.\n" 390 "\t\t-m\tAutomatically remount underlying filesystem.\n" 391 "\t\t-r\tInject error over a particular logical range of an\n" 392 "\t\t\tobject. Will be translated to the appropriate blkid\n" 393 "\t\t\trange according to the object's properties.\n" 394 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 395 "\t\t\tassociated object.\n" 396 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 397 "\t\t\ta pool object.\n" 398 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 399 "\t\t\ta percentage between 0.0001 and 100.\n" 400 "\n" 401 "\t-t data\t\tInject an error into the plain file contents of a\n" 402 "\t\t\tfile. The object must be specified as a complete path\n" 403 "\t\t\tto a file on a ZFS filesystem.\n" 404 "\n" 405 "\t-t dnode\tInject an error into the metadnode in the block\n" 406 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 407 "\t\t\t'-r' option is incompatible with this mode. The object\n" 408 "\t\t\tis specified as a complete path to a file or directory\n" 409 "\t\t\ton a ZFS filesystem.\n" 410 "\n" 411 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 412 "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n" 413 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" 414 "\t\t\tthe poolname.\n"); 415 } 416 417 static int 418 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 419 void *data) 420 { 421 zfs_cmd_t zc = {"\0"}; 422 int ret; 423 424 while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 425 if ((ret = func((int)zc.zc_guid, zc.zc_name, 426 &zc.zc_inject_record, data)) != 0) 427 return (ret); 428 429 if (errno != ENOENT) { 430 (void) fprintf(stderr, "Unable to list handlers: %s\n", 431 strerror(errno)); 432 return (-1); 433 } 434 435 return (0); 436 } 437 438 static int 439 print_data_handler(int id, const char *pool, zinject_record_t *record, 440 void *data) 441 { 442 int *count = data; 443 444 if (record->zi_guid != 0 || record->zi_func[0] != '\0' || 445 record->zi_duration != 0) { 446 return (0); 447 } 448 449 if (*count == 0) { 450 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s " 451 "%-15s %-6s %-15s\n", "ID", "POOL", "OBJSET", "OBJECT", 452 "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT"); 453 (void) printf("--- --------------- ------ " 454 "------ -------- --- ---- --------------- " 455 "------ ------\n"); 456 } 457 458 *count += 1; 459 460 char rangebuf[32]; 461 if (record->zi_start == 0 && record->zi_end == -1ULL) 462 snprintf(rangebuf, sizeof (rangebuf), "all"); 463 else 464 snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]", 465 (u_longlong_t)record->zi_start, 466 (u_longlong_t)record->zi_end); 467 468 469 (void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x %-15s " 470 "%6" PRIu64 " %6" PRIu64 "\n", id, pool, 471 (u_longlong_t)record->zi_objset, 472 (u_longlong_t)record->zi_object, type_to_name(record->zi_type), 473 record->zi_level, record->zi_dvas, rangebuf, 474 record->zi_match_count, record->zi_inject_count); 475 476 return (0); 477 } 478 479 static int 480 print_device_handler(int id, const char *pool, zinject_record_t *record, 481 void *data) 482 { 483 int *count = data; 484 485 if (record->zi_guid == 0 || record->zi_func[0] != '\0') 486 return (0); 487 488 if (record->zi_cmd == ZINJECT_DELAY_IO) 489 return (0); 490 491 if (*count == 0) { 492 (void) printf("%3s %-15s %-16s %-5s %-10s %-9s " 493 "%-6s %-6s\n", 494 "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ", 495 "MATCH", "INJECT"); 496 (void) printf( 497 "--- --------------- ---------------- " 498 "----- ---------- --------- " 499 "------ ------\n"); 500 } 501 502 *count += 1; 503 504 double freq = record->zi_freq == 0 ? 100.0f : 505 (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f; 506 507 (void) printf("%3d %-15s %llx %-5s %-10s %8.4f%% " 508 "%6" PRIu64 " %6" PRIu64 "\n", id, pool, 509 (u_longlong_t)record->zi_guid, 510 iotype_to_str(record->zi_iotype), err_to_str(record->zi_error), 511 freq, record->zi_match_count, record->zi_inject_count); 512 513 return (0); 514 } 515 516 static int 517 print_delay_handler(int id, const char *pool, zinject_record_t *record, 518 void *data) 519 { 520 int *count = data; 521 522 if (record->zi_guid == 0 || record->zi_func[0] != '\0') 523 return (0); 524 525 if (record->zi_cmd != ZINJECT_DELAY_IO) 526 return (0); 527 528 if (*count == 0) { 529 (void) printf("%3s %-15s %-16s %-10s %-5s %-9s " 530 "%-6s %-6s\n", 531 "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ", 532 "MATCH", "INJECT"); 533 (void) printf("--- --------------- ---------------- " 534 "---------- ----- --------- " 535 "------ ------\n"); 536 } 537 538 *count += 1; 539 540 double freq = record->zi_freq == 0 ? 100.0f : 541 (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f; 542 543 (void) printf("%3d %-15s %llx %10llu %5llu %8.4f%% " 544 "%6" PRIu64 " %6" PRIu64 "\n", id, pool, 545 (u_longlong_t)record->zi_guid, 546 (u_longlong_t)NSEC2MSEC(record->zi_timer), 547 (u_longlong_t)record->zi_nlanes, 548 freq, record->zi_match_count, record->zi_inject_count); 549 550 return (0); 551 } 552 553 static int 554 print_panic_handler(int id, const char *pool, zinject_record_t *record, 555 void *data) 556 { 557 int *count = data; 558 559 if (record->zi_func[0] == '\0') 560 return (0); 561 562 if (*count == 0) { 563 (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION"); 564 (void) printf("--- --------------- ----------------\n"); 565 } 566 567 *count += 1; 568 569 (void) printf("%3d %-15s %s\n", id, pool, record->zi_func); 570 571 return (0); 572 } 573 574 static int 575 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record, 576 void *data) 577 { 578 int *count = data; 579 580 if (record->zi_cmd != ZINJECT_DELAY_IMPORT && 581 record->zi_cmd != ZINJECT_DELAY_EXPORT) { 582 return (0); 583 } 584 585 if (*count == 0) { 586 (void) printf("%3s %-19s %-11s %s\n", 587 "ID", "POOL", "DELAY (sec)", "COMMAND"); 588 (void) printf("--- ------------------- -----------" 589 " -------\n"); 590 } 591 592 *count += 1; 593 594 (void) printf("%3d %-19s %-11llu %s\n", 595 id, pool, (u_longlong_t)record->zi_duration, 596 record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export"); 597 598 return (0); 599 } 600 601 /* 602 * Print all registered error handlers. Returns the number of handlers 603 * registered. 604 */ 605 static int 606 print_all_handlers(void) 607 { 608 int count = 0, total = 0; 609 610 (void) iter_handlers(print_device_handler, &count); 611 if (count > 0) { 612 total += count; 613 (void) printf("\n"); 614 count = 0; 615 } 616 617 (void) iter_handlers(print_delay_handler, &count); 618 if (count > 0) { 619 total += count; 620 (void) printf("\n"); 621 count = 0; 622 } 623 624 (void) iter_handlers(print_data_handler, &count); 625 if (count > 0) { 626 total += count; 627 (void) printf("\n"); 628 count = 0; 629 } 630 631 (void) iter_handlers(print_pool_delay_handler, &count); 632 if (count > 0) { 633 total += count; 634 (void) printf("\n"); 635 count = 0; 636 } 637 638 (void) iter_handlers(print_panic_handler, &count); 639 640 return (count + total); 641 } 642 643 static int 644 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 645 void *data) 646 { 647 (void) pool, (void) record, (void) data; 648 zfs_cmd_t zc = {"\0"}; 649 650 zc.zc_guid = (uint64_t)id; 651 652 if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 653 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 654 id, strerror(errno)); 655 return (1); 656 } 657 658 return (0); 659 } 660 661 /* 662 * Remove all fault injection handlers. 663 */ 664 static int 665 cancel_all_handlers(void) 666 { 667 int ret = iter_handlers(cancel_one_handler, NULL); 668 669 if (ret == 0) 670 (void) printf("removed all registered handlers\n"); 671 672 return (ret); 673 } 674 675 /* 676 * Remove a specific fault injection handler. 677 */ 678 static int 679 cancel_handler(int id) 680 { 681 zfs_cmd_t zc = {"\0"}; 682 683 zc.zc_guid = (uint64_t)id; 684 685 if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 686 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 687 id, strerror(errno)); 688 return (1); 689 } 690 691 (void) printf("removed handler %d\n", id); 692 693 return (0); 694 } 695 696 /* 697 * Register a new fault injection handler. 698 */ 699 static int 700 register_handler(const char *pool, int flags, zinject_record_t *record, 701 int quiet) 702 { 703 zfs_cmd_t zc = {"\0"}; 704 705 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); 706 zc.zc_inject_record = *record; 707 zc.zc_guid = flags; 708 709 if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 710 const char *errmsg = strerror(errno); 711 712 switch (errno) { 713 case EDOM: 714 errmsg = "block level exceeds max level of object"; 715 break; 716 case EEXIST: 717 if (record->zi_cmd == ZINJECT_DELAY_IMPORT) 718 errmsg = "pool already imported"; 719 if (record->zi_cmd == ZINJECT_DELAY_EXPORT) 720 errmsg = "a handler already exists"; 721 break; 722 case ENOENT: 723 /* import delay injector running on older zfs module */ 724 if (record->zi_cmd == ZINJECT_DELAY_IMPORT) 725 errmsg = "import delay injector not supported"; 726 break; 727 default: 728 break; 729 } 730 (void) fprintf(stderr, "failed to add handler: %s\n", errmsg); 731 return (1); 732 } 733 734 if (flags & ZINJECT_NULL) 735 return (0); 736 737 if (quiet) { 738 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 739 } else { 740 boolean_t show_object = B_FALSE; 741 boolean_t show_iotype = B_FALSE; 742 (void) printf("Added handler %llu with the following " 743 "properties:\n", (u_longlong_t)zc.zc_guid); 744 (void) printf(" pool: %s\n", pool); 745 if (record->zi_guid) { 746 (void) printf(" vdev: %llx\n", 747 (u_longlong_t)record->zi_guid); 748 show_iotype = B_TRUE; 749 } else if (record->zi_func[0] != '\0') { 750 (void) printf(" panic function: %s\n", 751 record->zi_func); 752 } else if (record->zi_duration > 0) { 753 (void) printf(" time: %lld seconds\n", 754 (u_longlong_t)record->zi_duration); 755 } else if (record->zi_duration < 0) { 756 (void) printf(" txgs: %lld \n", 757 (u_longlong_t)-record->zi_duration); 758 } else if (record->zi_timer > 0) { 759 (void) printf(" timer: %lld ms\n", 760 (u_longlong_t)NSEC2MSEC(record->zi_timer)); 761 if (record->zi_cmd == ZINJECT_DELAY_READY) { 762 show_object = B_TRUE; 763 show_iotype = B_TRUE; 764 } 765 } else { 766 show_object = B_TRUE; 767 } 768 if (show_iotype) { 769 (void) printf("iotype: %s\n", 770 iotype_to_str(record->zi_iotype)); 771 } 772 if (show_object) { 773 (void) printf("objset: %llu\n", 774 (u_longlong_t)record->zi_objset); 775 (void) printf("object: %llu\n", 776 (u_longlong_t)record->zi_object); 777 (void) printf(" type: %llu\n", 778 (u_longlong_t)record->zi_type); 779 (void) printf(" level: %d\n", record->zi_level); 780 if (record->zi_start == 0 && 781 record->zi_end == -1ULL) 782 (void) printf(" range: all\n"); 783 else 784 (void) printf(" range: [%llu, %llu)\n", 785 (u_longlong_t)record->zi_start, 786 (u_longlong_t)record->zi_end); 787 (void) printf(" dvas: 0x%x\n", record->zi_dvas); 788 } 789 } 790 791 return (0); 792 } 793 794 static int 795 perform_action(const char *pool, zinject_record_t *record, int cmd) 796 { 797 zfs_cmd_t zc = {"\0"}; 798 799 ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED); 800 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); 801 zc.zc_guid = record->zi_guid; 802 zc.zc_cookie = cmd; 803 804 if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) 805 return (0); 806 807 return (1); 808 } 809 810 static int 811 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes) 812 { 813 unsigned long scan_delay; 814 unsigned long scan_nlanes; 815 816 if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2) 817 return (1); 818 819 /* 820 * We explicitly disallow a delay of zero here, because we key 821 * off this value being non-zero in translate_device(), to 822 * determine if the fault is a ZINJECT_DELAY_IO fault or not. 823 */ 824 if (scan_delay == 0) 825 return (1); 826 827 /* 828 * The units for the CLI delay parameter is milliseconds, but 829 * the data passed to the kernel is interpreted as nanoseconds. 830 * Thus we scale the milliseconds to nanoseconds here, and this 831 * nanosecond value is used to pass the delay to the kernel. 832 */ 833 *delay = MSEC2NSEC(scan_delay); 834 *nlanes = scan_nlanes; 835 836 return (0); 837 } 838 839 static int 840 parse_frequency(const char *str, uint32_t *percent) 841 { 842 double val; 843 char *post; 844 845 val = strtod(str, &post); 846 if (post == NULL || *post != '\0') 847 return (EINVAL); 848 849 /* valid range is [0.0001, 100.0] */ 850 val /= 100.0f; 851 if (val < 0.000001f || val > 1.0f) 852 return (ERANGE); 853 854 /* convert to an integer for use by kernel */ 855 *percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX)); 856 857 return (0); 858 } 859 860 /* 861 * This function converts a string specifier for DVAs into a bit mask. 862 * The dva's provided by the user should be 0 indexed and separated by 863 * a comma. For example: 864 * "1" -> 0b0010 (0x2) 865 * "0,1" -> 0b0011 (0x3) 866 * "0,1,2" -> 0b0111 (0x7) 867 */ 868 static int 869 parse_dvas(const char *str, uint32_t *dvas_out) 870 { 871 const char *c = str; 872 uint32_t mask = 0; 873 boolean_t need_delim = B_FALSE; 874 875 /* max string length is 5 ("0,1,2") */ 876 if (strlen(str) > 5 || strlen(str) == 0) 877 return (EINVAL); 878 879 while (*c != '\0') { 880 switch (*c) { 881 case '0': 882 case '1': 883 case '2': 884 /* check for pipe between DVAs */ 885 if (need_delim) 886 return (EINVAL); 887 888 /* check if this DVA has been set already */ 889 if (mask & (1 << ((*c) - '0'))) 890 return (EINVAL); 891 892 mask |= (1 << ((*c) - '0')); 893 need_delim = B_TRUE; 894 break; 895 case ',': 896 need_delim = B_FALSE; 897 break; 898 default: 899 /* check for invalid character */ 900 return (EINVAL); 901 } 902 c++; 903 } 904 905 /* check for dangling delimiter */ 906 if (!need_delim) 907 return (EINVAL); 908 909 *dvas_out = mask; 910 return (0); 911 } 912 913 int 914 main(int argc, char **argv) 915 { 916 int c; 917 char *range = NULL; 918 char *cancel = NULL; 919 char *end; 920 char *raw = NULL; 921 char *device = NULL; 922 int level = 0; 923 int quiet = 0; 924 int error = 0; 925 int domount = 0; 926 int io_type = ZINJECT_IOTYPE_ALL; 927 int action = VDEV_STATE_UNKNOWN; 928 err_type_t type = TYPE_INVAL; 929 err_type_t label = TYPE_INVAL; 930 zinject_record_t record = { 0 }; 931 char pool[MAXNAMELEN] = ""; 932 char dataset[MAXNAMELEN] = ""; 933 zfs_handle_t *zhp = NULL; 934 int nowrites = 0; 935 int dur_txg = 0; 936 int dur_secs = 0; 937 int ret; 938 int flags = 0; 939 uint32_t dvas = 0; 940 hrtime_t ready_delay = -1; 941 942 if ((g_zfs = libzfs_init()) == NULL) { 943 (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); 944 return (1); 945 } 946 947 libzfs_print_on_error(g_zfs, B_TRUE); 948 949 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 950 (void) fprintf(stderr, "failed to open ZFS device\n"); 951 libzfs_fini(g_zfs); 952 return (1); 953 } 954 955 if (argc == 1) { 956 /* 957 * No arguments. Print the available handlers. If there are no 958 * available handlers, direct the user to '-h' for help 959 * information. 960 */ 961 if (print_all_handlers() == 0) { 962 (void) printf("No handlers registered.\n"); 963 (void) printf("Run 'zinject -h' for usage " 964 "information.\n"); 965 } 966 libzfs_fini(g_zfs); 967 return (0); 968 } 969 970 while ((c = getopt(argc, argv, 971 ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) { 972 switch (c) { 973 case 'a': 974 flags |= ZINJECT_FLUSH_ARC; 975 break; 976 case 'A': 977 if (strcasecmp(optarg, "degrade") == 0) { 978 action = VDEV_STATE_DEGRADED; 979 } else if (strcasecmp(optarg, "fault") == 0) { 980 action = VDEV_STATE_FAULTED; 981 } else { 982 (void) fprintf(stderr, "invalid action '%s': " 983 "must be 'degrade' or 'fault'\n", optarg); 984 usage(); 985 libzfs_fini(g_zfs); 986 return (1); 987 } 988 break; 989 case 'b': 990 raw = optarg; 991 break; 992 case 'c': 993 cancel = optarg; 994 break; 995 case 'C': 996 ret = parse_dvas(optarg, &dvas); 997 if (ret != 0) { 998 (void) fprintf(stderr, "invalid DVA list '%s': " 999 "DVAs should be 0 indexed and separated by " 1000 "commas.\n", optarg); 1001 usage(); 1002 libzfs_fini(g_zfs); 1003 return (1); 1004 } 1005 break; 1006 case 'd': 1007 device = optarg; 1008 break; 1009 case 'D': 1010 errno = 0; 1011 ret = parse_delay(optarg, &record.zi_timer, 1012 &record.zi_nlanes); 1013 if (ret != 0) { 1014 1015 (void) fprintf(stderr, "invalid i/o delay " 1016 "value: '%s'\n", optarg); 1017 usage(); 1018 libzfs_fini(g_zfs); 1019 return (1); 1020 } 1021 break; 1022 case 'e': 1023 error = str_to_err(optarg); 1024 if (error < 0) { 1025 (void) fprintf(stderr, "invalid error type " 1026 "'%s': must be one of: io decompress " 1027 "decrypt nxio dtl corrupt noop\n", 1028 optarg); 1029 usage(); 1030 libzfs_fini(g_zfs); 1031 return (1); 1032 } 1033 break; 1034 case 'f': 1035 ret = parse_frequency(optarg, &record.zi_freq); 1036 if (ret != 0) { 1037 (void) fprintf(stderr, "%sfrequency value must " 1038 "be in the range [0.0001, 100.0]\n", 1039 ret == EINVAL ? "invalid value: " : 1040 ret == ERANGE ? "out of range: " : ""); 1041 libzfs_fini(g_zfs); 1042 return (1); 1043 } 1044 break; 1045 case 'F': 1046 record.zi_failfast = B_TRUE; 1047 break; 1048 case 'g': 1049 dur_txg = 1; 1050 record.zi_duration = (int)strtol(optarg, &end, 10); 1051 if (record.zi_duration <= 0 || *end != '\0') { 1052 (void) fprintf(stderr, "invalid duration '%s': " 1053 "must be a positive integer\n", optarg); 1054 usage(); 1055 libzfs_fini(g_zfs); 1056 return (1); 1057 } 1058 /* store duration of txgs as its negative */ 1059 record.zi_duration *= -1; 1060 break; 1061 case 'h': 1062 usage(); 1063 libzfs_fini(g_zfs); 1064 return (0); 1065 case 'I': 1066 /* default duration, if one hasn't yet been defined */ 1067 nowrites = 1; 1068 if (dur_secs == 0 && dur_txg == 0) 1069 record.zi_duration = 30; 1070 break; 1071 case 'l': 1072 level = (int)strtol(optarg, &end, 10); 1073 if (*end != '\0') { 1074 (void) fprintf(stderr, "invalid level '%s': " 1075 "must be an integer\n", optarg); 1076 usage(); 1077 libzfs_fini(g_zfs); 1078 return (1); 1079 } 1080 break; 1081 case 'm': 1082 domount = 1; 1083 break; 1084 case 'p': 1085 (void) strlcpy(record.zi_func, optarg, 1086 sizeof (record.zi_func)); 1087 record.zi_cmd = ZINJECT_PANIC; 1088 break; 1089 case 'P': 1090 if (strcasecmp(optarg, "import") == 0) { 1091 record.zi_cmd = ZINJECT_DELAY_IMPORT; 1092 } else if (strcasecmp(optarg, "export") == 0) { 1093 record.zi_cmd = ZINJECT_DELAY_EXPORT; 1094 } else { 1095 (void) fprintf(stderr, "invalid command '%s': " 1096 "must be 'import' or 'export'\n", optarg); 1097 usage(); 1098 libzfs_fini(g_zfs); 1099 return (1); 1100 } 1101 break; 1102 case 'q': 1103 quiet = 1; 1104 break; 1105 case 'r': 1106 range = optarg; 1107 flags |= ZINJECT_CALC_RANGE; 1108 break; 1109 case 's': 1110 dur_secs = 1; 1111 record.zi_duration = (int)strtol(optarg, &end, 10); 1112 if (record.zi_duration <= 0 || *end != '\0') { 1113 (void) fprintf(stderr, "invalid duration '%s': " 1114 "must be a positive integer\n", optarg); 1115 usage(); 1116 libzfs_fini(g_zfs); 1117 return (1); 1118 } 1119 break; 1120 case 'T': 1121 io_type = str_to_iotype(optarg); 1122 if (io_type == ZINJECT_IOTYPES) { 1123 (void) fprintf(stderr, "invalid I/O type " 1124 "'%s': must be 'read', 'write', 'free', " 1125 "'claim', 'flush' or 'all'\n", optarg); 1126 usage(); 1127 libzfs_fini(g_zfs); 1128 return (1); 1129 } 1130 break; 1131 case 't': 1132 if ((type = name_to_type(optarg)) == TYPE_INVAL && 1133 !MOS_TYPE(type)) { 1134 (void) fprintf(stderr, "invalid type '%s'\n", 1135 optarg); 1136 usage(); 1137 libzfs_fini(g_zfs); 1138 return (1); 1139 } 1140 break; 1141 case 'u': 1142 flags |= ZINJECT_UNLOAD_SPA; 1143 break; 1144 case 'E': 1145 ready_delay = MSEC2NSEC(strtol(optarg, &end, 10)); 1146 if (ready_delay <= 0 || *end != '\0') { 1147 (void) fprintf(stderr, "invalid delay '%s': " 1148 "must be a positive duration\n", optarg); 1149 usage(); 1150 libzfs_fini(g_zfs); 1151 return (1); 1152 } 1153 record.zi_cmd = ZINJECT_DELAY_READY; 1154 record.zi_timer = ready_delay; 1155 break; 1156 case 'L': 1157 if ((label = name_to_type(optarg)) == TYPE_INVAL && 1158 !LABEL_TYPE(type)) { 1159 (void) fprintf(stderr, "invalid label type " 1160 "'%s'\n", optarg); 1161 usage(); 1162 libzfs_fini(g_zfs); 1163 return (1); 1164 } 1165 break; 1166 case ':': 1167 (void) fprintf(stderr, "option -%c requires an " 1168 "operand\n", optopt); 1169 usage(); 1170 libzfs_fini(g_zfs); 1171 return (1); 1172 case '?': 1173 (void) fprintf(stderr, "invalid option '%c'\n", 1174 optopt); 1175 usage(); 1176 libzfs_fini(g_zfs); 1177 return (2); 1178 } 1179 } 1180 1181 argc -= optind; 1182 argv += optind; 1183 1184 if (record.zi_duration != 0 && record.zi_cmd == 0) 1185 record.zi_cmd = ZINJECT_IGNORED_WRITES; 1186 1187 if (cancel != NULL) { 1188 /* 1189 * '-c' is invalid with any other options. 1190 */ 1191 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1192 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || 1193 record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) { 1194 (void) fprintf(stderr, "cancel (-c) incompatible with " 1195 "any other options\n"); 1196 usage(); 1197 libzfs_fini(g_zfs); 1198 return (2); 1199 } 1200 if (argc != 0) { 1201 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 1202 usage(); 1203 libzfs_fini(g_zfs); 1204 return (2); 1205 } 1206 1207 if (strcmp(cancel, "all") == 0) { 1208 return (cancel_all_handlers()); 1209 } else { 1210 int id = (int)strtol(cancel, &end, 10); 1211 if (*end != '\0') { 1212 (void) fprintf(stderr, "invalid handle id '%s':" 1213 " must be an integer or 'all'\n", cancel); 1214 usage(); 1215 libzfs_fini(g_zfs); 1216 return (1); 1217 } 1218 return (cancel_handler(id)); 1219 } 1220 } 1221 1222 if (device != NULL) { 1223 /* 1224 * Device (-d) injection uses a completely different mechanism 1225 * for doing injection, so handle it separately here. 1226 */ 1227 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1228 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || 1229 dvas != 0 || ready_delay >= 0) { 1230 (void) fprintf(stderr, "device (-d) incompatible with " 1231 "data error injection\n"); 1232 usage(); 1233 libzfs_fini(g_zfs); 1234 return (2); 1235 } 1236 1237 if (argc != 1) { 1238 (void) fprintf(stderr, "device (-d) injection requires " 1239 "a single pool name\n"); 1240 usage(); 1241 libzfs_fini(g_zfs); 1242 return (2); 1243 } 1244 1245 (void) strlcpy(pool, argv[0], sizeof (pool)); 1246 dataset[0] = '\0'; 1247 1248 if (error == ECKSUM) { 1249 (void) fprintf(stderr, "device error type must be " 1250 "'io', 'nxio' or 'corrupt'\n"); 1251 libzfs_fini(g_zfs); 1252 return (1); 1253 } 1254 1255 if (error == EILSEQ && 1256 (record.zi_freq == 0 || io_type != ZINJECT_IOTYPE_READ)) { 1257 (void) fprintf(stderr, "device corrupt errors require " 1258 "io type read and a frequency value\n"); 1259 libzfs_fini(g_zfs); 1260 return (1); 1261 } 1262 1263 record.zi_iotype = io_type; 1264 if (translate_device(pool, device, label, &record) != 0) { 1265 libzfs_fini(g_zfs); 1266 return (1); 1267 } 1268 1269 if (record.zi_nlanes) { 1270 switch (io_type) { 1271 case ZINJECT_IOTYPE_READ: 1272 case ZINJECT_IOTYPE_WRITE: 1273 case ZINJECT_IOTYPE_ALL: 1274 break; 1275 default: 1276 (void) fprintf(stderr, "I/O type for a delay " 1277 "must be 'read' or 'write'\n"); 1278 usage(); 1279 libzfs_fini(g_zfs); 1280 return (1); 1281 } 1282 } 1283 1284 if (!error) 1285 error = ENXIO; 1286 1287 if (action != VDEV_STATE_UNKNOWN) 1288 return (perform_action(pool, &record, action)); 1289 1290 } else if (raw != NULL) { 1291 if (range != NULL || type != TYPE_INVAL || level != 0 || 1292 record.zi_cmd != ZINJECT_UNINITIALIZED || 1293 record.zi_freq > 0 || dvas != 0) { 1294 (void) fprintf(stderr, "raw (-b) format with " 1295 "any other options\n"); 1296 usage(); 1297 libzfs_fini(g_zfs); 1298 return (2); 1299 } 1300 1301 if (argc != 1) { 1302 (void) fprintf(stderr, "raw (-b) format expects a " 1303 "single pool name\n"); 1304 usage(); 1305 libzfs_fini(g_zfs); 1306 return (2); 1307 } 1308 1309 (void) strlcpy(pool, argv[0], sizeof (pool)); 1310 dataset[0] = '\0'; 1311 1312 if (error == ENXIO) { 1313 (void) fprintf(stderr, "data error type must be " 1314 "'checksum' or 'io'\n"); 1315 libzfs_fini(g_zfs); 1316 return (1); 1317 } 1318 1319 if (record.zi_cmd == ZINJECT_UNINITIALIZED) { 1320 record.zi_cmd = ZINJECT_DATA_FAULT; 1321 if (!error) 1322 error = EIO; 1323 } else if (error != 0) { 1324 (void) fprintf(stderr, "error type -e incompatible " 1325 "with delay injection\n"); 1326 libzfs_fini(g_zfs); 1327 return (1); 1328 } else { 1329 record.zi_iotype = io_type; 1330 } 1331 1332 if (translate_raw(raw, &record) != 0) { 1333 libzfs_fini(g_zfs); 1334 return (1); 1335 } 1336 } else if (record.zi_cmd == ZINJECT_PANIC) { 1337 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1338 level != 0 || device != NULL || record.zi_freq > 0 || 1339 dvas != 0) { 1340 (void) fprintf(stderr, "%s incompatible with other " 1341 "options\n", "import|export delay (-P)"); 1342 usage(); 1343 libzfs_fini(g_zfs); 1344 return (2); 1345 } 1346 1347 if (argc < 1 || argc > 2) { 1348 (void) fprintf(stderr, "panic (-p) injection requires " 1349 "a single pool name and an optional id\n"); 1350 usage(); 1351 libzfs_fini(g_zfs); 1352 return (2); 1353 } 1354 1355 (void) strlcpy(pool, argv[0], sizeof (pool)); 1356 if (argv[1] != NULL) 1357 record.zi_type = atoi(argv[1]); 1358 dataset[0] = '\0'; 1359 } else if (record.zi_cmd == ZINJECT_DELAY_IMPORT || 1360 record.zi_cmd == ZINJECT_DELAY_EXPORT) { 1361 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1362 level != 0 || device != NULL || record.zi_freq > 0 || 1363 dvas != 0) { 1364 (void) fprintf(stderr, "%s incompatible with other " 1365 "options\n", "import|export delay (-P)"); 1366 usage(); 1367 libzfs_fini(g_zfs); 1368 return (2); 1369 } 1370 1371 if (argc != 1 || record.zi_duration <= 0) { 1372 (void) fprintf(stderr, "import|export delay (-P) " 1373 "injection requires a duration (-s) and a single " 1374 "pool name\n"); 1375 usage(); 1376 libzfs_fini(g_zfs); 1377 return (2); 1378 } 1379 1380 (void) strlcpy(pool, argv[0], sizeof (pool)); 1381 } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) { 1382 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1383 level != 0 || record.zi_freq > 0 || dvas != 0) { 1384 (void) fprintf(stderr, "hardware failure (-I) " 1385 "incompatible with other options\n"); 1386 usage(); 1387 libzfs_fini(g_zfs); 1388 return (2); 1389 } 1390 1391 if (nowrites == 0) { 1392 (void) fprintf(stderr, "-s or -g meaningless " 1393 "without -I (ignore writes)\n"); 1394 usage(); 1395 libzfs_fini(g_zfs); 1396 return (2); 1397 } else if (dur_secs && dur_txg) { 1398 (void) fprintf(stderr, "choose a duration either " 1399 "in seconds (-s) or a number of txgs (-g) " 1400 "but not both\n"); 1401 usage(); 1402 libzfs_fini(g_zfs); 1403 return (2); 1404 } else if (argc != 1) { 1405 (void) fprintf(stderr, "ignore writes (-I) " 1406 "injection requires a single pool name\n"); 1407 usage(); 1408 libzfs_fini(g_zfs); 1409 return (2); 1410 } 1411 1412 (void) strlcpy(pool, argv[0], sizeof (pool)); 1413 dataset[0] = '\0'; 1414 } else if (type == TYPE_INVAL) { 1415 if (flags == 0) { 1416 (void) fprintf(stderr, "at least one of '-b', '-d', " 1417 "'-t', '-a', '-p', '-I' or '-u' " 1418 "must be specified\n"); 1419 usage(); 1420 libzfs_fini(g_zfs); 1421 return (2); 1422 } 1423 1424 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 1425 (void) strlcpy(pool, argv[0], sizeof (pool)); 1426 dataset[0] = '\0'; 1427 } else if (argc != 0) { 1428 (void) fprintf(stderr, "extraneous argument for " 1429 "'-f'\n"); 1430 usage(); 1431 libzfs_fini(g_zfs); 1432 return (2); 1433 } 1434 1435 flags |= ZINJECT_NULL; 1436 } else { 1437 if (argc != 1) { 1438 (void) fprintf(stderr, "missing object\n"); 1439 usage(); 1440 libzfs_fini(g_zfs); 1441 return (2); 1442 } 1443 1444 if (error == ENXIO || error == EILSEQ) { 1445 (void) fprintf(stderr, "data error type must be " 1446 "'checksum' or 'io'\n"); 1447 libzfs_fini(g_zfs); 1448 return (1); 1449 } 1450 1451 if (dvas != 0) { 1452 if (error == EACCES || error == EINVAL) { 1453 (void) fprintf(stderr, "the '-C' option may " 1454 "not be used with logical data errors " 1455 "'decrypt' and 'decompress'\n"); 1456 libzfs_fini(g_zfs); 1457 return (1); 1458 } 1459 1460 record.zi_dvas = dvas; 1461 } 1462 1463 if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) { 1464 (void) fprintf(stderr, "error type -e incompatible " 1465 "with delay injection\n"); 1466 libzfs_fini(g_zfs); 1467 return (1); 1468 } 1469 1470 if (error == EACCES) { 1471 if (type != TYPE_DATA) { 1472 (void) fprintf(stderr, "decryption errors " 1473 "may only be injected for 'data' types\n"); 1474 libzfs_fini(g_zfs); 1475 return (1); 1476 } 1477 1478 record.zi_cmd = ZINJECT_DECRYPT_FAULT; 1479 /* 1480 * Internally, ZFS actually uses ECKSUM for decryption 1481 * errors since EACCES is used to indicate the key was 1482 * not found. 1483 */ 1484 error = ECKSUM; 1485 } else if (record.zi_cmd == ZINJECT_UNINITIALIZED) { 1486 record.zi_cmd = ZINJECT_DATA_FAULT; 1487 if (!error) 1488 error = EIO; 1489 } else { 1490 record.zi_iotype = io_type; 1491 } 1492 1493 if (translate_record(type, argv[0], range, level, &record, pool, 1494 dataset) != 0) { 1495 libzfs_fini(g_zfs); 1496 return (1); 1497 } 1498 } 1499 1500 /* 1501 * If this is pool-wide metadata, unmount everything. The ioctl() will 1502 * unload the pool, so that we trigger spa-wide reopen of metadata next 1503 * time we access the pool. 1504 */ 1505 if (dataset[0] != '\0' && domount) { 1506 if ((zhp = zfs_open(g_zfs, dataset, 1507 ZFS_TYPE_DATASET)) == NULL) { 1508 libzfs_fini(g_zfs); 1509 return (1); 1510 } 1511 if (zfs_unmount(zhp, NULL, 0) != 0) { 1512 libzfs_fini(g_zfs); 1513 return (1); 1514 } 1515 } 1516 1517 record.zi_error = error; 1518 1519 ret = register_handler(pool, flags, &record, quiet); 1520 1521 if (dataset[0] != '\0' && domount) 1522 ret = (zfs_mount(zhp, NULL, 0) != 0); 1523 1524 libzfs_fini(g_zfs); 1525 1526 return (ret); 1527 } 1528