1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * ZFS Fault Injector 28 * 29 * This userland component takes a set of options and uses libzpool to translate 30 * from a user-visible object type and name to an internal representation. 31 * There are two basic types of faults: device faults and data faults. 32 * 33 * 34 * DEVICE FAULTS 35 * 36 * Errors can be injected into a particular vdev using the '-d' option. This 37 * option takes a path or vdev GUID to uniquely identify the device within a 38 * pool. There are two types of errors that can be injected, EIO and ENXIO, 39 * that can be controlled through the '-e' option. The default is ENXIO. For 40 * EIO failures, any attempt to read data from the device will return EIO, but 41 * subsequent attempt to reopen the device will succeed. For ENXIO failures, 42 * any attempt to read from the device will return EIO, but any attempt to 43 * reopen the device will also return ENXIO. 44 * For label faults, the -L option must be specified. This allows faults 45 * to be injected into either the nvlist, uberblock, pad1, or pad2 region 46 * of all the labels for the specified device. 47 * 48 * This form of the command looks like: 49 * 50 * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool 51 * 52 * 53 * DATA FAULTS 54 * 55 * We begin with a tuple of the form: 56 * 57 * <type,level,range,object> 58 * 59 * type A string describing the type of data to target. Each type 60 * implicitly describes how to interpret 'object'. Currently, 61 * the following values are supported: 62 * 63 * data User data for a file 64 * dnode Dnode for a file or directory 65 * 66 * The following MOS objects are special. Instead of injecting 67 * errors on a particular object or blkid, we inject errors across 68 * all objects of the given type. 69 * 70 * mos Any data in the MOS 71 * mosdir object directory 72 * config pool configuration 73 * bpobj blkptr list 74 * spacemap spacemap 75 * metaslab metaslab 76 * errlog persistent error log 77 * 78 * level Object level. Defaults to '0', not applicable to all types. If 79 * a range is given, this corresponds to the indirect block 80 * corresponding to the specific range. 81 * 82 * range A numerical range [start,end) within the object. Defaults to 83 * the full size of the file. 84 * 85 * object A string describing the logical location of the object. For 86 * files and directories (currently the only supported types), 87 * this is the path of the object on disk. 88 * 89 * This is translated, via libzpool, into the following internal representation: 90 * 91 * <type,objset,object,level,range> 92 * 93 * These types should be self-explanatory. This tuple is then passed to the 94 * kernel via a special ioctl() to initiate fault injection for the given 95 * object. Note that 'type' is not strictly necessary for fault injection, but 96 * is used when translating existing faults into a human-readable string. 97 * 98 * 99 * The command itself takes one of the forms: 100 * 101 * zinject 102 * zinject <-a | -u pool> 103 * zinject -c <id|all> 104 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 105 * [-r range] <object> 106 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 107 * 108 * With no arguments, the command prints all currently registered injection 109 * handlers, with their numeric identifiers. 110 * 111 * The '-c' option will clear the given handler, or all handlers if 'all' is 112 * specified. 113 * 114 * The '-e' option takes a string describing the errno to simulate. This must 115 * be one of 'io', 'checksum', or 'decrypt'. In most cases this will result 116 * in the same behavior, but RAID-Z will produce a different set of ereports 117 * for this situation. 118 * 119 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 120 * specified, then the ARC cache is flushed appropriately. If '-u' is 121 * specified, then the underlying SPA is unloaded. Either of these flags can be 122 * specified independently of any other handlers. The '-m' flag automatically 123 * does an unmount and remount of the underlying dataset to aid in flushing the 124 * cache. 125 * 126 * The '-f' flag controls the frequency of errors injected, expressed as a 127 * integer percentage between 1 and 100. The default is 100. 128 * 129 * The this form is responsible for actually injecting the handler into the 130 * framework. It takes the arguments described above, translates them to the 131 * internal tuple using libzpool, and then issues an ioctl() to register the 132 * handler. 133 * 134 * The final form can target a specific bookmark, regardless of whether a 135 * human-readable interface has been designed. It allows developers to specify 136 * a particular block by number. 137 */ 138 139 #include <errno.h> 140 #include <fcntl.h> 141 #include <stdio.h> 142 #include <stdlib.h> 143 #include <strings.h> 144 #include <unistd.h> 145 146 #include <sys/fs/zfs.h> 147 #include <sys/mount.h> 148 149 #include <libzfs.h> 150 151 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 152 153 #include "zinject.h" 154 155 libzfs_handle_t *g_zfs; 156 int zfs_fd; 157 158 #define ECKSUM EBADE 159 160 static const char *errtable[TYPE_INVAL] = { 161 "data", 162 "dnode", 163 "mos", 164 "mosdir", 165 "metaslab", 166 "config", 167 "bpobj", 168 "spacemap", 169 "errlog", 170 "uber", 171 "nvlist", 172 "pad1", 173 "pad2" 174 }; 175 176 static err_type_t 177 name_to_type(const char *arg) 178 { 179 int i; 180 for (i = 0; i < TYPE_INVAL; i++) 181 if (strcmp(errtable[i], arg) == 0) 182 return (i); 183 184 return (TYPE_INVAL); 185 } 186 187 static const char * 188 type_to_name(uint64_t type) 189 { 190 switch (type) { 191 case DMU_OT_OBJECT_DIRECTORY: 192 return ("mosdir"); 193 case DMU_OT_OBJECT_ARRAY: 194 return ("metaslab"); 195 case DMU_OT_PACKED_NVLIST: 196 return ("config"); 197 case DMU_OT_BPOBJ: 198 return ("bpobj"); 199 case DMU_OT_SPACE_MAP: 200 return ("spacemap"); 201 case DMU_OT_ERROR_LOG: 202 return ("errlog"); 203 default: 204 return ("-"); 205 } 206 } 207 208 209 /* 210 * Print usage message. 211 */ 212 void 213 usage(void) 214 { 215 (void) printf( 216 "usage:\n" 217 "\n" 218 "\tzinject\n" 219 "\n" 220 "\t\tList all active injection records.\n" 221 "\n" 222 "\tzinject -c <id|all>\n" 223 "\n" 224 "\t\tClear the particular record (if given a numeric ID), or\n" 225 "\t\tall records if 'all' is specificed.\n" 226 "\n" 227 "\tzinject -p <function name> pool\n" 228 "\n" 229 "\t\tInject a panic fault at the specified function. Only \n" 230 "\t\tfunctions which call spa_vdev_config_exit(), or \n" 231 "\t\tspa_vdev_exit() will trigger a panic.\n" 232 "\n" 233 "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n" 234 "\t [-T <read|write|free|claim|all> pool\n" 235 "\n" 236 "\t\tInject a fault into a particular device or the device's\n" 237 "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " 238 "\t\t'pad1', or 'pad2'.\n" 239 "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n" 240 "\n" 241 "\tzinject -d device -A <degrade|fault> pool\n" 242 "\n" 243 "\t\tPerform a specific action on a particular device\n" 244 "\n" 245 "\tzinject -d device -D latency:lanes pool\n" 246 "\n" 247 "\t\tAdd an artificial delay to IO requests on a particular\n" 248 "\t\tdevice, such that the requests take a minimum of 'latency'\n" 249 "\t\tmilliseconds to complete. Each delay has an associated\n" 250 "\t\tnumber of 'lanes' which defines the number of concurrent\n" 251 "\t\tIO requests that can be processed.\n" 252 "\n" 253 "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n" 254 "\t\tthe device will only be able to service a single IO request\n" 255 "\t\tat a time with each request taking 10 ms to complete. So,\n" 256 "\t\tif only a single request is submitted every 10 ms, the\n" 257 "\t\taverage latency will be 10 ms; but if more than one request\n" 258 "\t\tis submitted every 10 ms, the average latency will be more\n" 259 "\t\tthan 10 ms.\n" 260 "\n" 261 "\t\tSimilarly, if a delay of 10 ms is specified to have two\n" 262 "\t\tlanes (-D 10:2), then the device will be able to service\n" 263 "\t\ttwo requests at a time, each with a minimum latency of\n" 264 "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n" 265 "\t\tthe average latency will be 10 ms; but if more than two\n" 266 "\t\trequests are submitted every 10 ms, the average latency\n" 267 "\t\twill be more than 10 ms.\n" 268 "\n" 269 "\t\tAlso note, these delays are additive. So two invocations\n" 270 "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n" 271 "\t\tof '-D 10:2'. This also means, one can specify multiple\n" 272 "\t\tlanes with differing target latencies. For example, an\n" 273 "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n" 274 "\t\tcreate 3 lanes on the device; one lane with a latency\n" 275 "\t\tof 10 ms and two lanes with a 25 ms latency.\n" 276 "\n" 277 "\tzinject -I [-s <seconds> | -g <txgs>] pool\n" 278 "\n" 279 "\t\tCause the pool to stop writing blocks yet not\n" 280 "\t\treport errors for a duration. Simulates buggy hardware\n" 281 "\t\tthat fails to honor cache flush requests.\n" 282 "\t\tDefault duration is 30 seconds. The machine is panicked\n" 283 "\t\tat the end of the duration.\n" 284 "\n" 285 "\tzinject -b objset:object:level:blkid pool\n" 286 "\n" 287 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 288 "\t\tspecified by the remaining tuple. Each number is in\n" 289 "\t\thexidecimal, and only one block can be specified.\n" 290 "\n" 291 "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n" 292 "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n" 293 "\n" 294 "\t\tInject an error into the object specified by the '-t' option\n" 295 "\t\tand the object descriptor. The 'object' parameter is\n" 296 "\t\tinterperted depending on the '-t' option.\n" 297 "\n" 298 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 299 "\t\t-e\tInject a specific error. Must be one of 'io', " 300 "'checksum',\n" 301 "\t\t\t'decompress', or decrypt. Default is 'io'.\n" 302 "\t\t-C\tInject the given error only into specific DVAs. The\n" 303 "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n" 304 "\t\t\tseparated by commas (ex. '0,2').\n" 305 "\t\t-l\tInject error at a particular block level. Default is " 306 "0.\n" 307 "\t\t-m\tAutomatically remount underlying filesystem.\n" 308 "\t\t-r\tInject error over a particular logical range of an\n" 309 "\t\t\tobject. Will be translated to the appropriate blkid\n" 310 "\t\t\trange according to the object's properties.\n" 311 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 312 "\t\t\tassociated object.\n" 313 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 314 "\t\t\ta pool object.\n" 315 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 316 "\t\t\ta percentage between 1 and 100.\n" 317 "\n" 318 "\t-t data\t\tInject an error into the plain file contents of a\n" 319 "\t\t\tfile. The object must be specified as a complete path\n" 320 "\t\t\tto a file on a ZFS filesystem.\n" 321 "\n" 322 "\t-t dnode\tInject an error into the metadnode in the block\n" 323 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 324 "\t\t\t'-r' option is incompatible with this mode. The object\n" 325 "\t\t\tis specified as a complete path to a file or directory\n" 326 "\t\t\ton a ZFS filesystem.\n" 327 "\n" 328 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 329 "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n" 330 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" 331 "\t\t\tthe poolname.\n"); 332 } 333 334 static int 335 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 336 void *data) 337 { 338 zfs_cmd_t zc = { 0 }; 339 int ret; 340 341 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 342 if ((ret = func((int)zc.zc_guid, zc.zc_name, 343 &zc.zc_inject_record, data)) != 0) 344 return (ret); 345 346 if (errno != ENOENT) { 347 (void) fprintf(stderr, "Unable to list handlers: %s\n", 348 strerror(errno)); 349 return (-1); 350 } 351 352 return (0); 353 } 354 355 static int 356 print_data_handler(int id, const char *pool, zinject_record_t *record, 357 void *data) 358 { 359 int *count = data; 360 361 if (record->zi_guid != 0 || record->zi_func[0] != '\0') 362 return (0); 363 364 if (*count == 0) { 365 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s ", 366 "%-15s\n", "ID", "POOL", "OBJSET", "OBJECT", "TYPE", 367 "LVL", "DVAs", "RANGE"); 368 (void) printf("--- --------------- ------ " 369 "------ -------- --- ---- ----------------\n"); 370 } 371 372 *count += 1; 373 374 (void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x ", 375 id, pool, (u_longlong_t)record->zi_objset, 376 (u_longlong_t)record->zi_object, type_to_name(record->zi_type), 377 record->zi_level, record->zi_dvas); 378 379 if (record->zi_start == 0 && 380 record->zi_end == -1ULL) 381 (void) printf("all\n"); 382 else 383 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 384 (u_longlong_t)record->zi_end); 385 386 return (0); 387 } 388 389 static int 390 print_device_handler(int id, const char *pool, zinject_record_t *record, 391 void *data) 392 { 393 int *count = data; 394 395 if (record->zi_guid == 0 || record->zi_func[0] != '\0') 396 return (0); 397 398 if (record->zi_cmd == ZINJECT_DELAY_IO) 399 return (0); 400 401 if (*count == 0) { 402 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 403 (void) printf("--- --------------- ----------------\n"); 404 } 405 406 *count += 1; 407 408 (void) printf("%3d %-15s %llx\n", id, pool, 409 (u_longlong_t)record->zi_guid); 410 411 return (0); 412 } 413 414 static int 415 print_delay_handler(int id, const char *pool, zinject_record_t *record, 416 void *data) 417 { 418 int *count = data; 419 420 if (record->zi_guid == 0 || record->zi_func[0] != '\0') 421 return (0); 422 423 if (record->zi_cmd != ZINJECT_DELAY_IO) 424 return (0); 425 426 if (*count == 0) { 427 (void) printf("%3s %-15s %-15s %-15s %s\n", 428 "ID", "POOL", "DELAY (ms)", "LANES", "GUID"); 429 (void) printf("--- --------------- --------------- " 430 "--------------- ----------------\n"); 431 } 432 433 *count += 1; 434 435 (void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool, 436 (u_longlong_t)NSEC2MSEC(record->zi_timer), 437 (u_longlong_t)record->zi_nlanes, 438 (u_longlong_t)record->zi_guid); 439 440 return (0); 441 } 442 443 static int 444 print_panic_handler(int id, const char *pool, zinject_record_t *record, 445 void *data) 446 { 447 int *count = data; 448 449 if (record->zi_func[0] == '\0') 450 return (0); 451 452 if (*count == 0) { 453 (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION"); 454 (void) printf("--- --------------- ----------------\n"); 455 } 456 457 *count += 1; 458 459 (void) printf("%3d %-15s %s\n", id, pool, record->zi_func); 460 461 return (0); 462 } 463 464 /* 465 * Print all registered error handlers. Returns the number of handlers 466 * registered. 467 */ 468 static int 469 print_all_handlers(void) 470 { 471 int count = 0, total = 0; 472 473 (void) iter_handlers(print_device_handler, &count); 474 if (count > 0) { 475 total += count; 476 (void) printf("\n"); 477 count = 0; 478 } 479 480 (void) iter_handlers(print_delay_handler, &count); 481 if (count > 0) { 482 total += count; 483 (void) printf("\n"); 484 count = 0; 485 } 486 487 (void) iter_handlers(print_data_handler, &count); 488 if (count > 0) { 489 total += count; 490 (void) printf("\n"); 491 count = 0; 492 } 493 494 (void) iter_handlers(print_panic_handler, &count); 495 496 return (count + total); 497 } 498 499 /* ARGSUSED */ 500 static int 501 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 502 void *data) 503 { 504 zfs_cmd_t zc = { 0 }; 505 506 zc.zc_guid = (uint64_t)id; 507 508 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 509 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 510 id, strerror(errno)); 511 return (1); 512 } 513 514 return (0); 515 } 516 517 /* 518 * Remove all fault injection handlers. 519 */ 520 static int 521 cancel_all_handlers(void) 522 { 523 int ret = iter_handlers(cancel_one_handler, NULL); 524 525 if (ret == 0) 526 (void) printf("removed all registered handlers\n"); 527 528 return (ret); 529 } 530 531 /* 532 * Remove a specific fault injection handler. 533 */ 534 static int 535 cancel_handler(int id) 536 { 537 zfs_cmd_t zc = { 0 }; 538 539 zc.zc_guid = (uint64_t)id; 540 541 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 542 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 543 id, strerror(errno)); 544 return (1); 545 } 546 547 (void) printf("removed handler %d\n", id); 548 549 return (0); 550 } 551 552 /* 553 * Register a new fault injection handler. 554 */ 555 static int 556 register_handler(const char *pool, int flags, zinject_record_t *record, 557 int quiet) 558 { 559 zfs_cmd_t zc = { 0 }; 560 561 (void) strcpy(zc.zc_name, pool); 562 zc.zc_inject_record = *record; 563 zc.zc_guid = flags; 564 565 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 566 (void) fprintf(stderr, "failed to add handler: %s\n", 567 strerror(errno)); 568 return (1); 569 } 570 571 if (flags & ZINJECT_NULL) 572 return (0); 573 574 if (quiet) { 575 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 576 } else { 577 (void) printf("Added handler %llu with the following " 578 "properties:\n", (u_longlong_t)zc.zc_guid); 579 (void) printf(" pool: %s\n", pool); 580 if (record->zi_guid) { 581 (void) printf(" vdev: %llx\n", 582 (u_longlong_t)record->zi_guid); 583 } else if (record->zi_func[0] != '\0') { 584 (void) printf(" panic function: %s\n", 585 record->zi_func); 586 } else if (record->zi_duration > 0) { 587 (void) printf(" time: %lld seconds\n", 588 (u_longlong_t)record->zi_duration); 589 } else if (record->zi_duration < 0) { 590 (void) printf(" txgs: %lld \n", 591 (u_longlong_t)-record->zi_duration); 592 } else { 593 (void) printf("objset: %llu\n", 594 (u_longlong_t)record->zi_objset); 595 (void) printf("object: %llu\n", 596 (u_longlong_t)record->zi_object); 597 (void) printf(" type: %llu\n", 598 (u_longlong_t)record->zi_type); 599 (void) printf(" level: %d\n", record->zi_level); 600 if (record->zi_start == 0 && 601 record->zi_end == -1ULL) 602 (void) printf(" range: all\n"); 603 else 604 (void) printf(" range: [%llu, %llu)\n", 605 (u_longlong_t)record->zi_start, 606 (u_longlong_t)record->zi_end); 607 (void) printf(" dvas: 0x%x\n", record->zi_dvas); 608 } 609 } 610 611 return (0); 612 } 613 614 int 615 perform_action(const char *pool, zinject_record_t *record, int cmd) 616 { 617 zfs_cmd_t zc = { 0 }; 618 619 ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED); 620 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); 621 zc.zc_guid = record->zi_guid; 622 zc.zc_cookie = cmd; 623 624 if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) 625 return (0); 626 627 return (1); 628 } 629 630 static int 631 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes) 632 { 633 unsigned long scan_delay; 634 unsigned long scan_nlanes; 635 636 if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2) 637 return (1); 638 639 /* 640 * We explicitly disallow a delay of zero here, because we key 641 * off this value being non-zero in translate_device(), to 642 * determine if the fault is a ZINJECT_DELAY_IO fault or not. 643 */ 644 if (scan_delay == 0) 645 return (1); 646 647 /* 648 * The units for the CLI delay parameter is milliseconds, but 649 * the data passed to the kernel is interpreted as nanoseconds. 650 * Thus we scale the milliseconds to nanoseconds here, and this 651 * nanosecond value is used to pass the delay to the kernel. 652 */ 653 *delay = MSEC2NSEC(scan_delay); 654 *nlanes = scan_nlanes; 655 656 return (0); 657 } 658 659 /* 660 * This function converts a string specifier for DVAs into a bit mask. 661 * The dva's provided by the user should be 0 indexed and separated by 662 * a comma. For example: 663 * "1" -> 0b0010 (0x2) 664 * "0,1" -> 0b0011 (0x3) 665 * "0,1,2" -> 0b0111 (0x7) 666 */ 667 static int 668 parse_dvas(const char *str, uint32_t *dvas_out) 669 { 670 const char *c = str; 671 uint32_t mask = 0; 672 boolean_t need_delim = B_FALSE; 673 674 /* max string length is 5 ("0,1,2") */ 675 if (strlen(str) > 5 || strlen(str) == 0) 676 return (EINVAL); 677 678 while (*c != '\0') { 679 switch (*c) { 680 case '0': 681 case '1': 682 case '2': 683 /* check for pipe between DVAs */ 684 if (need_delim) 685 return (EINVAL); 686 687 /* check if this DVA has been set already */ 688 if (mask & (1 << ((*c) - '0'))) 689 return (EINVAL); 690 691 mask |= (1 << ((*c) - '0')); 692 need_delim = B_TRUE; 693 break; 694 case ',': 695 need_delim = B_FALSE; 696 break; 697 default: 698 /* check for invalid character */ 699 return (EINVAL); 700 } 701 c++; 702 } 703 704 /* check for dangling delimiter */ 705 if (!need_delim) 706 return (EINVAL); 707 708 *dvas_out = mask; 709 return (0); 710 } 711 712 int 713 main(int argc, char **argv) 714 { 715 int c; 716 char *range = NULL; 717 char *cancel = NULL; 718 char *end; 719 char *raw = NULL; 720 char *device = NULL; 721 int level = 0; 722 int quiet = 0; 723 int error = 0; 724 int domount = 0; 725 int io_type = ZIO_TYPES; 726 int action = VDEV_STATE_UNKNOWN; 727 err_type_t type = TYPE_INVAL; 728 err_type_t label = TYPE_INVAL; 729 zinject_record_t record = { 0 }; 730 char pool[MAXNAMELEN]; 731 char dataset[MAXNAMELEN]; 732 zfs_handle_t *zhp; 733 int nowrites = 0; 734 int dur_txg = 0; 735 int dur_secs = 0; 736 int ret; 737 int flags = 0; 738 uint32_t dvas = 0; 739 740 if ((g_zfs = libzfs_init()) == NULL) { 741 (void) fprintf(stderr, "internal error: failed to " 742 "initialize ZFS library\n"); 743 return (1); 744 } 745 746 libzfs_print_on_error(g_zfs, B_TRUE); 747 748 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 749 (void) fprintf(stderr, "failed to open ZFS device\n"); 750 return (1); 751 } 752 753 if (argc == 1) { 754 /* 755 * No arguments. Print the available handlers. If there are no 756 * available handlers, direct the user to '-h' for help 757 * information. 758 */ 759 if (print_all_handlers() == 0) { 760 (void) printf("No handlers registered.\n"); 761 (void) printf("Run 'zinject -h' for usage " 762 "information.\n"); 763 } 764 765 return (0); 766 } 767 768 while ((c = getopt(argc, argv, 769 ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) { 770 switch (c) { 771 case 'a': 772 flags |= ZINJECT_FLUSH_ARC; 773 break; 774 case 'A': 775 if (strcasecmp(optarg, "degrade") == 0) { 776 action = VDEV_STATE_DEGRADED; 777 } else if (strcasecmp(optarg, "fault") == 0) { 778 action = VDEV_STATE_FAULTED; 779 } else { 780 (void) fprintf(stderr, "invalid action '%s': " 781 "must be 'degrade' or 'fault'\n", optarg); 782 usage(); 783 return (1); 784 } 785 break; 786 case 'b': 787 raw = optarg; 788 break; 789 case 'c': 790 cancel = optarg; 791 break; 792 case 'C': 793 ret = parse_dvas(optarg, &dvas); 794 if (ret != 0) { 795 (void) fprintf(stderr, "invalid DVA list '%s': " 796 "DVAs should be 0 indexed and separated by " 797 "commas.\n", optarg); 798 usage(); 799 libzfs_fini(g_zfs); 800 return (1); 801 } 802 break; 803 case 'd': 804 device = optarg; 805 break; 806 case 'D': 807 ret = parse_delay(optarg, &record.zi_timer, 808 &record.zi_nlanes); 809 if (ret != 0) { 810 (void) fprintf(stderr, "invalid i/o delay " 811 "value: '%s'\n", optarg); 812 usage(); 813 return (1); 814 } 815 break; 816 case 'e': 817 if (strcasecmp(optarg, "io") == 0) { 818 error = EIO; 819 } else if (strcasecmp(optarg, "checksum") == 0) { 820 error = ECKSUM; 821 } else if (strcasecmp(optarg, "decrypt") == 0) { 822 error = EACCES; 823 } else if (strcasecmp(optarg, "nxio") == 0) { 824 error = ENXIO; 825 } else if (strcasecmp(optarg, "dtl") == 0) { 826 error = ECHILD; 827 } else { 828 (void) fprintf(stderr, "invalid error type " 829 "'%s': must be 'io', 'checksum' or " 830 "'nxio'\n", optarg); 831 usage(); 832 return (1); 833 } 834 break; 835 case 'f': 836 record.zi_freq = atoi(optarg); 837 if (record.zi_freq < 1 || record.zi_freq > 100) { 838 (void) fprintf(stderr, "frequency range must " 839 "be in the range (0, 100]\n"); 840 return (1); 841 } 842 break; 843 case 'F': 844 record.zi_failfast = B_TRUE; 845 break; 846 case 'g': 847 dur_txg = 1; 848 record.zi_duration = (int)strtol(optarg, &end, 10); 849 if (record.zi_duration <= 0 || *end != '\0') { 850 (void) fprintf(stderr, "invalid duration '%s': " 851 "must be a positive integer\n", optarg); 852 usage(); 853 return (1); 854 } 855 /* store duration of txgs as its negative */ 856 record.zi_duration *= -1; 857 break; 858 case 'h': 859 usage(); 860 return (0); 861 case 'I': 862 /* default duration, if one hasn't yet been defined */ 863 nowrites = 1; 864 if (dur_secs == 0 && dur_txg == 0) 865 record.zi_duration = 30; 866 break; 867 case 'l': 868 level = (int)strtol(optarg, &end, 10); 869 if (*end != '\0') { 870 (void) fprintf(stderr, "invalid level '%s': " 871 "must be an integer\n", optarg); 872 usage(); 873 return (1); 874 } 875 break; 876 case 'm': 877 domount = 1; 878 break; 879 case 'p': 880 (void) strlcpy(record.zi_func, optarg, 881 sizeof (record.zi_func)); 882 record.zi_cmd = ZINJECT_PANIC; 883 break; 884 case 'q': 885 quiet = 1; 886 break; 887 case 'r': 888 range = optarg; 889 break; 890 case 's': 891 dur_secs = 1; 892 record.zi_duration = (int)strtol(optarg, &end, 10); 893 if (record.zi_duration <= 0 || *end != '\0') { 894 (void) fprintf(stderr, "invalid duration '%s': " 895 "must be a positive integer\n", optarg); 896 usage(); 897 return (1); 898 } 899 break; 900 case 'T': 901 if (strcasecmp(optarg, "read") == 0) { 902 io_type = ZIO_TYPE_READ; 903 } else if (strcasecmp(optarg, "write") == 0) { 904 io_type = ZIO_TYPE_WRITE; 905 } else if (strcasecmp(optarg, "free") == 0) { 906 io_type = ZIO_TYPE_FREE; 907 } else if (strcasecmp(optarg, "claim") == 0) { 908 io_type = ZIO_TYPE_CLAIM; 909 } else if (strcasecmp(optarg, "all") == 0) { 910 io_type = ZIO_TYPES; 911 } else { 912 (void) fprintf(stderr, "invalid I/O type " 913 "'%s': must be 'read', 'write', 'free', " 914 "'claim' or 'all'\n", optarg); 915 usage(); 916 return (1); 917 } 918 break; 919 case 't': 920 if ((type = name_to_type(optarg)) == TYPE_INVAL && 921 !MOS_TYPE(type)) { 922 (void) fprintf(stderr, "invalid type '%s'\n", 923 optarg); 924 usage(); 925 return (1); 926 } 927 break; 928 case 'u': 929 flags |= ZINJECT_UNLOAD_SPA; 930 break; 931 case 'L': 932 if ((label = name_to_type(optarg)) == TYPE_INVAL && 933 !LABEL_TYPE(type)) { 934 (void) fprintf(stderr, "invalid label type " 935 "'%s'\n", optarg); 936 usage(); 937 return (1); 938 } 939 break; 940 case ':': 941 (void) fprintf(stderr, "option -%c requires an " 942 "operand\n", optopt); 943 usage(); 944 return (1); 945 case '?': 946 (void) fprintf(stderr, "invalid option '%c'\n", 947 optopt); 948 usage(); 949 return (2); 950 } 951 } 952 953 argc -= optind; 954 argv += optind; 955 956 if (record.zi_duration != 0) 957 record.zi_cmd = ZINJECT_IGNORED_WRITES; 958 959 if (cancel != NULL) { 960 /* 961 * '-c' is invalid with any other options. 962 */ 963 if (raw != NULL || range != NULL || type != TYPE_INVAL || 964 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || 965 record.zi_freq > 0 || dvas != 0) { 966 (void) fprintf(stderr, "cancel (-c) incompatible with " 967 "any other options\n"); 968 usage(); 969 return (2); 970 } 971 if (argc != 0) { 972 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 973 usage(); 974 return (2); 975 } 976 977 if (strcmp(cancel, "all") == 0) { 978 return (cancel_all_handlers()); 979 } else { 980 int id = (int)strtol(cancel, &end, 10); 981 if (*end != '\0') { 982 (void) fprintf(stderr, "invalid handle id '%s':" 983 " must be an integer or 'all'\n", cancel); 984 usage(); 985 return (1); 986 } 987 return (cancel_handler(id)); 988 } 989 } 990 991 if (device != NULL) { 992 /* 993 * Device (-d) injection uses a completely different mechanism 994 * for doing injection, so handle it separately here. 995 */ 996 if (raw != NULL || range != NULL || type != TYPE_INVAL || 997 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || 998 dvas != 0) { 999 (void) fprintf(stderr, "device (-d) incompatible with " 1000 "data error injection\n"); 1001 usage(); 1002 return (2); 1003 } 1004 1005 if (argc != 1) { 1006 (void) fprintf(stderr, "device (-d) injection requires " 1007 "a single pool name\n"); 1008 usage(); 1009 return (2); 1010 } 1011 1012 (void) strcpy(pool, argv[0]); 1013 dataset[0] = '\0'; 1014 1015 if (error == ECKSUM) { 1016 (void) fprintf(stderr, "device error type must be " 1017 "'io' or 'nxio'\n"); 1018 return (1); 1019 } 1020 1021 record.zi_iotype = io_type; 1022 if (translate_device(pool, device, label, &record) != 0) 1023 return (1); 1024 if (!error) 1025 error = ENXIO; 1026 1027 if (action != VDEV_STATE_UNKNOWN) 1028 return (perform_action(pool, &record, action)); 1029 1030 } else if (raw != NULL) { 1031 if (range != NULL || type != TYPE_INVAL || level != 0 || 1032 record.zi_cmd != ZINJECT_UNINITIALIZED || 1033 record.zi_freq > 0 || dvas != 0) { 1034 (void) fprintf(stderr, "raw (-b) format with " 1035 "any other options\n"); 1036 usage(); 1037 return (2); 1038 } 1039 1040 if (argc != 1) { 1041 (void) fprintf(stderr, "raw (-b) format expects a " 1042 "single pool name\n"); 1043 usage(); 1044 return (2); 1045 } 1046 1047 (void) strcpy(pool, argv[0]); 1048 dataset[0] = '\0'; 1049 1050 if (error == ENXIO) { 1051 (void) fprintf(stderr, "data error type must be " 1052 "'checksum' or 'io'\n"); 1053 return (1); 1054 } 1055 1056 record.zi_cmd = ZINJECT_DATA_FAULT; 1057 if (translate_raw(raw, &record) != 0) 1058 return (1); 1059 if (!error) 1060 error = EIO; 1061 } else if (record.zi_cmd == ZINJECT_PANIC) { 1062 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1063 level != 0 || device != NULL || record.zi_freq > 0 || 1064 dvas != 0) { 1065 (void) fprintf(stderr, "panic (-p) incompatible with " 1066 "other options\n"); 1067 usage(); 1068 return (2); 1069 } 1070 1071 if (argc < 1 || argc > 2) { 1072 (void) fprintf(stderr, "panic (-p) injection requires " 1073 "a single pool name and an optional id\n"); 1074 usage(); 1075 return (2); 1076 } 1077 1078 (void) strcpy(pool, argv[0]); 1079 if (argv[1] != NULL) 1080 record.zi_type = atoi(argv[1]); 1081 dataset[0] = '\0'; 1082 } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) { 1083 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1084 level != 0 || record.zi_freq > 0 || dvas != 0) { 1085 (void) fprintf(stderr, "hardware failure (-I) " 1086 "incompatible with other options\n"); 1087 usage(); 1088 libzfs_fini(g_zfs); 1089 return (2); 1090 } 1091 1092 if (nowrites == 0) { 1093 (void) fprintf(stderr, "-s or -g meaningless " 1094 "without -I (ignore writes)\n"); 1095 usage(); 1096 return (2); 1097 } else if (dur_secs && dur_txg) { 1098 (void) fprintf(stderr, "choose a duration either " 1099 "in seconds (-s) or a number of txgs (-g) " 1100 "but not both\n"); 1101 usage(); 1102 return (2); 1103 } else if (argc != 1) { 1104 (void) fprintf(stderr, "ignore writes (-I) " 1105 "injection requires a single pool name\n"); 1106 usage(); 1107 return (2); 1108 } 1109 1110 (void) strcpy(pool, argv[0]); 1111 dataset[0] = '\0'; 1112 } else if (type == TYPE_INVAL) { 1113 if (flags == 0) { 1114 (void) fprintf(stderr, "at least one of '-b', '-d', " 1115 "'-t', '-a', '-p', '-I' or '-u' " 1116 "must be specified\n"); 1117 usage(); 1118 return (2); 1119 } 1120 1121 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 1122 (void) strcpy(pool, argv[0]); 1123 dataset[0] = '\0'; 1124 } else if (argc != 0) { 1125 (void) fprintf(stderr, "extraneous argument for " 1126 "'-f'\n"); 1127 usage(); 1128 return (2); 1129 } 1130 1131 flags |= ZINJECT_NULL; 1132 } else { 1133 if (argc != 1) { 1134 (void) fprintf(stderr, "missing object\n"); 1135 usage(); 1136 return (2); 1137 } 1138 1139 if (error == ENXIO) { 1140 (void) fprintf(stderr, "data error type must be " 1141 "'checksum' or 'io'\n"); 1142 return (1); 1143 } 1144 1145 if (dvas != 0) { 1146 if (error == EACCES || error == EINVAL) { 1147 (void) fprintf(stderr, "the '-C' option may " 1148 "not be used with logical data errors " 1149 "'decrypt' and 'decompress'\n"); 1150 record.zi_dvas = dvas; 1151 } 1152 } 1153 1154 record.zi_cmd = ZINJECT_DATA_FAULT; 1155 1156 if (error == EACCES) { 1157 if (type != TYPE_DATA) { 1158 (void) fprintf(stderr, "decryption errors " 1159 "may only be injected for 'data' types\n"); 1160 libzfs_fini(g_zfs); 1161 return (1); 1162 } 1163 1164 record.zi_cmd = ZINJECT_DECRYPT_FAULT; 1165 /* 1166 * Internally, ZFS actually uses ECKSUM for decryption 1167 * errors since EACCES is used to indicate the key was 1168 * not found. 1169 */ 1170 error = ECKSUM; 1171 } 1172 1173 if (translate_record(type, argv[0], range, level, &record, pool, 1174 dataset) != 0) 1175 return (1); 1176 if (!error) 1177 error = EIO; 1178 } 1179 1180 /* 1181 * If this is pool-wide metadata, unmount everything. The ioctl() will 1182 * unload the pool, so that we trigger spa-wide reopen of metadata next 1183 * time we access the pool. 1184 */ 1185 if (dataset[0] != '\0' && domount) { 1186 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) 1187 return (1); 1188 1189 if (zfs_unmount(zhp, NULL, 0) != 0) 1190 return (1); 1191 } 1192 1193 record.zi_error = error; 1194 1195 ret = register_handler(pool, flags, &record, quiet); 1196 1197 if (dataset[0] != '\0' && domount) 1198 ret = (zfs_mount(zhp, NULL, 0) != 0); 1199 1200 libzfs_fini(g_zfs); 1201 1202 return (ret); 1203 } 1204