1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * ZFS Fault Injector 28 * 29 * This userland component takes a set of options and uses libzpool to translate 30 * from a user-visible object type and name to an internal representation. 31 * There are two basic types of faults: device faults and data faults. 32 * 33 * 34 * DEVICE FAULTS 35 * 36 * Errors can be injected into a particular vdev using the '-d' option. This 37 * option takes a path or vdev GUID to uniquely identify the device within a 38 * pool. There are two types of errors that can be injected, EIO and ENXIO, 39 * that can be controlled through the '-e' option. The default is ENXIO. For 40 * EIO failures, any attempt to read data from the device will return EIO, but 41 * subsequent attempt to reopen the device will succeed. For ENXIO failures, 42 * any attempt to read from the device will return EIO, but any attempt to 43 * reopen the device will also return ENXIO. 44 * For label faults, the -L option must be specified. This allows faults 45 * to be injected into either the nvlist, uberblock, pad1, or pad2 region 46 * of all the labels for the specified device. 47 * 48 * This form of the command looks like: 49 * 50 * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool 51 * 52 * 53 * DATA FAULTS 54 * 55 * We begin with a tuple of the form: 56 * 57 * <type,level,range,object> 58 * 59 * type A string describing the type of data to target. Each type 60 * implicitly describes how to interpret 'object'. Currently, 61 * the following values are supported: 62 * 63 * data User data for a file 64 * dnode Dnode for a file or directory 65 * 66 * The following MOS objects are special. Instead of injecting 67 * errors on a particular object or blkid, we inject errors across 68 * all objects of the given type. 69 * 70 * mos Any data in the MOS 71 * mosdir object directory 72 * config pool configuration 73 * bpobj blkptr list 74 * spacemap spacemap 75 * metaslab metaslab 76 * errlog persistent error log 77 * 78 * level Object level. Defaults to '0', not applicable to all types. If 79 * a range is given, this corresponds to the indirect block 80 * corresponding to the specific range. 81 * 82 * range A numerical range [start,end) within the object. Defaults to 83 * the full size of the file. 84 * 85 * object A string describing the logical location of the object. For 86 * files and directories (currently the only supported types), 87 * this is the path of the object on disk. 88 * 89 * This is translated, via libzpool, into the following internal representation: 90 * 91 * <type,objset,object,level,range> 92 * 93 * These types should be self-explanatory. This tuple is then passed to the 94 * kernel via a special ioctl() to initiate fault injection for the given 95 * object. Note that 'type' is not strictly necessary for fault injection, but 96 * is used when translating existing faults into a human-readable string. 97 * 98 * 99 * The command itself takes one of the forms: 100 * 101 * zinject 102 * zinject <-a | -u pool> 103 * zinject -c <id|all> 104 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 105 * [-r range] <object> 106 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 107 * 108 * With no arguments, the command prints all currently registered injection 109 * handlers, with their numeric identifiers. 110 * 111 * The '-c' option will clear the given handler, or all handlers if 'all' is 112 * specified. 113 * 114 * The '-e' option takes a string describing the errno to simulate. This must 115 * be either 'io' or 'checksum'. In most cases this will result in the same 116 * behavior, but RAID-Z will produce a different set of ereports for this 117 * situation. 118 * 119 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 120 * specified, then the ARC cache is flushed appropriately. If '-u' is 121 * specified, then the underlying SPA is unloaded. Either of these flags can be 122 * specified independently of any other handlers. The '-m' flag automatically 123 * does an unmount and remount of the underlying dataset to aid in flushing the 124 * cache. 125 * 126 * The '-f' flag controls the frequency of errors injected, expressed as a 127 * integer percentage between 1 and 100. The default is 100. 128 * 129 * The this form is responsible for actually injecting the handler into the 130 * framework. It takes the arguments described above, translates them to the 131 * internal tuple using libzpool, and then issues an ioctl() to register the 132 * handler. 133 * 134 * The final form can target a specific bookmark, regardless of whether a 135 * human-readable interface has been designed. It allows developers to specify 136 * a particular block by number. 137 */ 138 139 #include <errno.h> 140 #include <fcntl.h> 141 #include <stdio.h> 142 #include <stdlib.h> 143 #include <strings.h> 144 #include <unistd.h> 145 146 #include <sys/fs/zfs.h> 147 #include <sys/mount.h> 148 149 #include <libzfs.h> 150 151 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 152 153 #include "zinject.h" 154 155 libzfs_handle_t *g_zfs; 156 int zfs_fd; 157 158 #define ECKSUM EBADE 159 160 static const char *errtable[TYPE_INVAL] = { 161 "data", 162 "dnode", 163 "mos", 164 "mosdir", 165 "metaslab", 166 "config", 167 "bpobj", 168 "spacemap", 169 "errlog", 170 "uber", 171 "nvlist", 172 "pad1", 173 "pad2" 174 }; 175 176 static err_type_t 177 name_to_type(const char *arg) 178 { 179 int i; 180 for (i = 0; i < TYPE_INVAL; i++) 181 if (strcmp(errtable[i], arg) == 0) 182 return (i); 183 184 return (TYPE_INVAL); 185 } 186 187 static const char * 188 type_to_name(uint64_t type) 189 { 190 switch (type) { 191 case DMU_OT_OBJECT_DIRECTORY: 192 return ("mosdir"); 193 case DMU_OT_OBJECT_ARRAY: 194 return ("metaslab"); 195 case DMU_OT_PACKED_NVLIST: 196 return ("config"); 197 case DMU_OT_BPOBJ: 198 return ("bpobj"); 199 case DMU_OT_SPACE_MAP: 200 return ("spacemap"); 201 case DMU_OT_ERROR_LOG: 202 return ("errlog"); 203 default: 204 return ("-"); 205 } 206 } 207 208 209 /* 210 * Print usage message. 211 */ 212 void 213 usage(void) 214 { 215 (void) printf( 216 "usage:\n" 217 "\n" 218 "\tzinject\n" 219 "\n" 220 "\t\tList all active injection records.\n" 221 "\n" 222 "\tzinject -c <id|all>\n" 223 "\n" 224 "\t\tClear the particular record (if given a numeric ID), or\n" 225 "\t\tall records if 'all' is specificed.\n" 226 "\n" 227 "\tzinject -p <function name> pool\n" 228 "\n" 229 "\t\tInject a panic fault at the specified function. Only \n" 230 "\t\tfunctions which call spa_vdev_config_exit(), or \n" 231 "\t\tspa_vdev_exit() will trigger a panic.\n" 232 "\n" 233 "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n" 234 "\t [-T <read|write|free|claim|all> pool\n" 235 "\n" 236 "\t\tInject a fault into a particular device or the device's\n" 237 "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " 238 "\t\t'pad1', or 'pad2'.\n" 239 "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n" 240 "\n" 241 "\tzinject -d device -A <degrade|fault> pool\n" 242 "\n" 243 "\t\tPerform a specific action on a particular device\n" 244 "\n" 245 "\tzinject -d device -D latency:lanes pool\n" 246 "\n" 247 "\t\tAdd an artificial delay to IO requests on a particular\n" 248 "\t\tdevice, such that the requests take a minimum of 'latency'\n" 249 "\t\tmilliseconds to complete. Each delay has an associated\n" 250 "\t\tnumber of 'lanes' which defines the number of concurrent\n" 251 "\t\tIO requests that can be processed.\n" 252 "\n" 253 "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n" 254 "\t\tthe device will only be able to service a single IO request\n" 255 "\t\tat a time with each request taking 10 ms to complete. So,\n" 256 "\t\tif only a single request is submitted every 10 ms, the\n" 257 "\t\taverage latency will be 10 ms; but if more than one request\n" 258 "\t\tis submitted every 10 ms, the average latency will be more\n" 259 "\t\tthan 10 ms.\n" 260 "\n" 261 "\t\tSimilarly, if a delay of 10 ms is specified to have two\n" 262 "\t\tlanes (-D 10:2), then the device will be able to service\n" 263 "\t\ttwo requests at a time, each with a minimum latency of\n" 264 "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n" 265 "\t\tthe average latency will be 10 ms; but if more than two\n" 266 "\t\trequests are submitted every 10 ms, the average latency\n" 267 "\t\twill be more than 10 ms.\n" 268 "\n" 269 "\t\tAlso note, these delays are additive. So two invocations\n" 270 "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n" 271 "\t\tof '-D 10:2'. This also means, one can specify multiple\n" 272 "\t\tlanes with differing target latencies. For example, an\n" 273 "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n" 274 "\t\tcreate 3 lanes on the device; one lane with a latency\n" 275 "\t\tof 10 ms and two lanes with a 25 ms latency.\n" 276 "\n" 277 "\tzinject -I [-s <seconds> | -g <txgs>] pool\n" 278 "\n" 279 "\t\tCause the pool to stop writing blocks yet not\n" 280 "\t\treport errors for a duration. Simulates buggy hardware\n" 281 "\t\tthat fails to honor cache flush requests.\n" 282 "\t\tDefault duration is 30 seconds. The machine is panicked\n" 283 "\t\tat the end of the duration.\n" 284 "\n" 285 "\tzinject -b objset:object:level:blkid pool\n" 286 "\n" 287 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 288 "\t\tspecified by the remaining tuple. Each number is in\n" 289 "\t\thexidecimal, and only one block can be specified.\n" 290 "\n" 291 "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n" 292 "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n" 293 "\n" 294 "\t\tInject an error into the object specified by the '-t' option\n" 295 "\t\tand the object descriptor. The 'object' parameter is\n" 296 "\t\tinterperted depending on the '-t' option.\n" 297 "\n" 298 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 299 "\t\t-e\tInject a specific error. Must be either 'io' or\n" 300 "\t\t\t'checksum', or 'decompress'. Default is 'io'.\n" 301 "\t\t-C\tInject the given error only into specific DVAs. The\n" 302 "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n" 303 "\t\t\tseparated by commas (ex. '0,2').\n" 304 "\t\t-l\tInject error at a particular block level. Default is " 305 "0.\n" 306 "\t\t-m\tAutomatically remount underlying filesystem.\n" 307 "\t\t-r\tInject error over a particular logical range of an\n" 308 "\t\t\tobject. Will be translated to the appropriate blkid\n" 309 "\t\t\trange according to the object's properties.\n" 310 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 311 "\t\t\tassociated object.\n" 312 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 313 "\t\t\ta pool object.\n" 314 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 315 "\t\t\ta percentage between 1 and 100.\n" 316 "\n" 317 "\t-t data\t\tInject an error into the plain file contents of a\n" 318 "\t\t\tfile. The object must be specified as a complete path\n" 319 "\t\t\tto a file on a ZFS filesystem.\n" 320 "\n" 321 "\t-t dnode\tInject an error into the metadnode in the block\n" 322 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 323 "\t\t\t'-r' option is incompatible with this mode. The object\n" 324 "\t\t\tis specified as a complete path to a file or directory\n" 325 "\t\t\ton a ZFS filesystem.\n" 326 "\n" 327 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 328 "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n" 329 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" 330 "\t\t\tthe poolname.\n"); 331 } 332 333 static int 334 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 335 void *data) 336 { 337 zfs_cmd_t zc = { 0 }; 338 int ret; 339 340 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 341 if ((ret = func((int)zc.zc_guid, zc.zc_name, 342 &zc.zc_inject_record, data)) != 0) 343 return (ret); 344 345 if (errno != ENOENT) { 346 (void) fprintf(stderr, "Unable to list handlers: %s\n", 347 strerror(errno)); 348 return (-1); 349 } 350 351 return (0); 352 } 353 354 static int 355 print_data_handler(int id, const char *pool, zinject_record_t *record, 356 void *data) 357 { 358 int *count = data; 359 360 if (record->zi_guid != 0 || record->zi_func[0] != '\0') 361 return (0); 362 363 if (*count == 0) { 364 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s ", 365 "%-15s\n", "ID", "POOL", "OBJSET", "OBJECT", "TYPE", 366 "LVL", "DVAs", "RANGE"); 367 (void) printf("--- --------------- ------ " 368 "------ -------- --- ---- ----------------\n"); 369 } 370 371 *count += 1; 372 373 (void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x ", 374 id, pool, (u_longlong_t)record->zi_objset, 375 (u_longlong_t)record->zi_object, type_to_name(record->zi_type), 376 record->zi_level, record->zi_dvas); 377 378 if (record->zi_start == 0 && 379 record->zi_end == -1ULL) 380 (void) printf("all\n"); 381 else 382 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 383 (u_longlong_t)record->zi_end); 384 385 return (0); 386 } 387 388 static int 389 print_device_handler(int id, const char *pool, zinject_record_t *record, 390 void *data) 391 { 392 int *count = data; 393 394 if (record->zi_guid == 0 || record->zi_func[0] != '\0') 395 return (0); 396 397 if (record->zi_cmd == ZINJECT_DELAY_IO) 398 return (0); 399 400 if (*count == 0) { 401 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 402 (void) printf("--- --------------- ----------------\n"); 403 } 404 405 *count += 1; 406 407 (void) printf("%3d %-15s %llx\n", id, pool, 408 (u_longlong_t)record->zi_guid); 409 410 return (0); 411 } 412 413 static int 414 print_delay_handler(int id, const char *pool, zinject_record_t *record, 415 void *data) 416 { 417 int *count = data; 418 419 if (record->zi_guid == 0 || record->zi_func[0] != '\0') 420 return (0); 421 422 if (record->zi_cmd != ZINJECT_DELAY_IO) 423 return (0); 424 425 if (*count == 0) { 426 (void) printf("%3s %-15s %-15s %-15s %s\n", 427 "ID", "POOL", "DELAY (ms)", "LANES", "GUID"); 428 (void) printf("--- --------------- --------------- " 429 "--------------- ----------------\n"); 430 } 431 432 *count += 1; 433 434 (void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool, 435 (u_longlong_t)NSEC2MSEC(record->zi_timer), 436 (u_longlong_t)record->zi_nlanes, 437 (u_longlong_t)record->zi_guid); 438 439 return (0); 440 } 441 442 static int 443 print_panic_handler(int id, const char *pool, zinject_record_t *record, 444 void *data) 445 { 446 int *count = data; 447 448 if (record->zi_func[0] == '\0') 449 return (0); 450 451 if (*count == 0) { 452 (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION"); 453 (void) printf("--- --------------- ----------------\n"); 454 } 455 456 *count += 1; 457 458 (void) printf("%3d %-15s %s\n", id, pool, record->zi_func); 459 460 return (0); 461 } 462 463 /* 464 * Print all registered error handlers. Returns the number of handlers 465 * registered. 466 */ 467 static int 468 print_all_handlers(void) 469 { 470 int count = 0, total = 0; 471 472 (void) iter_handlers(print_device_handler, &count); 473 if (count > 0) { 474 total += count; 475 (void) printf("\n"); 476 count = 0; 477 } 478 479 (void) iter_handlers(print_delay_handler, &count); 480 if (count > 0) { 481 total += count; 482 (void) printf("\n"); 483 count = 0; 484 } 485 486 (void) iter_handlers(print_data_handler, &count); 487 if (count > 0) { 488 total += count; 489 (void) printf("\n"); 490 count = 0; 491 } 492 493 (void) iter_handlers(print_panic_handler, &count); 494 495 return (count + total); 496 } 497 498 /* ARGSUSED */ 499 static int 500 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 501 void *data) 502 { 503 zfs_cmd_t zc = { 0 }; 504 505 zc.zc_guid = (uint64_t)id; 506 507 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 508 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 509 id, strerror(errno)); 510 return (1); 511 } 512 513 return (0); 514 } 515 516 /* 517 * Remove all fault injection handlers. 518 */ 519 static int 520 cancel_all_handlers(void) 521 { 522 int ret = iter_handlers(cancel_one_handler, NULL); 523 524 if (ret == 0) 525 (void) printf("removed all registered handlers\n"); 526 527 return (ret); 528 } 529 530 /* 531 * Remove a specific fault injection handler. 532 */ 533 static int 534 cancel_handler(int id) 535 { 536 zfs_cmd_t zc = { 0 }; 537 538 zc.zc_guid = (uint64_t)id; 539 540 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 541 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 542 id, strerror(errno)); 543 return (1); 544 } 545 546 (void) printf("removed handler %d\n", id); 547 548 return (0); 549 } 550 551 /* 552 * Register a new fault injection handler. 553 */ 554 static int 555 register_handler(const char *pool, int flags, zinject_record_t *record, 556 int quiet) 557 { 558 zfs_cmd_t zc = { 0 }; 559 560 (void) strcpy(zc.zc_name, pool); 561 zc.zc_inject_record = *record; 562 zc.zc_guid = flags; 563 564 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 565 (void) fprintf(stderr, "failed to add handler: %s\n", 566 strerror(errno)); 567 return (1); 568 } 569 570 if (flags & ZINJECT_NULL) 571 return (0); 572 573 if (quiet) { 574 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 575 } else { 576 (void) printf("Added handler %llu with the following " 577 "properties:\n", (u_longlong_t)zc.zc_guid); 578 (void) printf(" pool: %s\n", pool); 579 if (record->zi_guid) { 580 (void) printf(" vdev: %llx\n", 581 (u_longlong_t)record->zi_guid); 582 } else if (record->zi_func[0] != '\0') { 583 (void) printf(" panic function: %s\n", 584 record->zi_func); 585 } else if (record->zi_duration > 0) { 586 (void) printf(" time: %lld seconds\n", 587 (u_longlong_t)record->zi_duration); 588 } else if (record->zi_duration < 0) { 589 (void) printf(" txgs: %lld \n", 590 (u_longlong_t)-record->zi_duration); 591 } else { 592 (void) printf("objset: %llu\n", 593 (u_longlong_t)record->zi_objset); 594 (void) printf("object: %llu\n", 595 (u_longlong_t)record->zi_object); 596 (void) printf(" type: %llu\n", 597 (u_longlong_t)record->zi_type); 598 (void) printf(" level: %d\n", record->zi_level); 599 if (record->zi_start == 0 && 600 record->zi_end == -1ULL) 601 (void) printf(" range: all\n"); 602 else 603 (void) printf(" range: [%llu, %llu)\n", 604 (u_longlong_t)record->zi_start, 605 (u_longlong_t)record->zi_end); 606 (void) printf(" dvas: 0x%x\n", record->zi_dvas); 607 } 608 } 609 610 return (0); 611 } 612 613 int 614 perform_action(const char *pool, zinject_record_t *record, int cmd) 615 { 616 zfs_cmd_t zc = { 0 }; 617 618 ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED); 619 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); 620 zc.zc_guid = record->zi_guid; 621 zc.zc_cookie = cmd; 622 623 if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) 624 return (0); 625 626 return (1); 627 } 628 629 static int 630 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes) 631 { 632 unsigned long scan_delay; 633 unsigned long scan_nlanes; 634 635 if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2) 636 return (1); 637 638 /* 639 * We explicitly disallow a delay of zero here, because we key 640 * off this value being non-zero in translate_device(), to 641 * determine if the fault is a ZINJECT_DELAY_IO fault or not. 642 */ 643 if (scan_delay == 0) 644 return (1); 645 646 /* 647 * The units for the CLI delay parameter is milliseconds, but 648 * the data passed to the kernel is interpreted as nanoseconds. 649 * Thus we scale the milliseconds to nanoseconds here, and this 650 * nanosecond value is used to pass the delay to the kernel. 651 */ 652 *delay = MSEC2NSEC(scan_delay); 653 *nlanes = scan_nlanes; 654 655 return (0); 656 } 657 658 /* 659 * This function converts a string specifier for DVAs into a bit mask. 660 * The dva's provided by the user should be 0 indexed and separated by 661 * a comma. For example: 662 * "1" -> 0b0010 (0x2) 663 * "0,1" -> 0b0011 (0x3) 664 * "0,1,2" -> 0b0111 (0x7) 665 */ 666 static int 667 parse_dvas(const char *str, uint32_t *dvas_out) 668 { 669 const char *c = str; 670 uint32_t mask = 0; 671 boolean_t need_delim = B_FALSE; 672 673 /* max string length is 5 ("0,1,2") */ 674 if (strlen(str) > 5 || strlen(str) == 0) 675 return (EINVAL); 676 677 while (*c != '\0') { 678 switch (*c) { 679 case '0': 680 case '1': 681 case '2': 682 /* check for pipe between DVAs */ 683 if (need_delim) 684 return (EINVAL); 685 686 /* check if this DVA has been set already */ 687 if (mask & (1 << ((*c) - '0'))) 688 return (EINVAL); 689 690 mask |= (1 << ((*c) - '0')); 691 need_delim = B_TRUE; 692 break; 693 case ',': 694 need_delim = B_FALSE; 695 break; 696 default: 697 /* check for invalid character */ 698 return (EINVAL); 699 } 700 c++; 701 } 702 703 /* check for dangling delimiter */ 704 if (!need_delim) 705 return (EINVAL); 706 707 *dvas_out = mask; 708 return (0); 709 } 710 711 int 712 main(int argc, char **argv) 713 { 714 int c; 715 char *range = NULL; 716 char *cancel = NULL; 717 char *end; 718 char *raw = NULL; 719 char *device = NULL; 720 int level = 0; 721 int quiet = 0; 722 int error = 0; 723 int domount = 0; 724 int io_type = ZIO_TYPES; 725 int action = VDEV_STATE_UNKNOWN; 726 err_type_t type = TYPE_INVAL; 727 err_type_t label = TYPE_INVAL; 728 zinject_record_t record = { 0 }; 729 char pool[MAXNAMELEN]; 730 char dataset[MAXNAMELEN]; 731 zfs_handle_t *zhp; 732 int nowrites = 0; 733 int dur_txg = 0; 734 int dur_secs = 0; 735 int ret; 736 int flags = 0; 737 uint32_t dvas = 0; 738 739 if ((g_zfs = libzfs_init()) == NULL) { 740 (void) fprintf(stderr, "internal error: failed to " 741 "initialize ZFS library\n"); 742 return (1); 743 } 744 745 libzfs_print_on_error(g_zfs, B_TRUE); 746 747 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 748 (void) fprintf(stderr, "failed to open ZFS device\n"); 749 return (1); 750 } 751 752 if (argc == 1) { 753 /* 754 * No arguments. Print the available handlers. If there are no 755 * available handlers, direct the user to '-h' for help 756 * information. 757 */ 758 if (print_all_handlers() == 0) { 759 (void) printf("No handlers registered.\n"); 760 (void) printf("Run 'zinject -h' for usage " 761 "information.\n"); 762 } 763 764 return (0); 765 } 766 767 while ((c = getopt(argc, argv, 768 ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) { 769 switch (c) { 770 case 'a': 771 flags |= ZINJECT_FLUSH_ARC; 772 break; 773 case 'A': 774 if (strcasecmp(optarg, "degrade") == 0) { 775 action = VDEV_STATE_DEGRADED; 776 } else if (strcasecmp(optarg, "fault") == 0) { 777 action = VDEV_STATE_FAULTED; 778 } else { 779 (void) fprintf(stderr, "invalid action '%s': " 780 "must be 'degrade' or 'fault'\n", optarg); 781 usage(); 782 return (1); 783 } 784 break; 785 case 'b': 786 raw = optarg; 787 break; 788 case 'c': 789 cancel = optarg; 790 break; 791 case 'C': 792 ret = parse_dvas(optarg, &dvas); 793 if (ret != 0) { 794 (void) fprintf(stderr, "invalid DVA list '%s': " 795 "DVAs should be 0 indexed and separated by " 796 "commas.\n", optarg); 797 usage(); 798 libzfs_fini(g_zfs); 799 return (1); 800 } 801 break; 802 case 'd': 803 device = optarg; 804 break; 805 case 'D': 806 ret = parse_delay(optarg, &record.zi_timer, 807 &record.zi_nlanes); 808 if (ret != 0) { 809 (void) fprintf(stderr, "invalid i/o delay " 810 "value: '%s'\n", optarg); 811 usage(); 812 return (1); 813 } 814 break; 815 case 'e': 816 if (strcasecmp(optarg, "io") == 0) { 817 error = EIO; 818 } else if (strcasecmp(optarg, "checksum") == 0) { 819 error = ECKSUM; 820 } else if (strcasecmp(optarg, "nxio") == 0) { 821 error = ENXIO; 822 } else if (strcasecmp(optarg, "dtl") == 0) { 823 error = ECHILD; 824 } else { 825 (void) fprintf(stderr, "invalid error type " 826 "'%s': must be 'io', 'checksum' or " 827 "'nxio'\n", optarg); 828 usage(); 829 return (1); 830 } 831 break; 832 case 'f': 833 record.zi_freq = atoi(optarg); 834 if (record.zi_freq < 1 || record.zi_freq > 100) { 835 (void) fprintf(stderr, "frequency range must " 836 "be in the range (0, 100]\n"); 837 return (1); 838 } 839 break; 840 case 'F': 841 record.zi_failfast = B_TRUE; 842 break; 843 case 'g': 844 dur_txg = 1; 845 record.zi_duration = (int)strtol(optarg, &end, 10); 846 if (record.zi_duration <= 0 || *end != '\0') { 847 (void) fprintf(stderr, "invalid duration '%s': " 848 "must be a positive integer\n", optarg); 849 usage(); 850 return (1); 851 } 852 /* store duration of txgs as its negative */ 853 record.zi_duration *= -1; 854 break; 855 case 'h': 856 usage(); 857 return (0); 858 case 'I': 859 /* default duration, if one hasn't yet been defined */ 860 nowrites = 1; 861 if (dur_secs == 0 && dur_txg == 0) 862 record.zi_duration = 30; 863 break; 864 case 'l': 865 level = (int)strtol(optarg, &end, 10); 866 if (*end != '\0') { 867 (void) fprintf(stderr, "invalid level '%s': " 868 "must be an integer\n", optarg); 869 usage(); 870 return (1); 871 } 872 break; 873 case 'm': 874 domount = 1; 875 break; 876 case 'p': 877 (void) strlcpy(record.zi_func, optarg, 878 sizeof (record.zi_func)); 879 record.zi_cmd = ZINJECT_PANIC; 880 break; 881 case 'q': 882 quiet = 1; 883 break; 884 case 'r': 885 range = optarg; 886 break; 887 case 's': 888 dur_secs = 1; 889 record.zi_duration = (int)strtol(optarg, &end, 10); 890 if (record.zi_duration <= 0 || *end != '\0') { 891 (void) fprintf(stderr, "invalid duration '%s': " 892 "must be a positive integer\n", optarg); 893 usage(); 894 return (1); 895 } 896 break; 897 case 'T': 898 if (strcasecmp(optarg, "read") == 0) { 899 io_type = ZIO_TYPE_READ; 900 } else if (strcasecmp(optarg, "write") == 0) { 901 io_type = ZIO_TYPE_WRITE; 902 } else if (strcasecmp(optarg, "free") == 0) { 903 io_type = ZIO_TYPE_FREE; 904 } else if (strcasecmp(optarg, "claim") == 0) { 905 io_type = ZIO_TYPE_CLAIM; 906 } else if (strcasecmp(optarg, "all") == 0) { 907 io_type = ZIO_TYPES; 908 } else { 909 (void) fprintf(stderr, "invalid I/O type " 910 "'%s': must be 'read', 'write', 'free', " 911 "'claim' or 'all'\n", optarg); 912 usage(); 913 return (1); 914 } 915 break; 916 case 't': 917 if ((type = name_to_type(optarg)) == TYPE_INVAL && 918 !MOS_TYPE(type)) { 919 (void) fprintf(stderr, "invalid type '%s'\n", 920 optarg); 921 usage(); 922 return (1); 923 } 924 break; 925 case 'u': 926 flags |= ZINJECT_UNLOAD_SPA; 927 break; 928 case 'L': 929 if ((label = name_to_type(optarg)) == TYPE_INVAL && 930 !LABEL_TYPE(type)) { 931 (void) fprintf(stderr, "invalid label type " 932 "'%s'\n", optarg); 933 usage(); 934 return (1); 935 } 936 break; 937 case ':': 938 (void) fprintf(stderr, "option -%c requires an " 939 "operand\n", optopt); 940 usage(); 941 return (1); 942 case '?': 943 (void) fprintf(stderr, "invalid option '%c'\n", 944 optopt); 945 usage(); 946 return (2); 947 } 948 } 949 950 argc -= optind; 951 argv += optind; 952 953 if (record.zi_duration != 0) 954 record.zi_cmd = ZINJECT_IGNORED_WRITES; 955 956 if (cancel != NULL) { 957 /* 958 * '-c' is invalid with any other options. 959 */ 960 if (raw != NULL || range != NULL || type != TYPE_INVAL || 961 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || 962 record.zi_freq > 0 || dvas != 0) { 963 (void) fprintf(stderr, "cancel (-c) incompatible with " 964 "any other options\n"); 965 usage(); 966 return (2); 967 } 968 if (argc != 0) { 969 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 970 usage(); 971 return (2); 972 } 973 974 if (strcmp(cancel, "all") == 0) { 975 return (cancel_all_handlers()); 976 } else { 977 int id = (int)strtol(cancel, &end, 10); 978 if (*end != '\0') { 979 (void) fprintf(stderr, "invalid handle id '%s':" 980 " must be an integer or 'all'\n", cancel); 981 usage(); 982 return (1); 983 } 984 return (cancel_handler(id)); 985 } 986 } 987 988 if (device != NULL) { 989 /* 990 * Device (-d) injection uses a completely different mechanism 991 * for doing injection, so handle it separately here. 992 */ 993 if (raw != NULL || range != NULL || type != TYPE_INVAL || 994 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || 995 dvas != 0) { 996 (void) fprintf(stderr, "device (-d) incompatible with " 997 "data error injection\n"); 998 usage(); 999 return (2); 1000 } 1001 1002 if (argc != 1) { 1003 (void) fprintf(stderr, "device (-d) injection requires " 1004 "a single pool name\n"); 1005 usage(); 1006 return (2); 1007 } 1008 1009 (void) strcpy(pool, argv[0]); 1010 dataset[0] = '\0'; 1011 1012 if (error == ECKSUM) { 1013 (void) fprintf(stderr, "device error type must be " 1014 "'io' or 'nxio'\n"); 1015 return (1); 1016 } 1017 1018 record.zi_iotype = io_type; 1019 if (translate_device(pool, device, label, &record) != 0) 1020 return (1); 1021 if (!error) 1022 error = ENXIO; 1023 1024 if (action != VDEV_STATE_UNKNOWN) 1025 return (perform_action(pool, &record, action)); 1026 1027 } else if (raw != NULL) { 1028 if (range != NULL || type != TYPE_INVAL || level != 0 || 1029 record.zi_cmd != ZINJECT_UNINITIALIZED || 1030 record.zi_freq > 0 || dvas != 0) { 1031 (void) fprintf(stderr, "raw (-b) format with " 1032 "any other options\n"); 1033 usage(); 1034 return (2); 1035 } 1036 1037 if (argc != 1) { 1038 (void) fprintf(stderr, "raw (-b) format expects a " 1039 "single pool name\n"); 1040 usage(); 1041 return (2); 1042 } 1043 1044 (void) strcpy(pool, argv[0]); 1045 dataset[0] = '\0'; 1046 1047 if (error == ENXIO) { 1048 (void) fprintf(stderr, "data error type must be " 1049 "'checksum' or 'io'\n"); 1050 return (1); 1051 } 1052 1053 record.zi_cmd = ZINJECT_DATA_FAULT; 1054 if (translate_raw(raw, &record) != 0) 1055 return (1); 1056 if (!error) 1057 error = EIO; 1058 } else if (record.zi_cmd == ZINJECT_PANIC) { 1059 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1060 level != 0 || device != NULL || record.zi_freq > 0 || 1061 dvas != 0) { 1062 (void) fprintf(stderr, "panic (-p) incompatible with " 1063 "other options\n"); 1064 usage(); 1065 return (2); 1066 } 1067 1068 if (argc < 1 || argc > 2) { 1069 (void) fprintf(stderr, "panic (-p) injection requires " 1070 "a single pool name and an optional id\n"); 1071 usage(); 1072 return (2); 1073 } 1074 1075 (void) strcpy(pool, argv[0]); 1076 if (argv[1] != NULL) 1077 record.zi_type = atoi(argv[1]); 1078 dataset[0] = '\0'; 1079 } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) { 1080 if (raw != NULL || range != NULL || type != TYPE_INVAL || 1081 level != 0 || record.zi_freq > 0 || dvas != 0) { 1082 (void) fprintf(stderr, "hardware failure (-I) " 1083 "incompatible with other options\n"); 1084 usage(); 1085 libzfs_fini(g_zfs); 1086 return (2); 1087 } 1088 1089 if (nowrites == 0) { 1090 (void) fprintf(stderr, "-s or -g meaningless " 1091 "without -I (ignore writes)\n"); 1092 usage(); 1093 return (2); 1094 } else if (dur_secs && dur_txg) { 1095 (void) fprintf(stderr, "choose a duration either " 1096 "in seconds (-s) or a number of txgs (-g) " 1097 "but not both\n"); 1098 usage(); 1099 return (2); 1100 } else if (argc != 1) { 1101 (void) fprintf(stderr, "ignore writes (-I) " 1102 "injection requires a single pool name\n"); 1103 usage(); 1104 return (2); 1105 } 1106 1107 (void) strcpy(pool, argv[0]); 1108 dataset[0] = '\0'; 1109 } else if (type == TYPE_INVAL) { 1110 if (flags == 0) { 1111 (void) fprintf(stderr, "at least one of '-b', '-d', " 1112 "'-t', '-a', '-p', '-I' or '-u' " 1113 "must be specified\n"); 1114 usage(); 1115 return (2); 1116 } 1117 1118 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 1119 (void) strcpy(pool, argv[0]); 1120 dataset[0] = '\0'; 1121 } else if (argc != 0) { 1122 (void) fprintf(stderr, "extraneous argument for " 1123 "'-f'\n"); 1124 usage(); 1125 return (2); 1126 } 1127 1128 flags |= ZINJECT_NULL; 1129 } else { 1130 if (argc != 1) { 1131 (void) fprintf(stderr, "missing object\n"); 1132 usage(); 1133 return (2); 1134 } 1135 1136 if (error == ENXIO) { 1137 (void) fprintf(stderr, "data error type must be " 1138 "'checksum' or 'io'\n"); 1139 return (1); 1140 } 1141 1142 if (dvas != 0) { 1143 if (error == EACCES || error == EINVAL) { 1144 (void) fprintf(stderr, "the '-C' option may " 1145 "not be used with logical data errors " 1146 "'decrypt' and 'decompress'\n"); 1147 libzfs_fini(g_zfs); 1148 return (1); 1149 } 1150 1151 record.zi_dvas = dvas; 1152 } 1153 1154 record.zi_cmd = ZINJECT_DATA_FAULT; 1155 if (translate_record(type, argv[0], range, level, &record, pool, 1156 dataset) != 0) 1157 return (1); 1158 if (!error) 1159 error = EIO; 1160 } 1161 1162 /* 1163 * If this is pool-wide metadata, unmount everything. The ioctl() will 1164 * unload the pool, so that we trigger spa-wide reopen of metadata next 1165 * time we access the pool. 1166 */ 1167 if (dataset[0] != '\0' && domount) { 1168 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) 1169 return (1); 1170 1171 if (zfs_unmount(zhp, NULL, 0) != 0) 1172 return (1); 1173 } 1174 1175 record.zi_error = error; 1176 1177 ret = register_handler(pool, flags, &record, quiet); 1178 1179 if (dataset[0] != '\0' && domount) 1180 ret = (zfs_mount(zhp, NULL, 0) != 0); 1181 1182 libzfs_fini(g_zfs); 1183 1184 return (ret); 1185 } 1186