1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * ZFS Fault Injector 28 * 29 * This userland component takes a set of options and uses libzpool to translate 30 * from a user-visible object type and name to an internal representation. 31 * There are two basic types of faults: device faults and data faults. 32 * 33 * 34 * DEVICE FAULTS 35 * 36 * Errors can be injected into a particular vdev using the '-d' option. This 37 * option takes a path or vdev GUID to uniquely identify the device within a 38 * pool. There are two types of errors that can be injected, EIO and ENXIO, 39 * that can be controlled through the '-e' option. The default is ENXIO. For 40 * EIO failures, any attempt to read data from the device will return EIO, but 41 * subsequent attempt to reopen the device will succeed. For ENXIO failures, 42 * any attempt to read from the device will return EIO, but any attempt to 43 * reopen the device will also return ENXIO. 44 * For label faults, the -L option must be specified. This allows faults 45 * to be injected into either the nvlist or uberblock region of all the labels 46 * for the specified device. 47 * 48 * This form of the command looks like: 49 * 50 * zinject -d device [-e errno] [-L <uber | nvlist>] pool 51 * 52 * 53 * DATA FAULTS 54 * 55 * We begin with a tuple of the form: 56 * 57 * <type,level,range,object> 58 * 59 * type A string describing the type of data to target. Each type 60 * implicitly describes how to interpret 'object'. Currently, 61 * the following values are supported: 62 * 63 * data User data for a file 64 * dnode Dnode for a file or directory 65 * 66 * The following MOS objects are special. Instead of injecting 67 * errors on a particular object or blkid, we inject errors across 68 * all objects of the given type. 69 * 70 * mos Any data in the MOS 71 * mosdir object directory 72 * config pool configuration 73 * bplist blkptr list 74 * spacemap spacemap 75 * metaslab metaslab 76 * errlog persistent error log 77 * 78 * level Object level. Defaults to '0', not applicable to all types. If 79 * a range is given, this corresponds to the indirect block 80 * corresponding to the specific range. 81 * 82 * range A numerical range [start,end) within the object. Defaults to 83 * the full size of the file. 84 * 85 * object A string describing the logical location of the object. For 86 * files and directories (currently the only supported types), 87 * this is the path of the object on disk. 88 * 89 * This is translated, via libzpool, into the following internal representation: 90 * 91 * <type,objset,object,level,range> 92 * 93 * These types should be self-explanatory. This tuple is then passed to the 94 * kernel via a special ioctl() to initiate fault injection for the given 95 * object. Note that 'type' is not strictly necessary for fault injection, but 96 * is used when translating existing faults into a human-readable string. 97 * 98 * 99 * The command itself takes one of the forms: 100 * 101 * zinject 102 * zinject <-a | -u pool> 103 * zinject -c <id|all> 104 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 105 * [-r range] <object> 106 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 107 * 108 * With no arguments, the command prints all currently registered injection 109 * handlers, with their numeric identifiers. 110 * 111 * The '-c' option will clear the given handler, or all handlers if 'all' is 112 * specified. 113 * 114 * The '-e' option takes a string describing the errno to simulate. This must 115 * be either 'io' or 'checksum'. In most cases this will result in the same 116 * behavior, but RAID-Z will produce a different set of ereports for this 117 * situation. 118 * 119 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 120 * specified, then the ARC cache is flushed appropriately. If '-u' is 121 * specified, then the underlying SPA is unloaded. Either of these flags can be 122 * specified independently of any other handlers. The '-m' flag automatically 123 * does an unmount and remount of the underlying dataset to aid in flushing the 124 * cache. 125 * 126 * The '-f' flag controls the frequency of errors injected, expressed as a 127 * integer percentage between 1 and 100. The default is 100. 128 * 129 * The this form is responsible for actually injecting the handler into the 130 * framework. It takes the arguments described above, translates them to the 131 * internal tuple using libzpool, and then issues an ioctl() to register the 132 * handler. 133 * 134 * The final form can target a specific bookmark, regardless of whether a 135 * human-readable interface has been designed. It allows developers to specify 136 * a particular block by number. 137 */ 138 139 #include <errno.h> 140 #include <fcntl.h> 141 #include <stdio.h> 142 #include <stdlib.h> 143 #include <strings.h> 144 #include <unistd.h> 145 146 #include <sys/fs/zfs.h> 147 #include <sys/mount.h> 148 149 #include <libzfs.h> 150 151 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 152 153 #include "zinject.h" 154 155 libzfs_handle_t *g_zfs; 156 int zfs_fd; 157 158 #define ECKSUM EBADE 159 160 static const char *errtable[TYPE_INVAL] = { 161 "data", 162 "dnode", 163 "mos", 164 "mosdir", 165 "metaslab", 166 "config", 167 "bplist", 168 "spacemap", 169 "errlog", 170 "uber", 171 "nvlist" 172 }; 173 174 static err_type_t 175 name_to_type(const char *arg) 176 { 177 int i; 178 for (i = 0; i < TYPE_INVAL; i++) 179 if (strcmp(errtable[i], arg) == 0) 180 return (i); 181 182 return (TYPE_INVAL); 183 } 184 185 static const char * 186 type_to_name(uint64_t type) 187 { 188 switch (type) { 189 case DMU_OT_OBJECT_DIRECTORY: 190 return ("mosdir"); 191 case DMU_OT_OBJECT_ARRAY: 192 return ("metaslab"); 193 case DMU_OT_PACKED_NVLIST: 194 return ("config"); 195 case DMU_OT_BPLIST: 196 return ("bplist"); 197 case DMU_OT_SPACE_MAP: 198 return ("spacemap"); 199 case DMU_OT_ERROR_LOG: 200 return ("errlog"); 201 default: 202 return ("-"); 203 } 204 } 205 206 207 /* 208 * Print usage message. 209 */ 210 void 211 usage(void) 212 { 213 (void) printf( 214 "usage:\n" 215 "\n" 216 "\tzinject\n" 217 "\n" 218 "\t\tList all active injection records.\n" 219 "\n" 220 "\tzinject -c <id|all>\n" 221 "\n" 222 "\t\tClear the particular record (if given a numeric ID), or\n" 223 "\t\tall records if 'all' is specificed.\n" 224 "\n" 225 "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n" 226 "\t\tInject a fault into a particular device or the device's\n" 227 "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n" 228 "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n" 229 "\n" 230 "\tzinject -b objset:object:level:blkid pool\n" 231 "\n" 232 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 233 "\t\tspecified by the remaining tuple. Each number is in\n" 234 "\t\thexidecimal, and only one block can be specified.\n" 235 "\n" 236 "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n" 237 "\t [-a] [-m] [-u] [-f freq] <object>\n" 238 "\n" 239 "\t\tInject an error into the object specified by the '-t' option\n" 240 "\t\tand the object descriptor. The 'object' parameter is\n" 241 "\t\tinterperted depending on the '-t' option.\n" 242 "\n" 243 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 244 "\t\t-e\tInject a specific error. Must be either 'io' or\n" 245 "\t\t\t'checksum'. Default is 'io'.\n" 246 "\t\t-l\tInject error at a particular block level. Default is " 247 "0.\n" 248 "\t\t-m\tAutomatically remount underlying filesystem.\n" 249 "\t\t-r\tInject error over a particular logical range of an\n" 250 "\t\t\tobject. Will be translated to the appropriate blkid\n" 251 "\t\t\trange according to the object's properties.\n" 252 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 253 "\t\t\tassociated object.\n" 254 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 255 "\t\t\ta pool object.\n" 256 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 257 "\t\t\ta percentage between 1 and 100.\n" 258 "\n" 259 "\t-t data\t\tInject an error into the plain file contents of a\n" 260 "\t\t\tfile. The object must be specified as a complete path\n" 261 "\t\t\tto a file on a ZFS filesystem.\n" 262 "\n" 263 "\t-t dnode\tInject an error into the metadnode in the block\n" 264 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 265 "\t\t\t'-r' option is incompatible with this mode. The object\n" 266 "\t\t\tis specified as a complete path to a file or directory\n" 267 "\t\t\ton a ZFS filesystem.\n" 268 "\n" 269 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 270 "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n" 271 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" 272 "\t\t\tthe poolname.\n"); 273 } 274 275 static int 276 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 277 void *data) 278 { 279 zfs_cmd_t zc; 280 int ret; 281 282 zc.zc_guid = 0; 283 284 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 285 if ((ret = func((int)zc.zc_guid, zc.zc_name, 286 &zc.zc_inject_record, data)) != 0) 287 return (ret); 288 289 return (0); 290 } 291 292 static int 293 print_data_handler(int id, const char *pool, zinject_record_t *record, 294 void *data) 295 { 296 int *count = data; 297 298 if (record->zi_guid != 0) 299 return (0); 300 301 if (*count == 0) { 302 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n", 303 "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE"); 304 (void) printf("--- --------------- ------ " 305 "------ -------- --- ---------------\n"); 306 } 307 308 *count += 1; 309 310 (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool, 311 (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object, 312 type_to_name(record->zi_type), record->zi_level); 313 314 if (record->zi_start == 0 && 315 record->zi_end == -1ULL) 316 (void) printf("all\n"); 317 else 318 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 319 (u_longlong_t)record->zi_end); 320 321 return (0); 322 } 323 324 static int 325 print_device_handler(int id, const char *pool, zinject_record_t *record, 326 void *data) 327 { 328 int *count = data; 329 330 if (record->zi_guid == 0) 331 return (0); 332 333 if (*count == 0) { 334 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 335 (void) printf("--- --------------- ----------------\n"); 336 } 337 338 *count += 1; 339 340 (void) printf("%3d %-15s %llx\n", id, pool, 341 (u_longlong_t)record->zi_guid); 342 343 return (0); 344 } 345 346 /* 347 * Print all registered error handlers. Returns the number of handlers 348 * registered. 349 */ 350 static int 351 print_all_handlers(void) 352 { 353 int count = 0; 354 355 (void) iter_handlers(print_device_handler, &count); 356 (void) printf("\n"); 357 count = 0; 358 (void) iter_handlers(print_data_handler, &count); 359 360 return (count); 361 } 362 363 /* ARGSUSED */ 364 static int 365 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 366 void *data) 367 { 368 zfs_cmd_t zc; 369 370 zc.zc_guid = (uint64_t)id; 371 372 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 373 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 374 id, strerror(errno)); 375 return (1); 376 } 377 378 return (0); 379 } 380 381 /* 382 * Remove all fault injection handlers. 383 */ 384 static int 385 cancel_all_handlers(void) 386 { 387 int ret = iter_handlers(cancel_one_handler, NULL); 388 389 (void) printf("removed all registered handlers\n"); 390 391 return (ret); 392 } 393 394 /* 395 * Remove a specific fault injection handler. 396 */ 397 static int 398 cancel_handler(int id) 399 { 400 zfs_cmd_t zc; 401 402 zc.zc_guid = (uint64_t)id; 403 404 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 405 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 406 id, strerror(errno)); 407 return (1); 408 } 409 410 (void) printf("removed handler %d\n", id); 411 412 return (0); 413 } 414 415 /* 416 * Register a new fault injection handler. 417 */ 418 static int 419 register_handler(const char *pool, int flags, zinject_record_t *record, 420 int quiet) 421 { 422 zfs_cmd_t zc; 423 424 (void) strcpy(zc.zc_name, pool); 425 zc.zc_inject_record = *record; 426 zc.zc_guid = flags; 427 428 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 429 (void) fprintf(stderr, "failed to add handler: %s\n", 430 strerror(errno)); 431 return (1); 432 } 433 434 if (flags & ZINJECT_NULL) 435 return (0); 436 437 if (quiet) { 438 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 439 } else { 440 (void) printf("Added handler %llu with the following " 441 "properties:\n", (u_longlong_t)zc.zc_guid); 442 (void) printf(" pool: %s\n", pool); 443 if (record->zi_guid) { 444 (void) printf(" vdev: %llx\n", 445 (u_longlong_t)record->zi_guid); 446 } else { 447 (void) printf("objset: %llu\n", 448 (u_longlong_t)record->zi_objset); 449 (void) printf("object: %llu\n", 450 (u_longlong_t)record->zi_object); 451 (void) printf(" type: %llu\n", 452 (u_longlong_t)record->zi_type); 453 (void) printf(" level: %d\n", record->zi_level); 454 if (record->zi_start == 0 && 455 record->zi_end == -1ULL) 456 (void) printf(" range: all\n"); 457 else 458 (void) printf(" range: [%llu, %llu)\n", 459 (u_longlong_t)record->zi_start, 460 (u_longlong_t)record->zi_end); 461 } 462 } 463 464 return (0); 465 } 466 467 int 468 main(int argc, char **argv) 469 { 470 int c; 471 char *range = NULL; 472 char *cancel = NULL; 473 char *end; 474 char *raw = NULL; 475 char *device = NULL; 476 int level = 0; 477 int quiet = 0; 478 int error = 0; 479 int domount = 0; 480 err_type_t type = TYPE_INVAL; 481 err_type_t label = TYPE_INVAL; 482 zinject_record_t record = { 0 }; 483 char pool[MAXNAMELEN]; 484 char dataset[MAXNAMELEN]; 485 zfs_handle_t *zhp; 486 int ret; 487 int flags = 0; 488 489 if ((g_zfs = libzfs_init()) == NULL) { 490 (void) fprintf(stderr, "internal error: failed to " 491 "initialize ZFS library\n"); 492 return (1); 493 } 494 495 libzfs_print_on_error(g_zfs, B_TRUE); 496 497 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 498 (void) fprintf(stderr, "failed to open ZFS device\n"); 499 return (1); 500 } 501 502 if (argc == 1) { 503 /* 504 * No arguments. Print the available handlers. If there are no 505 * available handlers, direct the user to '-h' for help 506 * information. 507 */ 508 if (print_all_handlers() == 0) { 509 (void) printf("No handlers registered.\n"); 510 (void) printf("Run 'zinject -h' for usage " 511 "information.\n"); 512 } 513 514 return (0); 515 } 516 517 while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) { 518 switch (c) { 519 case 'a': 520 flags |= ZINJECT_FLUSH_ARC; 521 break; 522 case 'b': 523 raw = optarg; 524 break; 525 case 'c': 526 cancel = optarg; 527 break; 528 case 'd': 529 device = optarg; 530 break; 531 case 'e': 532 if (strcasecmp(optarg, "io") == 0) { 533 error = EIO; 534 } else if (strcasecmp(optarg, "checksum") == 0) { 535 error = ECKSUM; 536 } else if (strcasecmp(optarg, "nxio") == 0) { 537 error = ENXIO; 538 } else { 539 (void) fprintf(stderr, "invalid error type " 540 "'%s': must be 'io', 'checksum' or " 541 "'nxio'\n", optarg); 542 usage(); 543 return (1); 544 } 545 break; 546 case 'f': 547 record.zi_freq = atoi(optarg); 548 if (record.zi_freq < 1 || record.zi_freq > 100) { 549 (void) fprintf(stderr, "frequency range must " 550 "be in the range (0, 100]\n"); 551 return (1); 552 } 553 break; 554 case 'F': 555 record.zi_failfast = B_TRUE; 556 break; 557 case 'h': 558 usage(); 559 return (0); 560 case 'l': 561 level = (int)strtol(optarg, &end, 10); 562 if (*end != '\0') { 563 (void) fprintf(stderr, "invalid level '%s': " 564 "must be an integer\n", optarg); 565 usage(); 566 return (1); 567 } 568 break; 569 case 'm': 570 domount = 1; 571 break; 572 case 'q': 573 quiet = 1; 574 break; 575 case 'r': 576 range = optarg; 577 break; 578 case 't': 579 if ((type = name_to_type(optarg)) == TYPE_INVAL && 580 !MOS_TYPE(type)) { 581 (void) fprintf(stderr, "invalid type '%s'\n", 582 optarg); 583 usage(); 584 return (1); 585 } 586 break; 587 case 'u': 588 flags |= ZINJECT_UNLOAD_SPA; 589 break; 590 case 'L': 591 if ((label = name_to_type(optarg)) == TYPE_INVAL && 592 !LABEL_TYPE(type)) { 593 (void) fprintf(stderr, "invalid label type " 594 "'%s'\n", optarg); 595 usage(); 596 return (1); 597 } 598 break; 599 case ':': 600 (void) fprintf(stderr, "option -%c requires an " 601 "operand\n", optopt); 602 usage(); 603 return (1); 604 case '?': 605 (void) fprintf(stderr, "invalid option '%c'\n", 606 optopt); 607 usage(); 608 return (2); 609 } 610 } 611 612 argc -= optind; 613 argv += optind; 614 615 if (cancel != NULL) { 616 /* 617 * '-c' is invalid with any other options. 618 */ 619 if (raw != NULL || range != NULL || type != TYPE_INVAL || 620 level != 0) { 621 (void) fprintf(stderr, "cancel (-c) incompatible with " 622 "any other options\n"); 623 usage(); 624 return (2); 625 } 626 if (argc != 0) { 627 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 628 usage(); 629 return (2); 630 } 631 632 if (strcmp(cancel, "all") == 0) { 633 return (cancel_all_handlers()); 634 } else { 635 int id = (int)strtol(cancel, &end, 10); 636 if (*end != '\0') { 637 (void) fprintf(stderr, "invalid handle id '%s':" 638 " must be an integer or 'all'\n", cancel); 639 usage(); 640 return (1); 641 } 642 return (cancel_handler(id)); 643 } 644 } 645 646 if (device != NULL) { 647 /* 648 * Device (-d) injection uses a completely different mechanism 649 * for doing injection, so handle it separately here. 650 */ 651 if (raw != NULL || range != NULL || type != TYPE_INVAL || 652 level != 0) { 653 (void) fprintf(stderr, "device (-d) incompatible with " 654 "data error injection\n"); 655 usage(); 656 return (2); 657 } 658 659 if (argc != 1) { 660 (void) fprintf(stderr, "device (-d) injection requires " 661 "a single pool name\n"); 662 usage(); 663 return (2); 664 } 665 666 (void) strcpy(pool, argv[0]); 667 dataset[0] = '\0'; 668 669 if (error == ECKSUM) { 670 (void) fprintf(stderr, "device error type must be " 671 "'io' or 'nxio'\n"); 672 return (1); 673 } 674 675 if (translate_device(pool, device, label, &record) != 0) 676 return (1); 677 if (!error) 678 error = ENXIO; 679 } else if (raw != NULL) { 680 if (range != NULL || type != TYPE_INVAL || level != 0) { 681 (void) fprintf(stderr, "raw (-b) format with " 682 "any other options\n"); 683 usage(); 684 return (2); 685 } 686 687 if (argc != 1) { 688 (void) fprintf(stderr, "raw (-b) format expects a " 689 "single pool name\n"); 690 usage(); 691 return (2); 692 } 693 694 (void) strcpy(pool, argv[0]); 695 dataset[0] = '\0'; 696 697 if (error == ENXIO) { 698 (void) fprintf(stderr, "data error type must be " 699 "'checksum' or 'io'\n"); 700 return (1); 701 } 702 703 if (translate_raw(raw, &record) != 0) 704 return (1); 705 if (!error) 706 error = EIO; 707 } else if (type == TYPE_INVAL) { 708 if (flags == 0) { 709 (void) fprintf(stderr, "at least one of '-b', '-d', " 710 "'-t', '-a', or '-u' must be specified\n"); 711 usage(); 712 return (2); 713 } 714 715 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 716 (void) strcpy(pool, argv[0]); 717 dataset[0] = '\0'; 718 } else if (argc != 0) { 719 (void) fprintf(stderr, "extraneous argument for " 720 "'-f'\n"); 721 usage(); 722 return (2); 723 } 724 725 flags |= ZINJECT_NULL; 726 } else { 727 if (argc != 1) { 728 (void) fprintf(stderr, "missing object\n"); 729 usage(); 730 return (2); 731 } 732 733 if (error == ENXIO) { 734 (void) fprintf(stderr, "data error type must be " 735 "'checksum' or 'io'\n"); 736 return (1); 737 } 738 739 if (translate_record(type, argv[0], range, level, &record, pool, 740 dataset) != 0) 741 return (1); 742 if (!error) 743 error = EIO; 744 } 745 746 /* 747 * If this is pool-wide metadata, unmount everything. The ioctl() will 748 * unload the pool, so that we trigger spa-wide reopen of metadata next 749 * time we access the pool. 750 */ 751 if (dataset[0] != '\0' && domount) { 752 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) 753 return (1); 754 755 if (zfs_unmount(zhp, NULL, 0) != 0) 756 return (1); 757 } 758 759 record.zi_error = error; 760 761 ret = register_handler(pool, flags, &record, quiet); 762 763 if (dataset[0] != '\0' && domount) 764 ret = (zfs_mount(zhp, NULL, 0) != 0); 765 766 libzfs_fini(g_zfs); 767 768 return (ret); 769 } 770