1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * ZFS Fault Injector 30 * 31 * This userland component takes a set of options and uses libzpool to translate 32 * from a user-visible object type and name to an internal representation. 33 * There are two basic types of faults: device faults and data faults. 34 * 35 * 36 * DEVICE FAULTS 37 * 38 * Errors can be injected into a particular vdev using the '-d' option. This 39 * option takes a path or vdev GUID to uniquely identify the device within a 40 * pool. There are two types of errors that can be injected, EIO and ENXIO, 41 * that can be controlled through the '-e' option. The default is ENXIO. For 42 * EIO failures, any attempt to read data from the device will return EIO, but 43 * subsequent attempt to reopen the device will succeed. For ENXIO failures, 44 * any attempt to read from the device will return EIO, but any attempt to 45 * reopen the device will also return ENXIO. 46 * For label faults, the -L option must be specified. This allows faults 47 * to be injected into either the nvlist or uberblock region of all the labels 48 * for the specified device. 49 * 50 * This form of the command looks like: 51 * 52 * zinject -d device [-e errno] [-L <uber | nvlist>] pool 53 * 54 * 55 * DATA FAULTS 56 * 57 * We begin with a tuple of the form: 58 * 59 * <type,level,range,object> 60 * 61 * type A string describing the type of data to target. Each type 62 * implicitly describes how to interpret 'object'. Currently, 63 * the following values are supported: 64 * 65 * data User data for a file 66 * dnode Dnode for a file or directory 67 * 68 * The following MOS objects are special. Instead of injecting 69 * errors on a particular object or blkid, we inject errors across 70 * all objects of the given type. 71 * 72 * mos Any data in the MOS 73 * mosdir object directory 74 * config pool configuration 75 * bplist blkptr list 76 * spacemap spacemap 77 * metaslab metaslab 78 * errlog persistent error log 79 * 80 * level Object level. Defaults to '0', not applicable to all types. If 81 * a range is given, this corresponds to the indirect block 82 * corresponding to the specific range. 83 * 84 * range A numerical range [start,end) within the object. Defaults to 85 * the full size of the file. 86 * 87 * object A string describing the logical location of the object. For 88 * files and directories (currently the only supported types), 89 * this is the path of the object on disk. 90 * 91 * This is translated, via libzpool, into the following internal representation: 92 * 93 * <type,objset,object,level,range> 94 * 95 * These types should be self-explanatory. This tuple is then passed to the 96 * kernel via a special ioctl() to initiate fault injection for the given 97 * object. Note that 'type' is not strictly necessary for fault injection, but 98 * is used when translating existing faults into a human-readable string. 99 * 100 * 101 * The command itself takes one of the forms: 102 * 103 * zinject 104 * zinject <-a | -u pool> 105 * zinject -c <id|all> 106 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 107 * [-r range] <object> 108 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 109 * 110 * With no arguments, the command prints all currently registered injection 111 * handlers, with their numeric identifiers. 112 * 113 * The '-c' option will clear the given handler, or all handlers if 'all' is 114 * specified. 115 * 116 * The '-e' option takes a string describing the errno to simulate. This must 117 * be either 'io' or 'checksum'. In most cases this will result in the same 118 * behavior, but RAID-Z will produce a different set of ereports for this 119 * situation. 120 * 121 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 122 * specified, then the ARC cache is flushed appropriately. If '-u' is 123 * specified, then the underlying SPA is unloaded. Either of these flags can be 124 * specified independently of any other handlers. The '-m' flag automatically 125 * does an unmount and remount of the underlying dataset to aid in flushing the 126 * cache. 127 * 128 * The '-f' flag controls the frequency of errors injected, expressed as a 129 * integer percentage between 1 and 100. The default is 100. 130 * 131 * The this form is responsible for actually injecting the handler into the 132 * framework. It takes the arguments described above, translates them to the 133 * internal tuple using libzpool, and then issues an ioctl() to register the 134 * handler. 135 * 136 * The final form can target a specific bookmark, regardless of whether a 137 * human-readable interface has been designed. It allows developers to specify 138 * a particular block by number. 139 */ 140 141 #include <errno.h> 142 #include <fcntl.h> 143 #include <stdio.h> 144 #include <stdlib.h> 145 #include <strings.h> 146 #include <unistd.h> 147 148 #include <sys/fs/zfs.h> 149 #include <sys/mount.h> 150 151 #include <libzfs.h> 152 153 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 154 155 #include "zinject.h" 156 157 libzfs_handle_t *g_zfs; 158 int zfs_fd; 159 160 #define ECKSUM EBADE 161 162 static const char *errtable[TYPE_INVAL] = { 163 "data", 164 "dnode", 165 "mos", 166 "mosdir", 167 "metaslab", 168 "config", 169 "bplist", 170 "spacemap", 171 "errlog", 172 "uber", 173 "nvlist" 174 }; 175 176 static err_type_t 177 name_to_type(const char *arg) 178 { 179 int i; 180 for (i = 0; i < TYPE_INVAL; i++) 181 if (strcmp(errtable[i], arg) == 0) 182 return (i); 183 184 return (TYPE_INVAL); 185 } 186 187 static const char * 188 type_to_name(uint64_t type) 189 { 190 switch (type) { 191 case DMU_OT_OBJECT_DIRECTORY: 192 return ("mosdir"); 193 case DMU_OT_OBJECT_ARRAY: 194 return ("metaslab"); 195 case DMU_OT_PACKED_NVLIST: 196 return ("config"); 197 case DMU_OT_BPLIST: 198 return ("bplist"); 199 case DMU_OT_SPACE_MAP: 200 return ("spacemap"); 201 case DMU_OT_ERROR_LOG: 202 return ("errlog"); 203 default: 204 return ("-"); 205 } 206 } 207 208 209 /* 210 * Print usage message. 211 */ 212 void 213 usage(void) 214 { 215 (void) printf( 216 "usage:\n" 217 "\n" 218 "\tzinject\n" 219 "\n" 220 "\t\tList all active injection records.\n" 221 "\n" 222 "\tzinject -c <id|all>\n" 223 "\n" 224 "\t\tClear the particular record (if given a numeric ID), or\n" 225 "\t\tall records if 'all' is specificed.\n" 226 "\n" 227 "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n" 228 "\t\tInject a fault into a particular device or the device's\n" 229 "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n" 230 "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n" 231 "\n" 232 "\tzinject -b objset:object:level:blkid pool\n" 233 "\n" 234 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 235 "\t\tspecified by the remaining tuple. Each number is in\n" 236 "\t\thexidecimal, and only one block can be specified.\n" 237 "\n" 238 "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n" 239 "\t [-a] [-m] [-u] [-f freq] <object>\n" 240 "\n" 241 "\t\tInject an error into the object specified by the '-t' option\n" 242 "\t\tand the object descriptor. The 'object' parameter is\n" 243 "\t\tinterperted depending on the '-t' option.\n" 244 "\n" 245 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 246 "\t\t-e\tInject a specific error. Must be either 'io' or\n" 247 "\t\t\t'checksum'. Default is 'io'.\n" 248 "\t\t-l\tInject error at a particular block level. Default is " 249 "0.\n" 250 "\t\t-m\tAutomatically remount underlying filesystem.\n" 251 "\t\t-r\tInject error over a particular logical range of an\n" 252 "\t\t\tobject. Will be translated to the appropriate blkid\n" 253 "\t\t\trange according to the object's properties.\n" 254 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 255 "\t\t\tassociated object.\n" 256 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 257 "\t\t\ta pool object.\n" 258 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 259 "\t\t\ta percentage between 1 and 100.\n" 260 "\n" 261 "\t-t data\t\tInject an error into the plain file contents of a\n" 262 "\t\t\tfile. The object must be specified as a complete path\n" 263 "\t\t\tto a file on a ZFS filesystem.\n" 264 "\n" 265 "\t-t dnode\tInject an error into the metadnode in the block\n" 266 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 267 "\t\t\t'-r' option is incompatible with this mode. The object\n" 268 "\t\t\tis specified as a complete path to a file or directory\n" 269 "\t\t\ton a ZFS filesystem.\n" 270 "\n" 271 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 272 "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n" 273 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" 274 "\t\t\tthe poolname.\n"); 275 } 276 277 static int 278 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 279 void *data) 280 { 281 zfs_cmd_t zc; 282 int ret; 283 284 zc.zc_guid = 0; 285 286 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 287 if ((ret = func((int)zc.zc_guid, zc.zc_name, 288 &zc.zc_inject_record, data)) != 0) 289 return (ret); 290 291 return (0); 292 } 293 294 static int 295 print_data_handler(int id, const char *pool, zinject_record_t *record, 296 void *data) 297 { 298 int *count = data; 299 300 if (record->zi_guid != 0) 301 return (0); 302 303 if (*count == 0) { 304 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n", 305 "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE"); 306 (void) printf("--- --------------- ------ " 307 "------ -------- --- ---------------\n"); 308 } 309 310 *count += 1; 311 312 (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool, 313 (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object, 314 type_to_name(record->zi_type), record->zi_level); 315 316 if (record->zi_start == 0 && 317 record->zi_end == -1ULL) 318 (void) printf("all\n"); 319 else 320 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 321 (u_longlong_t)record->zi_end); 322 323 return (0); 324 } 325 326 static int 327 print_device_handler(int id, const char *pool, zinject_record_t *record, 328 void *data) 329 { 330 int *count = data; 331 332 if (record->zi_guid == 0) 333 return (0); 334 335 if (*count == 0) { 336 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 337 (void) printf("--- --------------- ----------------\n"); 338 } 339 340 *count += 1; 341 342 (void) printf("%3d %-15s %llx\n", id, pool, 343 (u_longlong_t)record->zi_guid); 344 345 return (0); 346 } 347 348 /* 349 * Print all registered error handlers. Returns the number of handlers 350 * registered. 351 */ 352 static int 353 print_all_handlers(void) 354 { 355 int count = 0; 356 357 (void) iter_handlers(print_device_handler, &count); 358 (void) printf("\n"); 359 count = 0; 360 (void) iter_handlers(print_data_handler, &count); 361 362 return (count); 363 } 364 365 /* ARGSUSED */ 366 static int 367 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 368 void *data) 369 { 370 zfs_cmd_t zc; 371 372 zc.zc_guid = (uint64_t)id; 373 374 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 375 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 376 id, strerror(errno)); 377 return (1); 378 } 379 380 return (0); 381 } 382 383 /* 384 * Remove all fault injection handlers. 385 */ 386 static int 387 cancel_all_handlers(void) 388 { 389 int ret = iter_handlers(cancel_one_handler, NULL); 390 391 (void) printf("removed all registered handlers\n"); 392 393 return (ret); 394 } 395 396 /* 397 * Remove a specific fault injection handler. 398 */ 399 static int 400 cancel_handler(int id) 401 { 402 zfs_cmd_t zc; 403 404 zc.zc_guid = (uint64_t)id; 405 406 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 407 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 408 id, strerror(errno)); 409 return (1); 410 } 411 412 (void) printf("removed handler %d\n", id); 413 414 return (0); 415 } 416 417 /* 418 * Register a new fault injection handler. 419 */ 420 static int 421 register_handler(const char *pool, int flags, zinject_record_t *record, 422 int quiet) 423 { 424 zfs_cmd_t zc; 425 426 (void) strcpy(zc.zc_name, pool); 427 zc.zc_inject_record = *record; 428 zc.zc_guid = flags; 429 430 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 431 (void) fprintf(stderr, "failed to add handler: %s\n", 432 strerror(errno)); 433 return (1); 434 } 435 436 if (flags & ZINJECT_NULL) 437 return (0); 438 439 if (quiet) { 440 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 441 } else { 442 (void) printf("Added handler %llu with the following " 443 "properties:\n", (u_longlong_t)zc.zc_guid); 444 (void) printf(" pool: %s\n", pool); 445 if (record->zi_guid) { 446 (void) printf(" vdev: %llx\n", 447 (u_longlong_t)record->zi_guid); 448 } else { 449 (void) printf("objset: %llu\n", 450 (u_longlong_t)record->zi_objset); 451 (void) printf("object: %llu\n", 452 (u_longlong_t)record->zi_object); 453 (void) printf(" type: %llu\n", 454 (u_longlong_t)record->zi_type); 455 (void) printf(" level: %d\n", record->zi_level); 456 if (record->zi_start == 0 && 457 record->zi_end == -1ULL) 458 (void) printf(" range: all\n"); 459 else 460 (void) printf(" range: [%llu, %llu)\n", 461 (u_longlong_t)record->zi_start, 462 (u_longlong_t)record->zi_end); 463 } 464 } 465 466 return (0); 467 } 468 469 int 470 main(int argc, char **argv) 471 { 472 int c; 473 char *range = NULL; 474 char *cancel = NULL; 475 char *end; 476 char *raw = NULL; 477 char *device = NULL; 478 int level = 0; 479 int quiet = 0; 480 int error = 0; 481 int domount = 0; 482 err_type_t type = TYPE_INVAL; 483 err_type_t label = TYPE_INVAL; 484 zinject_record_t record = { 0 }; 485 char pool[MAXNAMELEN]; 486 char dataset[MAXNAMELEN]; 487 zfs_handle_t *zhp; 488 int ret; 489 int flags = 0; 490 491 if ((g_zfs = libzfs_init()) == NULL) { 492 (void) fprintf(stderr, "internal error: failed to " 493 "initialize ZFS library\n"); 494 return (1); 495 } 496 497 libzfs_print_on_error(g_zfs, B_TRUE); 498 499 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 500 (void) fprintf(stderr, "failed to open ZFS device\n"); 501 return (1); 502 } 503 504 if (argc == 1) { 505 /* 506 * No arguments. Print the available handlers. If there are no 507 * available handlers, direct the user to '-h' for help 508 * information. 509 */ 510 if (print_all_handlers() == 0) { 511 (void) printf("No handlers registered.\n"); 512 (void) printf("Run 'zinject -h' for usage " 513 "information.\n"); 514 } 515 516 return (0); 517 } 518 519 while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) { 520 switch (c) { 521 case 'a': 522 flags |= ZINJECT_FLUSH_ARC; 523 break; 524 case 'b': 525 raw = optarg; 526 break; 527 case 'c': 528 cancel = optarg; 529 break; 530 case 'd': 531 device = optarg; 532 break; 533 case 'e': 534 if (strcasecmp(optarg, "io") == 0) { 535 error = EIO; 536 } else if (strcasecmp(optarg, "checksum") == 0) { 537 error = ECKSUM; 538 } else if (strcasecmp(optarg, "nxio") == 0) { 539 error = ENXIO; 540 } else { 541 (void) fprintf(stderr, "invalid error type " 542 "'%s': must be 'io', 'checksum' or " 543 "'nxio'\n", optarg); 544 usage(); 545 return (1); 546 } 547 break; 548 case 'f': 549 record.zi_freq = atoi(optarg); 550 if (record.zi_freq < 1 || record.zi_freq > 100) { 551 (void) fprintf(stderr, "frequency range must " 552 "be in the range (0, 100]\n"); 553 return (1); 554 } 555 break; 556 case 'h': 557 usage(); 558 return (0); 559 case 'l': 560 level = (int)strtol(optarg, &end, 10); 561 if (*end != '\0') { 562 (void) fprintf(stderr, "invalid level '%s': " 563 "must be an integer\n", optarg); 564 usage(); 565 return (1); 566 } 567 break; 568 case 'm': 569 domount = 1; 570 break; 571 case 'q': 572 quiet = 1; 573 break; 574 case 'r': 575 range = optarg; 576 break; 577 case 't': 578 if ((type = name_to_type(optarg)) == TYPE_INVAL && 579 !MOS_TYPE(type)) { 580 (void) fprintf(stderr, "invalid type '%s'\n", 581 optarg); 582 usage(); 583 return (1); 584 } 585 break; 586 case 'u': 587 flags |= ZINJECT_UNLOAD_SPA; 588 break; 589 case 'L': 590 if ((label = name_to_type(optarg)) == TYPE_INVAL && 591 !LABEL_TYPE(type)) { 592 (void) fprintf(stderr, "invalid label type " 593 "'%s'\n", optarg); 594 usage(); 595 return (1); 596 } 597 break; 598 case ':': 599 (void) fprintf(stderr, "option -%c requires an " 600 "operand\n", optopt); 601 usage(); 602 return (1); 603 case '?': 604 (void) fprintf(stderr, "invalid option '%c'\n", 605 optopt); 606 usage(); 607 return (2); 608 } 609 } 610 611 argc -= optind; 612 argv += optind; 613 614 if (cancel != NULL) { 615 /* 616 * '-c' is invalid with any other options. 617 */ 618 if (raw != NULL || range != NULL || type != TYPE_INVAL || 619 level != 0) { 620 (void) fprintf(stderr, "cancel (-c) incompatible with " 621 "any other options\n"); 622 usage(); 623 return (2); 624 } 625 if (argc != 0) { 626 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 627 usage(); 628 return (2); 629 } 630 631 if (strcmp(cancel, "all") == 0) { 632 return (cancel_all_handlers()); 633 } else { 634 int id = (int)strtol(cancel, &end, 10); 635 if (*end != '\0') { 636 (void) fprintf(stderr, "invalid handle id '%s':" 637 " must be an integer or 'all'\n", cancel); 638 usage(); 639 return (1); 640 } 641 return (cancel_handler(id)); 642 } 643 } 644 645 if (device != NULL) { 646 /* 647 * Device (-d) injection uses a completely different mechanism 648 * for doing injection, so handle it separately here. 649 */ 650 if (raw != NULL || range != NULL || type != TYPE_INVAL || 651 level != 0) { 652 (void) fprintf(stderr, "device (-d) incompatible with " 653 "data error injection\n"); 654 usage(); 655 return (2); 656 } 657 658 if (argc != 1) { 659 (void) fprintf(stderr, "device (-d) injection requires " 660 "a single pool name\n"); 661 usage(); 662 return (2); 663 } 664 665 (void) strcpy(pool, argv[0]); 666 dataset[0] = '\0'; 667 668 if (error == ECKSUM) { 669 (void) fprintf(stderr, "device error type must be " 670 "'io' or 'nxio'\n"); 671 return (1); 672 } 673 674 if (translate_device(pool, device, label, &record) != 0) 675 return (1); 676 if (!error) 677 error = ENXIO; 678 } else if (raw != NULL) { 679 if (range != NULL || type != TYPE_INVAL || level != 0) { 680 (void) fprintf(stderr, "raw (-b) format with " 681 "any other options\n"); 682 usage(); 683 return (2); 684 } 685 686 if (argc != 1) { 687 (void) fprintf(stderr, "raw (-b) format expects a " 688 "single pool name\n"); 689 usage(); 690 return (2); 691 } 692 693 (void) strcpy(pool, argv[0]); 694 dataset[0] = '\0'; 695 696 if (error == ENXIO) { 697 (void) fprintf(stderr, "data error type must be " 698 "'checksum' or 'io'\n"); 699 return (1); 700 } 701 702 if (translate_raw(raw, &record) != 0) 703 return (1); 704 if (!error) 705 error = EIO; 706 } else if (type == TYPE_INVAL) { 707 if (flags == 0) { 708 (void) fprintf(stderr, "at least one of '-b', '-d', " 709 "'-t', '-a', or '-u' must be specified\n"); 710 usage(); 711 return (2); 712 } 713 714 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 715 (void) strcpy(pool, argv[0]); 716 dataset[0] = '\0'; 717 } else if (argc != 0) { 718 (void) fprintf(stderr, "extraneous argument for " 719 "'-f'\n"); 720 usage(); 721 return (2); 722 } 723 724 flags |= ZINJECT_NULL; 725 } else { 726 if (argc != 1) { 727 (void) fprintf(stderr, "missing object\n"); 728 usage(); 729 return (2); 730 } 731 732 if (error == ENXIO) { 733 (void) fprintf(stderr, "data error type must be " 734 "'checksum' or 'io'\n"); 735 return (1); 736 } 737 738 if (translate_record(type, argv[0], range, level, &record, pool, 739 dataset) != 0) 740 return (1); 741 if (!error) 742 error = EIO; 743 } 744 745 /* 746 * If this is pool-wide metadata, unmount everything. The ioctl() will 747 * unload the pool, so that we trigger spa-wide reopen of metadata next 748 * time we access the pool. 749 */ 750 if (dataset[0] != '\0' && domount) { 751 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) 752 return (1); 753 754 if (zfs_unmount(zhp, NULL, 0) != 0) 755 return (1); 756 } 757 758 record.zi_error = error; 759 760 ret = register_handler(pool, flags, &record, quiet); 761 762 if (dataset[0] != '\0' && domount) 763 ret = (zfs_mount(zhp, NULL, 0) != 0); 764 765 libzfs_fini(g_zfs); 766 767 return (ret); 768 } 769