1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * ZFS Fault Injector 30 * 31 * This userland component takes a set of options and uses libzpool to translate 32 * from a user-visible object type and name to an internal representation. 33 * There are two basic types of faults: device faults and data faults. 34 * 35 * 36 * DEVICE FAULTS 37 * 38 * Errors can be injected into a particular vdev using the '-d' option. This 39 * option takes a path or vdev GUID to uniquely identify the device within a 40 * pool. There are two types of errors that can be injected, EIO and ENXIO, 41 * that can be controlled through the '-t' option. The default is ENXIO. For 42 * EIO failures, any attempt to read data from the device will return EIO, but 43 * subsequent attempt to reopen the device will succeed. For ENXIO failures, 44 * any attempt to read from the device will return EIO, but any attempt to 45 * reopen the device will also return ENXIO. 46 * 47 * This form of the command looks like: 48 * 49 * zinject -d device [-t type] pool 50 * 51 * 52 * DATA FAULTS 53 * 54 * We begin with a tuple of the form: 55 * 56 * <type,level,range,object> 57 * 58 * type A string describing the type of data to target. Each type 59 * implicitly describes how to interpret 'object'. Currently, 60 * the following values are supported: 61 * 62 * data User data for a file 63 * dnode Dnode for a file or directory 64 * 65 * The following MOS objects are special. Instead of injecting 66 * errors on a particular object or blkid, we inject errors across 67 * all objects of the given type. 68 * 69 * mos Any data in the MOS 70 * mosdir object directory 71 * config pool configuration 72 * bplist blkptr list 73 * spacemap spacemap 74 * metaslab metaslab 75 * errlog persistent error log 76 * 77 * level Object level. Defaults to '0', not applicable to all types. If 78 * a range is given, this corresponds to the indirect block 79 * corresponding to the specific range. 80 * 81 * range A numerical range [start,end) within the object. Defaults to 82 * the full size of the file. 83 * 84 * object A string describing the logical location of the object. For 85 * files and directories (currently the only supported types), 86 * this is the path of the object on disk. 87 * 88 * This is translated, via libzpool, into the following internal representation: 89 * 90 * <type,objset,object,level,range> 91 * 92 * These types should be self-explanatory. This tuple is then passed to the 93 * kernel via a special ioctl() to initiate fault injection for the given 94 * object. Note that 'type' is not strictly necessary for fault injection, but 95 * is used when translating existing faults into a human-readable string. 96 * 97 * 98 * The command itself takes one of the forms: 99 * 100 * zinject 101 * zinject <-a | -u pool> 102 * zinject -c <id|all> 103 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 104 * [-r range] <object> 105 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 106 * 107 * With no arguments, the command prints all currently registered injection 108 * handlers, with their numeric identifiers. 109 * 110 * The '-c' option will clear the given handler, or all handlers if 'all' is 111 * specified. 112 * 113 * The '-e' option takes a string describing the errno to simulate. This must 114 * be either 'io' or 'checksum'. In most cases this will result in the same 115 * behavior, but RAID-Z will produce a different set of ereports for this 116 * situation. 117 * 118 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 119 * specified, then the ARC cache is flushed appropriately. If '-u' is 120 * specified, then the underlying SPA is unloaded. Either of these flags can be 121 * specified independently of any other handlers. The '-m' flag automatically 122 * does an unmount and remount of the underlying dataset to aid in flushing the 123 * cache. 124 * 125 * The '-f' flag controls the frequency of errors injected, expressed as a 126 * integer percentage between 1 and 100. The default is 100. 127 * 128 * The this form is responsible for actually injecting the handler into the 129 * framework. It takes the arguments described above, translates them to the 130 * internal tuple using libzpool, and then issues an ioctl() to register the 131 * handler. 132 * 133 * The final form can target a specific bookmark, regardless of whether a 134 * human-readable interface has been designed. It allows developers to specify 135 * a particular block by number. 136 */ 137 138 #include <errno.h> 139 #include <fcntl.h> 140 #include <stdio.h> 141 #include <stdlib.h> 142 #include <strings.h> 143 #include <unistd.h> 144 145 #include <sys/fs/zfs.h> 146 #include <sys/mount.h> 147 148 #include <libzfs.h> 149 150 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 151 152 #include "zinject.h" 153 154 libzfs_handle_t *g_zfs; 155 int zfs_fd; 156 157 #define ECKSUM EBADE 158 159 static const char *errtable[TYPE_INVAL] = { 160 "data", 161 "dnode", 162 "mos", 163 "mosdir", 164 "metaslab", 165 "config", 166 "bplist", 167 "spacemap", 168 "errlog" 169 }; 170 171 static err_type_t 172 name_to_type(const char *arg) 173 { 174 int i; 175 for (i = 0; i < TYPE_INVAL; i++) 176 if (strcmp(errtable[i], arg) == 0) 177 return (i); 178 179 return (TYPE_INVAL); 180 } 181 182 static const char * 183 type_to_name(uint64_t type) 184 { 185 switch (type) { 186 case DMU_OT_OBJECT_DIRECTORY: 187 return ("mosdir"); 188 case DMU_OT_OBJECT_ARRAY: 189 return ("metaslab"); 190 case DMU_OT_PACKED_NVLIST: 191 return ("config"); 192 case DMU_OT_BPLIST: 193 return ("bplist"); 194 case DMU_OT_SPACE_MAP: 195 return ("spacemap"); 196 case DMU_OT_ERROR_LOG: 197 return ("errlog"); 198 default: 199 return ("-"); 200 } 201 } 202 203 204 /* 205 * Print usage message. 206 */ 207 void 208 usage(void) 209 { 210 (void) printf( 211 "usage:\n" 212 "\n" 213 "\tzinject\n" 214 "\n" 215 "\t\tList all active injection records.\n" 216 "\n" 217 "\tzinject -c <id|all>\n" 218 "\n" 219 "\t\tClear the particular record (if given a numeric ID), or\n" 220 "\t\tall records if 'all' is specificed.\n" 221 "\n" 222 "\tzinject -d device [-e errno] pool\n" 223 "\t\tInject a fault into a particular device. 'errno' can either\n" 224 "\t\tbe 'nxio' (the default) or 'io'.\n" 225 "\n" 226 "\tzinject -b objset:object:level:blkid pool\n" 227 "\n" 228 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 229 "\t\tspecified by the remaining tuple. Each number is in\n" 230 "\t\thexidecimal, and only one block can be specified.\n" 231 "\n" 232 "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n" 233 "\t [-a] [-m] [-u] [-f freq] <object>\n" 234 "\n" 235 "\t\tInject an error into the object specified by the '-t' option\n" 236 "\t\tand the object descriptor. The 'object' parameter is\n" 237 "\t\tinterperted depending on the '-t' option.\n" 238 "\n" 239 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 240 "\t\t-e\tInject a specific error. Must be either 'io' or\n" 241 "\t\t\t'checksum'. Default is 'io'.\n" 242 "\t\t-l\tInject error at a particular block level. Default is " 243 "0.\n" 244 "\t\t-m\tAutomatically remount underlying filesystem.\n" 245 "\t\t-r\tInject error over a particular logical range of an\n" 246 "\t\t\tobject. Will be translated to the appropriate blkid\n" 247 "\t\t\trange according to the object's properties.\n" 248 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 249 "\t\t\tassociated object.\n" 250 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 251 "\t\t\ta pool object.\n" 252 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 253 "\t\t\ta percentage between 1 and 100.\n" 254 "\n" 255 "\t-t data\t\tInject an error into the plain file contents of a\n" 256 "\t\t\tfile. The object must be specified as a complete path\n" 257 "\t\t\tto a file on a ZFS filesystem.\n" 258 "\n" 259 "\t-t dnode\tInject an error into the metadnode in the block\n" 260 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 261 "\t\t\t'-r' option is incompatible with this mode. The object\n" 262 "\t\t\tis specified as a complete path to a file or directory\n" 263 "\t\t\ton a ZFS filesystem.\n" 264 "\n" 265 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 266 "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n" 267 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" 268 "\t\t\tthe poolname.\n"); 269 } 270 271 static int 272 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 273 void *data) 274 { 275 zfs_cmd_t zc; 276 int ret; 277 278 zc.zc_guid = 0; 279 280 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 281 if ((ret = func((int)zc.zc_guid, zc.zc_name, 282 &zc.zc_inject_record, data)) != 0) 283 return (ret); 284 285 return (0); 286 } 287 288 static int 289 print_data_handler(int id, const char *pool, zinject_record_t *record, 290 void *data) 291 { 292 int *count = data; 293 294 if (record->zi_guid != 0) 295 return (0); 296 297 if (*count == 0) { 298 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n", 299 "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE"); 300 (void) printf("--- --------------- ------ " 301 "------ -------- --- ---------------\n"); 302 } 303 304 *count += 1; 305 306 (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool, 307 (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object, 308 type_to_name(record->zi_type), record->zi_level); 309 310 if (record->zi_start == 0 && 311 record->zi_end == -1ULL) 312 (void) printf("all\n"); 313 else 314 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 315 (u_longlong_t)record->zi_end); 316 317 return (0); 318 } 319 320 static int 321 print_device_handler(int id, const char *pool, zinject_record_t *record, 322 void *data) 323 { 324 int *count = data; 325 326 if (record->zi_guid == 0) 327 return (0); 328 329 if (*count == 0) { 330 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 331 (void) printf("--- --------------- ----------------\n"); 332 } 333 334 *count += 1; 335 336 (void) printf("%3d %-15s %llx\n", id, pool, 337 (u_longlong_t)record->zi_guid); 338 339 return (0); 340 } 341 342 /* 343 * Print all registered error handlers. Returns the number of handlers 344 * registered. 345 */ 346 static int 347 print_all_handlers(void) 348 { 349 int count = 0; 350 351 (void) iter_handlers(print_device_handler, &count); 352 (void) printf("\n"); 353 count = 0; 354 (void) iter_handlers(print_data_handler, &count); 355 356 return (count); 357 } 358 359 /* ARGSUSED */ 360 static int 361 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 362 void *data) 363 { 364 zfs_cmd_t zc; 365 366 zc.zc_guid = (uint64_t)id; 367 368 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 369 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 370 id, strerror(errno)); 371 return (1); 372 } 373 374 return (0); 375 } 376 377 /* 378 * Remove all fault injection handlers. 379 */ 380 static int 381 cancel_all_handlers(void) 382 { 383 int ret = iter_handlers(cancel_one_handler, NULL); 384 385 (void) printf("removed all registered handlers\n"); 386 387 return (ret); 388 } 389 390 /* 391 * Remove a specific fault injection handler. 392 */ 393 static int 394 cancel_handler(int id) 395 { 396 zfs_cmd_t zc; 397 398 zc.zc_guid = (uint64_t)id; 399 400 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 401 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 402 id, strerror(errno)); 403 return (1); 404 } 405 406 (void) printf("removed handler %d\n", id); 407 408 return (0); 409 } 410 411 /* 412 * Register a new fault injection handler. 413 */ 414 static int 415 register_handler(const char *pool, int flags, zinject_record_t *record, 416 int quiet) 417 { 418 zfs_cmd_t zc; 419 420 (void) strcpy(zc.zc_name, pool); 421 zc.zc_inject_record = *record; 422 zc.zc_guid = flags; 423 424 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 425 (void) fprintf(stderr, "failed to add handler: %s\n", 426 strerror(errno)); 427 return (1); 428 } 429 430 if (flags & ZINJECT_NULL) 431 return (0); 432 433 if (quiet) { 434 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 435 } else { 436 (void) printf("Added handler %llu with the following " 437 "properties:\n", (u_longlong_t)zc.zc_guid); 438 (void) printf(" pool: %s\n", pool); 439 if (record->zi_guid) { 440 (void) printf(" vdev: %llx\n", 441 (u_longlong_t)record->zi_guid); 442 } else { 443 (void) printf("objset: %llu\n", 444 (u_longlong_t)record->zi_objset); 445 (void) printf("object: %llu\n", 446 (u_longlong_t)record->zi_object); 447 (void) printf(" type: %llu\n", 448 (u_longlong_t)record->zi_type); 449 (void) printf(" level: %d\n", record->zi_level); 450 if (record->zi_start == 0 && 451 record->zi_end == -1ULL) 452 (void) printf(" range: all\n"); 453 else 454 (void) printf(" range: [%llu, %llu)\n", 455 (u_longlong_t)record->zi_start, 456 (u_longlong_t)record->zi_end); 457 } 458 } 459 460 return (0); 461 } 462 463 int 464 main(int argc, char **argv) 465 { 466 int c; 467 char *range = NULL; 468 char *cancel = NULL; 469 char *end; 470 char *raw = NULL; 471 char *device = NULL; 472 int level = 0; 473 int quiet = 0; 474 int error = 0; 475 int domount = 0; 476 err_type_t type = TYPE_INVAL; 477 zinject_record_t record = { 0 }; 478 char pool[MAXNAMELEN]; 479 char dataset[MAXNAMELEN]; 480 zfs_handle_t *zhp; 481 int ret; 482 int flags = 0; 483 484 if ((g_zfs = libzfs_init()) == NULL) { 485 (void) fprintf(stderr, "internal error: failed to " 486 "initialize ZFS library\n"); 487 return (1); 488 } 489 490 libzfs_print_on_error(g_zfs, B_TRUE); 491 492 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 493 (void) fprintf(stderr, "failed to open ZFS device\n"); 494 return (1); 495 } 496 497 if (argc == 1) { 498 /* 499 * No arguments. Print the available handlers. If there are no 500 * available handlers, direct the user to '-h' for help 501 * information. 502 */ 503 if (print_all_handlers() == 0) { 504 (void) printf("No handlers registered.\n"); 505 (void) printf("Run 'zinject -h' for usage " 506 "information.\n"); 507 } 508 509 return (0); 510 } 511 512 while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:u")) != -1) { 513 switch (c) { 514 case 'a': 515 flags |= ZINJECT_FLUSH_ARC; 516 break; 517 case 'b': 518 raw = optarg; 519 break; 520 case 'c': 521 cancel = optarg; 522 break; 523 case 'd': 524 device = optarg; 525 break; 526 case 'e': 527 if (strcasecmp(optarg, "io") == 0) { 528 error = EIO; 529 } else if (strcasecmp(optarg, "checksum") == 0) { 530 error = ECKSUM; 531 } else if (strcasecmp(optarg, "nxio") == 0) { 532 error = ENXIO; 533 } else { 534 (void) fprintf(stderr, "invalid error type " 535 "'%s': must be 'io', 'checksum' or " 536 "'nxio'\n", optarg); 537 usage(); 538 return (1); 539 } 540 break; 541 case 'f': 542 record.zi_freq = atoi(optarg); 543 if (record.zi_freq < 1 || record.zi_freq > 100) { 544 (void) fprintf(stderr, "frequency range must " 545 "be in the range (0, 100]\n"); 546 return (1); 547 } 548 break; 549 case 'h': 550 usage(); 551 return (0); 552 case 'l': 553 level = (int)strtol(optarg, &end, 10); 554 if (*end != '\0') { 555 (void) fprintf(stderr, "invalid level '%s': " 556 "must be an integer\n", optarg); 557 usage(); 558 return (1); 559 } 560 break; 561 case 'm': 562 domount = 1; 563 break; 564 case 'q': 565 quiet = 1; 566 break; 567 case 'r': 568 range = optarg; 569 break; 570 case 't': 571 if ((type = name_to_type(optarg)) == TYPE_INVAL) { 572 (void) fprintf(stderr, "invalid type '%s'\n", 573 optarg); 574 usage(); 575 return (1); 576 } 577 break; 578 case 'u': 579 flags |= ZINJECT_UNLOAD_SPA; 580 break; 581 case ':': 582 (void) fprintf(stderr, "option -%c requires an " 583 "operand\n", optopt); 584 usage(); 585 return (1); 586 case '?': 587 (void) fprintf(stderr, "invalid option '%c'\n", 588 optopt); 589 usage(); 590 return (2); 591 } 592 } 593 594 argc -= optind; 595 argv += optind; 596 597 if (cancel != NULL) { 598 /* 599 * '-c' is invalid with any other options. 600 */ 601 if (raw != NULL || range != NULL || type != TYPE_INVAL || 602 level != 0) { 603 (void) fprintf(stderr, "cancel (-c) incompatible with " 604 "any other options\n"); 605 usage(); 606 return (2); 607 } 608 if (argc != 0) { 609 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 610 usage(); 611 return (2); 612 } 613 614 if (strcmp(cancel, "all") == 0) { 615 return (cancel_all_handlers()); 616 } else { 617 int id = (int)strtol(cancel, &end, 10); 618 if (*end != '\0') { 619 (void) fprintf(stderr, "invalid handle id '%s':" 620 " must be an integer or 'all'\n", cancel); 621 usage(); 622 return (1); 623 } 624 return (cancel_handler(id)); 625 } 626 } 627 628 if (device != NULL) { 629 /* 630 * Device (-d) injection uses a completely different mechanism 631 * for doing injection, so handle it separately here. 632 */ 633 if (raw != NULL || range != NULL || type != TYPE_INVAL || 634 level != 0) { 635 (void) fprintf(stderr, "device (-d) incompatible with " 636 "data error injection\n"); 637 usage(); 638 return (2); 639 } 640 641 if (argc != 1) { 642 (void) fprintf(stderr, "device (-d) injection requires " 643 "a single pool name\n"); 644 usage(); 645 return (2); 646 } 647 648 (void) strcpy(pool, argv[0]); 649 dataset[0] = '\0'; 650 651 if (error == ECKSUM) { 652 (void) fprintf(stderr, "device error type must be " 653 "'io' or 'nxio'\n"); 654 return (1); 655 } 656 657 if (translate_device(pool, device, &record) != 0) 658 return (1); 659 if (!error) 660 error = ENXIO; 661 } else if (raw != NULL) { 662 if (range != NULL || type != TYPE_INVAL || level != 0) { 663 (void) fprintf(stderr, "raw (-b) format with " 664 "any other options\n"); 665 usage(); 666 return (2); 667 } 668 669 if (argc != 1) { 670 (void) fprintf(stderr, "raw (-b) format expects a " 671 "single pool name\n"); 672 usage(); 673 return (2); 674 } 675 676 (void) strcpy(pool, argv[0]); 677 dataset[0] = '\0'; 678 679 if (error == ENXIO) { 680 (void) fprintf(stderr, "data error type must be " 681 "'checksum' or 'io'\n"); 682 return (1); 683 } 684 685 if (translate_raw(raw, &record) != 0) 686 return (1); 687 if (!error) 688 error = EIO; 689 } else if (type == TYPE_INVAL) { 690 if (flags == 0) { 691 (void) fprintf(stderr, "at least one of '-b', '-d', " 692 "'-t', '-a', or '-u' must be specified\n"); 693 usage(); 694 return (2); 695 } 696 697 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 698 (void) strcpy(pool, argv[0]); 699 dataset[0] = '\0'; 700 } else if (argc != 0) { 701 (void) fprintf(stderr, "extraneous argument for " 702 "'-f'\n"); 703 usage(); 704 return (2); 705 } 706 707 flags |= ZINJECT_NULL; 708 } else { 709 if (argc != 1) { 710 (void) fprintf(stderr, "missing object\n"); 711 usage(); 712 return (2); 713 } 714 715 if (error == ENXIO) { 716 (void) fprintf(stderr, "data error type must be " 717 "'checksum' or 'io'\n"); 718 return (1); 719 } 720 721 if (translate_record(type, argv[0], range, level, &record, pool, 722 dataset) != 0) 723 return (1); 724 if (!error) 725 error = EIO; 726 } 727 728 /* 729 * If this is pool-wide metadata, unmount everything. The ioctl() will 730 * unload the pool, so that we trigger spa-wide reopen of metadata next 731 * time we access the pool. 732 */ 733 if (dataset[0] != '\0' && domount) { 734 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) 735 return (1); 736 737 if (zfs_unmount(zhp, NULL, 0) != 0) 738 return (1); 739 } 740 741 record.zi_error = error; 742 743 ret = register_handler(pool, flags, &record, quiet); 744 745 if (dataset[0] != '\0' && domount) 746 ret = (zfs_mount(zhp, NULL, 0) != 0); 747 748 libzfs_fini(g_zfs); 749 750 return (ret); 751 } 752