1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * ZFS Fault Injector 30 * 31 * This userland component takes a set of options and uses libzpool to translate 32 * from a user-visible object type and name to an internal representation. 33 * There are two basic types of faults: device faults and data faults. 34 * 35 * 36 * DEVICE FAULTS 37 * 38 * Errors can be injected into a particular vdev using the '-d' option. This 39 * option takes a path or vdev GUID to uniquely identify the device within a 40 * pool. There are two types of errors that can be injected, EIO and ENXIO, 41 * that can be controlled through the '-t' option. The default is ENXIO. For 42 * EIO failures, any attempt to read data from the device will return EIO, but 43 * subsequent attempt to reopen the device will succeed. For ENXIO failures, 44 * any attempt to read from the device will return EIO, but any attempt to 45 * reopen the device will also return ENXIO. 46 * 47 * This form of the command looks like: 48 * 49 * zinject -d device [-t type] pool 50 * 51 * 52 * DATA FAULTS 53 * 54 * We begin with a tuple of the form: 55 * 56 * <type,level,range,object> 57 * 58 * type A string describing the type of data to target. Each type 59 * implicitly describes how to interpret 'object'. Currently, 60 * the following values are supported: 61 * 62 * data User data for a file 63 * dnode Dnode for a file or directory 64 * 65 * The following MOS objects are special. Instead of injecting 66 * errors on a particular object or blkid, we inject errors across 67 * all objects of the given type. 68 * 69 * mos Any data in the MOS 70 * mosdir object directory 71 * config pool configuration 72 * bplist blkptr list 73 * spacemap spacemap 74 * metaslab metaslab 75 * errlog persistent error log 76 * 77 * level Object level. Defaults to '0', not applicable to all types. If 78 * a range is given, this corresponds to the indirect block 79 * corresponding to the specific range. 80 * 81 * range A numerical range [start,end) within the object. Defaults to 82 * the full size of the file. 83 * 84 * object A string describing the logical location of the object. For 85 * files and directories (currently the only supported types), 86 * this is the path of the object on disk. 87 * 88 * This is translated, via libzpool, into the following internal representation: 89 * 90 * <type,objset,object,level,range> 91 * 92 * These types should be self-explanatory. This tuple is then passed to the 93 * kernel via a special ioctl() to initiate fault injection for the given 94 * object. Note that 'type' is not strictly necessary for fault injection, but 95 * is used when translating existing faults into a human-readable string. 96 * 97 * 98 * The command itself takes one of the forms: 99 * 100 * zinject 101 * zinject <-a | -u pool> 102 * zinject -c <id|all> 103 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 104 * [-r range] <object> 105 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 106 * 107 * With no arguments, the command prints all currently registered injection 108 * handlers, with their numeric identifiers. 109 * 110 * The '-c' option will clear the given handler, or all handlers if 'all' is 111 * specified. 112 * 113 * The '-e' option takes a string describing the errno to simulate. This must 114 * be either 'io' or 'checksum'. In most cases this will result in the same 115 * behavior, but RAID-Z will produce a different set of ereports for this 116 * situation. 117 * 118 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 119 * specified, then the ARC cache is flushed appropriately. If '-u' is 120 * specified, then the underlying SPA is unloaded. Either of these flags can be 121 * specified independently of any other handlers. The '-m' flag automatically 122 * does an unmount and remount of the underlying dataset to aid in flushing the 123 * cache. 124 * 125 * The '-f' flag controls the frequency of errors injected, expressed as a 126 * integer percentage between 1 and 100. The default is 100. 127 * 128 * The this form is responsible for actually injecting the handler into the 129 * framework. It takes the arguments described above, translates them to the 130 * internal tuple using libzpool, and then issues an ioctl() to register the 131 * handler. 132 * 133 * The final form can target a specific bookmark, regardless of whether a 134 * human-readable interface has been designed. It allows developers to specify 135 * a particular block by number. 136 */ 137 138 #include <errno.h> 139 #include <fcntl.h> 140 #include <stdio.h> 141 #include <stdlib.h> 142 #include <strings.h> 143 #include <unistd.h> 144 145 #include <sys/fs/zfs.h> 146 #include <sys/mount.h> 147 148 #include <libzfs.h> 149 150 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 151 152 #include "zinject.h" 153 154 libzfs_handle_t *g_zfs; 155 int zfs_fd; 156 157 #define ECKSUM EBADE 158 159 static const char *errtable[TYPE_INVAL] = { 160 "data", 161 "dnode", 162 "mos", 163 "mosdir", 164 "metaslab", 165 "config", 166 "bplist", 167 "spacemap", 168 "errlog" 169 }; 170 171 static err_type_t 172 name_to_type(const char *arg) 173 { 174 int i; 175 for (i = 0; i < TYPE_INVAL; i++) 176 if (strcmp(errtable[i], arg) == 0) 177 return (i); 178 179 return (TYPE_INVAL); 180 } 181 182 static const char * 183 type_to_name(uint64_t type) 184 { 185 switch (type) { 186 case DMU_OT_OBJECT_DIRECTORY: 187 return ("mosdir"); 188 case DMU_OT_OBJECT_ARRAY: 189 return ("metaslab"); 190 case DMU_OT_PACKED_NVLIST: 191 return ("config"); 192 case DMU_OT_BPLIST: 193 return ("bplist"); 194 case DMU_OT_SPACE_MAP: 195 return ("spacemap"); 196 case DMU_OT_ERROR_LOG: 197 return ("errlog"); 198 default: 199 return ("-"); 200 } 201 } 202 203 204 /* 205 * Print usage message. 206 */ 207 void 208 usage(void) 209 { 210 (void) printf( 211 "usage:\n" 212 "\n" 213 "\tzinject\n" 214 "\n" 215 "\t\tList all active injection records.\n" 216 "\n" 217 "\tzinject -c <id|all>\n" 218 "\n" 219 "\t\tClear the particular record (if given a numeric ID), or\n" 220 "\t\tall records if 'all' is specificed.\n" 221 "\n" 222 "\tzinject -d device [-e errno] pool\n" 223 "\t\tInject a fault into a particular device. 'errno' can either\n" 224 "\t\tbe 'nxio' (the default) or 'io'.\n" 225 "\n" 226 "\tzinject -b objset:object:level:blkid pool\n" 227 "\n" 228 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 229 "\t\tspecified by the remaining tuple. Each number is in\n" 230 "\t\thexidecimal, and only one block can be specified.\n" 231 "\n" 232 "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n" 233 "\t [-a] [-m] [-u] [-f freq] <object>\n" 234 "\n" 235 "\t\tInject an error into the object specified by the '-t' option\n" 236 "\t\tand the object descriptor. The 'object' parameter is\n" 237 "\t\tinterperted depending on the '-t' option.\n" 238 "\n" 239 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 240 "\t\t-e\tInject a specific error. Must be either 'io' or\n" 241 "\t\t\t'checksum'. Default is 'io'.\n" 242 "\t\t-l\tInject error at a particular block level. Default is " 243 "0.\n" 244 "\t\t-m\tAutomatically remount underlying filesystem.\n" 245 "\t\t-r\tInject error over a particular logical range of an\n" 246 "\t\t\tobject. Will be translated to the appropriate blkid\n" 247 "\t\t\trange according to the object's properties.\n" 248 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 249 "\t\t\tassociated object.\n" 250 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 251 "\t\t\ta pool object.\n" 252 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 253 "\t\t\ta percentage between 1 and 100.\n" 254 "\n" 255 "\t-t data\t\tInject an error into the plain file contents of a\n" 256 "\t\t\tfile. The object must be specified as a complete path\n" 257 "\t\t\tto a file on a ZFS filesystem.\n" 258 "\n" 259 "\t-t dnode\tInject an error into the metadnode in the block\n" 260 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 261 "\t\t\t'-r' option is incompatible with this mode. The object\n" 262 "\t\t\tis specified as a complete path to a file or directory\n" 263 "\t\t\ton a ZFS filesystem.\n" 264 "\n" 265 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 266 "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n" 267 "\t\t\tspacemap, metaslab, errlog\n"); 268 } 269 270 static int 271 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 272 void *data) 273 { 274 zfs_cmd_t zc; 275 int ret; 276 277 zc.zc_guid = 0; 278 279 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 280 if ((ret = func((int)zc.zc_guid, zc.zc_name, 281 &zc.zc_inject_record, data)) != 0) 282 return (ret); 283 284 return (0); 285 } 286 287 static int 288 print_data_handler(int id, const char *pool, zinject_record_t *record, 289 void *data) 290 { 291 int *count = data; 292 293 if (record->zi_guid != 0) 294 return (0); 295 296 if (*count == 0) { 297 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n", 298 "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE"); 299 (void) printf("--- --------------- ------ " 300 "------ -------- --- ---------------\n"); 301 } 302 303 *count += 1; 304 305 (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool, 306 (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object, 307 type_to_name(record->zi_type), record->zi_level); 308 309 if (record->zi_start == 0 && 310 record->zi_end == -1ULL) 311 (void) printf("all\n"); 312 else 313 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 314 (u_longlong_t)record->zi_end); 315 316 return (0); 317 } 318 319 static int 320 print_device_handler(int id, const char *pool, zinject_record_t *record, 321 void *data) 322 { 323 int *count = data; 324 325 if (record->zi_guid == 0) 326 return (0); 327 328 if (*count == 0) { 329 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 330 (void) printf("--- --------------- ----------------\n"); 331 } 332 333 *count += 1; 334 335 (void) printf("%3d %-15s %llx\n", id, pool, 336 (u_longlong_t)record->zi_guid); 337 338 return (0); 339 } 340 341 /* 342 * Print all registered error handlers. Returns the number of handlers 343 * registered. 344 */ 345 static int 346 print_all_handlers(void) 347 { 348 int count = 0; 349 350 (void) iter_handlers(print_device_handler, &count); 351 (void) printf("\n"); 352 count = 0; 353 (void) iter_handlers(print_data_handler, &count); 354 355 return (count); 356 } 357 358 /* ARGSUSED */ 359 static int 360 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 361 void *data) 362 { 363 zfs_cmd_t zc; 364 365 zc.zc_guid = (uint64_t)id; 366 367 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 368 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 369 id, strerror(errno)); 370 return (1); 371 } 372 373 return (0); 374 } 375 376 /* 377 * Remove all fault injection handlers. 378 */ 379 static int 380 cancel_all_handlers(void) 381 { 382 int ret = iter_handlers(cancel_one_handler, NULL); 383 384 (void) printf("removed all registered handlers\n"); 385 386 return (ret); 387 } 388 389 /* 390 * Remove a specific fault injection handler. 391 */ 392 static int 393 cancel_handler(int id) 394 { 395 zfs_cmd_t zc; 396 397 zc.zc_guid = (uint64_t)id; 398 399 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 400 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 401 id, strerror(errno)); 402 return (1); 403 } 404 405 (void) printf("removed handler %d\n", id); 406 407 return (0); 408 } 409 410 /* 411 * Register a new fault injection handler. 412 */ 413 static int 414 register_handler(const char *pool, int flags, zinject_record_t *record, 415 int quiet) 416 { 417 zfs_cmd_t zc; 418 419 (void) strcpy(zc.zc_name, pool); 420 zc.zc_inject_record = *record; 421 zc.zc_guid = flags; 422 423 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 424 (void) fprintf(stderr, "failed to add handler: %s\n", 425 strerror(errno)); 426 return (1); 427 } 428 429 if (flags & ZINJECT_NULL) 430 return (0); 431 432 if (quiet) { 433 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 434 } else { 435 (void) printf("Added handler %llu with the following " 436 "properties:\n", (u_longlong_t)zc.zc_guid); 437 (void) printf(" pool: %s\n", pool); 438 if (record->zi_guid) { 439 (void) printf(" vdev: %llx\n", 440 (u_longlong_t)record->zi_guid); 441 } else { 442 (void) printf("objset: %llu\n", 443 (u_longlong_t)record->zi_objset); 444 (void) printf("object: %llu\n", 445 (u_longlong_t)record->zi_object); 446 (void) printf(" type: %llu\n", 447 (u_longlong_t)record->zi_type); 448 (void) printf(" level: %d\n", record->zi_level); 449 if (record->zi_start == 0 && 450 record->zi_end == -1ULL) 451 (void) printf(" range: all\n"); 452 else 453 (void) printf(" range: [%llu, %llu)\n", 454 (u_longlong_t)record->zi_start, 455 (u_longlong_t)record->zi_end); 456 } 457 } 458 459 return (0); 460 } 461 462 int 463 main(int argc, char **argv) 464 { 465 int c; 466 char *range = NULL; 467 char *cancel = NULL; 468 char *end; 469 char *raw = NULL; 470 char *device = NULL; 471 int level = 0; 472 int quiet = 0; 473 int error = 0; 474 int domount = 0; 475 err_type_t type = TYPE_INVAL; 476 zinject_record_t record = { 0 }; 477 char pool[MAXNAMELEN]; 478 char dataset[MAXNAMELEN]; 479 zfs_handle_t *zhp; 480 int ret; 481 int flags = 0; 482 483 if ((g_zfs = libzfs_init()) == NULL) { 484 (void) fprintf(stderr, "internal error: failed to " 485 "initialize ZFS library\n"); 486 return (1); 487 } 488 489 libzfs_print_on_error(g_zfs, B_TRUE); 490 491 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 492 (void) fprintf(stderr, "failed to open ZFS device\n"); 493 return (1); 494 } 495 496 if (argc == 1) { 497 /* 498 * No arguments. Print the available handlers. If there are no 499 * available handlers, direct the user to '-h' for help 500 * information. 501 */ 502 if (print_all_handlers() == 0) { 503 (void) printf("No handlers registered.\n"); 504 (void) printf("Run 'zinject -h' for usage " 505 "information.\n"); 506 } 507 508 return (0); 509 } 510 511 while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:u")) != -1) { 512 switch (c) { 513 case 'a': 514 flags |= ZINJECT_FLUSH_ARC; 515 break; 516 case 'b': 517 raw = optarg; 518 break; 519 case 'c': 520 cancel = optarg; 521 break; 522 case 'd': 523 device = optarg; 524 break; 525 case 'e': 526 if (strcasecmp(optarg, "io") == 0) { 527 error = EIO; 528 } else if (strcasecmp(optarg, "checksum") == 0) { 529 error = ECKSUM; 530 } else if (strcasecmp(optarg, "nxio") == 0) { 531 error = ENXIO; 532 } else { 533 (void) fprintf(stderr, "invalid error type " 534 "'%s': must be 'io', 'checksum' or " 535 "'nxio'\n", optarg); 536 usage(); 537 return (1); 538 } 539 break; 540 case 'f': 541 record.zi_freq = atoi(optarg); 542 if (record.zi_freq < 1 || record.zi_freq > 100) { 543 (void) fprintf(stderr, "frequency range must " 544 "be in the range (0, 100]\n"); 545 return (1); 546 } 547 break; 548 case 'h': 549 usage(); 550 return (0); 551 case 'l': 552 level = (int)strtol(optarg, &end, 10); 553 if (*end != '\0') { 554 (void) fprintf(stderr, "invalid level '%s': " 555 "must be an integer\n", optarg); 556 usage(); 557 return (1); 558 } 559 break; 560 case 'm': 561 domount = 1; 562 break; 563 case 'q': 564 quiet = 1; 565 break; 566 case 'r': 567 range = optarg; 568 break; 569 case 't': 570 if ((type = name_to_type(optarg)) == TYPE_INVAL) { 571 (void) fprintf(stderr, "invalid type '%s'\n", 572 optarg); 573 usage(); 574 return (1); 575 } 576 break; 577 case 'u': 578 flags |= ZINJECT_UNLOAD_SPA; 579 break; 580 case ':': 581 (void) fprintf(stderr, "option -%c requires an " 582 "operand\n", optopt); 583 usage(); 584 return (1); 585 case '?': 586 (void) fprintf(stderr, "invalid option '%c'\n", 587 optopt); 588 usage(); 589 return (2); 590 } 591 } 592 593 argc -= optind; 594 argv += optind; 595 596 if (cancel != NULL) { 597 /* 598 * '-c' is invalid with any other options. 599 */ 600 if (raw != NULL || range != NULL || type != TYPE_INVAL || 601 level != 0) { 602 (void) fprintf(stderr, "cancel (-c) incompatible with " 603 "any other options\n"); 604 usage(); 605 return (2); 606 } 607 if (argc != 0) { 608 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 609 usage(); 610 return (2); 611 } 612 613 if (strcmp(cancel, "all") == 0) { 614 return (cancel_all_handlers()); 615 } else { 616 int id = (int)strtol(cancel, &end, 10); 617 if (*end != '\0') { 618 (void) fprintf(stderr, "invalid handle id '%s':" 619 " must be an integer or 'all'\n", cancel); 620 usage(); 621 return (1); 622 } 623 return (cancel_handler(id)); 624 } 625 } 626 627 if (device != NULL) { 628 /* 629 * Device (-d) injection uses a completely different mechanism 630 * for doing injection, so handle it separately here. 631 */ 632 if (raw != NULL || range != NULL || type != TYPE_INVAL || 633 level != 0) { 634 (void) fprintf(stderr, "device (-d) incompatible with " 635 "data error injection\n"); 636 usage(); 637 return (2); 638 } 639 640 if (argc != 1) { 641 (void) fprintf(stderr, "device (-d) injection requires " 642 "a single pool name\n"); 643 usage(); 644 return (2); 645 } 646 647 (void) strcpy(pool, argv[0]); 648 dataset[0] = '\0'; 649 650 if (error == ECKSUM) { 651 (void) fprintf(stderr, "device error type must be " 652 "'io' or 'nxio'\n"); 653 return (1); 654 } 655 656 if (translate_device(pool, device, &record) != 0) 657 return (1); 658 if (!error) 659 error = ENXIO; 660 } else if (raw != NULL) { 661 if (range != NULL || type != TYPE_INVAL || level != 0) { 662 (void) fprintf(stderr, "raw (-b) format with " 663 "any other options\n"); 664 usage(); 665 return (2); 666 } 667 668 if (argc != 1) { 669 (void) fprintf(stderr, "raw (-b) format expects a " 670 "single pool name\n"); 671 usage(); 672 return (2); 673 } 674 675 (void) strcpy(pool, argv[0]); 676 dataset[0] = '\0'; 677 678 if (error == ENXIO) { 679 (void) fprintf(stderr, "data error type must be " 680 "'checksum' or 'io'\n"); 681 return (1); 682 } 683 684 if (translate_raw(raw, &record) != 0) 685 return (1); 686 if (!error) 687 error = EIO; 688 } else if (type == TYPE_INVAL) { 689 if (flags == 0) { 690 (void) fprintf(stderr, "at least one of '-b', '-d', " 691 "'-t', '-a', or '-u' must be specified\n"); 692 usage(); 693 return (2); 694 } 695 696 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 697 (void) strcpy(pool, argv[0]); 698 dataset[0] = '\0'; 699 } else if (argc != 0) { 700 (void) fprintf(stderr, "extraneous argument for " 701 "'-f'\n"); 702 usage(); 703 return (2); 704 } 705 706 flags |= ZINJECT_NULL; 707 } else { 708 if (argc != 1) { 709 (void) fprintf(stderr, "missing object\n"); 710 usage(); 711 return (2); 712 } 713 714 if (error == ENXIO) { 715 (void) fprintf(stderr, "data error type must be " 716 "'checksum' or 'io'\n"); 717 return (1); 718 } 719 720 if (translate_record(type, argv[0], range, level, &record, pool, 721 dataset) != 0) 722 return (1); 723 if (!error) 724 error = EIO; 725 } 726 727 /* 728 * If this is pool-wide metadata, unmount everything. The ioctl() will 729 * unload the pool, so that we trigger spa-wide reopen of metadata next 730 * time we access the pool. 731 */ 732 if (dataset[0] != '\0' && domount) { 733 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) 734 return (1); 735 736 if (zfs_unmount(zhp, NULL, 0) != 0) 737 return (1); 738 } 739 740 record.zi_error = error; 741 742 ret = register_handler(pool, flags, &record, quiet); 743 744 if (dataset[0] != '\0' && domount) 745 ret = (zfs_mount(zhp, NULL, 0) != 0); 746 747 libzfs_fini(g_zfs); 748 749 return (ret); 750 } 751