1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * ZFS Fault Injector 30 * 31 * This userland component takes a set of options and uses libzpool to translate 32 * from a user-visible object type and name to an internal representation. 33 * There are two basic types of faults: device faults and data faults. 34 * 35 * 36 * DEVICE FAULTS 37 * 38 * Errors can be injected into a particular vdev using the '-d' option. This 39 * option takes a path or vdev GUID to uniquely identify the device within a 40 * pool. There are two types of errors that can be injected, EIO and ENXIO, 41 * that can be controlled through the '-t' option. The default is ENXIO. For 42 * EIO failures, any attempt to read data from the device will return EIO, but 43 * subsequent attempt to reopen the device will succeed. For ENXIO failures, 44 * any attempt to read from the device will return EIO, but any attempt to 45 * reopen the device will also return ENXIO. 46 * 47 * This form of the command looks like: 48 * 49 * zinject -d device [-t type] pool 50 * 51 * 52 * DATA FAULTS 53 * 54 * We begin with a tuple of the form: 55 * 56 * <type,level,range,object> 57 * 58 * type A string describing the type of data to target. Each type 59 * implicitly describes how to interpret 'object'. Currently, 60 * the following values are supported: 61 * 62 * data User data for a file 63 * dnode Dnode for a file or directory 64 * 65 * The following MOS objects are special. Instead of injecting 66 * errors on a particular object or blkid, we inject errors across 67 * all objects of the given type. 68 * 69 * mos Any data in the MOS 70 * mosdir object directory 71 * config pool configuration 72 * bplist blkptr list 73 * spacemap spacemap 74 * metaslab metaslab 75 * errlog persistent error log 76 * 77 * level Object level. Defaults to '0', not applicable to all types. If 78 * a range is given, this corresponds to the indirect block 79 * corresponding to the specific range. 80 * 81 * range A numerical range [start,end) within the object. Defaults to 82 * the full size of the file. 83 * 84 * object A string describing the logical location of the object. For 85 * files and directories (currently the only supported types), 86 * this is the path of the object on disk. 87 * 88 * This is translated, via libzpool, into the following internal representation: 89 * 90 * <type,objset,object,level,range> 91 * 92 * These types should be self-explanatory. This tuple is then passed to the 93 * kernel via a special ioctl() to initiate fault injection for the given 94 * object. Note that 'type' is not strictly necessary for fault injection, but 95 * is used when translating existing faults into a human-readable string. 96 * 97 * 98 * The command itself takes one of the forms: 99 * 100 * zinject 101 * zinject <-a | -u pool> 102 * zinject -c <id|all> 103 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 104 * [-r range] <object> 105 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 106 * 107 * With no arguments, the command prints all currently registered injection 108 * handlers, with their numeric identifiers. 109 * 110 * The '-c' option will clear the given handler, or all handlers if 'all' is 111 * specified. 112 * 113 * The '-e' option takes a string describing the errno to simulate. This must 114 * be either 'io' or 'checksum'. In most cases this will result in the same 115 * behavior, but RAID-Z will produce a different set of ereports for this 116 * situation. 117 * 118 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 119 * specified, then the ARC cache is flushed appropriately. If '-u' is 120 * specified, then the underlying SPA is unloaded. Either of these flags can be 121 * specified independently of any other handlers. The '-m' flag automatically 122 * does an unmount and remount of the underlying dataset to aid in flushing the 123 * cache. 124 * 125 * The '-f' flag controls the frequency of errors injected, expressed as a 126 * integer percentage between 1 and 100. The default is 100. 127 * 128 * The this form is responsible for actually injecting the handler into the 129 * framework. It takes the arguments described above, translates them to the 130 * internal tuple using libzpool, and then issues an ioctl() to register the 131 * handler. 132 * 133 * The final form can target a specific bookmark, regardless of whether a 134 * human-readable interface has been designed. It allows developers to specify 135 * a particular block by number. 136 */ 137 138 #include <errno.h> 139 #include <fcntl.h> 140 #include <stdio.h> 141 #include <stdlib.h> 142 #include <strings.h> 143 #include <unistd.h> 144 145 #include <sys/fs/zfs.h> 146 #include <sys/mount.h> 147 148 #include <libzfs.h> 149 150 #undef verify /* both libzfs.h and zfs_context.h want to define this */ 151 152 #include "zinject.h" 153 154 int zfs_fd; 155 156 #define ECKSUM EBADE 157 158 static const char *errtable[TYPE_INVAL] = { 159 "data", 160 "dnode", 161 "mos", 162 "mosdir", 163 "metaslab", 164 "config", 165 "bplist", 166 "spacemap", 167 "errlog" 168 }; 169 170 static err_type_t 171 name_to_type(const char *arg) 172 { 173 int i; 174 for (i = 0; i < TYPE_INVAL; i++) 175 if (strcmp(errtable[i], arg) == 0) 176 return (i); 177 178 return (TYPE_INVAL); 179 } 180 181 static const char * 182 type_to_name(uint64_t type) 183 { 184 switch (type) { 185 case DMU_OT_OBJECT_DIRECTORY: 186 return ("mosdir"); 187 case DMU_OT_OBJECT_ARRAY: 188 return ("metaslab"); 189 case DMU_OT_PACKED_NVLIST: 190 return ("config"); 191 case DMU_OT_BPLIST: 192 return ("bplist"); 193 case DMU_OT_SPACE_MAP: 194 return ("spacemap"); 195 case DMU_OT_ERROR_LOG: 196 return ("errlog"); 197 default: 198 return ("-"); 199 } 200 } 201 202 203 /* 204 * Print usage message. 205 */ 206 void 207 usage(void) 208 { 209 (void) printf( 210 "usage:\n" 211 "\n" 212 "\tzinject\n" 213 "\n" 214 "\t\tList all active injection records.\n" 215 "\n" 216 "\tzinject -c <id|all>\n" 217 "\n" 218 "\t\tClear the particular record (if given a numeric ID), or\n" 219 "\t\tall records if 'all' is specificed.\n" 220 "\n" 221 "\tzinject -d device [-e errno] pool\n" 222 "\t\tInject a fault into a particular device. 'errno' can either\n" 223 "\t\tbe 'nxio' (the default) or 'io'.\n" 224 "\n" 225 "\tzinject -b objset:object:level:blkid pool\n" 226 "\n" 227 "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 228 "\t\tspecified by the remaining tuple. Each number is in\n" 229 "\t\thexidecimal, and only one block can be specified.\n" 230 "\n" 231 "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n" 232 "\t [-a] [-m] [-u] [-f freq] <object>\n" 233 "\n" 234 "\t\tInject an error into the object specified by the '-t' option\n" 235 "\t\tand the object descriptor. The 'object' parameter is\n" 236 "\t\tinterperted depending on the '-t' option.\n" 237 "\n" 238 "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 239 "\t\t-e\tInject a specific error. Must be either 'io' or\n" 240 "\t\t\t'checksum'. Default is 'io'.\n" 241 "\t\t-l\tInject error at a particular block level. Default is " 242 "0.\n" 243 "\t\t-m\tAutomatically remount underlying filesystem.\n" 244 "\t\t-r\tInject error over a particular logical range of an\n" 245 "\t\t\tobject. Will be translated to the appropriate blkid\n" 246 "\t\t\trange according to the object's properties.\n" 247 "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 248 "\t\t\tassociated object.\n" 249 "\t\t-u\tUnload the associated pool. Can be specified with only\n" 250 "\t\t\ta pool object.\n" 251 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 252 "\t\t\ta percentage between 1 and 100.\n" 253 "\n" 254 "\t-t data\t\tInject an error into the plain file contents of a\n" 255 "\t\t\tfile. The object must be specified as a complete path\n" 256 "\t\t\tto a file on a ZFS filesystem.\n" 257 "\n" 258 "\t-t dnode\tInject an error into the metadnode in the block\n" 259 "\t\t\tcorresponding to the dnode for a file or directory. The\n" 260 "\t\t\t'-r' option is incompatible with this mode. The object\n" 261 "\t\t\tis specified as a complete path to a file or directory\n" 262 "\t\t\ton a ZFS filesystem.\n" 263 "\n" 264 "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 265 "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n" 266 "\t\t\tspacemap, metaslab, errlog\n"); 267 } 268 269 static int 270 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 271 void *data) 272 { 273 zfs_cmd_t zc; 274 int ret; 275 276 zc.zc_guid = 0; 277 278 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 279 if ((ret = func((int)zc.zc_guid, zc.zc_name, 280 &zc.zc_inject_record, data)) != 0) 281 return (ret); 282 283 return (0); 284 } 285 286 static int 287 print_data_handler(int id, const char *pool, zinject_record_t *record, 288 void *data) 289 { 290 int *count = data; 291 292 if (record->zi_guid != 0) 293 return (0); 294 295 if (*count == 0) { 296 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n", 297 "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE"); 298 (void) printf("--- --------------- ------ " 299 "------ -------- --- ---------------\n"); 300 } 301 302 *count += 1; 303 304 (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool, 305 (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object, 306 type_to_name(record->zi_type), record->zi_level); 307 308 if (record->zi_start == 0 && 309 record->zi_end == -1ULL) 310 (void) printf("all\n"); 311 else 312 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 313 (u_longlong_t)record->zi_end); 314 315 return (0); 316 } 317 318 static int 319 print_device_handler(int id, const char *pool, zinject_record_t *record, 320 void *data) 321 { 322 int *count = data; 323 324 if (record->zi_guid == 0) 325 return (0); 326 327 if (*count == 0) { 328 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 329 (void) printf("--- --------------- ----------------\n"); 330 } 331 332 *count += 1; 333 334 (void) printf("%3d %-15s %llx\n", id, pool, 335 (u_longlong_t)record->zi_guid); 336 337 return (0); 338 } 339 340 /* 341 * Print all registered error handlers. Returns the number of handlers 342 * registered. 343 */ 344 static int 345 print_all_handlers(void) 346 { 347 int count = 0; 348 349 (void) iter_handlers(print_device_handler, &count); 350 (void) printf("\n"); 351 count = 0; 352 (void) iter_handlers(print_data_handler, &count); 353 354 return (count); 355 } 356 357 /* ARGSUSED */ 358 static int 359 cancel_one_handler(int id, const char *pool, zinject_record_t *record, 360 void *data) 361 { 362 zfs_cmd_t zc; 363 364 zc.zc_guid = (uint64_t)id; 365 366 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 367 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 368 id, strerror(errno)); 369 return (1); 370 } 371 372 return (0); 373 } 374 375 /* 376 * Remove all fault injection handlers. 377 */ 378 static int 379 cancel_all_handlers(void) 380 { 381 int ret = iter_handlers(cancel_one_handler, NULL); 382 383 (void) printf("removed all registered handlers\n"); 384 385 return (ret); 386 } 387 388 /* 389 * Remove a specific fault injection handler. 390 */ 391 static int 392 cancel_handler(int id) 393 { 394 zfs_cmd_t zc; 395 396 zc.zc_guid = (uint64_t)id; 397 398 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 399 (void) fprintf(stderr, "failed to remove handler %d: %s\n", 400 id, strerror(errno)); 401 return (1); 402 } 403 404 (void) printf("removed handler %d\n", id); 405 406 return (0); 407 } 408 409 /* 410 * Register a new fault injection handler. 411 */ 412 static int 413 register_handler(const char *pool, int flags, zinject_record_t *record, 414 int quiet) 415 { 416 zfs_cmd_t zc; 417 418 (void) strcpy(zc.zc_name, pool); 419 zc.zc_inject_record = *record; 420 zc.zc_guid = flags; 421 422 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 423 (void) fprintf(stderr, "failed to add handler: %s\n", 424 strerror(errno)); 425 return (1); 426 } 427 428 if (flags & ZINJECT_NULL) 429 return (0); 430 431 if (quiet) { 432 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 433 } else { 434 (void) printf("Added handler %llu with the following " 435 "properties:\n", (u_longlong_t)zc.zc_guid); 436 (void) printf(" pool: %s\n", pool); 437 if (record->zi_guid) { 438 (void) printf(" vdev: %llx\n", 439 (u_longlong_t)record->zi_guid); 440 } else { 441 (void) printf("objset: %llu\n", 442 (u_longlong_t)record->zi_objset); 443 (void) printf("object: %llu\n", 444 (u_longlong_t)record->zi_object); 445 (void) printf(" type: %llu\n", 446 (u_longlong_t)record->zi_type); 447 (void) printf(" level: %d\n", record->zi_level); 448 if (record->zi_start == 0 && 449 record->zi_end == -1ULL) 450 (void) printf(" range: all\n"); 451 else 452 (void) printf(" range: [%llu, %llu)\n", 453 (u_longlong_t)record->zi_start, 454 (u_longlong_t)record->zi_end); 455 } 456 } 457 458 return (0); 459 } 460 461 int 462 main(int argc, char **argv) 463 { 464 int c; 465 char *range = NULL; 466 char *cancel = NULL; 467 char *end; 468 char *raw = NULL; 469 char *device = NULL; 470 int level = 0; 471 int quiet = 0; 472 int error = 0; 473 int domount = 0; 474 err_type_t type = TYPE_INVAL; 475 zinject_record_t record = { 0 }; 476 char pool[MAXNAMELEN]; 477 char dataset[MAXNAMELEN]; 478 zfs_handle_t *zhp; 479 int ret; 480 int flags = 0; 481 482 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 483 (void) fprintf(stderr, "failed to open ZFS device\n"); 484 return (1); 485 } 486 487 if (argc == 1) { 488 /* 489 * No arguments. Print the available handlers. If there are no 490 * available handlers, direct the user to '-h' for help 491 * information. 492 */ 493 if (print_all_handlers() == 0) { 494 (void) printf("No handlers registered.\n"); 495 (void) printf("Run 'zinject -h' for usage " 496 "information.\n"); 497 } 498 499 return (0); 500 } 501 502 while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:u")) != -1) { 503 switch (c) { 504 case 'a': 505 flags |= ZINJECT_FLUSH_ARC; 506 break; 507 case 'b': 508 raw = optarg; 509 break; 510 case 'c': 511 cancel = optarg; 512 break; 513 case 'd': 514 device = optarg; 515 break; 516 case 'e': 517 if (strcasecmp(optarg, "io") == 0) { 518 error = EIO; 519 } else if (strcasecmp(optarg, "checksum") == 0) { 520 error = ECKSUM; 521 } else if (strcasecmp(optarg, "nxio") == 0) { 522 error = ENXIO; 523 } else { 524 (void) fprintf(stderr, "invalid error type " 525 "'%s': must be 'io', 'checksum' or " 526 "'nxio'\n", optarg); 527 usage(); 528 return (1); 529 } 530 break; 531 case 'f': 532 record.zi_freq = atoi(optarg); 533 if (record.zi_freq < 1 || record.zi_freq > 100) { 534 (void) fprintf(stderr, "frequency range must " 535 "be in the range (0, 100]\n"); 536 return (1); 537 } 538 break; 539 case 'h': 540 usage(); 541 return (0); 542 case 'l': 543 level = (int)strtol(optarg, &end, 10); 544 if (*end != '\0') { 545 (void) fprintf(stderr, "invalid level '%s': " 546 "must be an integer\n", optarg); 547 usage(); 548 return (1); 549 } 550 break; 551 case 'm': 552 domount = 1; 553 break; 554 case 'q': 555 quiet = 1; 556 break; 557 case 'r': 558 range = optarg; 559 break; 560 case 't': 561 if ((type = name_to_type(optarg)) == TYPE_INVAL) { 562 (void) fprintf(stderr, "invalid type '%s'\n", 563 optarg); 564 usage(); 565 return (1); 566 } 567 break; 568 case 'u': 569 flags |= ZINJECT_UNLOAD_SPA; 570 break; 571 case ':': 572 (void) fprintf(stderr, "option -%c requires an " 573 "operand\n", optopt); 574 usage(); 575 return (1); 576 case '?': 577 (void) fprintf(stderr, "invalid option '%c'\n", 578 optopt); 579 usage(); 580 return (2); 581 } 582 } 583 584 argc -= optind; 585 argv += optind; 586 587 if (cancel != NULL) { 588 /* 589 * '-c' is invalid with any other options. 590 */ 591 if (raw != NULL || range != NULL || type != TYPE_INVAL || 592 level != 0) { 593 (void) fprintf(stderr, "cancel (-c) incompatible with " 594 "any other options\n"); 595 usage(); 596 return (2); 597 } 598 if (argc != 0) { 599 (void) fprintf(stderr, "extraneous argument to '-c'\n"); 600 usage(); 601 return (2); 602 } 603 604 if (strcmp(cancel, "all") == 0) { 605 return (cancel_all_handlers()); 606 } else { 607 int id = (int)strtol(cancel, &end, 10); 608 if (*end != '\0') { 609 (void) fprintf(stderr, "invalid handle id '%s':" 610 " must be an integer or 'all'\n", cancel); 611 usage(); 612 return (1); 613 } 614 return (cancel_handler(id)); 615 } 616 } 617 618 if (device != NULL) { 619 /* 620 * Device (-d) injection uses a completely different mechanism 621 * for doing injection, so handle it separately here. 622 */ 623 if (raw != NULL || range != NULL || type != TYPE_INVAL || 624 level != 0) { 625 (void) fprintf(stderr, "device (-d) incompatible with " 626 "data error injection\n"); 627 usage(); 628 return (2); 629 } 630 631 if (argc != 1) { 632 (void) fprintf(stderr, "device (-d) injection requires " 633 "a single pool name\n"); 634 usage(); 635 return (2); 636 } 637 638 (void) strcpy(pool, argv[0]); 639 dataset[0] = '\0'; 640 641 if (error == ECKSUM) { 642 (void) fprintf(stderr, "device error type must be " 643 "'io' or 'nxio'\n"); 644 return (1); 645 } 646 647 if (translate_device(pool, device, &record) != 0) 648 return (1); 649 if (!error) 650 error = ENXIO; 651 } else if (raw != NULL) { 652 if (range != NULL || type != TYPE_INVAL || level != 0) { 653 (void) fprintf(stderr, "raw (-b) format with " 654 "any other options\n"); 655 usage(); 656 return (2); 657 } 658 659 if (argc != 1) { 660 (void) fprintf(stderr, "raw (-b) format expects a " 661 "single pool name\n"); 662 usage(); 663 return (2); 664 } 665 666 (void) strcpy(pool, argv[0]); 667 dataset[0] = '\0'; 668 669 if (error == ENXIO) { 670 (void) fprintf(stderr, "data error type must be " 671 "'checksum' or 'io'\n"); 672 return (1); 673 } 674 675 if (translate_raw(raw, &record) != 0) 676 return (1); 677 if (!error) 678 error = EIO; 679 } else if (type == TYPE_INVAL) { 680 if (flags == 0) { 681 (void) fprintf(stderr, "at least one of '-b', '-d', " 682 "'-t', '-a', or '-u' must be specified\n"); 683 usage(); 684 return (2); 685 } 686 687 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 688 (void) strcpy(pool, argv[0]); 689 dataset[0] = '\0'; 690 } else if (argc != 0) { 691 (void) fprintf(stderr, "extraneous argument for " 692 "'-f'\n"); 693 usage(); 694 return (2); 695 } 696 697 flags |= ZINJECT_NULL; 698 } else { 699 if (argc != 1) { 700 (void) fprintf(stderr, "missing object\n"); 701 usage(); 702 return (2); 703 } 704 705 if (error == ENXIO) { 706 (void) fprintf(stderr, "data error type must be " 707 "'checksum' or 'io'\n"); 708 return (1); 709 } 710 711 if (translate_record(type, argv[0], range, level, &record, pool, 712 dataset) != 0) 713 return (1); 714 if (!error) 715 error = EIO; 716 } 717 718 /* 719 * If this is pool-wide metadata, unmount everything. The ioctl() will 720 * unload the pool, so that we trigger spa-wide reopen of metadata next 721 * time we access the pool. 722 */ 723 if (dataset[0] != '\0' && domount) { 724 if ((zhp = zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) 725 return (1); 726 727 if (zfs_unmount(zhp, NULL, 0) != 0) 728 return (1); 729 } 730 731 record.zi_error = error; 732 733 ret = register_handler(pool, flags, &record, quiet); 734 735 if (dataset[0] != '\0' && domount) 736 ret = (zfs_mount(zhp, NULL, 0) != 0); 737 738 return (ret); 739 } 740