1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * ZFS fault injection 28 * 29 * To handle fault injection, we keep track of a series of zinject_record_t 30 * structures which describe which logical block(s) should be injected with a 31 * fault. These are kept in a global list. Each record corresponds to a given 32 * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 33 * or exported while the injection record exists. 34 * 35 * Device level injection is done using the 'zi_guid' field. If this is set, it 36 * means that the error is destined for a particular device, not a piece of 37 * data. 38 * 39 * This is a rather poor data structure and algorithm, but we don't expect more 40 * than a few faults at any one time, so it should be sufficient for our needs. 41 */ 42 43 #include <sys/arc.h> 44 #include <sys/zio_impl.h> 45 #include <sys/zfs_ioctl.h> 46 #include <sys/vdev_impl.h> 47 #include <sys/dmu_objset.h> 48 #include <sys/fs/zfs.h> 49 50 uint32_t zio_injection_enabled; 51 52 /* 53 * Data describing each zinject handler registered on the system, and 54 * contains the list node linking the handler in the global zinject 55 * handler list. 56 */ 57 typedef struct inject_handler { 58 int zi_id; 59 spa_t *zi_spa; 60 zinject_record_t zi_record; 61 uint64_t *zi_lanes; 62 int zi_next_lane; 63 list_node_t zi_link; 64 } inject_handler_t; 65 66 /* 67 * List of all zinject handlers registered on the system, protected by 68 * the inject_lock defined below. 69 */ 70 static list_t inject_handlers; 71 72 /* 73 * This protects insertion into, and traversal of, the inject handler 74 * list defined above; as well as the inject_delay_count. Any time a 75 * handler is inserted or removed from the list, this lock should be 76 * taken as a RW_WRITER; and any time traversal is done over the list 77 * (without modification to it) this lock should be taken as a RW_READER. 78 */ 79 static krwlock_t inject_lock; 80 81 /* 82 * This holds the number of zinject delay handlers that have been 83 * registered on the system. It is protected by the inject_lock defined 84 * above. Thus modifications to this count must be a RW_WRITER of the 85 * inject_lock, and reads of this count must be (at least) a RW_READER 86 * of the lock. 87 */ 88 static int inject_delay_count = 0; 89 90 /* 91 * This lock is used only in zio_handle_io_delay(), refer to the comment 92 * in that function for more details. 93 */ 94 static kmutex_t inject_delay_mtx; 95 96 /* 97 * Used to assign unique identifying numbers to each new zinject handler. 98 */ 99 static int inject_next_id = 1; 100 101 /* 102 * Returns true if the given record matches the I/O in progress. 103 */ 104 static boolean_t 105 zio_match_handler(zbookmark_phys_t *zb, uint64_t type, int dva, 106 zinject_record_t *record, int error) 107 { 108 /* 109 * Check for a match against the MOS, which is based on type 110 */ 111 if (zb->zb_objset == DMU_META_OBJSET && 112 record->zi_objset == DMU_META_OBJSET && 113 record->zi_object == DMU_META_DNODE_OBJECT) { 114 if (record->zi_type == DMU_OT_NONE || 115 type == record->zi_type) 116 return (record->zi_freq == 0 || 117 spa_get_random(100) < record->zi_freq); 118 else 119 return (B_FALSE); 120 } 121 122 /* 123 * Check for an exact match. 124 */ 125 if (zb->zb_objset == record->zi_objset && 126 zb->zb_object == record->zi_object && 127 zb->zb_level == record->zi_level && 128 zb->zb_blkid >= record->zi_start && 129 zb->zb_blkid <= record->zi_end && 130 (record->zi_dvas == 0 || (record->zi_dvas & (1ULL << dva))) && 131 error == record->zi_error) { 132 return (record->zi_freq == 0 || 133 spa_get_random(100) < record->zi_freq); 134 } 135 136 return (B_FALSE); 137 } 138 139 /* 140 * Panic the system when a config change happens in the function 141 * specified by tag. 142 */ 143 void 144 zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type) 145 { 146 inject_handler_t *handler; 147 148 rw_enter(&inject_lock, RW_READER); 149 150 for (handler = list_head(&inject_handlers); handler != NULL; 151 handler = list_next(&inject_handlers, handler)) { 152 153 if (spa != handler->zi_spa) 154 continue; 155 156 if (handler->zi_record.zi_type == type && 157 strcmp(tag, handler->zi_record.zi_func) == 0) 158 panic("Panic requested in function %s\n", tag); 159 } 160 161 rw_exit(&inject_lock); 162 } 163 164 165 /* 166 * If this is a physical I/O for a vdev child determine which DVA it is 167 * for. We iterate backwards through the DVAs matching on the offset so 168 * that we end up with ZI_NO_DVA (-1) if we don't find a match. 169 */ 170 static int 171 zio_match_dva(zio_t *zio) 172 { 173 int i = ZI_NO_DVA; 174 175 if (zio->io_bp != NULL && zio->io_vd != NULL && 176 zio->io_child_type == ZIO_CHILD_VDEV) { 177 for (i = BP_GET_NDVAS(zio->io_bp) - 1; i >= 0; i--) { 178 dva_t *dva = &zio->io_bp->blk_dva[i]; 179 uint64_t off = DVA_GET_OFFSET(dva); 180 vdev_t *vd = vdev_lookup_top(zio->io_spa, 181 DVA_GET_VDEV(dva)); 182 183 /* Compensate for vdev label added to leaves */ 184 if (zio->io_vd->vdev_ops->vdev_op_leaf) 185 off += VDEV_LABEL_START_SIZE; 186 187 if (zio->io_vd == vd && zio->io_offset == off) 188 break; 189 } 190 } 191 192 return (i); 193 } 194 195 196 /* 197 * Inject a decryption failure. Decryption failures can occur in 198 * both the ARC and the ZIO layers. 199 */ 200 int 201 zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb, 202 uint64_t type, int error) 203 { 204 int ret = 0; 205 inject_handler_t *handler; 206 207 rw_enter(&inject_lock, RW_READER); 208 209 for (handler = list_head(&inject_handlers); handler != NULL; 210 handler = list_next(&inject_handlers, handler)) { 211 212 if (spa != handler->zi_spa || 213 handler->zi_record.zi_cmd != ZINJECT_DECRYPT_FAULT) 214 continue; 215 216 if (zio_match_handler((zbookmark_phys_t *)zb, type, ZI_NO_DVA, 217 &handler->zi_record, error)) { 218 ret = error; 219 break; 220 } 221 } 222 223 rw_exit(&inject_lock); 224 return (ret); 225 } 226 227 /* 228 * Determine if the I/O in question should return failure. Returns the errno 229 * to be returned to the caller. 230 */ 231 int 232 zio_handle_fault_injection(zio_t *zio, int error) 233 { 234 int ret = 0; 235 inject_handler_t *handler; 236 237 /* 238 * Ignore I/O not associated with any logical data. 239 */ 240 if (zio->io_logical == NULL) 241 return (0); 242 243 /* 244 * Currently, we only support fault injection on reads. 245 */ 246 if (zio->io_type != ZIO_TYPE_READ) 247 return (0); 248 249 rw_enter(&inject_lock, RW_READER); 250 251 for (handler = list_head(&inject_handlers); handler != NULL; 252 handler = list_next(&inject_handlers, handler)) { 253 254 if (zio->io_spa != handler->zi_spa || 255 handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT) 256 continue; 257 258 /* If this handler matches, return the specified error */ 259 if (zio_match_handler(&zio->io_logical->io_bookmark, 260 zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 261 zio_match_dva(zio), &handler->zi_record, error)) { 262 ret = error; 263 break; 264 } 265 } 266 267 rw_exit(&inject_lock); 268 269 return (ret); 270 } 271 272 /* 273 * Determine if the zio is part of a label update and has an injection 274 * handler associated with that portion of the label. Currently, we 275 * allow error injection in either the nvlist or the uberblock region of 276 * of the vdev label. 277 */ 278 int 279 zio_handle_label_injection(zio_t *zio, int error) 280 { 281 inject_handler_t *handler; 282 vdev_t *vd = zio->io_vd; 283 uint64_t offset = zio->io_offset; 284 int label; 285 int ret = 0; 286 287 if (offset >= VDEV_LABEL_START_SIZE && 288 offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) 289 return (0); 290 291 rw_enter(&inject_lock, RW_READER); 292 293 for (handler = list_head(&inject_handlers); handler != NULL; 294 handler = list_next(&inject_handlers, handler)) { 295 uint64_t start = handler->zi_record.zi_start; 296 uint64_t end = handler->zi_record.zi_end; 297 298 if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT) 299 continue; 300 301 /* 302 * The injection region is the relative offsets within a 303 * vdev label. We must determine the label which is being 304 * updated and adjust our region accordingly. 305 */ 306 label = vdev_label_number(vd->vdev_psize, offset); 307 start = vdev_label_offset(vd->vdev_psize, label, start); 308 end = vdev_label_offset(vd->vdev_psize, label, end); 309 310 if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && 311 (offset >= start && offset <= end)) { 312 ret = error; 313 break; 314 } 315 } 316 rw_exit(&inject_lock); 317 return (ret); 318 } 319 320 321 int 322 zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) 323 { 324 inject_handler_t *handler; 325 int ret = 0; 326 327 /* 328 * We skip over faults in the labels unless it's during 329 * device open (i.e. zio == NULL). 330 */ 331 if (zio != NULL) { 332 uint64_t offset = zio->io_offset; 333 334 if (offset < VDEV_LABEL_START_SIZE || 335 offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE) 336 return (0); 337 } 338 339 rw_enter(&inject_lock, RW_READER); 340 341 for (handler = list_head(&inject_handlers); handler != NULL; 342 handler = list_next(&inject_handlers, handler)) { 343 344 if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT) 345 continue; 346 347 if (vd->vdev_guid == handler->zi_record.zi_guid) { 348 if (handler->zi_record.zi_failfast && 349 (zio == NULL || (zio->io_flags & 350 (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { 351 continue; 352 } 353 354 /* Handle type specific I/O failures */ 355 if (zio != NULL && 356 handler->zi_record.zi_iotype != ZIO_TYPES && 357 handler->zi_record.zi_iotype != zio->io_type) 358 continue; 359 360 if (handler->zi_record.zi_error == error) { 361 /* 362 * For a failed open, pretend like the device 363 * has gone away. 364 */ 365 if (error == ENXIO) 366 vd->vdev_stat.vs_aux = 367 VDEV_AUX_OPEN_FAILED; 368 369 /* 370 * Treat these errors as if they had been 371 * retried so that all the appropriate stats 372 * and FMA events are generated. 373 */ 374 if (!handler->zi_record.zi_failfast && 375 zio != NULL) 376 zio->io_flags |= ZIO_FLAG_IO_RETRY; 377 378 ret = error; 379 break; 380 } 381 if (handler->zi_record.zi_error == ENXIO) { 382 ret = SET_ERROR(EIO); 383 break; 384 } 385 } 386 } 387 388 rw_exit(&inject_lock); 389 390 return (ret); 391 } 392 393 /* 394 * Simulate hardware that ignores cache flushes. For requested number 395 * of seconds nix the actual writing to disk. 396 */ 397 void 398 zio_handle_ignored_writes(zio_t *zio) 399 { 400 inject_handler_t *handler; 401 402 rw_enter(&inject_lock, RW_READER); 403 404 for (handler = list_head(&inject_handlers); handler != NULL; 405 handler = list_next(&inject_handlers, handler)) { 406 407 /* Ignore errors not destined for this pool */ 408 if (zio->io_spa != handler->zi_spa || 409 handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) 410 continue; 411 412 /* 413 * Positive duration implies # of seconds, negative 414 * a number of txgs 415 */ 416 if (handler->zi_record.zi_timer == 0) { 417 if (handler->zi_record.zi_duration > 0) 418 handler->zi_record.zi_timer = ddi_get_lbolt64(); 419 else 420 handler->zi_record.zi_timer = zio->io_txg; 421 } 422 423 /* Have a "problem" writing 60% of the time */ 424 if (spa_get_random(100) < 60) 425 zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; 426 break; 427 } 428 429 rw_exit(&inject_lock); 430 } 431 432 void 433 spa_handle_ignored_writes(spa_t *spa) 434 { 435 inject_handler_t *handler; 436 437 if (zio_injection_enabled == 0) 438 return; 439 440 rw_enter(&inject_lock, RW_READER); 441 442 for (handler = list_head(&inject_handlers); handler != NULL; 443 handler = list_next(&inject_handlers, handler)) { 444 445 if (spa != handler->zi_spa || 446 handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) 447 continue; 448 449 if (handler->zi_record.zi_duration > 0) { 450 VERIFY(handler->zi_record.zi_timer == 0 || 451 handler->zi_record.zi_timer + 452 handler->zi_record.zi_duration * hz > 453 ddi_get_lbolt64()); 454 } else { 455 /* duration is negative so the subtraction here adds */ 456 VERIFY(handler->zi_record.zi_timer == 0 || 457 handler->zi_record.zi_timer - 458 handler->zi_record.zi_duration >= 459 spa_syncing_txg(spa)); 460 } 461 } 462 463 rw_exit(&inject_lock); 464 } 465 466 hrtime_t 467 zio_handle_io_delay(zio_t *zio) 468 { 469 vdev_t *vd = zio->io_vd; 470 inject_handler_t *min_handler = NULL; 471 hrtime_t min_target = 0; 472 473 rw_enter(&inject_lock, RW_READER); 474 475 /* 476 * inject_delay_count is a subset of zio_injection_enabled that 477 * is only incremented for delay handlers. These checks are 478 * mainly added to remind the reader why we're not explicitly 479 * checking zio_injection_enabled like the other functions. 480 */ 481 IMPLY(inject_delay_count > 0, zio_injection_enabled > 0); 482 IMPLY(zio_injection_enabled == 0, inject_delay_count == 0); 483 484 /* 485 * If there aren't any inject delay handlers registered, then we 486 * can short circuit and simply return 0 here. A value of zero 487 * informs zio_delay_interrupt() that this request should not be 488 * delayed. This short circuit keeps us from acquiring the 489 * inject_delay_mutex unnecessarily. 490 */ 491 if (inject_delay_count == 0) { 492 rw_exit(&inject_lock); 493 return (0); 494 } 495 496 /* 497 * Each inject handler has a number of "lanes" associated with 498 * it. Each lane is able to handle requests independently of one 499 * another, and at a latency defined by the inject handler 500 * record's zi_timer field. Thus if a handler in configured with 501 * a single lane with a 10ms latency, it will delay requests 502 * such that only a single request is completed every 10ms. So, 503 * if more than one request is attempted per each 10ms interval, 504 * the average latency of the requests will be greater than 505 * 10ms; but if only a single request is submitted each 10ms 506 * interval the average latency will be 10ms. 507 * 508 * We need to acquire this mutex to prevent multiple concurrent 509 * threads being assigned to the same lane of a given inject 510 * handler. The mutex allows us to perform the following two 511 * operations atomically: 512 * 513 * 1. determine the minimum handler and minimum target 514 * value of all the possible handlers 515 * 2. update that minimum handler's lane array 516 * 517 * Without atomicity, two (or more) threads could pick the same 518 * lane in step (1), and then conflict with each other in step 519 * (2). This could allow a single lane handler to process 520 * multiple requests simultaneously, which shouldn't be possible. 521 */ 522 mutex_enter(&inject_delay_mtx); 523 524 for (inject_handler_t *handler = list_head(&inject_handlers); 525 handler != NULL; handler = list_next(&inject_handlers, handler)) { 526 if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO) 527 continue; 528 529 if (vd->vdev_guid != handler->zi_record.zi_guid) 530 continue; 531 532 /* 533 * Defensive; should never happen as the array allocation 534 * occurs prior to inserting this handler on the list. 535 */ 536 ASSERT3P(handler->zi_lanes, !=, NULL); 537 538 /* 539 * This should never happen, the zinject command should 540 * prevent a user from setting an IO delay with zero lanes. 541 */ 542 ASSERT3U(handler->zi_record.zi_nlanes, !=, 0); 543 544 ASSERT3U(handler->zi_record.zi_nlanes, >, 545 handler->zi_next_lane); 546 547 /* 548 * We want to issue this IO to the lane that will become 549 * idle the soonest, so we compare the soonest this 550 * specific handler can complete the IO with all other 551 * handlers, to find the lowest value of all possible 552 * lanes. We then use this lane to submit the request. 553 * 554 * Since each handler has a constant value for its 555 * delay, we can just use the "next" lane for that 556 * handler; as it will always be the lane with the 557 * lowest value for that particular handler (i.e. the 558 * lane that will become idle the soonest). This saves a 559 * scan of each handler's lanes array. 560 * 561 * There's two cases to consider when determining when 562 * this specific IO request should complete. If this 563 * lane is idle, we want to "submit" the request now so 564 * it will complete after zi_timer milliseconds. Thus, 565 * we set the target to now + zi_timer. 566 * 567 * If the lane is busy, we want this request to complete 568 * zi_timer milliseconds after the lane becomes idle. 569 * Since the 'zi_lanes' array holds the time at which 570 * each lane will become idle, we use that value to 571 * determine when this request should complete. 572 */ 573 hrtime_t idle = handler->zi_record.zi_timer + gethrtime(); 574 hrtime_t busy = handler->zi_record.zi_timer + 575 handler->zi_lanes[handler->zi_next_lane]; 576 hrtime_t target = MAX(idle, busy); 577 578 if (min_handler == NULL) { 579 min_handler = handler; 580 min_target = target; 581 continue; 582 } 583 584 ASSERT3P(min_handler, !=, NULL); 585 ASSERT3U(min_target, !=, 0); 586 587 /* 588 * We don't yet increment the "next lane" variable since 589 * we still might find a lower value lane in another 590 * handler during any remaining iterations. Once we're 591 * sure we've selected the absolute minimum, we'll claim 592 * the lane and increment the handler's "next lane" 593 * field below. 594 */ 595 596 if (target < min_target) { 597 min_handler = handler; 598 min_target = target; 599 } 600 } 601 602 /* 603 * 'min_handler' will be NULL if no IO delays are registered for 604 * this vdev, otherwise it will point to the handler containing 605 * the lane that will become idle the soonest. 606 */ 607 if (min_handler != NULL) { 608 ASSERT3U(min_target, !=, 0); 609 min_handler->zi_lanes[min_handler->zi_next_lane] = min_target; 610 611 /* 612 * If we've used all possible lanes for this handler, 613 * loop back and start using the first lane again; 614 * otherwise, just increment the lane index. 615 */ 616 min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) % 617 min_handler->zi_record.zi_nlanes; 618 } 619 620 mutex_exit(&inject_delay_mtx); 621 rw_exit(&inject_lock); 622 623 return (min_target); 624 } 625 626 /* 627 * Create a new handler for the given record. We add it to the list, adding 628 * a reference to the spa_t in the process. We increment zio_injection_enabled, 629 * which is the switch to trigger all fault injection. 630 */ 631 int 632 zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 633 { 634 inject_handler_t *handler; 635 int error; 636 spa_t *spa; 637 638 /* 639 * If this is pool-wide metadata, make sure we unload the corresponding 640 * spa_t, so that the next attempt to load it will trigger the fault. 641 * We call spa_reset() to unload the pool appropriately. 642 */ 643 if (flags & ZINJECT_UNLOAD_SPA) 644 if ((error = spa_reset(name)) != 0) 645 return (error); 646 647 if (record->zi_cmd == ZINJECT_DELAY_IO) { 648 /* 649 * A value of zero for the number of lanes or for the 650 * delay time doesn't make sense. 651 */ 652 if (record->zi_timer == 0 || record->zi_nlanes == 0) 653 return (SET_ERROR(EINVAL)); 654 655 /* 656 * The number of lanes is directly mapped to the size of 657 * an array used by the handler. Thus, to ensure the 658 * user doesn't trigger an allocation that's "too large" 659 * we cap the number of lanes here. 660 */ 661 if (record->zi_nlanes >= UINT16_MAX) 662 return (SET_ERROR(EINVAL)); 663 } 664 665 if (!(flags & ZINJECT_NULL)) { 666 /* 667 * spa_inject_ref() will add an injection reference, which will 668 * prevent the pool from being removed from the namespace while 669 * still allowing it to be unloaded. 670 */ 671 if ((spa = spa_inject_addref(name)) == NULL) 672 return (SET_ERROR(ENOENT)); 673 674 handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 675 676 handler->zi_spa = spa; 677 handler->zi_record = *record; 678 679 if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { 680 handler->zi_lanes = kmem_zalloc( 681 sizeof (*handler->zi_lanes) * 682 handler->zi_record.zi_nlanes, KM_SLEEP); 683 handler->zi_next_lane = 0; 684 } else { 685 handler->zi_lanes = NULL; 686 handler->zi_next_lane = 0; 687 } 688 689 rw_enter(&inject_lock, RW_WRITER); 690 691 /* 692 * We can't move this increment into the conditional 693 * above because we need to hold the RW_WRITER lock of 694 * inject_lock, and we don't want to hold that while 695 * allocating the handler's zi_lanes array. 696 */ 697 if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { 698 ASSERT3S(inject_delay_count, >=, 0); 699 inject_delay_count++; 700 ASSERT3S(inject_delay_count, >, 0); 701 } 702 703 *id = handler->zi_id = inject_next_id++; 704 list_insert_tail(&inject_handlers, handler); 705 atomic_inc_32(&zio_injection_enabled); 706 707 rw_exit(&inject_lock); 708 } 709 710 /* 711 * Flush the ARC, so that any attempts to read this data will end up 712 * going to the ZIO layer. Note that this is a little overkill, but 713 * we don't have the necessary ARC interfaces to do anything else, and 714 * fault injection isn't a performance critical path. 715 */ 716 if (flags & ZINJECT_FLUSH_ARC) 717 /* 718 * We must use FALSE to ensure arc_flush returns, since 719 * we're not preventing concurrent ARC insertions. 720 */ 721 arc_flush(NULL, FALSE); 722 723 return (0); 724 } 725 726 /* 727 * Returns the next record with an ID greater than that supplied to the 728 * function. Used to iterate over all handlers in the system. 729 */ 730 int 731 zio_inject_list_next(int *id, char *name, size_t buflen, 732 zinject_record_t *record) 733 { 734 inject_handler_t *handler; 735 int ret; 736 737 mutex_enter(&spa_namespace_lock); 738 rw_enter(&inject_lock, RW_READER); 739 740 for (handler = list_head(&inject_handlers); handler != NULL; 741 handler = list_next(&inject_handlers, handler)) 742 if (handler->zi_id > *id) 743 break; 744 745 if (handler) { 746 *record = handler->zi_record; 747 *id = handler->zi_id; 748 (void) strncpy(name, spa_name(handler->zi_spa), buflen); 749 ret = 0; 750 } else { 751 ret = SET_ERROR(ENOENT); 752 } 753 754 rw_exit(&inject_lock); 755 mutex_exit(&spa_namespace_lock); 756 757 return (ret); 758 } 759 760 /* 761 * Clear the fault handler with the given identifier, or return ENOENT if none 762 * exists. 763 */ 764 int 765 zio_clear_fault(int id) 766 { 767 inject_handler_t *handler; 768 769 rw_enter(&inject_lock, RW_WRITER); 770 771 for (handler = list_head(&inject_handlers); handler != NULL; 772 handler = list_next(&inject_handlers, handler)) 773 if (handler->zi_id == id) 774 break; 775 776 if (handler == NULL) { 777 rw_exit(&inject_lock); 778 return (SET_ERROR(ENOENT)); 779 } 780 781 if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { 782 ASSERT3S(inject_delay_count, >, 0); 783 inject_delay_count--; 784 ASSERT3S(inject_delay_count, >=, 0); 785 } 786 787 list_remove(&inject_handlers, handler); 788 rw_exit(&inject_lock); 789 790 if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { 791 ASSERT3P(handler->zi_lanes, !=, NULL); 792 kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) * 793 handler->zi_record.zi_nlanes); 794 } else { 795 ASSERT3P(handler->zi_lanes, ==, NULL); 796 } 797 798 spa_inject_delref(handler->zi_spa); 799 kmem_free(handler, sizeof (inject_handler_t)); 800 atomic_dec_32(&zio_injection_enabled); 801 802 return (0); 803 } 804 805 void 806 zio_inject_init(void) 807 { 808 rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); 809 mutex_init(&inject_delay_mtx, NULL, MUTEX_DEFAULT, NULL); 810 list_create(&inject_handlers, sizeof (inject_handler_t), 811 offsetof(inject_handler_t, zi_link)); 812 } 813 814 void 815 zio_inject_fini(void) 816 { 817 list_destroy(&inject_handlers); 818 mutex_destroy(&inject_delay_mtx); 819 rw_destroy(&inject_lock); 820 } 821