1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * 23 * Copyright (c) 2018, Intel Corporation. 24 * Copyright (c) 2020 by Lawrence Livermore National Security, LLC. 25 */ 26 27 #include <sys/vdev_impl.h> 28 #include <sys/vdev_draid.h> 29 #include <sys/dsl_scan.h> 30 #include <sys/spa_impl.h> 31 #include <sys/metaslab_impl.h> 32 #include <sys/vdev_rebuild.h> 33 #include <sys/zio.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zap.h> 37 38 /* 39 * This file contains the sequential reconstruction implementation for 40 * resilvering. This form of resilvering is internally referred to as device 41 * rebuild to avoid conflating it with the traditional healing reconstruction 42 * performed by the dsl scan code. 43 * 44 * When replacing a device, or scrubbing the pool, ZFS has historically used 45 * a process called resilvering which is a form of healing reconstruction. 46 * This approach has the advantage that as blocks are read from disk their 47 * checksums can be immediately verified and the data repaired. Unfortunately, 48 * it also results in a random IO pattern to the disk even when extra care 49 * is taken to sequentialize the IO as much as possible. This substantially 50 * increases the time required to resilver the pool and restore redundancy. 51 * 52 * For mirrored devices it's possible to implement an alternate sequential 53 * reconstruction strategy when resilvering. Sequential reconstruction 54 * behaves like a traditional RAID rebuild and reconstructs a device in LBA 55 * order without verifying the checksum. After this phase completes a second 56 * scrub phase is started to verify all of the checksums. This two phase 57 * process will take longer than the healing reconstruction described above. 58 * However, it has that advantage that after the reconstruction first phase 59 * completes redundancy has been restored. At this point the pool can incur 60 * another device failure without risking data loss. 61 * 62 * There are a few noteworthy limitations and other advantages of resilvering 63 * using sequential reconstruction vs healing reconstruction. 64 * 65 * Limitations: 66 * 67 * - Sequential reconstruction is not possible on RAIDZ due to its 68 * variable stripe width. Note dRAID uses a fixed stripe width which 69 * avoids this issue, but comes at the expense of some usable capacity. 70 * 71 * - Block checksums are not verified during sequential reconstruction. 72 * Similar to traditional RAID the parity/mirror data is reconstructed 73 * but cannot be immediately double checked. For this reason when the 74 * last active resilver completes the pool is automatically scrubbed 75 * by default. 76 * 77 * - Deferred resilvers using sequential reconstruction are not currently 78 * supported. When adding another vdev to an active top-level resilver 79 * it must be restarted. 80 * 81 * Advantages: 82 * 83 * - Sequential reconstruction is performed in LBA order which may be faster 84 * than healing reconstruction particularly when using using HDDs (or 85 * especially with SMR devices). Only allocated capacity is resilvered. 86 * 87 * - Sequential reconstruction is not constrained by ZFS block boundaries. 88 * This allows it to issue larger IOs to disk which span multiple blocks 89 * allowing all of these logical blocks to be repaired with a single IO. 90 * 91 * - Unlike a healing resilver or scrub which are pool wide operations, 92 * sequential reconstruction is handled by the top-level vdevs. This 93 * allows for it to be started or canceled on a top-level vdev without 94 * impacting any other top-level vdevs in the pool. 95 * 96 * - Data only referenced by a pool checkpoint will be repaired because 97 * that space is reflected in the space maps. This differs for a 98 * healing resilver or scrub which will not repair that data. 99 */ 100 101 102 /* 103 * Size of rebuild reads; defaults to 1MiB per data disk and is capped at 104 * SPA_MAXBLOCKSIZE. 105 */ 106 unsigned long zfs_rebuild_max_segment = 1024 * 1024; 107 108 /* 109 * Maximum number of parallelly executed bytes per leaf vdev caused by a 110 * sequential resilver. We attempt to strike a balance here between keeping 111 * the vdev queues full of I/Os at all times and not overflowing the queues 112 * to cause long latency, which would cause long txg sync times. 113 * 114 * A large default value can be safely used here because the default target 115 * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep 116 * the queue depth short. 117 * 118 * 32MB was selected as the default value to achieve good performance with 119 * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential 120 * rebuild was unable to saturate all of the drives using smaller values. 121 * With a value of 32MB the sequential resilver write rate was measured at 122 * 800MB/s sustained while rebuilding to a distributed spare. 123 */ 124 unsigned long zfs_rebuild_vdev_limit = 32 << 20; 125 126 /* 127 * Automatically start a pool scrub when the last active sequential resilver 128 * completes in order to verify the checksums of all blocks which have been 129 * resilvered. This option is enabled by default and is strongly recommended. 130 */ 131 int zfs_rebuild_scrub_enabled = 1; 132 133 /* 134 * For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync(). 135 */ 136 static void vdev_rebuild_thread(void *arg); 137 138 /* 139 * Clear the per-vdev rebuild bytes value for a vdev tree. 140 */ 141 static void 142 clear_rebuild_bytes(vdev_t *vd) 143 { 144 vdev_stat_t *vs = &vd->vdev_stat; 145 146 for (uint64_t i = 0; i < vd->vdev_children; i++) 147 clear_rebuild_bytes(vd->vdev_child[i]); 148 149 mutex_enter(&vd->vdev_stat_lock); 150 vs->vs_rebuild_processed = 0; 151 mutex_exit(&vd->vdev_stat_lock); 152 } 153 154 /* 155 * Determines whether a vdev_rebuild_thread() should be stopped. 156 */ 157 static boolean_t 158 vdev_rebuild_should_stop(vdev_t *vd) 159 { 160 return (!vdev_writeable(vd) || vd->vdev_removing || 161 vd->vdev_rebuild_exit_wanted || 162 vd->vdev_rebuild_cancel_wanted || 163 vd->vdev_rebuild_reset_wanted); 164 } 165 166 /* 167 * Determine if the rebuild should be canceled. This may happen when all 168 * vdevs with MISSING DTLs are detached. 169 */ 170 static boolean_t 171 vdev_rebuild_should_cancel(vdev_t *vd) 172 { 173 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 174 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 175 176 if (!vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg)) 177 return (B_TRUE); 178 179 return (B_FALSE); 180 } 181 182 /* 183 * The sync task for updating the on-disk state of a rebuild. This is 184 * scheduled by vdev_rebuild_range(). 185 */ 186 static void 187 vdev_rebuild_update_sync(void *arg, dmu_tx_t *tx) 188 { 189 int vdev_id = (uintptr_t)arg; 190 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 191 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 192 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 193 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 194 uint64_t txg = dmu_tx_get_txg(tx); 195 196 mutex_enter(&vd->vdev_rebuild_lock); 197 198 if (vr->vr_scan_offset[txg & TXG_MASK] > 0) { 199 vrp->vrp_last_offset = vr->vr_scan_offset[txg & TXG_MASK]; 200 vr->vr_scan_offset[txg & TXG_MASK] = 0; 201 } 202 203 vrp->vrp_scan_time_ms = vr->vr_prev_scan_time_ms + 204 NSEC2MSEC(gethrtime() - vr->vr_pass_start_time); 205 206 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 207 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 208 REBUILD_PHYS_ENTRIES, vrp, tx)); 209 210 mutex_exit(&vd->vdev_rebuild_lock); 211 } 212 213 /* 214 * Initialize the on-disk state for a new rebuild, start the rebuild thread. 215 */ 216 static void 217 vdev_rebuild_initiate_sync(void *arg, dmu_tx_t *tx) 218 { 219 int vdev_id = (uintptr_t)arg; 220 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 221 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 222 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 223 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 224 225 ASSERT(vd->vdev_rebuilding); 226 227 spa_feature_incr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx); 228 229 mutex_enter(&vd->vdev_rebuild_lock); 230 bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 231 vrp->vrp_rebuild_state = VDEV_REBUILD_ACTIVE; 232 vrp->vrp_min_txg = 0; 233 vrp->vrp_max_txg = dmu_tx_get_txg(tx); 234 vrp->vrp_start_time = gethrestime_sec(); 235 vrp->vrp_scan_time_ms = 0; 236 vr->vr_prev_scan_time_ms = 0; 237 238 /* 239 * Rebuilds are currently only used when replacing a device, in which 240 * case there must be DTL_MISSING entries. In the future, we could 241 * allow rebuilds to be used in a way similar to a scrub. This would 242 * be useful because it would allow us to rebuild the space used by 243 * pool checkpoints. 244 */ 245 VERIFY(vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg)); 246 247 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 248 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 249 REBUILD_PHYS_ENTRIES, vrp, tx)); 250 251 spa_history_log_internal(spa, "rebuild", tx, 252 "vdev_id=%llu vdev_guid=%llu started", 253 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 254 255 ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); 256 vd->vdev_rebuild_thread = thread_create(NULL, 0, 257 vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri); 258 259 mutex_exit(&vd->vdev_rebuild_lock); 260 } 261 262 static void 263 vdev_rebuild_log_notify(spa_t *spa, vdev_t *vd, char *name) 264 { 265 nvlist_t *aux = fnvlist_alloc(); 266 267 fnvlist_add_string(aux, ZFS_EV_RESILVER_TYPE, "sequential"); 268 spa_event_notify(spa, vd, aux, name); 269 nvlist_free(aux); 270 } 271 272 /* 273 * Called to request that a new rebuild be started. The feature will remain 274 * active for the duration of the rebuild, then revert to the enabled state. 275 */ 276 static void 277 vdev_rebuild_initiate(vdev_t *vd) 278 { 279 spa_t *spa = vd->vdev_spa; 280 281 ASSERT(vd->vdev_top == vd); 282 ASSERT(MUTEX_HELD(&vd->vdev_rebuild_lock)); 283 ASSERT(!vd->vdev_rebuilding); 284 285 dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 286 VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); 287 288 vd->vdev_rebuilding = B_TRUE; 289 290 dsl_sync_task_nowait(spa_get_dsl(spa), vdev_rebuild_initiate_sync, 291 (void *)(uintptr_t)vd->vdev_id, tx); 292 dmu_tx_commit(tx); 293 294 vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_START); 295 } 296 297 /* 298 * Update the on-disk state to completed when a rebuild finishes. 299 */ 300 static void 301 vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx) 302 { 303 int vdev_id = (uintptr_t)arg; 304 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 305 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 306 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 307 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 308 309 mutex_enter(&vd->vdev_rebuild_lock); 310 vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE; 311 vrp->vrp_end_time = gethrestime_sec(); 312 313 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 314 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 315 REBUILD_PHYS_ENTRIES, vrp, tx)); 316 317 vdev_dtl_reassess(vd, tx->tx_txg, vrp->vrp_max_txg, B_TRUE, B_TRUE); 318 spa_feature_decr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx); 319 320 spa_history_log_internal(spa, "rebuild", tx, 321 "vdev_id=%llu vdev_guid=%llu complete", 322 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 323 vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_FINISH); 324 325 /* Handles detaching of spares */ 326 spa_async_request(spa, SPA_ASYNC_REBUILD_DONE); 327 vd->vdev_rebuilding = B_FALSE; 328 mutex_exit(&vd->vdev_rebuild_lock); 329 330 /* 331 * While we're in syncing context take the opportunity to 332 * setup the scrub when there are no more active rebuilds. 333 */ 334 if (!vdev_rebuild_active(spa->spa_root_vdev) && 335 zfs_rebuild_scrub_enabled) { 336 pool_scan_func_t func = POOL_SCAN_SCRUB; 337 dsl_scan_setup_sync(&func, tx); 338 } 339 340 cv_broadcast(&vd->vdev_rebuild_cv); 341 } 342 343 /* 344 * Update the on-disk state to canceled when a rebuild finishes. 345 */ 346 static void 347 vdev_rebuild_cancel_sync(void *arg, dmu_tx_t *tx) 348 { 349 int vdev_id = (uintptr_t)arg; 350 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 351 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 352 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 353 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 354 355 mutex_enter(&vd->vdev_rebuild_lock); 356 vrp->vrp_rebuild_state = VDEV_REBUILD_CANCELED; 357 vrp->vrp_end_time = gethrestime_sec(); 358 359 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 360 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 361 REBUILD_PHYS_ENTRIES, vrp, tx)); 362 363 spa_feature_decr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx); 364 365 spa_history_log_internal(spa, "rebuild", tx, 366 "vdev_id=%llu vdev_guid=%llu canceled", 367 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 368 vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_FINISH); 369 370 vd->vdev_rebuild_cancel_wanted = B_FALSE; 371 vd->vdev_rebuilding = B_FALSE; 372 mutex_exit(&vd->vdev_rebuild_lock); 373 374 spa_notify_waiters(spa); 375 cv_broadcast(&vd->vdev_rebuild_cv); 376 } 377 378 /* 379 * Resets the progress of a running rebuild. This will occur when a new 380 * vdev is added to rebuild. 381 */ 382 static void 383 vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx) 384 { 385 int vdev_id = (uintptr_t)arg; 386 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 387 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 388 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 389 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 390 391 mutex_enter(&vd->vdev_rebuild_lock); 392 393 ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE); 394 ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); 395 396 vrp->vrp_last_offset = 0; 397 vrp->vrp_min_txg = 0; 398 vrp->vrp_max_txg = dmu_tx_get_txg(tx); 399 vrp->vrp_bytes_scanned = 0; 400 vrp->vrp_bytes_issued = 0; 401 vrp->vrp_bytes_rebuilt = 0; 402 vrp->vrp_bytes_est = 0; 403 vrp->vrp_scan_time_ms = 0; 404 vr->vr_prev_scan_time_ms = 0; 405 406 /* See vdev_rebuild_initiate_sync comment */ 407 VERIFY(vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg)); 408 409 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 410 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 411 REBUILD_PHYS_ENTRIES, vrp, tx)); 412 413 spa_history_log_internal(spa, "rebuild", tx, 414 "vdev_id=%llu vdev_guid=%llu reset", 415 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 416 417 vd->vdev_rebuild_reset_wanted = B_FALSE; 418 ASSERT(vd->vdev_rebuilding); 419 420 vd->vdev_rebuild_thread = thread_create(NULL, 0, 421 vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri); 422 423 mutex_exit(&vd->vdev_rebuild_lock); 424 } 425 426 /* 427 * Clear the last rebuild status. 428 */ 429 void 430 vdev_rebuild_clear_sync(void *arg, dmu_tx_t *tx) 431 { 432 int vdev_id = (uintptr_t)arg; 433 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 434 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 435 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 436 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 437 objset_t *mos = spa_meta_objset(spa); 438 439 mutex_enter(&vd->vdev_rebuild_lock); 440 441 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD) || 442 vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE) { 443 mutex_exit(&vd->vdev_rebuild_lock); 444 return; 445 } 446 447 clear_rebuild_bytes(vd); 448 bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 449 450 if (vd->vdev_top_zap != 0 && zap_contains(mos, vd->vdev_top_zap, 451 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS) == 0) { 452 VERIFY0(zap_update(mos, vd->vdev_top_zap, 453 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 454 REBUILD_PHYS_ENTRIES, vrp, tx)); 455 } 456 457 mutex_exit(&vd->vdev_rebuild_lock); 458 } 459 460 /* 461 * The zio_done_func_t callback for each rebuild I/O issued. It's responsible 462 * for updating the rebuild stats and limiting the number of in flight I/Os. 463 */ 464 static void 465 vdev_rebuild_cb(zio_t *zio) 466 { 467 vdev_rebuild_t *vr = zio->io_private; 468 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 469 vdev_t *vd = vr->vr_top_vdev; 470 471 mutex_enter(&vr->vr_io_lock); 472 if (zio->io_error == ENXIO && !vdev_writeable(vd)) { 473 /* 474 * The I/O failed because the top-level vdev was unavailable. 475 * Attempt to roll back to the last completed offset, in order 476 * resume from the correct location if the pool is resumed. 477 * (This works because spa_sync waits on spa_txg_zio before 478 * it runs sync tasks.) 479 */ 480 uint64_t *off = &vr->vr_scan_offset[zio->io_txg & TXG_MASK]; 481 *off = MIN(*off, zio->io_offset); 482 } else if (zio->io_error) { 483 vrp->vrp_errors++; 484 } 485 486 abd_free(zio->io_abd); 487 488 ASSERT3U(vr->vr_bytes_inflight, >, 0); 489 vr->vr_bytes_inflight -= zio->io_size; 490 cv_broadcast(&vr->vr_io_cv); 491 mutex_exit(&vr->vr_io_lock); 492 493 spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd); 494 } 495 496 /* 497 * Initialize a block pointer that can be used to read the given segment 498 * for sequential rebuild. 499 */ 500 static void 501 vdev_rebuild_blkptr_init(blkptr_t *bp, vdev_t *vd, uint64_t start, 502 uint64_t asize) 503 { 504 ASSERT(vd->vdev_ops == &vdev_draid_ops || 505 vd->vdev_ops == &vdev_mirror_ops || 506 vd->vdev_ops == &vdev_replacing_ops || 507 vd->vdev_ops == &vdev_spare_ops); 508 509 uint64_t psize = vd->vdev_ops == &vdev_draid_ops ? 510 vdev_draid_asize_to_psize(vd, asize) : asize; 511 512 BP_ZERO(bp); 513 514 DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id); 515 DVA_SET_OFFSET(&bp->blk_dva[0], start); 516 DVA_SET_GANG(&bp->blk_dva[0], 0); 517 DVA_SET_ASIZE(&bp->blk_dva[0], asize); 518 519 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); 520 BP_SET_LSIZE(bp, psize); 521 BP_SET_PSIZE(bp, psize); 522 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); 523 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); 524 BP_SET_TYPE(bp, DMU_OT_NONE); 525 BP_SET_LEVEL(bp, 0); 526 BP_SET_DEDUP(bp, 0); 527 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 528 } 529 530 /* 531 * Issues a rebuild I/O and takes care of rate limiting the number of queued 532 * rebuild I/Os. The provided start and size must be properly aligned for the 533 * top-level vdev type being rebuilt. 534 */ 535 static int 536 vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size) 537 { 538 uint64_t ms_id __maybe_unused = vr->vr_scan_msp->ms_id; 539 vdev_t *vd = vr->vr_top_vdev; 540 spa_t *spa = vd->vdev_spa; 541 blkptr_t blk; 542 543 ASSERT3U(ms_id, ==, start >> vd->vdev_ms_shift); 544 ASSERT3U(ms_id, ==, (start + size - 1) >> vd->vdev_ms_shift); 545 546 vr->vr_pass_bytes_scanned += size; 547 vr->vr_rebuild_phys.vrp_bytes_scanned += size; 548 549 /* 550 * Rebuild the data in this range by constructing a special block 551 * pointer. It has no relation to any existing blocks in the pool. 552 * However, by disabling checksum verification and issuing a scrub IO 553 * we can reconstruct and repair any children with missing data. 554 */ 555 vdev_rebuild_blkptr_init(&blk, vd, start, size); 556 uint64_t psize = BP_GET_PSIZE(&blk); 557 558 if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN)) 559 return (0); 560 561 mutex_enter(&vr->vr_io_lock); 562 563 /* Limit in flight rebuild I/Os */ 564 while (vr->vr_bytes_inflight >= vr->vr_bytes_inflight_max) 565 cv_wait(&vr->vr_io_cv, &vr->vr_io_lock); 566 567 vr->vr_bytes_inflight += psize; 568 mutex_exit(&vr->vr_io_lock); 569 570 dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 571 VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); 572 uint64_t txg = dmu_tx_get_txg(tx); 573 574 spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER); 575 mutex_enter(&vd->vdev_rebuild_lock); 576 577 /* This is the first I/O for this txg. */ 578 if (vr->vr_scan_offset[txg & TXG_MASK] == 0) { 579 vr->vr_scan_offset[txg & TXG_MASK] = start; 580 dsl_sync_task_nowait(spa_get_dsl(spa), 581 vdev_rebuild_update_sync, 582 (void *)(uintptr_t)vd->vdev_id, tx); 583 } 584 585 /* When exiting write out our progress. */ 586 if (vdev_rebuild_should_stop(vd)) { 587 mutex_enter(&vr->vr_io_lock); 588 vr->vr_bytes_inflight -= psize; 589 mutex_exit(&vr->vr_io_lock); 590 spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd); 591 mutex_exit(&vd->vdev_rebuild_lock); 592 dmu_tx_commit(tx); 593 return (SET_ERROR(EINTR)); 594 } 595 mutex_exit(&vd->vdev_rebuild_lock); 596 dmu_tx_commit(tx); 597 598 vr->vr_scan_offset[txg & TXG_MASK] = start + size; 599 vr->vr_pass_bytes_issued += size; 600 vr->vr_rebuild_phys.vrp_bytes_issued += size; 601 602 zio_nowait(zio_read(spa->spa_txg_zio[txg & TXG_MASK], spa, &blk, 603 abd_alloc(psize, B_FALSE), psize, vdev_rebuild_cb, vr, 604 ZIO_PRIORITY_REBUILD, ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL | 605 ZIO_FLAG_RESILVER, NULL)); 606 607 return (0); 608 } 609 610 /* 611 * Issues rebuild I/Os for all ranges in the provided vr->vr_tree range tree. 612 */ 613 static int 614 vdev_rebuild_ranges(vdev_rebuild_t *vr) 615 { 616 vdev_t *vd = vr->vr_top_vdev; 617 zfs_btree_t *t = &vr->vr_scan_tree->rt_root; 618 zfs_btree_index_t idx; 619 int error; 620 621 for (range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL; 622 rs = zfs_btree_next(t, &idx, &idx)) { 623 uint64_t start = rs_get_start(rs, vr->vr_scan_tree); 624 uint64_t size = rs_get_end(rs, vr->vr_scan_tree) - start; 625 626 /* 627 * zfs_scan_suspend_progress can be set to disable rebuild 628 * progress for testing. See comment in dsl_scan_sync(). 629 */ 630 while (zfs_scan_suspend_progress && 631 !vdev_rebuild_should_stop(vd)) { 632 delay(hz); 633 } 634 635 while (size > 0) { 636 uint64_t chunk_size; 637 638 /* 639 * Split range into legally-sized logical chunks 640 * given the constraints of the top-level vdev 641 * being rebuilt (dRAID or mirror). 642 */ 643 ASSERT3P(vd->vdev_ops, !=, NULL); 644 chunk_size = vd->vdev_ops->vdev_op_rebuild_asize(vd, 645 start, size, zfs_rebuild_max_segment); 646 647 error = vdev_rebuild_range(vr, start, chunk_size); 648 if (error != 0) 649 return (error); 650 651 size -= chunk_size; 652 start += chunk_size; 653 } 654 } 655 656 return (0); 657 } 658 659 /* 660 * Calculates the estimated capacity which remains to be scanned. Since 661 * we traverse the pool in metaslab order only allocated capacity beyond 662 * the vrp_last_offset need be considered. All lower offsets must have 663 * already been rebuilt and are thus already included in vrp_bytes_scanned. 664 */ 665 static void 666 vdev_rebuild_update_bytes_est(vdev_t *vd, uint64_t ms_id) 667 { 668 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 669 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 670 uint64_t bytes_est = vrp->vrp_bytes_scanned; 671 672 if (vrp->vrp_last_offset < vd->vdev_ms[ms_id]->ms_start) 673 return; 674 675 for (uint64_t i = ms_id; i < vd->vdev_ms_count; i++) { 676 metaslab_t *msp = vd->vdev_ms[i]; 677 678 mutex_enter(&msp->ms_lock); 679 bytes_est += metaslab_allocated_space(msp); 680 mutex_exit(&msp->ms_lock); 681 } 682 683 vrp->vrp_bytes_est = bytes_est; 684 } 685 686 /* 687 * Load from disk the top-level vdev's rebuild information. 688 */ 689 int 690 vdev_rebuild_load(vdev_t *vd) 691 { 692 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 693 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 694 spa_t *spa = vd->vdev_spa; 695 int err = 0; 696 697 mutex_enter(&vd->vdev_rebuild_lock); 698 vd->vdev_rebuilding = B_FALSE; 699 700 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD)) { 701 bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 702 mutex_exit(&vd->vdev_rebuild_lock); 703 return (SET_ERROR(ENOTSUP)); 704 } 705 706 ASSERT(vd->vdev_top == vd); 707 708 err = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap, 709 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 710 REBUILD_PHYS_ENTRIES, vrp); 711 712 /* 713 * A missing or damaged VDEV_TOP_ZAP_VDEV_REBUILD_PHYS should 714 * not prevent a pool from being imported. Clear the rebuild 715 * status allowing a new resilver/rebuild to be started. 716 */ 717 if (err == ENOENT || err == EOVERFLOW || err == ECKSUM) { 718 bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 719 } else if (err) { 720 mutex_exit(&vd->vdev_rebuild_lock); 721 return (err); 722 } 723 724 vr->vr_prev_scan_time_ms = vrp->vrp_scan_time_ms; 725 vr->vr_top_vdev = vd; 726 727 mutex_exit(&vd->vdev_rebuild_lock); 728 729 return (0); 730 } 731 732 /* 733 * Each scan thread is responsible for rebuilding a top-level vdev. The 734 * rebuild progress in tracked on-disk in VDEV_TOP_ZAP_VDEV_REBUILD_PHYS. 735 */ 736 static void 737 vdev_rebuild_thread(void *arg) 738 { 739 vdev_t *vd = arg; 740 spa_t *spa = vd->vdev_spa; 741 int error = 0; 742 743 /* 744 * If there's a scrub in process request that it be stopped. This 745 * is not required for a correct rebuild, but we do want rebuilds to 746 * emulate the resilver behavior as much as possible. 747 */ 748 dsl_pool_t *dsl = spa_get_dsl(spa); 749 if (dsl_scan_scrubbing(dsl)) 750 dsl_scan_cancel(dsl); 751 752 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 753 mutex_enter(&vd->vdev_rebuild_lock); 754 755 ASSERT3P(vd->vdev_top, ==, vd); 756 ASSERT3P(vd->vdev_rebuild_thread, !=, NULL); 757 ASSERT(vd->vdev_rebuilding); 758 ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD)); 759 ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE); 760 ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE); 761 762 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 763 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 764 vr->vr_top_vdev = vd; 765 vr->vr_scan_msp = NULL; 766 vr->vr_scan_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); 767 mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL); 768 cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL); 769 770 vr->vr_pass_start_time = gethrtime(); 771 vr->vr_pass_bytes_scanned = 0; 772 vr->vr_pass_bytes_issued = 0; 773 774 vr->vr_bytes_inflight_max = MAX(1ULL << 20, 775 zfs_rebuild_vdev_limit * vd->vdev_children); 776 777 uint64_t update_est_time = gethrtime(); 778 vdev_rebuild_update_bytes_est(vd, 0); 779 780 clear_rebuild_bytes(vr->vr_top_vdev); 781 782 mutex_exit(&vd->vdev_rebuild_lock); 783 784 /* 785 * Systematically walk the metaslabs and issue rebuild I/Os for 786 * all ranges in the allocated space map. 787 */ 788 for (uint64_t i = 0; i < vd->vdev_ms_count; i++) { 789 metaslab_t *msp = vd->vdev_ms[i]; 790 vr->vr_scan_msp = msp; 791 792 /* 793 * Removal of vdevs from the vdev tree may eliminate the need 794 * for the rebuild, in which case it should be canceled. The 795 * vdev_rebuild_cancel_wanted flag is set until the sync task 796 * completes. This may be after the rebuild thread exits. 797 */ 798 if (vdev_rebuild_should_cancel(vd)) { 799 vd->vdev_rebuild_cancel_wanted = B_TRUE; 800 error = EINTR; 801 break; 802 } 803 804 ASSERT0(range_tree_space(vr->vr_scan_tree)); 805 806 /* Disable any new allocations to this metaslab */ 807 metaslab_disable(msp); 808 spa_config_exit(spa, SCL_CONFIG, FTAG); 809 810 mutex_enter(&msp->ms_sync_lock); 811 mutex_enter(&msp->ms_lock); 812 813 /* 814 * If there are outstanding allocations wait for them to be 815 * synced. This is needed to ensure all allocated ranges are 816 * on disk and therefore will be rebuilt. 817 */ 818 for (int j = 0; j < TXG_SIZE; j++) { 819 if (range_tree_space(msp->ms_allocating[j])) { 820 mutex_exit(&msp->ms_lock); 821 mutex_exit(&msp->ms_sync_lock); 822 txg_wait_synced(dsl, 0); 823 mutex_enter(&msp->ms_sync_lock); 824 mutex_enter(&msp->ms_lock); 825 break; 826 } 827 } 828 829 /* 830 * When a metaslab has been allocated from read its allocated 831 * ranges from the space map object into the vr_scan_tree. 832 * Then add inflight / unflushed ranges and remove inflight / 833 * unflushed frees. This is the minimum range to be rebuilt. 834 */ 835 if (msp->ms_sm != NULL) { 836 VERIFY0(space_map_load(msp->ms_sm, 837 vr->vr_scan_tree, SM_ALLOC)); 838 839 for (int i = 0; i < TXG_SIZE; i++) { 840 ASSERT0(range_tree_space( 841 msp->ms_allocating[i])); 842 } 843 844 range_tree_walk(msp->ms_unflushed_allocs, 845 range_tree_add, vr->vr_scan_tree); 846 range_tree_walk(msp->ms_unflushed_frees, 847 range_tree_remove, vr->vr_scan_tree); 848 849 /* 850 * Remove ranges which have already been rebuilt based 851 * on the last offset. This can happen when restarting 852 * a scan after exporting and re-importing the pool. 853 */ 854 range_tree_clear(vr->vr_scan_tree, 0, 855 vrp->vrp_last_offset); 856 } 857 858 mutex_exit(&msp->ms_lock); 859 mutex_exit(&msp->ms_sync_lock); 860 861 /* 862 * To provide an accurate estimate re-calculate the estimated 863 * size every 5 minutes to account for recent allocations and 864 * frees made to space maps which have not yet been rebuilt. 865 */ 866 if (gethrtime() > update_est_time + SEC2NSEC(300)) { 867 update_est_time = gethrtime(); 868 vdev_rebuild_update_bytes_est(vd, i); 869 } 870 871 /* 872 * Walk the allocated space map and issue the rebuild I/O. 873 */ 874 error = vdev_rebuild_ranges(vr); 875 range_tree_vacate(vr->vr_scan_tree, NULL, NULL); 876 877 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 878 metaslab_enable(msp, B_FALSE, B_FALSE); 879 880 if (error != 0) 881 break; 882 } 883 884 range_tree_destroy(vr->vr_scan_tree); 885 spa_config_exit(spa, SCL_CONFIG, FTAG); 886 887 /* Wait for any remaining rebuild I/O to complete */ 888 mutex_enter(&vr->vr_io_lock); 889 while (vr->vr_bytes_inflight > 0) 890 cv_wait(&vr->vr_io_cv, &vr->vr_io_lock); 891 892 mutex_exit(&vr->vr_io_lock); 893 894 mutex_destroy(&vr->vr_io_lock); 895 cv_destroy(&vr->vr_io_cv); 896 897 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 898 899 dsl_pool_t *dp = spa_get_dsl(spa); 900 dmu_tx_t *tx = dmu_tx_create_dd(dp->dp_mos_dir); 901 VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); 902 903 mutex_enter(&vd->vdev_rebuild_lock); 904 if (error == 0) { 905 /* 906 * After a successful rebuild clear the DTLs of all ranges 907 * which were missing when the rebuild was started. These 908 * ranges must have been rebuilt as a consequence of rebuilding 909 * all allocated space. Note that unlike a scrub or resilver 910 * the rebuild operation will reconstruct data only referenced 911 * by a pool checkpoint. See the dsl_scan_done() comments. 912 */ 913 dsl_sync_task_nowait(dp, vdev_rebuild_complete_sync, 914 (void *)(uintptr_t)vd->vdev_id, tx); 915 } else if (vd->vdev_rebuild_cancel_wanted) { 916 /* 917 * The rebuild operation was canceled. This will occur when 918 * a device participating in the rebuild is detached. 919 */ 920 dsl_sync_task_nowait(dp, vdev_rebuild_cancel_sync, 921 (void *)(uintptr_t)vd->vdev_id, tx); 922 } else if (vd->vdev_rebuild_reset_wanted) { 923 /* 924 * Reset the running rebuild without canceling and restarting 925 * it. This will occur when a new device is attached and must 926 * participate in the rebuild. 927 */ 928 dsl_sync_task_nowait(dp, vdev_rebuild_reset_sync, 929 (void *)(uintptr_t)vd->vdev_id, tx); 930 } else { 931 /* 932 * The rebuild operation should be suspended. This may occur 933 * when detaching a child vdev or when exporting the pool. The 934 * rebuild is left in the active state so it will be resumed. 935 */ 936 ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE); 937 vd->vdev_rebuilding = B_FALSE; 938 } 939 940 dmu_tx_commit(tx); 941 942 vd->vdev_rebuild_thread = NULL; 943 mutex_exit(&vd->vdev_rebuild_lock); 944 spa_config_exit(spa, SCL_CONFIG, FTAG); 945 946 cv_broadcast(&vd->vdev_rebuild_cv); 947 948 thread_exit(); 949 } 950 951 /* 952 * Returns B_TRUE if any top-level vdev are rebuilding. 953 */ 954 boolean_t 955 vdev_rebuild_active(vdev_t *vd) 956 { 957 spa_t *spa = vd->vdev_spa; 958 boolean_t ret = B_FALSE; 959 960 if (vd == spa->spa_root_vdev) { 961 for (uint64_t i = 0; i < vd->vdev_children; i++) { 962 ret = vdev_rebuild_active(vd->vdev_child[i]); 963 if (ret) 964 return (ret); 965 } 966 } else if (vd->vdev_top_zap != 0) { 967 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 968 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 969 970 mutex_enter(&vd->vdev_rebuild_lock); 971 ret = (vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE); 972 mutex_exit(&vd->vdev_rebuild_lock); 973 } 974 975 return (ret); 976 } 977 978 /* 979 * Start a rebuild operation. The rebuild may be restarted when the 980 * top-level vdev is currently actively rebuilding. 981 */ 982 void 983 vdev_rebuild(vdev_t *vd) 984 { 985 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 986 vdev_rebuild_phys_t *vrp __maybe_unused = &vr->vr_rebuild_phys; 987 988 ASSERT(vd->vdev_top == vd); 989 ASSERT(vdev_is_concrete(vd)); 990 ASSERT(!vd->vdev_removing); 991 ASSERT(spa_feature_is_enabled(vd->vdev_spa, 992 SPA_FEATURE_DEVICE_REBUILD)); 993 994 mutex_enter(&vd->vdev_rebuild_lock); 995 if (vd->vdev_rebuilding) { 996 ASSERT3U(vrp->vrp_rebuild_state, ==, VDEV_REBUILD_ACTIVE); 997 998 /* 999 * Signal a running rebuild operation that it should restart 1000 * from the beginning because a new device was attached. The 1001 * vdev_rebuild_reset_wanted flag is set until the sync task 1002 * completes. This may be after the rebuild thread exits. 1003 */ 1004 if (!vd->vdev_rebuild_reset_wanted) 1005 vd->vdev_rebuild_reset_wanted = B_TRUE; 1006 } else { 1007 vdev_rebuild_initiate(vd); 1008 } 1009 mutex_exit(&vd->vdev_rebuild_lock); 1010 } 1011 1012 static void 1013 vdev_rebuild_restart_impl(vdev_t *vd) 1014 { 1015 spa_t *spa = vd->vdev_spa; 1016 1017 if (vd == spa->spa_root_vdev) { 1018 for (uint64_t i = 0; i < vd->vdev_children; i++) 1019 vdev_rebuild_restart_impl(vd->vdev_child[i]); 1020 1021 } else if (vd->vdev_top_zap != 0) { 1022 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 1023 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 1024 1025 mutex_enter(&vd->vdev_rebuild_lock); 1026 if (vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE && 1027 vdev_writeable(vd) && !vd->vdev_rebuilding) { 1028 ASSERT(spa_feature_is_active(spa, 1029 SPA_FEATURE_DEVICE_REBUILD)); 1030 vd->vdev_rebuilding = B_TRUE; 1031 vd->vdev_rebuild_thread = thread_create(NULL, 0, 1032 vdev_rebuild_thread, vd, 0, &p0, TS_RUN, 1033 maxclsyspri); 1034 } 1035 mutex_exit(&vd->vdev_rebuild_lock); 1036 } 1037 } 1038 1039 /* 1040 * Conditionally restart all of the vdev_rebuild_thread's for a pool. The 1041 * feature flag must be active and the rebuild in the active state. This 1042 * cannot be used to start a new rebuild. 1043 */ 1044 void 1045 vdev_rebuild_restart(spa_t *spa) 1046 { 1047 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1048 1049 vdev_rebuild_restart_impl(spa->spa_root_vdev); 1050 } 1051 1052 /* 1053 * Stop and wait for all of the vdev_rebuild_thread's associated with the 1054 * vdev tree provide to be terminated (canceled or stopped). 1055 */ 1056 void 1057 vdev_rebuild_stop_wait(vdev_t *vd) 1058 { 1059 spa_t *spa = vd->vdev_spa; 1060 1061 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1062 1063 if (vd == spa->spa_root_vdev) { 1064 for (uint64_t i = 0; i < vd->vdev_children; i++) 1065 vdev_rebuild_stop_wait(vd->vdev_child[i]); 1066 1067 } else if (vd->vdev_top_zap != 0) { 1068 ASSERT(vd == vd->vdev_top); 1069 1070 mutex_enter(&vd->vdev_rebuild_lock); 1071 if (vd->vdev_rebuild_thread != NULL) { 1072 vd->vdev_rebuild_exit_wanted = B_TRUE; 1073 while (vd->vdev_rebuilding) { 1074 cv_wait(&vd->vdev_rebuild_cv, 1075 &vd->vdev_rebuild_lock); 1076 } 1077 vd->vdev_rebuild_exit_wanted = B_FALSE; 1078 } 1079 mutex_exit(&vd->vdev_rebuild_lock); 1080 } 1081 } 1082 1083 /* 1084 * Stop all rebuild operations but leave them in the active state so they 1085 * will be resumed when importing the pool. 1086 */ 1087 void 1088 vdev_rebuild_stop_all(spa_t *spa) 1089 { 1090 vdev_rebuild_stop_wait(spa->spa_root_vdev); 1091 } 1092 1093 /* 1094 * Rebuild statistics reported per top-level vdev. 1095 */ 1096 int 1097 vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs) 1098 { 1099 spa_t *spa = tvd->vdev_spa; 1100 1101 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD)) 1102 return (SET_ERROR(ENOTSUP)); 1103 1104 if (tvd != tvd->vdev_top || tvd->vdev_top_zap == 0) 1105 return (SET_ERROR(EINVAL)); 1106 1107 int error = zap_contains(spa_meta_objset(spa), 1108 tvd->vdev_top_zap, VDEV_TOP_ZAP_VDEV_REBUILD_PHYS); 1109 1110 if (error == ENOENT) { 1111 bzero(vrs, sizeof (vdev_rebuild_stat_t)); 1112 vrs->vrs_state = VDEV_REBUILD_NONE; 1113 error = 0; 1114 } else if (error == 0) { 1115 vdev_rebuild_t *vr = &tvd->vdev_rebuild_config; 1116 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 1117 1118 mutex_enter(&tvd->vdev_rebuild_lock); 1119 vrs->vrs_state = vrp->vrp_rebuild_state; 1120 vrs->vrs_start_time = vrp->vrp_start_time; 1121 vrs->vrs_end_time = vrp->vrp_end_time; 1122 vrs->vrs_scan_time_ms = vrp->vrp_scan_time_ms; 1123 vrs->vrs_bytes_scanned = vrp->vrp_bytes_scanned; 1124 vrs->vrs_bytes_issued = vrp->vrp_bytes_issued; 1125 vrs->vrs_bytes_rebuilt = vrp->vrp_bytes_rebuilt; 1126 vrs->vrs_bytes_est = vrp->vrp_bytes_est; 1127 vrs->vrs_errors = vrp->vrp_errors; 1128 vrs->vrs_pass_time_ms = NSEC2MSEC(gethrtime() - 1129 vr->vr_pass_start_time); 1130 vrs->vrs_pass_bytes_scanned = vr->vr_pass_bytes_scanned; 1131 vrs->vrs_pass_bytes_issued = vr->vr_pass_bytes_issued; 1132 mutex_exit(&tvd->vdev_rebuild_lock); 1133 } 1134 1135 return (error); 1136 } 1137 1138 /* BEGIN CSTYLED */ 1139 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, ULONG, ZMOD_RW, 1140 "Max segment size in bytes of rebuild reads"); 1141 1142 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, ULONG, ZMOD_RW, 1143 "Max bytes in flight per leaf vdev for sequential resilvers"); 1144 1145 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_scrub_enabled, INT, ZMOD_RW, 1146 "Automatically scrub after sequential resilver completes"); 1147 /* END CSTYLED */ 1148