1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * 23 * Copyright (c) 2018, Intel Corporation. 24 * Copyright (c) 2020 by Lawrence Livermore National Security, LLC. 25 */ 26 27 #include <sys/vdev_impl.h> 28 #include <sys/vdev_draid.h> 29 #include <sys/dsl_scan.h> 30 #include <sys/spa_impl.h> 31 #include <sys/metaslab_impl.h> 32 #include <sys/vdev_rebuild.h> 33 #include <sys/zio.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zap.h> 37 38 /* 39 * This file contains the sequential reconstruction implementation for 40 * resilvering. This form of resilvering is internally referred to as device 41 * rebuild to avoid conflating it with the traditional healing reconstruction 42 * performed by the dsl scan code. 43 * 44 * When replacing a device, or scrubbing the pool, ZFS has historically used 45 * a process called resilvering which is a form of healing reconstruction. 46 * This approach has the advantage that as blocks are read from disk their 47 * checksums can be immediately verified and the data repaired. Unfortunately, 48 * it also results in a random IO pattern to the disk even when extra care 49 * is taken to sequentialize the IO as much as possible. This substantially 50 * increases the time required to resilver the pool and restore redundancy. 51 * 52 * For mirrored devices it's possible to implement an alternate sequential 53 * reconstruction strategy when resilvering. Sequential reconstruction 54 * behaves like a traditional RAID rebuild and reconstructs a device in LBA 55 * order without verifying the checksum. After this phase completes a second 56 * scrub phase is started to verify all of the checksums. This two phase 57 * process will take longer than the healing reconstruction described above. 58 * However, it has that advantage that after the reconstruction first phase 59 * completes redundancy has been restored. At this point the pool can incur 60 * another device failure without risking data loss. 61 * 62 * There are a few noteworthy limitations and other advantages of resilvering 63 * using sequential reconstruction vs healing reconstruction. 64 * 65 * Limitations: 66 * 67 * - Sequential reconstruction is not possible on RAIDZ due to its 68 * variable stripe width. Note dRAID uses a fixed stripe width which 69 * avoids this issue, but comes at the expense of some usable capacity. 70 * 71 * - Block checksums are not verified during sequential reconstruction. 72 * Similar to traditional RAID the parity/mirror data is reconstructed 73 * but cannot be immediately double checked. For this reason when the 74 * last active resilver completes the pool is automatically scrubbed 75 * by default. 76 * 77 * - Deferred resilvers using sequential reconstruction are not currently 78 * supported. When adding another vdev to an active top-level resilver 79 * it must be restarted. 80 * 81 * Advantages: 82 * 83 * - Sequential reconstruction is performed in LBA order which may be faster 84 * than healing reconstruction particularly when using HDDs (or 85 * especially with SMR devices). Only allocated capacity is resilvered. 86 * 87 * - Sequential reconstruction is not constrained by ZFS block boundaries. 88 * This allows it to issue larger IOs to disk which span multiple blocks 89 * allowing all of these logical blocks to be repaired with a single IO. 90 * 91 * - Unlike a healing resilver or scrub which are pool wide operations, 92 * sequential reconstruction is handled by the top-level vdevs. This 93 * allows for it to be started or canceled on a top-level vdev without 94 * impacting any other top-level vdevs in the pool. 95 * 96 * - Data only referenced by a pool checkpoint will be repaired because 97 * that space is reflected in the space maps. This differs for a 98 * healing resilver or scrub which will not repair that data. 99 */ 100 101 102 /* 103 * Size of rebuild reads; defaults to 1MiB per data disk and is capped at 104 * SPA_MAXBLOCKSIZE. 105 */ 106 static unsigned long zfs_rebuild_max_segment = 1024 * 1024; 107 108 /* 109 * Maximum number of parallelly executed bytes per leaf vdev caused by a 110 * sequential resilver. We attempt to strike a balance here between keeping 111 * the vdev queues full of I/Os at all times and not overflowing the queues 112 * to cause long latency, which would cause long txg sync times. 113 * 114 * A large default value can be safely used here because the default target 115 * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep 116 * the queue depth short. 117 * 118 * 32MB was selected as the default value to achieve good performance with 119 * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential 120 * rebuild was unable to saturate all of the drives using smaller values. 121 * With a value of 32MB the sequential resilver write rate was measured at 122 * 800MB/s sustained while rebuilding to a distributed spare. 123 */ 124 static unsigned long zfs_rebuild_vdev_limit = 32 << 20; 125 126 /* 127 * Automatically start a pool scrub when the last active sequential resilver 128 * completes in order to verify the checksums of all blocks which have been 129 * resilvered. This option is enabled by default and is strongly recommended. 130 */ 131 static int zfs_rebuild_scrub_enabled = 1; 132 133 /* 134 * For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync(). 135 */ 136 static __attribute__((noreturn)) void vdev_rebuild_thread(void *arg); 137 138 /* 139 * Clear the per-vdev rebuild bytes value for a vdev tree. 140 */ 141 static void 142 clear_rebuild_bytes(vdev_t *vd) 143 { 144 vdev_stat_t *vs = &vd->vdev_stat; 145 146 for (uint64_t i = 0; i < vd->vdev_children; i++) 147 clear_rebuild_bytes(vd->vdev_child[i]); 148 149 mutex_enter(&vd->vdev_stat_lock); 150 vs->vs_rebuild_processed = 0; 151 mutex_exit(&vd->vdev_stat_lock); 152 } 153 154 /* 155 * Determines whether a vdev_rebuild_thread() should be stopped. 156 */ 157 static boolean_t 158 vdev_rebuild_should_stop(vdev_t *vd) 159 { 160 return (!vdev_writeable(vd) || vd->vdev_removing || 161 vd->vdev_rebuild_exit_wanted || 162 vd->vdev_rebuild_cancel_wanted || 163 vd->vdev_rebuild_reset_wanted); 164 } 165 166 /* 167 * Determine if the rebuild should be canceled. This may happen when all 168 * vdevs with MISSING DTLs are detached. 169 */ 170 static boolean_t 171 vdev_rebuild_should_cancel(vdev_t *vd) 172 { 173 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 174 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 175 176 if (!vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg)) 177 return (B_TRUE); 178 179 return (B_FALSE); 180 } 181 182 /* 183 * The sync task for updating the on-disk state of a rebuild. This is 184 * scheduled by vdev_rebuild_range(). 185 */ 186 static void 187 vdev_rebuild_update_sync(void *arg, dmu_tx_t *tx) 188 { 189 int vdev_id = (uintptr_t)arg; 190 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 191 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 192 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 193 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 194 uint64_t txg = dmu_tx_get_txg(tx); 195 196 mutex_enter(&vd->vdev_rebuild_lock); 197 198 if (vr->vr_scan_offset[txg & TXG_MASK] > 0) { 199 vrp->vrp_last_offset = vr->vr_scan_offset[txg & TXG_MASK]; 200 vr->vr_scan_offset[txg & TXG_MASK] = 0; 201 } 202 203 vrp->vrp_scan_time_ms = vr->vr_prev_scan_time_ms + 204 NSEC2MSEC(gethrtime() - vr->vr_pass_start_time); 205 206 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 207 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 208 REBUILD_PHYS_ENTRIES, vrp, tx)); 209 210 mutex_exit(&vd->vdev_rebuild_lock); 211 } 212 213 /* 214 * Initialize the on-disk state for a new rebuild, start the rebuild thread. 215 */ 216 static void 217 vdev_rebuild_initiate_sync(void *arg, dmu_tx_t *tx) 218 { 219 int vdev_id = (uintptr_t)arg; 220 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 221 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 222 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 223 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 224 225 ASSERT(vd->vdev_rebuilding); 226 227 spa_feature_incr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx); 228 229 mutex_enter(&vd->vdev_rebuild_lock); 230 memset(vrp, 0, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 231 vrp->vrp_rebuild_state = VDEV_REBUILD_ACTIVE; 232 vrp->vrp_min_txg = 0; 233 vrp->vrp_max_txg = dmu_tx_get_txg(tx); 234 vrp->vrp_start_time = gethrestime_sec(); 235 vrp->vrp_scan_time_ms = 0; 236 vr->vr_prev_scan_time_ms = 0; 237 238 /* 239 * Rebuilds are currently only used when replacing a device, in which 240 * case there must be DTL_MISSING entries. In the future, we could 241 * allow rebuilds to be used in a way similar to a scrub. This would 242 * be useful because it would allow us to rebuild the space used by 243 * pool checkpoints. 244 */ 245 VERIFY(vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg)); 246 247 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 248 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 249 REBUILD_PHYS_ENTRIES, vrp, tx)); 250 251 spa_history_log_internal(spa, "rebuild", tx, 252 "vdev_id=%llu vdev_guid=%llu started", 253 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 254 255 ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); 256 vd->vdev_rebuild_thread = thread_create(NULL, 0, 257 vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri); 258 259 mutex_exit(&vd->vdev_rebuild_lock); 260 } 261 262 static void 263 vdev_rebuild_log_notify(spa_t *spa, vdev_t *vd, char *name) 264 { 265 nvlist_t *aux = fnvlist_alloc(); 266 267 fnvlist_add_string(aux, ZFS_EV_RESILVER_TYPE, "sequential"); 268 spa_event_notify(spa, vd, aux, name); 269 nvlist_free(aux); 270 } 271 272 /* 273 * Called to request that a new rebuild be started. The feature will remain 274 * active for the duration of the rebuild, then revert to the enabled state. 275 */ 276 static void 277 vdev_rebuild_initiate(vdev_t *vd) 278 { 279 spa_t *spa = vd->vdev_spa; 280 281 ASSERT(vd->vdev_top == vd); 282 ASSERT(MUTEX_HELD(&vd->vdev_rebuild_lock)); 283 ASSERT(!vd->vdev_rebuilding); 284 285 dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 286 VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); 287 288 vd->vdev_rebuilding = B_TRUE; 289 290 dsl_sync_task_nowait(spa_get_dsl(spa), vdev_rebuild_initiate_sync, 291 (void *)(uintptr_t)vd->vdev_id, tx); 292 dmu_tx_commit(tx); 293 294 vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_START); 295 } 296 297 /* 298 * Update the on-disk state to completed when a rebuild finishes. 299 */ 300 static void 301 vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx) 302 { 303 int vdev_id = (uintptr_t)arg; 304 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 305 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 306 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 307 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 308 309 mutex_enter(&vd->vdev_rebuild_lock); 310 vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE; 311 vrp->vrp_end_time = gethrestime_sec(); 312 313 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 314 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 315 REBUILD_PHYS_ENTRIES, vrp, tx)); 316 317 vdev_dtl_reassess(vd, tx->tx_txg, vrp->vrp_max_txg, B_TRUE, B_TRUE); 318 spa_feature_decr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx); 319 320 spa_history_log_internal(spa, "rebuild", tx, 321 "vdev_id=%llu vdev_guid=%llu complete", 322 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 323 vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_FINISH); 324 325 /* Handles detaching of spares */ 326 spa_async_request(spa, SPA_ASYNC_REBUILD_DONE); 327 vd->vdev_rebuilding = B_FALSE; 328 mutex_exit(&vd->vdev_rebuild_lock); 329 330 /* 331 * While we're in syncing context take the opportunity to 332 * setup the scrub when there are no more active rebuilds. 333 */ 334 pool_scan_func_t func = POOL_SCAN_SCRUB; 335 if (dsl_scan_setup_check(&func, tx) == 0 && 336 zfs_rebuild_scrub_enabled) { 337 dsl_scan_setup_sync(&func, tx); 338 } 339 340 cv_broadcast(&vd->vdev_rebuild_cv); 341 342 /* Clear recent error events (i.e. duplicate events tracking) */ 343 zfs_ereport_clear(spa, NULL); 344 } 345 346 /* 347 * Update the on-disk state to canceled when a rebuild finishes. 348 */ 349 static void 350 vdev_rebuild_cancel_sync(void *arg, dmu_tx_t *tx) 351 { 352 int vdev_id = (uintptr_t)arg; 353 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 354 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 355 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 356 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 357 358 mutex_enter(&vd->vdev_rebuild_lock); 359 vrp->vrp_rebuild_state = VDEV_REBUILD_CANCELED; 360 vrp->vrp_end_time = gethrestime_sec(); 361 362 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 363 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 364 REBUILD_PHYS_ENTRIES, vrp, tx)); 365 366 spa_feature_decr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx); 367 368 spa_history_log_internal(spa, "rebuild", tx, 369 "vdev_id=%llu vdev_guid=%llu canceled", 370 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 371 vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_FINISH); 372 373 vd->vdev_rebuild_cancel_wanted = B_FALSE; 374 vd->vdev_rebuilding = B_FALSE; 375 mutex_exit(&vd->vdev_rebuild_lock); 376 377 spa_notify_waiters(spa); 378 cv_broadcast(&vd->vdev_rebuild_cv); 379 } 380 381 /* 382 * Resets the progress of a running rebuild. This will occur when a new 383 * vdev is added to rebuild. 384 */ 385 static void 386 vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx) 387 { 388 int vdev_id = (uintptr_t)arg; 389 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 390 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 391 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 392 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 393 394 mutex_enter(&vd->vdev_rebuild_lock); 395 396 ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE); 397 ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); 398 399 vrp->vrp_last_offset = 0; 400 vrp->vrp_min_txg = 0; 401 vrp->vrp_max_txg = dmu_tx_get_txg(tx); 402 vrp->vrp_bytes_scanned = 0; 403 vrp->vrp_bytes_issued = 0; 404 vrp->vrp_bytes_rebuilt = 0; 405 vrp->vrp_bytes_est = 0; 406 vrp->vrp_scan_time_ms = 0; 407 vr->vr_prev_scan_time_ms = 0; 408 409 /* See vdev_rebuild_initiate_sync comment */ 410 VERIFY(vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg)); 411 412 VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, 413 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 414 REBUILD_PHYS_ENTRIES, vrp, tx)); 415 416 spa_history_log_internal(spa, "rebuild", tx, 417 "vdev_id=%llu vdev_guid=%llu reset", 418 (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); 419 420 vd->vdev_rebuild_reset_wanted = B_FALSE; 421 ASSERT(vd->vdev_rebuilding); 422 423 vd->vdev_rebuild_thread = thread_create(NULL, 0, 424 vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri); 425 426 mutex_exit(&vd->vdev_rebuild_lock); 427 } 428 429 /* 430 * Clear the last rebuild status. 431 */ 432 void 433 vdev_rebuild_clear_sync(void *arg, dmu_tx_t *tx) 434 { 435 int vdev_id = (uintptr_t)arg; 436 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 437 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 438 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 439 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 440 objset_t *mos = spa_meta_objset(spa); 441 442 mutex_enter(&vd->vdev_rebuild_lock); 443 444 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD) || 445 vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE) { 446 mutex_exit(&vd->vdev_rebuild_lock); 447 return; 448 } 449 450 clear_rebuild_bytes(vd); 451 memset(vrp, 0, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 452 453 if (vd->vdev_top_zap != 0 && zap_contains(mos, vd->vdev_top_zap, 454 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS) == 0) { 455 VERIFY0(zap_update(mos, vd->vdev_top_zap, 456 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 457 REBUILD_PHYS_ENTRIES, vrp, tx)); 458 } 459 460 mutex_exit(&vd->vdev_rebuild_lock); 461 } 462 463 /* 464 * The zio_done_func_t callback for each rebuild I/O issued. It's responsible 465 * for updating the rebuild stats and limiting the number of in flight I/Os. 466 */ 467 static void 468 vdev_rebuild_cb(zio_t *zio) 469 { 470 vdev_rebuild_t *vr = zio->io_private; 471 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 472 vdev_t *vd = vr->vr_top_vdev; 473 474 mutex_enter(&vr->vr_io_lock); 475 if (zio->io_error == ENXIO && !vdev_writeable(vd)) { 476 /* 477 * The I/O failed because the top-level vdev was unavailable. 478 * Attempt to roll back to the last completed offset, in order 479 * resume from the correct location if the pool is resumed. 480 * (This works because spa_sync waits on spa_txg_zio before 481 * it runs sync tasks.) 482 */ 483 uint64_t *off = &vr->vr_scan_offset[zio->io_txg & TXG_MASK]; 484 *off = MIN(*off, zio->io_offset); 485 } else if (zio->io_error) { 486 vrp->vrp_errors++; 487 } 488 489 abd_free(zio->io_abd); 490 491 ASSERT3U(vr->vr_bytes_inflight, >, 0); 492 vr->vr_bytes_inflight -= zio->io_size; 493 cv_broadcast(&vr->vr_io_cv); 494 mutex_exit(&vr->vr_io_lock); 495 496 spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd); 497 } 498 499 /* 500 * Initialize a block pointer that can be used to read the given segment 501 * for sequential rebuild. 502 */ 503 static void 504 vdev_rebuild_blkptr_init(blkptr_t *bp, vdev_t *vd, uint64_t start, 505 uint64_t asize) 506 { 507 ASSERT(vd->vdev_ops == &vdev_draid_ops || 508 vd->vdev_ops == &vdev_mirror_ops || 509 vd->vdev_ops == &vdev_replacing_ops || 510 vd->vdev_ops == &vdev_spare_ops); 511 512 uint64_t psize = vd->vdev_ops == &vdev_draid_ops ? 513 vdev_draid_asize_to_psize(vd, asize) : asize; 514 515 BP_ZERO(bp); 516 517 DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id); 518 DVA_SET_OFFSET(&bp->blk_dva[0], start); 519 DVA_SET_GANG(&bp->blk_dva[0], 0); 520 DVA_SET_ASIZE(&bp->blk_dva[0], asize); 521 522 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); 523 BP_SET_LSIZE(bp, psize); 524 BP_SET_PSIZE(bp, psize); 525 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); 526 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); 527 BP_SET_TYPE(bp, DMU_OT_NONE); 528 BP_SET_LEVEL(bp, 0); 529 BP_SET_DEDUP(bp, 0); 530 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 531 } 532 533 /* 534 * Issues a rebuild I/O and takes care of rate limiting the number of queued 535 * rebuild I/Os. The provided start and size must be properly aligned for the 536 * top-level vdev type being rebuilt. 537 */ 538 static int 539 vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size) 540 { 541 uint64_t ms_id __maybe_unused = vr->vr_scan_msp->ms_id; 542 vdev_t *vd = vr->vr_top_vdev; 543 spa_t *spa = vd->vdev_spa; 544 blkptr_t blk; 545 546 ASSERT3U(ms_id, ==, start >> vd->vdev_ms_shift); 547 ASSERT3U(ms_id, ==, (start + size - 1) >> vd->vdev_ms_shift); 548 549 vr->vr_pass_bytes_scanned += size; 550 vr->vr_rebuild_phys.vrp_bytes_scanned += size; 551 552 /* 553 * Rebuild the data in this range by constructing a special block 554 * pointer. It has no relation to any existing blocks in the pool. 555 * However, by disabling checksum verification and issuing a scrub IO 556 * we can reconstruct and repair any children with missing data. 557 */ 558 vdev_rebuild_blkptr_init(&blk, vd, start, size); 559 uint64_t psize = BP_GET_PSIZE(&blk); 560 561 if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN)) 562 return (0); 563 564 mutex_enter(&vr->vr_io_lock); 565 566 /* Limit in flight rebuild I/Os */ 567 while (vr->vr_bytes_inflight >= vr->vr_bytes_inflight_max) 568 cv_wait(&vr->vr_io_cv, &vr->vr_io_lock); 569 570 vr->vr_bytes_inflight += psize; 571 mutex_exit(&vr->vr_io_lock); 572 573 dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 574 VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); 575 uint64_t txg = dmu_tx_get_txg(tx); 576 577 spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER); 578 mutex_enter(&vd->vdev_rebuild_lock); 579 580 /* This is the first I/O for this txg. */ 581 if (vr->vr_scan_offset[txg & TXG_MASK] == 0) { 582 vr->vr_scan_offset[txg & TXG_MASK] = start; 583 dsl_sync_task_nowait(spa_get_dsl(spa), 584 vdev_rebuild_update_sync, 585 (void *)(uintptr_t)vd->vdev_id, tx); 586 } 587 588 /* When exiting write out our progress. */ 589 if (vdev_rebuild_should_stop(vd)) { 590 mutex_enter(&vr->vr_io_lock); 591 vr->vr_bytes_inflight -= psize; 592 mutex_exit(&vr->vr_io_lock); 593 spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd); 594 mutex_exit(&vd->vdev_rebuild_lock); 595 dmu_tx_commit(tx); 596 return (SET_ERROR(EINTR)); 597 } 598 mutex_exit(&vd->vdev_rebuild_lock); 599 dmu_tx_commit(tx); 600 601 vr->vr_scan_offset[txg & TXG_MASK] = start + size; 602 vr->vr_pass_bytes_issued += size; 603 vr->vr_rebuild_phys.vrp_bytes_issued += size; 604 605 zio_nowait(zio_read(spa->spa_txg_zio[txg & TXG_MASK], spa, &blk, 606 abd_alloc(psize, B_FALSE), psize, vdev_rebuild_cb, vr, 607 ZIO_PRIORITY_REBUILD, ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL | 608 ZIO_FLAG_RESILVER, NULL)); 609 610 return (0); 611 } 612 613 /* 614 * Issues rebuild I/Os for all ranges in the provided vr->vr_tree range tree. 615 */ 616 static int 617 vdev_rebuild_ranges(vdev_rebuild_t *vr) 618 { 619 vdev_t *vd = vr->vr_top_vdev; 620 zfs_btree_t *t = &vr->vr_scan_tree->rt_root; 621 zfs_btree_index_t idx; 622 int error; 623 624 for (range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL; 625 rs = zfs_btree_next(t, &idx, &idx)) { 626 uint64_t start = rs_get_start(rs, vr->vr_scan_tree); 627 uint64_t size = rs_get_end(rs, vr->vr_scan_tree) - start; 628 629 /* 630 * zfs_scan_suspend_progress can be set to disable rebuild 631 * progress for testing. See comment in dsl_scan_sync(). 632 */ 633 while (zfs_scan_suspend_progress && 634 !vdev_rebuild_should_stop(vd)) { 635 delay(hz); 636 } 637 638 while (size > 0) { 639 uint64_t chunk_size; 640 641 /* 642 * Split range into legally-sized logical chunks 643 * given the constraints of the top-level vdev 644 * being rebuilt (dRAID or mirror). 645 */ 646 ASSERT3P(vd->vdev_ops, !=, NULL); 647 chunk_size = vd->vdev_ops->vdev_op_rebuild_asize(vd, 648 start, size, zfs_rebuild_max_segment); 649 650 error = vdev_rebuild_range(vr, start, chunk_size); 651 if (error != 0) 652 return (error); 653 654 size -= chunk_size; 655 start += chunk_size; 656 } 657 } 658 659 return (0); 660 } 661 662 /* 663 * Calculates the estimated capacity which remains to be scanned. Since 664 * we traverse the pool in metaslab order only allocated capacity beyond 665 * the vrp_last_offset need be considered. All lower offsets must have 666 * already been rebuilt and are thus already included in vrp_bytes_scanned. 667 */ 668 static void 669 vdev_rebuild_update_bytes_est(vdev_t *vd, uint64_t ms_id) 670 { 671 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 672 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 673 uint64_t bytes_est = vrp->vrp_bytes_scanned; 674 675 if (vrp->vrp_last_offset < vd->vdev_ms[ms_id]->ms_start) 676 return; 677 678 for (uint64_t i = ms_id; i < vd->vdev_ms_count; i++) { 679 metaslab_t *msp = vd->vdev_ms[i]; 680 681 mutex_enter(&msp->ms_lock); 682 bytes_est += metaslab_allocated_space(msp); 683 mutex_exit(&msp->ms_lock); 684 } 685 686 vrp->vrp_bytes_est = bytes_est; 687 } 688 689 /* 690 * Load from disk the top-level vdev's rebuild information. 691 */ 692 int 693 vdev_rebuild_load(vdev_t *vd) 694 { 695 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 696 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 697 spa_t *spa = vd->vdev_spa; 698 int err = 0; 699 700 mutex_enter(&vd->vdev_rebuild_lock); 701 vd->vdev_rebuilding = B_FALSE; 702 703 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD)) { 704 memset(vrp, 0, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 705 mutex_exit(&vd->vdev_rebuild_lock); 706 return (SET_ERROR(ENOTSUP)); 707 } 708 709 ASSERT(vd->vdev_top == vd); 710 711 err = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap, 712 VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t), 713 REBUILD_PHYS_ENTRIES, vrp); 714 715 /* 716 * A missing or damaged VDEV_TOP_ZAP_VDEV_REBUILD_PHYS should 717 * not prevent a pool from being imported. Clear the rebuild 718 * status allowing a new resilver/rebuild to be started. 719 */ 720 if (err == ENOENT || err == EOVERFLOW || err == ECKSUM) { 721 memset(vrp, 0, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES); 722 } else if (err) { 723 mutex_exit(&vd->vdev_rebuild_lock); 724 return (err); 725 } 726 727 vr->vr_prev_scan_time_ms = vrp->vrp_scan_time_ms; 728 vr->vr_top_vdev = vd; 729 730 mutex_exit(&vd->vdev_rebuild_lock); 731 732 return (0); 733 } 734 735 /* 736 * Each scan thread is responsible for rebuilding a top-level vdev. The 737 * rebuild progress in tracked on-disk in VDEV_TOP_ZAP_VDEV_REBUILD_PHYS. 738 */ 739 static __attribute__((noreturn)) void 740 vdev_rebuild_thread(void *arg) 741 { 742 vdev_t *vd = arg; 743 spa_t *spa = vd->vdev_spa; 744 int error = 0; 745 746 /* 747 * If there's a scrub in process request that it be stopped. This 748 * is not required for a correct rebuild, but we do want rebuilds to 749 * emulate the resilver behavior as much as possible. 750 */ 751 dsl_pool_t *dsl = spa_get_dsl(spa); 752 if (dsl_scan_scrubbing(dsl)) 753 dsl_scan_cancel(dsl); 754 755 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 756 mutex_enter(&vd->vdev_rebuild_lock); 757 758 ASSERT3P(vd->vdev_top, ==, vd); 759 ASSERT3P(vd->vdev_rebuild_thread, !=, NULL); 760 ASSERT(vd->vdev_rebuilding); 761 ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD)); 762 ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE); 763 ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE); 764 765 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 766 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 767 vr->vr_top_vdev = vd; 768 vr->vr_scan_msp = NULL; 769 vr->vr_scan_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); 770 mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL); 771 cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL); 772 773 vr->vr_pass_start_time = gethrtime(); 774 vr->vr_pass_bytes_scanned = 0; 775 vr->vr_pass_bytes_issued = 0; 776 777 vr->vr_bytes_inflight_max = MAX(1ULL << 20, 778 zfs_rebuild_vdev_limit * vd->vdev_children); 779 780 uint64_t update_est_time = gethrtime(); 781 vdev_rebuild_update_bytes_est(vd, 0); 782 783 clear_rebuild_bytes(vr->vr_top_vdev); 784 785 mutex_exit(&vd->vdev_rebuild_lock); 786 787 /* 788 * Systematically walk the metaslabs and issue rebuild I/Os for 789 * all ranges in the allocated space map. 790 */ 791 for (uint64_t i = 0; i < vd->vdev_ms_count; i++) { 792 metaslab_t *msp = vd->vdev_ms[i]; 793 vr->vr_scan_msp = msp; 794 795 /* 796 * Removal of vdevs from the vdev tree may eliminate the need 797 * for the rebuild, in which case it should be canceled. The 798 * vdev_rebuild_cancel_wanted flag is set until the sync task 799 * completes. This may be after the rebuild thread exits. 800 */ 801 if (vdev_rebuild_should_cancel(vd)) { 802 vd->vdev_rebuild_cancel_wanted = B_TRUE; 803 error = EINTR; 804 break; 805 } 806 807 ASSERT0(range_tree_space(vr->vr_scan_tree)); 808 809 /* Disable any new allocations to this metaslab */ 810 spa_config_exit(spa, SCL_CONFIG, FTAG); 811 metaslab_disable(msp); 812 813 mutex_enter(&msp->ms_sync_lock); 814 mutex_enter(&msp->ms_lock); 815 816 /* 817 * If there are outstanding allocations wait for them to be 818 * synced. This is needed to ensure all allocated ranges are 819 * on disk and therefore will be rebuilt. 820 */ 821 for (int j = 0; j < TXG_SIZE; j++) { 822 if (range_tree_space(msp->ms_allocating[j])) { 823 mutex_exit(&msp->ms_lock); 824 mutex_exit(&msp->ms_sync_lock); 825 txg_wait_synced(dsl, 0); 826 mutex_enter(&msp->ms_sync_lock); 827 mutex_enter(&msp->ms_lock); 828 break; 829 } 830 } 831 832 /* 833 * When a metaslab has been allocated from read its allocated 834 * ranges from the space map object into the vr_scan_tree. 835 * Then add inflight / unflushed ranges and remove inflight / 836 * unflushed frees. This is the minimum range to be rebuilt. 837 */ 838 if (msp->ms_sm != NULL) { 839 VERIFY0(space_map_load(msp->ms_sm, 840 vr->vr_scan_tree, SM_ALLOC)); 841 842 for (int i = 0; i < TXG_SIZE; i++) { 843 ASSERT0(range_tree_space( 844 msp->ms_allocating[i])); 845 } 846 847 range_tree_walk(msp->ms_unflushed_allocs, 848 range_tree_add, vr->vr_scan_tree); 849 range_tree_walk(msp->ms_unflushed_frees, 850 range_tree_remove, vr->vr_scan_tree); 851 852 /* 853 * Remove ranges which have already been rebuilt based 854 * on the last offset. This can happen when restarting 855 * a scan after exporting and re-importing the pool. 856 */ 857 range_tree_clear(vr->vr_scan_tree, 0, 858 vrp->vrp_last_offset); 859 } 860 861 mutex_exit(&msp->ms_lock); 862 mutex_exit(&msp->ms_sync_lock); 863 864 /* 865 * To provide an accurate estimate re-calculate the estimated 866 * size every 5 minutes to account for recent allocations and 867 * frees made to space maps which have not yet been rebuilt. 868 */ 869 if (gethrtime() > update_est_time + SEC2NSEC(300)) { 870 update_est_time = gethrtime(); 871 vdev_rebuild_update_bytes_est(vd, i); 872 } 873 874 /* 875 * Walk the allocated space map and issue the rebuild I/O. 876 */ 877 error = vdev_rebuild_ranges(vr); 878 range_tree_vacate(vr->vr_scan_tree, NULL, NULL); 879 880 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 881 metaslab_enable(msp, B_FALSE, B_FALSE); 882 883 if (error != 0) 884 break; 885 } 886 887 range_tree_destroy(vr->vr_scan_tree); 888 spa_config_exit(spa, SCL_CONFIG, FTAG); 889 890 /* Wait for any remaining rebuild I/O to complete */ 891 mutex_enter(&vr->vr_io_lock); 892 while (vr->vr_bytes_inflight > 0) 893 cv_wait(&vr->vr_io_cv, &vr->vr_io_lock); 894 895 mutex_exit(&vr->vr_io_lock); 896 897 mutex_destroy(&vr->vr_io_lock); 898 cv_destroy(&vr->vr_io_cv); 899 900 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 901 902 dsl_pool_t *dp = spa_get_dsl(spa); 903 dmu_tx_t *tx = dmu_tx_create_dd(dp->dp_mos_dir); 904 VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); 905 906 mutex_enter(&vd->vdev_rebuild_lock); 907 if (error == 0) { 908 /* 909 * After a successful rebuild clear the DTLs of all ranges 910 * which were missing when the rebuild was started. These 911 * ranges must have been rebuilt as a consequence of rebuilding 912 * all allocated space. Note that unlike a scrub or resilver 913 * the rebuild operation will reconstruct data only referenced 914 * by a pool checkpoint. See the dsl_scan_done() comments. 915 */ 916 dsl_sync_task_nowait(dp, vdev_rebuild_complete_sync, 917 (void *)(uintptr_t)vd->vdev_id, tx); 918 } else if (vd->vdev_rebuild_cancel_wanted) { 919 /* 920 * The rebuild operation was canceled. This will occur when 921 * a device participating in the rebuild is detached. 922 */ 923 dsl_sync_task_nowait(dp, vdev_rebuild_cancel_sync, 924 (void *)(uintptr_t)vd->vdev_id, tx); 925 } else if (vd->vdev_rebuild_reset_wanted) { 926 /* 927 * Reset the running rebuild without canceling and restarting 928 * it. This will occur when a new device is attached and must 929 * participate in the rebuild. 930 */ 931 dsl_sync_task_nowait(dp, vdev_rebuild_reset_sync, 932 (void *)(uintptr_t)vd->vdev_id, tx); 933 } else { 934 /* 935 * The rebuild operation should be suspended. This may occur 936 * when detaching a child vdev or when exporting the pool. The 937 * rebuild is left in the active state so it will be resumed. 938 */ 939 ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE); 940 vd->vdev_rebuilding = B_FALSE; 941 } 942 943 dmu_tx_commit(tx); 944 945 vd->vdev_rebuild_thread = NULL; 946 mutex_exit(&vd->vdev_rebuild_lock); 947 spa_config_exit(spa, SCL_CONFIG, FTAG); 948 949 cv_broadcast(&vd->vdev_rebuild_cv); 950 951 thread_exit(); 952 } 953 954 /* 955 * Returns B_TRUE if any top-level vdev are rebuilding. 956 */ 957 boolean_t 958 vdev_rebuild_active(vdev_t *vd) 959 { 960 spa_t *spa = vd->vdev_spa; 961 boolean_t ret = B_FALSE; 962 963 if (vd == spa->spa_root_vdev) { 964 for (uint64_t i = 0; i < vd->vdev_children; i++) { 965 ret = vdev_rebuild_active(vd->vdev_child[i]); 966 if (ret) 967 return (ret); 968 } 969 } else if (vd->vdev_top_zap != 0) { 970 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 971 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 972 973 mutex_enter(&vd->vdev_rebuild_lock); 974 ret = (vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE); 975 mutex_exit(&vd->vdev_rebuild_lock); 976 } 977 978 return (ret); 979 } 980 981 /* 982 * Start a rebuild operation. The rebuild may be restarted when the 983 * top-level vdev is currently actively rebuilding. 984 */ 985 void 986 vdev_rebuild(vdev_t *vd) 987 { 988 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 989 vdev_rebuild_phys_t *vrp __maybe_unused = &vr->vr_rebuild_phys; 990 991 ASSERT(vd->vdev_top == vd); 992 ASSERT(vdev_is_concrete(vd)); 993 ASSERT(!vd->vdev_removing); 994 ASSERT(spa_feature_is_enabled(vd->vdev_spa, 995 SPA_FEATURE_DEVICE_REBUILD)); 996 997 mutex_enter(&vd->vdev_rebuild_lock); 998 if (vd->vdev_rebuilding) { 999 ASSERT3U(vrp->vrp_rebuild_state, ==, VDEV_REBUILD_ACTIVE); 1000 1001 /* 1002 * Signal a running rebuild operation that it should restart 1003 * from the beginning because a new device was attached. The 1004 * vdev_rebuild_reset_wanted flag is set until the sync task 1005 * completes. This may be after the rebuild thread exits. 1006 */ 1007 if (!vd->vdev_rebuild_reset_wanted) 1008 vd->vdev_rebuild_reset_wanted = B_TRUE; 1009 } else { 1010 vdev_rebuild_initiate(vd); 1011 } 1012 mutex_exit(&vd->vdev_rebuild_lock); 1013 } 1014 1015 static void 1016 vdev_rebuild_restart_impl(vdev_t *vd) 1017 { 1018 spa_t *spa = vd->vdev_spa; 1019 1020 if (vd == spa->spa_root_vdev) { 1021 for (uint64_t i = 0; i < vd->vdev_children; i++) 1022 vdev_rebuild_restart_impl(vd->vdev_child[i]); 1023 1024 } else if (vd->vdev_top_zap != 0) { 1025 vdev_rebuild_t *vr = &vd->vdev_rebuild_config; 1026 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 1027 1028 mutex_enter(&vd->vdev_rebuild_lock); 1029 if (vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE && 1030 vdev_writeable(vd) && !vd->vdev_rebuilding) { 1031 ASSERT(spa_feature_is_active(spa, 1032 SPA_FEATURE_DEVICE_REBUILD)); 1033 vd->vdev_rebuilding = B_TRUE; 1034 vd->vdev_rebuild_thread = thread_create(NULL, 0, 1035 vdev_rebuild_thread, vd, 0, &p0, TS_RUN, 1036 maxclsyspri); 1037 } 1038 mutex_exit(&vd->vdev_rebuild_lock); 1039 } 1040 } 1041 1042 /* 1043 * Conditionally restart all of the vdev_rebuild_thread's for a pool. The 1044 * feature flag must be active and the rebuild in the active state. This 1045 * cannot be used to start a new rebuild. 1046 */ 1047 void 1048 vdev_rebuild_restart(spa_t *spa) 1049 { 1050 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1051 1052 vdev_rebuild_restart_impl(spa->spa_root_vdev); 1053 } 1054 1055 /* 1056 * Stop and wait for all of the vdev_rebuild_thread's associated with the 1057 * vdev tree provide to be terminated (canceled or stopped). 1058 */ 1059 void 1060 vdev_rebuild_stop_wait(vdev_t *vd) 1061 { 1062 spa_t *spa = vd->vdev_spa; 1063 1064 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1065 1066 if (vd == spa->spa_root_vdev) { 1067 for (uint64_t i = 0; i < vd->vdev_children; i++) 1068 vdev_rebuild_stop_wait(vd->vdev_child[i]); 1069 1070 } else if (vd->vdev_top_zap != 0) { 1071 ASSERT(vd == vd->vdev_top); 1072 1073 mutex_enter(&vd->vdev_rebuild_lock); 1074 if (vd->vdev_rebuild_thread != NULL) { 1075 vd->vdev_rebuild_exit_wanted = B_TRUE; 1076 while (vd->vdev_rebuilding) { 1077 cv_wait(&vd->vdev_rebuild_cv, 1078 &vd->vdev_rebuild_lock); 1079 } 1080 vd->vdev_rebuild_exit_wanted = B_FALSE; 1081 } 1082 mutex_exit(&vd->vdev_rebuild_lock); 1083 } 1084 } 1085 1086 /* 1087 * Stop all rebuild operations but leave them in the active state so they 1088 * will be resumed when importing the pool. 1089 */ 1090 void 1091 vdev_rebuild_stop_all(spa_t *spa) 1092 { 1093 vdev_rebuild_stop_wait(spa->spa_root_vdev); 1094 } 1095 1096 /* 1097 * Rebuild statistics reported per top-level vdev. 1098 */ 1099 int 1100 vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs) 1101 { 1102 spa_t *spa = tvd->vdev_spa; 1103 1104 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD)) 1105 return (SET_ERROR(ENOTSUP)); 1106 1107 if (tvd != tvd->vdev_top || tvd->vdev_top_zap == 0) 1108 return (SET_ERROR(EINVAL)); 1109 1110 int error = zap_contains(spa_meta_objset(spa), 1111 tvd->vdev_top_zap, VDEV_TOP_ZAP_VDEV_REBUILD_PHYS); 1112 1113 if (error == ENOENT) { 1114 memset(vrs, 0, sizeof (vdev_rebuild_stat_t)); 1115 vrs->vrs_state = VDEV_REBUILD_NONE; 1116 error = 0; 1117 } else if (error == 0) { 1118 vdev_rebuild_t *vr = &tvd->vdev_rebuild_config; 1119 vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; 1120 1121 mutex_enter(&tvd->vdev_rebuild_lock); 1122 vrs->vrs_state = vrp->vrp_rebuild_state; 1123 vrs->vrs_start_time = vrp->vrp_start_time; 1124 vrs->vrs_end_time = vrp->vrp_end_time; 1125 vrs->vrs_scan_time_ms = vrp->vrp_scan_time_ms; 1126 vrs->vrs_bytes_scanned = vrp->vrp_bytes_scanned; 1127 vrs->vrs_bytes_issued = vrp->vrp_bytes_issued; 1128 vrs->vrs_bytes_rebuilt = vrp->vrp_bytes_rebuilt; 1129 vrs->vrs_bytes_est = vrp->vrp_bytes_est; 1130 vrs->vrs_errors = vrp->vrp_errors; 1131 vrs->vrs_pass_time_ms = NSEC2MSEC(gethrtime() - 1132 vr->vr_pass_start_time); 1133 vrs->vrs_pass_bytes_scanned = vr->vr_pass_bytes_scanned; 1134 vrs->vrs_pass_bytes_issued = vr->vr_pass_bytes_issued; 1135 mutex_exit(&tvd->vdev_rebuild_lock); 1136 } 1137 1138 return (error); 1139 } 1140 1141 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, ULONG, ZMOD_RW, 1142 "Max segment size in bytes of rebuild reads"); 1143 1144 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, ULONG, ZMOD_RW, 1145 "Max bytes in flight per leaf vdev for sequential resilvers"); 1146 1147 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_scrub_enabled, INT, ZMOD_RW, 1148 "Automatically scrub after sequential resilver completes"); 1149