1#!/bin/ksh -p 2# SPDX-License-Identifier: CDDL-1.0 3 4# 5# This file and its contents are supplied under the terms of the 6# Common Development and Distribution License ("CDDL"), version 1.0. 7# You may only use this file in accordance with the terms of version 8# 1.0 of the CDDL. 9# 10# A full copy of the text of the CDDL should have accompanied this 11# source. A copy of the CDDL is also available via the Internet at 12# http://www.illumos.org/license/CDDL. 13# 14 15# 16# Copyright (c) 2016 by Delphix. All rights reserved. 17# 18 19. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib 20 21# 22# DESCRIPTION: 23# It should be possible to rewind a pool beyond a device replacement. 24# 25# STRATEGY: 26# 1. Create a pool. 27# 2. Generate files and remember their hashsum. 28# 3. Sync a few times and note last synced txg. 29# 4. Take a snapshot to make sure old blocks are not overwritten. 30# 5. Initiate device replacement and export the pool. Special care must 31# be taken so that resilvering doesn't complete before the export. 32# 6. Test 1: Rewind pool to noted txg and then verify data checksums. 33# Import it read-only so that we do not overwrite blocks in later txgs. 34# 7. Re-import pool at latest txg and let the replacement finish. 35# 8. Export the pool an remove the new device - we shouldn't need it. 36# 9. Test 2: Rewind pool to noted txg and then verify data checksums. 37# 38# STRATEGY TO SLOW DOWN RESILVERING: 39# 1. Reduce zfs_txg_timeout, which controls how long can we resilver for 40# each sync. 41# 2. Add data to pool 42# 3. Re-import the pool so that data isn't cached 43# 4. Use zinject to slow down device I/O 44# 5. Trigger the resilvering 45# 6. Use spa freeze to stop writing to the pool. 46# 7. Clear zinject events (needed to export the pool) 47# 8. Export the pool 48# 49# DISCLAIMER: 50# This test can fail since nothing guarantees that old MOS blocks aren't 51# overwritten. Snapshots protect datasets and data files but not the MOS. 52# sync_some_data_a_few_times interleaves file data and MOS data for a few 53# txgs, thus increasing the odds that some txgs will have their MOS data 54# left untouched. 55# 56 57verify_runnable "global" 58 59ZFS_TXG_TIMEOUT="" 60 61function custom_cleanup 62{ 63 # Revert zfs_txg_timeout to defaults 64 [[ -n $ZFS_TXG_TIMEOUT ]] && 65 log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT 66 log_must rm -rf $BACKUP_DEVICE_DIR 67 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0 68 cleanup 69} 70 71log_onexit custom_cleanup 72 73function test_replace_vdev 74{ 75 typeset poolcreate="$1" 76 typeset replacevdev="$2" 77 typeset replaceby="$3" 78 typeset poolfinalstate="$4" 79 typeset zinjectdevices="$5" 80 typeset writedata="$6" 81 82 log_note "$0: pool '$poolcreate', replace $replacevdev by $replaceby." 83 84 log_must zpool create $TESTPOOL1 $poolcreate 85 86 # generate data and checksum it 87 log_must generate_data $TESTPOOL1 $MD5FILE 88 89 # add more data so that resilver takes longer 90 log_must write_some_data $TESTPOOL1 $writedata 91 92 # Syncing a few times while writing new data increases the odds that 93 # MOS metadata for some of the txgs will survive. 94 log_must sync_some_data_a_few_times $TESTPOOL1 95 typeset txg 96 txg=$(get_last_txg_synced $TESTPOOL1) 97 log_must zfs snapshot -r $TESTPOOL1@snap1 98 99 # This should not free original data. 100 log_must overwrite_data $TESTPOOL1 "" 101 102 log_must zpool export $TESTPOOL1 103 log_must zpool import -d $DEVICE_DIR $TESTPOOL1 104 105 # Ensure resilvering doesn't complete. 106 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1 107 log_must zpool replace $TESTPOOL1 $replacevdev $replaceby 108 109 # Confirm pool is still replacing 110 log_must pool_is_replacing $TESTPOOL1 111 log_must zpool export $TESTPOOL1 112 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0 113 114 ############################################################ 115 # Test 1: rewind while device is resilvering. 116 # Import read only to avoid overwriting more recent blocks. 117 ############################################################ 118 log_must zpool import -d $DEVICE_DIR -o readonly=on -T $txg $TESTPOOL1 119 log_must check_pool_config $TESTPOOL1 "$poolcreate" 120 121 log_must verify_data_hashsums $MD5FILE 122 123 log_must zpool export $TESTPOOL1 124 125 # Import pool at latest txg to finish the resilvering 126 log_must zpool import -d $DEVICE_DIR $TESTPOOL1 127 log_must overwrite_data $TESTPOOL1 "" 128 log_must wait_for_pool_config $TESTPOOL1 "$poolfinalstate" 129 log_must zpool export $TESTPOOL1 130 131 # Move out the new device 132 log_must mv $replaceby $BACKUP_DEVICE_DIR/ 133 134 ############################################################ 135 # Test 2: rewind after device has been replaced. 136 # Import read-write since we won't need the pool anymore. 137 ############################################################ 138 log_must zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1 139 log_must check_pool_config $TESTPOOL1 "$poolcreate" 140 141 log_must verify_data_hashsums $MD5FILE 142 143 # Cleanup 144 log_must zpool destroy $TESTPOOL1 145 # Restore the device we moved out 146 log_must mv "$BACKUP_DEVICE_DIR/$(basename $replaceby)" $DEVICE_DIR/ 147 # Fast way to clear vdev labels 148 log_must zpool create -f $TESTPOOL2 $VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4 149 log_must zpool destroy $TESTPOOL2 150 151 log_note "" 152} 153 154# Record txg history 155is_linux && log_must set_tunable32 TXG_HISTORY 100 156 157log_must mkdir -p $BACKUP_DEVICE_DIR 158# Make the devices bigger to reduce chances of overwriting MOS metadata. 159increase_device_sizes $(( FILE_SIZE * 4 )) 160 161# We set zfs_txg_timeout to 1 to reduce resilvering time at each sync. 162ZFS_TXG_TIMEOUT=$(get_zfs_txg_timeout) 163set_zfs_txg_timeout 1 164 165test_replace_vdev "$VDEV0 $VDEV1" \ 166 "$VDEV1" "$VDEV2" \ 167 "$VDEV0 $VDEV2" \ 168 "$VDEV0 $VDEV1" 15 169 170test_replace_vdev "mirror $VDEV0 $VDEV1" \ 171 "$VDEV1" "$VDEV2" \ 172 "mirror $VDEV0 $VDEV2" \ 173 "$VDEV0 $VDEV1" 10 174 175test_replace_vdev "raidz $VDEV0 $VDEV1 $VDEV2" \ 176 "$VDEV1" "$VDEV3" \ 177 "raidz $VDEV0 $VDEV3 $VDEV2" \ 178 "$VDEV0 $VDEV1 $VDEV2" 10 179 180test_replace_vdev "draid $VDEV0 $VDEV1 $VDEV2 $VDEV3" \ 181 "$VDEV1" "$VDEV4" \ 182 "draid $VDEV0 $VDEV4 $VDEV2 $VDEV3 spares draid1-0-0" \ 183 "$VDEV0 $VDEV1 $VDEV2 $VDEV3" 10 184 185set_zfs_txg_timeout $ZFS_TXG_TIMEOUT 186 187log_pass "zpool import rewind after device replacement passed." 188