1#!/bin/ksh -p 2 3# 4# This file and its contents are supplied under the terms of the 5# Common Development and Distribution License ("CDDL"), version 1.0. 6# You may only use this file in accordance with the terms of version 7# 1.0 of the CDDL. 8# 9# A full copy of the text of the CDDL should have accompanied this 10# source. A copy of the CDDL is also available via the Internet at 11# http://www.illumos.org/license/CDDL. 12# 13 14# 15# Copyright (c) 2016 by Delphix. All rights reserved. 16# 17 18. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib 19 20# 21# DESCRIPTION: 22# It should be possible to rewind a pool beyond a device replacement. 23# 24# STRATEGY: 25# 1. Create a pool. 26# 2. Generate files and remember their hashsum. 27# 3. Sync a few times and note last synced txg. 28# 4. Take a snapshot to make sure old blocks are not overwritten. 29# 5. Initiate device replacement and export the pool. Special care must 30# be taken so that resilvering doesn't complete before the export. 31# 6. Test 1: Rewind pool to noted txg and then verify data checksums. 32# Import it read-only so that we do not overwrite blocks in later txgs. 33# 7. Re-import pool at latest txg and let the replacement finish. 34# 8. Export the pool an remove the new device - we shouldn't need it. 35# 9. Test 2: Rewind pool to noted txg and then verify data checksums. 36# 37# STRATEGY TO SLOW DOWN RESILVERING: 38# 1. Reduce zfs_txg_timeout, which controls how long can we resilver for 39# each sync. 40# 2. Add data to pool 41# 3. Re-import the pool so that data isn't cached 42# 4. Use zinject to slow down device I/O 43# 5. Trigger the resilvering 44# 6. Use spa freeze to stop writing to the pool. 45# 7. Clear zinject events (needed to export the pool) 46# 8. Export the pool 47# 48# DISCLAIMER: 49# This test can fail since nothing guarantees that old MOS blocks aren't 50# overwritten. Snapshots protect datasets and data files but not the MOS. 51# sync_some_data_a_few_times interleaves file data and MOS data for a few 52# txgs, thus increasing the odds that some txgs will have their MOS data 53# left untouched. 54# 55 56verify_runnable "global" 57 58ZFS_TXG_TIMEOUT="" 59 60function custom_cleanup 61{ 62 # Revert zfs_txg_timeout to defaults 63 [[ -n $ZFS_TXG_TIMEOUT ]] && 64 log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT 65 log_must rm -rf $BACKUP_DEVICE_DIR 66 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0 67 cleanup 68} 69 70log_onexit custom_cleanup 71 72function test_replace_vdev 73{ 74 typeset poolcreate="$1" 75 typeset replacevdev="$2" 76 typeset replaceby="$3" 77 typeset poolfinalstate="$4" 78 typeset zinjectdevices="$5" 79 typeset writedata="$6" 80 81 log_note "$0: pool '$poolcreate', replace $replacevdev by $replaceby." 82 83 log_must zpool create $TESTPOOL1 $poolcreate 84 85 # generate data and checksum it 86 log_must generate_data $TESTPOOL1 $MD5FILE 87 88 # add more data so that resilver takes longer 89 log_must write_some_data $TESTPOOL1 $writedata 90 91 # Syncing a few times while writing new data increases the odds that 92 # MOS metadata for some of the txgs will survive. 93 log_must sync_some_data_a_few_times $TESTPOOL1 94 typeset txg 95 txg=$(get_last_txg_synced $TESTPOOL1) 96 log_must zfs snapshot -r $TESTPOOL1@snap1 97 98 # This should not free original data. 99 log_must overwrite_data $TESTPOOL1 "" 100 101 log_must zpool export $TESTPOOL1 102 log_must zpool import -d $DEVICE_DIR $TESTPOOL1 103 104 # Ensure resilvering doesn't complete. 105 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1 106 log_must zpool replace $TESTPOOL1 $replacevdev $replaceby 107 108 # Confirm pool is still replacing 109 log_must pool_is_replacing $TESTPOOL1 110 log_must zpool export $TESTPOOL1 111 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0 112 113 ############################################################ 114 # Test 1: rewind while device is resilvering. 115 # Import read only to avoid overwriting more recent blocks. 116 ############################################################ 117 log_must zpool import -d $DEVICE_DIR -o readonly=on -T $txg $TESTPOOL1 118 log_must check_pool_config $TESTPOOL1 "$poolcreate" 119 120 log_must verify_data_hashsums $MD5FILE 121 122 log_must zpool export $TESTPOOL1 123 124 # Import pool at latest txg to finish the resilvering 125 log_must zpool import -d $DEVICE_DIR $TESTPOOL1 126 log_must overwrite_data $TESTPOOL1 "" 127 log_must wait_for_pool_config $TESTPOOL1 "$poolfinalstate" 128 log_must zpool export $TESTPOOL1 129 130 # Move out the new device 131 log_must mv $replaceby $BACKUP_DEVICE_DIR/ 132 133 ############################################################ 134 # Test 2: rewind after device has been replaced. 135 # Import read-write since we won't need the pool anymore. 136 ############################################################ 137 log_must zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1 138 log_must check_pool_config $TESTPOOL1 "$poolcreate" 139 140 log_must verify_data_hashsums $MD5FILE 141 142 # Cleanup 143 log_must zpool destroy $TESTPOOL1 144 # Restore the device we moved out 145 log_must mv "$BACKUP_DEVICE_DIR/$(basename $replaceby)" $DEVICE_DIR/ 146 # Fast way to clear vdev labels 147 log_must zpool create -f $TESTPOOL2 $VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4 148 log_must zpool destroy $TESTPOOL2 149 150 log_note "" 151} 152 153# Record txg history 154is_linux && log_must set_tunable32 TXG_HISTORY 100 155 156log_must mkdir -p $BACKUP_DEVICE_DIR 157# Make the devices bigger to reduce chances of overwriting MOS metadata. 158increase_device_sizes $(( FILE_SIZE * 4 )) 159 160# We set zfs_txg_timeout to 1 to reduce resilvering time at each sync. 161ZFS_TXG_TIMEOUT=$(get_zfs_txg_timeout) 162set_zfs_txg_timeout 1 163 164test_replace_vdev "$VDEV0 $VDEV1" \ 165 "$VDEV1" "$VDEV2" \ 166 "$VDEV0 $VDEV2" \ 167 "$VDEV0 $VDEV1" 15 168 169test_replace_vdev "mirror $VDEV0 $VDEV1" \ 170 "$VDEV1" "$VDEV2" \ 171 "mirror $VDEV0 $VDEV2" \ 172 "$VDEV0 $VDEV1" 10 173 174test_replace_vdev "raidz $VDEV0 $VDEV1 $VDEV2" \ 175 "$VDEV1" "$VDEV3" \ 176 "raidz $VDEV0 $VDEV3 $VDEV2" \ 177 "$VDEV0 $VDEV1 $VDEV2" 10 178 179test_replace_vdev "draid $VDEV0 $VDEV1 $VDEV2 $VDEV3" \ 180 "$VDEV1" "$VDEV4" \ 181 "draid $VDEV0 $VDEV4 $VDEV2 $VDEV3 spares draid1-0-0" \ 182 "$VDEV0 $VDEV1 $VDEV2 $VDEV3" 10 183 184set_zfs_txg_timeout $ZFS_TXG_TIMEOUT 185 186log_pass "zpool import rewind after device replacement passed." 187