1#!/bin/ksh -p
2# SPDX-License-Identifier: CDDL-1.0
3
4#
5# This file and its contents are supplied under the terms of the
6# Common Development and Distribution License ("CDDL"), version 1.0.
7# You may only use this file in accordance with the terms of version
8# 1.0 of the CDDL.
9#
10# A full copy of the text of the CDDL should have accompanied this
11# source.  A copy of the CDDL is also available via the Internet at
12# http://www.illumos.org/license/CDDL.
13#
14
15#
16# Copyright (c) 2016 by Delphix. All rights reserved.
17#
18
19. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
20
21#
22# DESCRIPTION:
23#	It should be possible to rewind a pool beyond a device replacement.
24#
25# STRATEGY:
26#	1. Create a pool.
27#	2. Generate files and remember their hashsum.
28#	3. Sync a few times and note last synced txg.
29#	4. Take a snapshot to make sure old blocks are not overwritten.
30#	5. Initiate device replacement and export the pool. Special care must
31#	   be taken so that resilvering doesn't complete before the export.
32#	6. Test 1: Rewind pool to noted txg and then verify data checksums.
33#	   Import it read-only so that we do not overwrite blocks in later txgs.
34#	7. Re-import pool at latest txg and let the replacement finish.
35#	8. Export the pool an remove the new device - we shouldn't need it.
36#	9. Test 2: Rewind pool to noted txg and then verify data checksums.
37#
38# STRATEGY TO SLOW DOWN RESILVERING:
39#	1. Reduce zfs_txg_timeout, which controls how long can we resilver for
40#	   each sync.
41#	2. Add data to pool
42#	3. Re-import the pool so that data isn't cached
43#	4. Use zinject to slow down device I/O
44#	5. Trigger the resilvering
45#	6. Use spa freeze to stop writing to the pool.
46#	7. Clear zinject events (needed to export the pool)
47#	8. Export the pool
48#
49# DISCLAIMER:
50#	This test can fail since nothing guarantees that old MOS blocks aren't
51#	overwritten. Snapshots protect datasets and data files but not the MOS.
52#	sync_some_data_a_few_times interleaves file data and MOS data for a few
53#	txgs, thus increasing the odds that some txgs will have their MOS data
54#	left untouched.
55#
56
57verify_runnable "global"
58
59ZFS_TXG_TIMEOUT=""
60
61function custom_cleanup
62{
63	# Revert zfs_txg_timeout to defaults
64	[[ -n $ZFS_TXG_TIMEOUT ]] &&
65	    log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
66	log_must rm -rf $BACKUP_DEVICE_DIR
67	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
68	cleanup
69}
70
71log_onexit custom_cleanup
72
73function test_replace_vdev
74{
75	typeset poolcreate="$1"
76	typeset replacevdev="$2"
77	typeset replaceby="$3"
78	typeset poolfinalstate="$4"
79	typeset zinjectdevices="$5"
80	typeset writedata="$6"
81
82	log_note "$0: pool '$poolcreate', replace $replacevdev by $replaceby."
83
84	log_must zpool create $TESTPOOL1 $poolcreate
85
86	# generate data and checksum it
87	log_must generate_data $TESTPOOL1 $MD5FILE
88
89	# add more data so that resilver takes longer
90	log_must write_some_data $TESTPOOL1 $writedata
91
92	# Syncing a few times while writing new data increases the odds that
93	# MOS metadata for some of the txgs will survive.
94	log_must sync_some_data_a_few_times $TESTPOOL1
95	typeset txg
96	txg=$(get_last_txg_synced $TESTPOOL1)
97	log_must zfs snapshot -r $TESTPOOL1@snap1
98
99	# This should not free original data.
100	log_must overwrite_data $TESTPOOL1 ""
101
102	log_must zpool export $TESTPOOL1
103	log_must zpool import -d $DEVICE_DIR $TESTPOOL1
104
105	# Ensure resilvering doesn't complete.
106	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
107	log_must zpool replace $TESTPOOL1 $replacevdev $replaceby
108
109	# Confirm pool is still replacing
110	log_must pool_is_replacing $TESTPOOL1
111	log_must zpool export $TESTPOOL1
112	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
113
114	############################################################
115	# Test 1: rewind while device is resilvering.
116	# Import read only to avoid overwriting more recent blocks.
117	############################################################
118	log_must zpool import -d $DEVICE_DIR -o readonly=on -T $txg $TESTPOOL1
119	log_must check_pool_config $TESTPOOL1 "$poolcreate"
120
121	log_must verify_data_hashsums $MD5FILE
122
123	log_must zpool export $TESTPOOL1
124
125	# Import pool at latest txg to finish the resilvering
126	log_must zpool import -d $DEVICE_DIR $TESTPOOL1
127	log_must overwrite_data $TESTPOOL1 ""
128	log_must wait_for_pool_config $TESTPOOL1 "$poolfinalstate"
129	log_must zpool export $TESTPOOL1
130
131	# Move out the new device
132	log_must mv $replaceby $BACKUP_DEVICE_DIR/
133
134	############################################################
135	# Test 2: rewind after device has been replaced.
136	# Import read-write since we won't need the pool anymore.
137	############################################################
138	log_must zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1
139	log_must check_pool_config $TESTPOOL1 "$poolcreate"
140
141	log_must verify_data_hashsums $MD5FILE
142
143	# Cleanup
144	log_must zpool destroy $TESTPOOL1
145	# Restore the device we moved out
146	log_must mv "$BACKUP_DEVICE_DIR/$(basename $replaceby)" $DEVICE_DIR/
147	# Fast way to clear vdev labels
148	log_must zpool create -f $TESTPOOL2 $VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4
149	log_must zpool destroy $TESTPOOL2
150
151	log_note ""
152}
153
154# Record txg history
155is_linux && log_must set_tunable32 TXG_HISTORY 100
156
157log_must mkdir -p $BACKUP_DEVICE_DIR
158# Make the devices bigger to reduce chances of overwriting MOS metadata.
159increase_device_sizes $(( FILE_SIZE * 4 ))
160
161# We set zfs_txg_timeout to 1 to reduce resilvering time at each sync.
162ZFS_TXG_TIMEOUT=$(get_zfs_txg_timeout)
163set_zfs_txg_timeout 1
164
165test_replace_vdev "$VDEV0 $VDEV1" \
166    "$VDEV1" "$VDEV2" \
167    "$VDEV0 $VDEV2" \
168    "$VDEV0 $VDEV1" 15
169
170test_replace_vdev "mirror $VDEV0 $VDEV1" \
171	"$VDEV1" "$VDEV2" \
172	"mirror $VDEV0 $VDEV2" \
173	"$VDEV0 $VDEV1" 10
174
175test_replace_vdev "raidz $VDEV0 $VDEV1 $VDEV2" \
176	"$VDEV1" "$VDEV3" \
177	"raidz $VDEV0 $VDEV3 $VDEV2" \
178	"$VDEV0 $VDEV1 $VDEV2" 10
179
180test_replace_vdev "draid $VDEV0 $VDEV1 $VDEV2 $VDEV3" \
181	"$VDEV1" "$VDEV4" \
182	"draid $VDEV0 $VDEV4 $VDEV2 $VDEV3 spares draid1-0-0" \
183	"$VDEV0 $VDEV1 $VDEV2 $VDEV3" 10
184
185set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
186
187log_pass "zpool import rewind after device replacement passed."
188