1#!/bin/ksh -p 2 3# 4# CDDL HEADER START 5# 6# This file and its contents are supplied under the terms of the 7# Common Development and Distribution License ("CDDL"), version 1.0. 8# You may only use this file in accordance with the terms of version 9# 1.0 of the CDDL. 10# 11# A full copy of the text of the CDDL should have accompanied this 12# source. A copy of the CDDL is also available via the Internet at 13# http://www.illumos.org/license/CDDL. 14# 15# CDDL HEADER END 16# 17 18# 19# Copyright (c) 2019, Datto Inc. All rights reserved. 20# 21 22. $STF_SUITE/include/libtest.shlib 23. $STF_SUITE/tests/functional/resilver/resilver.cfg 24 25SYSEVENT=$STF_SUITE/tests/functional/resilver/sysevent 26 27# 28# DESCRIPTION: 29# Testing resilver restart logic both with and without the deferred resilver 30# feature enabled, verifying that resilver is not restarted when it is 31# unecessary. 32# 33# STRATEGY: 34# 1. Create a pool 35# 2. Create four filesystems with the primary cache disable to force reads 36# 3. Write four files simultaneously, one to each filesystem 37# 4. Do with and without deferred resilvers enabled 38# a. Replace a vdev with a spare & suspend resilver immediately 39# b. Verify resilver starts properly 40# c. Offline / online another vdev to introduce a new DTL range 41# d. Verify resilver restart restart or defer 42# e. Inject read errors on vdev that was offlined / onlned 43# f. Verify that resilver did not restart 44# g. Unsuspend resilver and wait for it to finish 45# h. Verify that there are two resilvers and nothing is deferred 46# 47 48function cleanup 49{ 50 log_must set_tunable32 zfs_resilver_min_time_ms $ORIG_RESILVER_MIN_TIME 51 log_must set_tunable32 zfs_scan_suspend_progress \ 52 $ORIG_SCAN_SUSPEND_PROGRESS 53 log_must zinject -c all 54 destroy_pool $TESTPOOL 55 rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE 56 [[ -n "$EVTFILE" ]] && rm -f "$EVTFILE" 57 [[ -n "$EVTPID" ]] && kill "$EVTPID" 58} 59 60# count resilver events in zpool and number of deferred rsilvers on vdevs 61function verify_restarts # <msg> <cnt> <defer> 62{ 63 msg=$1 64 cnt=$2 65 defer=$3 66 67 # check the number of resilver start in events log 68 RESILVERS=$(wc -l $EVTFILE | awk '{ print $1 }') 69 log_note "expected $cnt resilver start(s)$msg, found $RESILVERS" 70 [[ "$RESILVERS" -ne "$cnt" ]] && 71 log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS" 72 73 [[ -z "$defer" ]] && return 74 75 # use zdb to find which vdevs have the resilver defer flag 76 VDEV_DEFERS=$(zdb -C $TESTPOOL | awk ' 77 /children/ { gsub(/[^0-9]/, ""); child = $0 } 78 /com\.datto:resilver_defer$/ { print child } 79 ') 80 81 if [[ "$defer" == "-" ]] 82 then 83 [[ -n $VDEV_DEFERS ]] && 84 log_fail "didn't expect any vdevs to have resilver deferred" 85 return 86 fi 87 88 [[ $VDEV_DEFERS -eq $defer ]] || 89 log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS" 90} 91 92log_assert "Check for unnecessary resilver restarts" 93 94ORIG_RESILVER_MIN_TIME=$(get_tunable zfs_resilver_min_time_ms) 95ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable zfs_scan_suspend_progress) 96 97set -A RESTARTS -- '1' '2' '2' '2' 98set -A VDEVS -- '' '' '' '' 99set -A DEFER_RESTARTS -- '1' '1' '1' '2' 100set -A DEFER_VDEVS -- '-' '2' '2' '-' 101 102VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" 103 104log_onexit cleanup 105 106# Monitor for resilver start events and log them to $EVTFILE as they occur 107EVTFILE=$(mktemp /tmp/resilver_events.XXXXXX) 108EVTPID=$($SYSEVENT -o $EVTFILE ESC_ZFS_resilver_start) 109log_must test -n "$EVTPID" 110 111log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE 112 113log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \ 114 raidz ${VDEV_FILES[@]} 115 116# create 4 filesystems 117for fs in fs{0..3} 118do 119 log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs 120done 121 122# simultaneously write 16M to each of them 123set -A DATAPATHS /$TESTPOOL/fs{0..3}/dat.0 124log_note "Writing data files" 125for path in ${DATAPATHS[@]} 126do 127 dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 & 128done 129wait 130 131# test without and with deferred resilve feature enabled 132for test in "without" "with" 133do 134 log_note "Testing $test deferred resilvers" 135 136 if [[ $test == "with" ]] 137 then 138 log_must zpool set feature@resilver_defer=enabled $TESTPOOL 139 RESTARTS=( "${DEFER_RESTARTS[@]}" ) 140 VDEVS=( "${DEFER_VDEVS[@]}" ) 141 VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" 142 fi 143 144 # clear the events 145 cp /dev/null $EVTFILE 146 147 # limit scanning time 148 log_must set_tunable32 zfs_resilver_min_time_ms 50 149 150 # initiate a resilver and suspend the scan as soon as possible 151 log_must zpool replace $TESTPOOL $VDEV_REPLACE 152 log_must set_tunable32 zfs_scan_suspend_progress 1 153 154 # there should only be 1 resilver start 155 verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" 156 157 # offline then online a vdev to introduce a new DTL range after current 158 # scan, which should restart (or defer) the resilver 159 log_must zpool offline $TESTPOOL ${VDEV_FILES[2]} 160 log_must zpool sync $TESTPOOL 161 log_must zpool online $TESTPOOL ${VDEV_FILES[2]} 162 log_must zpool sync $TESTPOOL 163 164 # there should now be 2 resilver starts w/o defer, 1 with defer 165 verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}" 166 167 # inject read io errors on vdev and verify resilver does not restart 168 log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL 169 log_must cat ${DATAPATHS[1]} > /dev/null 170 log_must zinject -c all 171 172 # there should still be 2 resilver starts w/o defer, 1 with defer 173 verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}" 174 175 # unsuspend resilver 176 log_must set_tunable32 zfs_scan_suspend_progress 0 177 log_must set_tunable32 zfs_resilver_min_time_ms 3000 178 179 # wait for resilver to finish 180 for iter in {0..59} 181 do 182 is_pool_resilvered $TESTPOOL && break 183 sleep 1 184 done 185 is_pool_resilvered $TESTPOOL || 186 log_fail "resilver timed out" 187 188 # wait for a few txg's to see if a resilver happens 189 log_must zpool sync $TESTPOOL 190 log_must zpool sync $TESTPOOL 191 192 # there should now be 2 resilver starts 193 verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" 194done 195 196log_pass "Resilver did not restart unnecessarily" 197