1#!/bin/ksh -p 2# SPDX-License-Identifier: CDDL-1.0 3# 4# CDDL HEADER START 5# 6# This file and its contents are supplied under the terms of the 7# Common Development and Distribution License ("CDDL"), version 1.0. 8# You may only use this file in accordance with the terms of version 9# 1.0 of the CDDL. 10# 11# A full copy of the text of the CDDL should have accompanied this 12# source. A copy of the CDDL is also available via the Internet at 13# http://www.illumos.org/license/CDDL. 14# 15# CDDL HEADER END 16# 17 18# 19# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. 20# 21 22. $STF_SUITE/include/libtest.shlib 23. $STF_SUITE/tests/functional/events/events_common.kshlib 24. $STF_SUITE/tests/functional/fault/fault.cfg 25 26# 27# DESCRIPTION: 28# Testing Fault Management Agent ZED Logic - Physically detached device is 29# made removed and onlined when reattached 30# 31# STRATEGY: 32# 1. Create a pool 33# 2. Simulate physical removal of one device 34# 3. Verify the device is removed when detached 35# 4. Reattach the device 36# 5. Verify the device is onlined 37# 6. Repeat the same tests with a spare device: 38# zed will use the spare to handle the removed data device 39# 7. Repeat the same tests again with a faulted spare device: 40# the removed data device should be removed 41# 42# NOTE: the use of 'block_device_wait' throughout the test helps avoid race 43# conditions caused by mixing creation/removal events from partitioning the 44# disk (zpool create) and events from physically removing it (remove_disk). 45# 46# NOTE: the test relies on ZED to transit state to removed on device removed 47# event. The ZED does receive a removal notification but only relies on it to 48# activate a hot spare. Additional work is planned to extend an existing ioctl 49# interface to allow the ZED to transition the vdev in to a removed state. 50# 51verify_runnable "both" 52 53if is_linux; then 54 # Add one 512b scsi_debug device (4Kn would generate IO errors) 55 # NOTE: must be larger than other "file" vdevs and minimum SPA devsize: 56 # add 32m of fudge 57 load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b' 58else 59 log_unsupported "scsi debug module unsupported" 60fi 61 62function cleanup 63{ 64 destroy_pool $TESTPOOL 65 rm -f $filedev1 66 rm -f $filedev2 67 rm -f $filedev3 68 rm -f $sparedev 69 unload_scsi_debug 70} 71 72log_assert "ZED detects physically removed devices" 73 74log_onexit cleanup 75 76filedev1="$TEST_BASE_DIR/file-vdev-1" 77filedev2="$TEST_BASE_DIR/file-vdev-2" 78filedev3="$TEST_BASE_DIR/file-vdev-3" 79sparedev="$TEST_BASE_DIR/file-vdev-spare" 80removedev=$(get_debug_device) 81 82typeset poolconfs=( 83 "mirror $filedev1 $removedev" 84 "raidz3 $filedev1 $filedev2 $filedev3 $removedev" 85 "mirror $filedev1 $filedev2 special mirror $filedev3 $removedev" 86) 87 88log_must truncate -s $MINVDEVSIZE $filedev1 89log_must truncate -s $MINVDEVSIZE $filedev2 90log_must truncate -s $MINVDEVSIZE $filedev3 91log_must truncate -s $MINVDEVSIZE $sparedev 92 93for conf in "${poolconfs[@]}" 94do 95 # 1. Create a pool 96 log_must zpool create -f $TESTPOOL $conf 97 block_device_wait ${DEV_DSKDIR}/${removedev} 98 99 mntpnt=$(get_prop mountpoint /$TESTPOOL) 100 101 # 2. Simulate physical removal of one device 102 remove_disk $removedev 103 log_must mkfile 1m $mntpnt/file 104 sync_pool $TESTPOOL 105 106 # 3. Verify the device is removed. 107 log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" 108 109 # 4. Reattach the device 110 insert_disk $removedev 111 112 # 5. Verify the device is onlined 113 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 114 115 # cleanup 116 destroy_pool $TESTPOOL 117 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 118 block_device_wait ${DEV_DSKDIR}/${removedev} 119done 120 121# 6. Repeat the same tests with a spare device: zed will use the spare to handle 122# the removed data device 123for conf in "${poolconfs[@]}" 124do 125 # special vdev can not be replaced by a hot spare 126 if [[ $conf = *"special mirror"* ]]; then 127 continue 128 fi 129 130 # 1. Create a pool with a spare 131 log_must zpool create -f $TESTPOOL $conf 132 block_device_wait ${DEV_DSKDIR}/${removedev} 133 log_must zpool add $TESTPOOL spare $sparedev 134 135 mntpnt=$(get_prop mountpoint /$TESTPOOL) 136 137 # 2. Simulate physical removal of one device 138 remove_disk $removedev 139 log_must mkfile 1m $mntpnt/file 140 sync_pool $TESTPOOL 141 142 # 3. Verify the device is handled by the spare. 143 log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE" 144 log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" 145 146 # 4. Reattach the device 147 insert_disk $removedev 148 149 # 5. Verify the device is onlined 150 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 151 152 # cleanup 153 destroy_pool $TESTPOOL 154 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 155 block_device_wait ${DEV_DSKDIR}/${removedev} 156done 157 158# 7. Repeat the same tests again with a faulted spare device: zed should offline 159# the removed data device if no spare is available 160for conf in "${poolconfs[@]}" 161do 162 # 1. Create a pool with a spare 163 log_must zpool create -f $TESTPOOL $conf 164 block_device_wait ${DEV_DSKDIR}/${removedev} 165 log_must zpool add $TESTPOOL spare $sparedev 166 167 mntpnt=$(get_prop mountpoint /$TESTPOOL) 168 169 # 2. Fault the spare device making it unavailable 170 log_must zpool offline -f $TESTPOOL $sparedev 171 log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED" 172 173 # 3. Simulate physical removal of one device 174 remove_disk $removedev 175 log_must mkfile 1m $mntpnt/file 176 sync_pool $TESTPOOL 177 178 # 4. Verify the device is removed 179 log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" 180 181 # 5. Reattach the device 182 insert_disk $removedev 183 184 # 6. Verify the device is onlined 185 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 186 187 # cleanup 188 destroy_pool $TESTPOOL 189 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 190 block_device_wait ${DEV_DSKDIR}/${removedev} 191done 192 193log_pass "ZED detects physically removed devices" 194