1#!/bin/ksh -p 2# 3# CDDL HEADER START 4# 5# This file and its contents are supplied under the terms of the 6# Common Development and Distribution License ("CDDL"), version 1.0. 7# You may only use this file in accordance with the terms of version 8# 1.0 of the CDDL. 9# 10# A full copy of the text of the CDDL should have accompanied this 11# source. A copy of the CDDL is also available via the Internet at 12# http://www.illumos.org/license/CDDL. 13# 14# CDDL HEADER END 15# 16 17# 18# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. 19# 20 21. $STF_SUITE/include/libtest.shlib 22. $STF_SUITE/tests/functional/events/events_common.kshlib 23. $STF_SUITE/tests/functional/fault/fault.cfg 24 25# 26# DESCRIPTION: 27# Testing Fault Management Agent ZED Logic - Physically detached device is 28# made removed and onlined when reattached 29# 30# STRATEGY: 31# 1. Create a pool 32# 2. Simulate physical removal of one device 33# 3. Verify the device is removed when detached 34# 4. Reattach the device 35# 5. Verify the device is onlined 36# 6. Repeat the same tests with a spare device: 37# zed will use the spare to handle the removed data device 38# 7. Repeat the same tests again with a faulted spare device: 39# the removed data device should be removed 40# 41# NOTE: the use of 'block_device_wait' throughout the test helps avoid race 42# conditions caused by mixing creation/removal events from partitioning the 43# disk (zpool create) and events from physically removing it (remove_disk). 44# 45# NOTE: the test relies on ZED to transit state to removed on device removed 46# event. The ZED does receive a removal notification but only relies on it to 47# activate a hot spare. Additional work is planned to extend an existing ioctl 48# interface to allow the ZED to transition the vdev in to a removed state. 49# 50verify_runnable "both" 51 52if is_linux; then 53 # Add one 512b scsi_debug device (4Kn would generate IO errors) 54 # NOTE: must be larger than other "file" vdevs and minimum SPA devsize: 55 # add 32m of fudge 56 load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b' 57else 58 log_unsupported "scsi debug module unsupported" 59fi 60 61function cleanup 62{ 63 destroy_pool $TESTPOOL 64 rm -f $filedev1 65 rm -f $filedev2 66 rm -f $filedev3 67 rm -f $sparedev 68 unload_scsi_debug 69} 70 71log_assert "ZED detects physically removed devices" 72 73log_onexit cleanup 74 75filedev1="$TEST_BASE_DIR/file-vdev-1" 76filedev2="$TEST_BASE_DIR/file-vdev-2" 77filedev3="$TEST_BASE_DIR/file-vdev-3" 78sparedev="$TEST_BASE_DIR/file-vdev-spare" 79removedev=$(get_debug_device) 80 81typeset poolconfs=( 82 "mirror $filedev1 $removedev" 83 "raidz3 $filedev1 $filedev2 $filedev3 $removedev" 84 "mirror $filedev1 $filedev2 special mirror $filedev3 $removedev" 85) 86 87log_must truncate -s $MINVDEVSIZE $filedev1 88log_must truncate -s $MINVDEVSIZE $filedev2 89log_must truncate -s $MINVDEVSIZE $filedev3 90log_must truncate -s $MINVDEVSIZE $sparedev 91 92for conf in "${poolconfs[@]}" 93do 94 # 1. Create a pool 95 log_must zpool create -f $TESTPOOL $conf 96 block_device_wait ${DEV_DSKDIR}/${removedev} 97 98 mntpnt=$(get_prop mountpoint /$TESTPOOL) 99 100 # 2. Simulate physical removal of one device 101 remove_disk $removedev 102 log_must mkfile 1m $mntpnt/file 103 sync_pool $TESTPOOL 104 105 # 3. Verify the device is removed. 106 log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" 107 108 # 4. Reattach the device 109 insert_disk $removedev 110 111 # 5. Verify the device is onlined 112 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 113 114 # cleanup 115 destroy_pool $TESTPOOL 116 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 117 block_device_wait ${DEV_DSKDIR}/${removedev} 118done 119 120# 6. Repeat the same tests with a spare device: zed will use the spare to handle 121# the removed data device 122for conf in "${poolconfs[@]}" 123do 124 # special vdev can not be replaced by a hot spare 125 if [[ $conf = *"special mirror"* ]]; then 126 continue 127 fi 128 129 # 1. Create a pool with a spare 130 log_must zpool create -f $TESTPOOL $conf 131 block_device_wait ${DEV_DSKDIR}/${removedev} 132 log_must zpool add $TESTPOOL spare $sparedev 133 134 mntpnt=$(get_prop mountpoint /$TESTPOOL) 135 136 # 2. Simulate physical removal of one device 137 remove_disk $removedev 138 log_must mkfile 1m $mntpnt/file 139 sync_pool $TESTPOOL 140 141 # 3. Verify the device is handled by the spare. 142 log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE" 143 log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" 144 145 # 4. Reattach the device 146 insert_disk $removedev 147 148 # 5. Verify the device is onlined 149 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 150 151 # cleanup 152 destroy_pool $TESTPOOL 153 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 154 block_device_wait ${DEV_DSKDIR}/${removedev} 155done 156 157# 7. Repeat the same tests again with a faulted spare device: zed should offline 158# the removed data device if no spare is available 159for conf in "${poolconfs[@]}" 160do 161 # 1. Create a pool with a spare 162 log_must zpool create -f $TESTPOOL $conf 163 block_device_wait ${DEV_DSKDIR}/${removedev} 164 log_must zpool add $TESTPOOL spare $sparedev 165 166 mntpnt=$(get_prop mountpoint /$TESTPOOL) 167 168 # 2. Fault the spare device making it unavailable 169 log_must zpool offline -f $TESTPOOL $sparedev 170 log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED" 171 172 # 3. Simulate physical removal of one device 173 remove_disk $removedev 174 log_must mkfile 1m $mntpnt/file 175 sync_pool $TESTPOOL 176 177 # 4. Verify the device is removed 178 log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" 179 180 # 5. Reattach the device 181 insert_disk $removedev 182 183 # 6. Verify the device is onlined 184 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 185 186 # cleanup 187 destroy_pool $TESTPOOL 188 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 189 block_device_wait ${DEV_DSKDIR}/${removedev} 190done 191 192log_pass "ZED detects physically removed devices" 193