xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1#!/bin/ksh -p
2# SPDX-License-Identifier: CDDL-1.0
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
20#
21
22. $STF_SUITE/include/libtest.shlib
23. $STF_SUITE/tests/functional/events/events_common.kshlib
24. $STF_SUITE/tests/functional/fault/fault.cfg
25
26#
27# DESCRIPTION:
28# Testing Fault Management Agent ZED Logic - Physically detached device is
29# made removed and onlined when reattached
30#
31# STRATEGY:
32# 1. Create a pool
33# 2. Simulate physical removal of one device
34# 3. Verify the device is removed when detached
35# 4. Reattach the device
36# 5. Verify the device is onlined
37# 6. Repeat the same tests with a spare device:
38#    zed will use the spare to handle the removed data device
39# 7. Repeat the same tests again with a faulted spare device:
40#    the removed data device should be removed
41#
42# NOTE: the use of 'block_device_wait' throughout the test helps avoid race
43# conditions caused by mixing creation/removal events from partitioning the
44# disk (zpool create) and events from physically removing it (remove_disk).
45#
46# NOTE: the test relies on ZED to transit state to removed on device removed
47# event.  The ZED does receive a removal notification but only relies on it to
48# activate a hot spare.  Additional work is planned to extend an existing ioctl
49# interface to allow the ZED to transition the vdev in to a removed state.
50#
51verify_runnable "both"
52
53if is_linux; then
54	# Add one 512b scsi_debug device (4Kn would generate IO errors)
55	# NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
56	# add 32m of fudge
57	load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b'
58else
59	log_unsupported "scsi debug module unsupported"
60fi
61
62function cleanup
63{
64	destroy_pool $TESTPOOL
65	rm -f $filedev1
66	rm -f $filedev2
67	rm -f $filedev3
68	rm -f $sparedev
69	unload_scsi_debug
70}
71
72log_assert "ZED detects physically removed devices"
73
74log_onexit cleanup
75
76filedev1="$TEST_BASE_DIR/file-vdev-1"
77filedev2="$TEST_BASE_DIR/file-vdev-2"
78filedev3="$TEST_BASE_DIR/file-vdev-3"
79sparedev="$TEST_BASE_DIR/file-vdev-spare"
80removedev=$(get_debug_device)
81
82typeset poolconfs=(
83    "mirror $filedev1 $removedev"
84    "raidz3 $filedev1 $filedev2 $filedev3 $removedev"
85    "mirror $filedev1 $filedev2 special mirror $filedev3 $removedev"
86)
87
88log_must truncate -s $MINVDEVSIZE $filedev1
89log_must truncate -s $MINVDEVSIZE $filedev2
90log_must truncate -s $MINVDEVSIZE $filedev3
91log_must truncate -s $MINVDEVSIZE $sparedev
92
93for conf in "${poolconfs[@]}"
94do
95	# 1. Create a pool
96	log_must zpool create -f $TESTPOOL $conf
97	block_device_wait ${DEV_DSKDIR}/${removedev}
98
99	mntpnt=$(get_prop mountpoint /$TESTPOOL)
100
101	# 2. Simulate physical removal of one device
102	remove_disk $removedev
103	log_must mkfile 1m $mntpnt/file
104	sync_pool $TESTPOOL
105
106	# 3. Verify the device is removed.
107	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
108
109	# 4. Reattach the device
110	insert_disk $removedev
111
112	# 5. Verify the device is onlined
113	log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
114
115	# cleanup
116	destroy_pool $TESTPOOL
117	log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
118	block_device_wait ${DEV_DSKDIR}/${removedev}
119done
120
121# 6. Repeat the same tests with a spare device: zed will use the spare to handle
122#    the removed data device
123for conf in "${poolconfs[@]}"
124do
125	# special vdev can not be replaced by a hot spare
126	if [[ $conf = *"special mirror"* ]]; then
127		continue
128	fi
129
130	# 1. Create a pool with a spare
131	log_must zpool create -f $TESTPOOL $conf
132	block_device_wait ${DEV_DSKDIR}/${removedev}
133	log_must zpool add $TESTPOOL spare $sparedev
134
135	mntpnt=$(get_prop mountpoint /$TESTPOOL)
136
137	# 2. Simulate physical removal of one device
138	remove_disk $removedev
139	log_must mkfile 1m $mntpnt/file
140	sync_pool $TESTPOOL
141
142	# 3. Verify the device is handled by the spare.
143	log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
144	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
145
146	# 4. Reattach the device
147	insert_disk $removedev
148
149	# 5. Verify the device is onlined
150	log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
151
152	# cleanup
153	destroy_pool $TESTPOOL
154	log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
155	block_device_wait ${DEV_DSKDIR}/${removedev}
156done
157
158# 7. Repeat the same tests again with a faulted spare device: zed should offline
159#    the removed data device if no spare is available
160for conf in "${poolconfs[@]}"
161do
162	# 1. Create a pool with a spare
163	log_must zpool create -f $TESTPOOL $conf
164	block_device_wait ${DEV_DSKDIR}/${removedev}
165	log_must zpool add $TESTPOOL spare $sparedev
166
167	mntpnt=$(get_prop mountpoint /$TESTPOOL)
168
169	# 2. Fault the spare device making it unavailable
170	log_must zpool offline -f $TESTPOOL $sparedev
171	log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED"
172
173	# 3. Simulate physical removal of one device
174	remove_disk $removedev
175	log_must mkfile 1m $mntpnt/file
176	sync_pool $TESTPOOL
177
178	# 4. Verify the device is removed
179	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
180
181	# 5. Reattach the device
182	insert_disk $removedev
183
184	# 6. Verify the device is onlined
185	log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
186
187	# cleanup
188	destroy_pool $TESTPOOL
189	log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
190	block_device_wait ${DEV_DSKDIR}/${removedev}
191done
192
193log_pass "ZED detects physically removed devices"
194