xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh (revision 113e60742ef6ba5c069aa737ee57ba3c2f88b248)
1#!/bin/ksh -p
2# SPDX-License-Identifier: CDDL-1.0
3#
4# CDDL HEADER START
5#
6# The contents of this file are subject to the terms of the
7# Common Development and Distribution License (the "License").
8# You may not use this file except in compliance with the License.
9#
10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11# or https://opensource.org/licenses/CDDL-1.0.
12# See the License for the specific language governing permissions
13# and limitations under the License.
14#
15# When distributing Covered Code, include this CDDL HEADER in each
16# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17# If applicable, add the following below this CDDL HEADER, with the
18# fields enclosed by brackets "[]" replaced with your own identifying
19# information: Portions Copyright [yyyy] [name of copyright owner]
20#
21# CDDL HEADER END
22#
23#
24# Copyright (c) 2017 by Intel Corporation. All rights reserved.
25# Copyright (c) 2023 by Klara, Inc. All rights reserved.
26#
27
28. $STF_SUITE/include/libtest.shlib
29. $STF_SUITE/tests/functional/fault/fault.cfg
30
31#
32# DESCRIPTION:
33# Testing Fault Management Agent ZED Logic - Automated Auto-Replace Test.
34# Verifys that auto-replace works with by-id paths.
35#
36# STRATEGY:
37# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias for a persistent path.
38#    This creates keys ID_VDEV and ID_VDEV_PATH and set phys_path="scsidebug".
39# 2. Create a pool and set autoreplace=on (auto-replace is opt-in)
40# 3. Export the pool
41# 4. Wipe and offline the scsi_debug disk
42# 5. Import the pool with missing disk
43# 6. Re-online the wiped scsi_debug disk with a new serial number
44# 7. Verify ZED detects the new blank disk and replaces the missing vdev
45# 8. Verify that the scsi_debug disk was re-partitioned
46#
47# Creates a raidz1 zpool using persistent /dev/disk/by-id path names
48#
49# Auto-replace is opt in, and matches by phys_path.
50#
51
52verify_runnable "both"
53
54if ! is_physical_device $DISKS; then
55	log_unsupported "Unsupported disks for this test."
56fi
57
58function cleanup
59{
60	zpool status $TESTPOOL
61	destroy_pool $TESTPOOL
62	sed -i '/alias scsidebug/d' $VDEVID_CONF
63	unload_scsi_debug
64}
65
66#
67# Wait until a vdev transitions to its replacement vdev
68#
69# Return 0 when vdev reaches expected state, 1 on timeout.
70#
71# Note: index +2 is to skip over root and raidz-0 vdevs
72#
73function wait_vdev_online # pool index oldguid timeout
74{
75	typeset pool=$1
76	typeset -i index=$2+2
77	typeset guid=$3
78	typeset timeout=${4:-60}
79	typeset -i i=0
80
81	while [[ $i -lt $timeout ]]; do
82		vdev_guids=( $(zpool get -H -o value guid $pool all-vdevs) )
83
84		if [ "${vdev_guids[$index]}" != "${guid}" ]; then
85			log_note "new vdev[$((index-2))]: ${vdev_guids[$index]}, replacing ${guid}"
86			return 0
87		fi
88
89		i=$((i+1))
90		sleep 1
91	done
92
93	return 1
94}
95log_assert "automated auto-replace with by-id paths"
96log_onexit cleanup
97
98load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
99SD=$(get_debug_device)
100SD_DEVICE_ID=$(get_persistent_disk_name $SD)
101SD_HOST=$(get_scsi_host $SD)
102
103# Register vdev_id alias for scsi_debug device to create a persistent path
104echo "alias scsidebug /dev/disk/by-id/$SD_DEVICE_ID" >>$VDEVID_CONF
105block_device_wait
106
107SD_DEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD | \
108    awk -F'=' '/ID_VDEV=/ {print $2; exit}')
109[ -z $SD_DEVICE ] && log_fail "vdev rule was not registered properly"
110
111log_must zpool events -c
112log_must zpool create -f $TESTPOOL raidz1 $SD_DEVICE_ID $DISK1 $DISK2 $DISK3
113
114vdev_guid=$(zpool get guid -H -o value $TESTPOOL $SD_DEVICE_ID)
115log_note original vdev guid ${vdev_guid}
116
117# Auto-replace is opt-in so need to set property
118log_must zpool set autoreplace=on $TESTPOOL
119
120# Add some data to the pool
121log_must zfs create $TESTPOOL/fs
122log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 R
123log_must zpool export $TESTPOOL
124
125# Record the partition UUID for later comparison
126part_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
127    --value /dev/disk/by-id/$SD_DEVICE_ID)
128[[ -z "$part_uuid" ]] || log_note original disk GPT uuid ${part_uuid}
129
130#
131# Wipe and offline the disk
132#
133# Note that it is not enough to zero the disk to expunge the partitions.
134# You also need to inform the kernel (e.g., 'hdparm -z' or 'partprobe').
135#
136# Using partprobe is overkill and hdparm is not as common as wipefs. So
137# we use wipefs which lets the kernel know the partition was removed
138# from the device (i.e., calls BLKRRPART ioctl).
139#
140log_must dd if=/dev/zero of=/dev/disk/by-id/$SD_DEVICE_ID bs=1M count=$SDSIZE
141log_must /usr/sbin/wipefs -a /dev/disk/by-id/$SD_DEVICE_ID
142remove_disk $SD
143block_device_wait
144
145# Re-import pool with drive missing
146log_must zpool import $TESTPOOL
147log_must check_state $TESTPOOL "" "DEGRADED"
148block_device_wait
149
150#
151# Online an empty disk in the same physical location, with a different by-id
152# symlink. We use vpd_use_hostno to make sure the underlying serial number
153# changes for the new disk which in turn gives us a different by-id path.
154#
155# The original names were something like:
156# 	/dev/disk/by-id/scsi-SLinux_scsi_debug_16000-part1
157# 	/dev/disk/by-id/wwn-0x33333330000007d0-part1
158#
159# This new inserted disk, will have different links like:
160# 	/dev/disk/by-id/scsi-SLinux_scsi_debug_2000-part1
161# 	/dev/disk/by-id/wwn-0x0x3333333000003e80 -part1
162#
163echo '0' > /sys/bus/pseudo/drivers/scsi_debug/vpd_use_hostno
164
165insert_disk $SD $SD_HOST
166
167# make sure the physical path points to the same scsi-debug device
168SD_DEVICE_ID=$(get_persistent_disk_name $SD)
169echo "alias scsidebug /dev/disk/by-id/$SD_DEVICE_ID" >>$VDEVID_CONF
170block_device_wait
171
172# Wait for the new disk to be online and replaced
173log_must wait_vdev_online $TESTPOOL 0 $vdev_guid 45
174log_must wait_replacing $TESTPOOL 45
175
176# Validate auto-replace was successful
177log_must check_state $TESTPOOL "" "ONLINE"
178
179#
180# Confirm the partition UUID changed so we know the new disk was relabeled
181#
182# Note: some older versions of udevadm don't support "--property" option so
183# we'll # skip this test when it is not supported
184#
185if [ ! -z "$part_uuid" ]; then
186	new_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
187	    --value /dev/disk/by-id/$SD_DEVICE_ID)
188	log_note new disk GPT uuid ${new_uuid}
189	[[ "$part_uuid" = "$new_uuid" ]] && \
190	    log_fail "The new disk was not relabeled as expected"
191fi
192
193log_pass "automated auto-replace with by-id paths"
194