xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1#!/bin/ksh -p
2# SPDX-License-Identifier: CDDL-1.0
3
4#
5# CDDL HEADER START
6#
7# This file and its contents are supplied under the terms of the
8# Common Development and Distribution License ("CDDL"), version 1.0.
9# You may only use this file in accordance with the terms of version
10# 1.0 of the CDDL.
11#
12# A full copy of the text of the CDDL should have accompanied this
13# source.  A copy of the CDDL is also available via the Internet at
14# http://www.illumos.org/license/CDDL.
15#
16# CDDL HEADER END
17#
18
19#
20# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
21#
22
23. $STF_SUITE/include/libtest.shlib
24. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
25
26#
27# DESCRIPTION:
28# Verify dRAID resilver to traditional and distributed spares for
29# a variety of pool configurations and pool states.
30#
31# STRATEGY:
32# 1. For resilvers:
33#    a. Create a semi-random dRAID pool configuration which can
34#       sustain 1 failure and has 5 distributed spares.
35#    b. Fill the pool with data
36#    c. Systematically fault and replace vdevs in the pools with
37#       spares to test resilving in common pool states.
38#    d. Scrub the pool to verify no data was lost
39#    e. Verify the contents of files in the pool
40#
41
42log_assert "Verify dRAID resilver"
43
44function cleanup_tunable
45{
46	log_must set_tunable32 REBUILD_SCRUB_ENABLED 1
47	cleanup
48}
49
50log_onexit cleanup_tunable
51
52#
53# Disable scrubbing after a sequential resilver to verify the resilver
54# alone is able to reconstruct the data without the help of a scrub.
55#
56log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
57
58for replace_mode in "healing" "sequential"; do
59
60	if [[ "$replace_mode" = "sequential" ]]; then
61		flags="-s"
62	else
63		flags=""
64	fi
65
66	parity=1
67	spares=5
68	data=$(random_int_between 1 4)
69	children=10
70	draid="draid${parity}:${data}d:${children}c:${spares}s"
71
72	setup_test_env $TESTPOOL $draid $children
73
74	#
75	# Perform a variety of replacements to normal and distributed spares
76	# for a variety of different vdev configurations to exercise different
77	# resilver code paths. The final configuration is expected to be:
78	#
79	# NAME                                  STATE     READ WRITE CKSUM
80	# testpool                              DEGRADED     0     0     0
81	#   draid1:1d:10c:5s-0                  DEGRADED     0     0     0
82	#     /var/tmp/basedir.28683/new_vdev0  ONLINE       0     0     0
83	#     /var/tmp/basedir.28683/new_vdev1  ONLINE       0     0     0
84	#     spare-2                           DEGRADED     0     0     0
85	#       /var/tmp/basedir.28683/vdev2    FAULTED      0     0     0
86	#       draid1-0-3                      ONLINE       0     0     0
87	#     spare-3                           DEGRADED     0     0     0
88	#       /var/tmp/basedir.28683/vdev3    FAULTED      0     0     0
89	#       draid1-0-4                      ONLINE       0     0     0
90	#     /var/tmp/basedir.28683/vdev4      ONLINE       0     0     0
91	#     /var/tmp/basedir.28683/vdev5      ONLINE       0     0     0
92	#     /var/tmp/basedir.28683/vdev6      ONLINE       0     0     0
93	#     draid1-0-0                        ONLINE       0     0     0
94	#     spare-8                           DEGRADED     0     0     0
95	#       /var/tmp/basedir.28683/vdev8    FAULTED      0     0     0
96	#       draid1-0-1                      ONLINE       0     0     0
97	#     spare-9                           ONLINE       0     0     0
98	#       /var/tmp/basedir.28683/vdev9    ONLINE       0     0     0
99	#       draid1-0-2                      ONLINE       0     0     0
100	# spares
101	#   draid1-0-0                          INUSE     currently in use
102	#   draid1-0-1                          INUSE     currently in use
103	#   draid1-0-2                          INUSE     currently in use
104	#   draid1-0-3                          INUSE     currently in use
105	#   draid1-0-4                          INUSE     currently in use
106	#
107
108	# Distributed spare which replaces original online device
109	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev7 "ONLINE"
110	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev7 draid1-0-0
111	log_must zpool detach $TESTPOOL $BASEDIR/vdev7
112	log_must check_vdev_state $TESTPOOL draid1-0-0 "ONLINE"
113	log_must check_hotspare_state $TESTPOOL draid1-0-0 "INUSE"
114	log_must verify_pool $TESTPOOL
115	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
116	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
117
118	# Distributed spare in mirror with original device faulted
119	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev8
120	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev8 "FAULTED"
121	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev8 draid1-0-1
122	log_must check_vdev_state $TESTPOOL spare-8 "DEGRADED"
123	log_must check_vdev_state $TESTPOOL draid1-0-1 "ONLINE"
124	log_must check_hotspare_state $TESTPOOL draid1-0-1 "INUSE"
125	log_must verify_pool $TESTPOOL
126	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
127	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
128
129	# Distributed spare in mirror with original device still online
130	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev9 "ONLINE"
131	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev9 draid1-0-2
132	log_must check_vdev_state $TESTPOOL spare-9 "ONLINE"
133	log_must check_vdev_state $TESTPOOL draid1-0-2 "ONLINE"
134	log_must check_hotspare_state $TESTPOOL draid1-0-2 "INUSE"
135	log_must verify_pool $TESTPOOL
136	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
137	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
138
139	# Normal faulted device replacement
140	new_vdev0="$BASEDIR/new_vdev0"
141	log_must truncate -s $MINVDEVSIZE $new_vdev0
142	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev0
143	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev0 "FAULTED"
144	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev0 $new_vdev0
145	log_must check_vdev_state $TESTPOOL $new_vdev0 "ONLINE"
146	log_must verify_pool $TESTPOOL
147	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
148	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
149
150	# Distributed spare faulted device replacement
151	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev2
152	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev2 "FAULTED"
153	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev2 draid1-0-3
154	log_must check_vdev_state $TESTPOOL spare-2 "DEGRADED"
155	log_must check_vdev_state $TESTPOOL draid1-0-3 "ONLINE"
156	log_must check_hotspare_state $TESTPOOL draid1-0-3 "INUSE"
157	log_must verify_pool $TESTPOOL
158	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
159	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
160
161	# Normal online device replacement
162	new_vdev1="$BASEDIR/new_vdev1"
163	log_must truncate -s $MINVDEVSIZE $new_vdev1
164	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev1 "ONLINE"
165	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev1 $new_vdev1
166	log_must check_vdev_state $TESTPOOL $new_vdev1 "ONLINE"
167	log_must verify_pool $TESTPOOL
168	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
169	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
170
171	# Distributed spare online device replacement (then fault)
172	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev3 draid1-0-4
173	log_must check_vdev_state $TESTPOOL spare-3 "ONLINE"
174	log_must check_vdev_state $TESTPOOL draid1-0-4 "ONLINE"
175	log_must check_hotspare_state $TESTPOOL draid1-0-4 "INUSE"
176	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev3
177	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev3 "FAULTED"
178	log_must check_vdev_state $TESTPOOL spare-3 "DEGRADED"
179	log_must verify_pool $TESTPOOL
180	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
181	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
182
183	# Verify the original data is valid
184	log_must is_data_valid $TESTPOOL
185	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
186
187	cleanup
188done
189
190log_pass "Verify resilver to dRAID distributed spares"
191