xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode.kshlib (revision d0abb9a6399accc9053e2808052be00a6754ecef)
1*d0abb9a6SMartin Matuska#!/bin/ksh -p
2*d0abb9a6SMartin Matuska# SPDX-License-Identifier: CDDL-1.0
3*d0abb9a6SMartin Matuska#
4*d0abb9a6SMartin Matuska# CDDL HEADER START
5*d0abb9a6SMartin Matuska#
6*d0abb9a6SMartin Matuska# The contents of this file are subject to the terms of the
7*d0abb9a6SMartin Matuska# Common Development and Distribution License (the "License").
8*d0abb9a6SMartin Matuska# You may not use this file except in compliance with the License.
9*d0abb9a6SMartin Matuska#
10*d0abb9a6SMartin Matuska# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11*d0abb9a6SMartin Matuska# or https://opensource.org/licenses/CDDL-1.0.
12*d0abb9a6SMartin Matuska# See the License for the specific language governing permissions
13*d0abb9a6SMartin Matuska# and limitations under the License.
14*d0abb9a6SMartin Matuska#
15*d0abb9a6SMartin Matuska# When distributing Covered Code, include this CDDL HEADER in each
16*d0abb9a6SMartin Matuska# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17*d0abb9a6SMartin Matuska# If applicable, add the following below this CDDL HEADER, with the
18*d0abb9a6SMartin Matuska# fields enclosed by brackets "[]" replaced with your own identifying
19*d0abb9a6SMartin Matuska# information: Portions Copyright [yyyy] [name of copyright owner]
20*d0abb9a6SMartin Matuska#
21*d0abb9a6SMartin Matuska# CDDL HEADER END
22*d0abb9a6SMartin Matuska#
23*d0abb9a6SMartin Matuska
24*d0abb9a6SMartin Matuska#
25*d0abb9a6SMartin Matuska# Copyright (c) 2025, Klara, Inc.
26*d0abb9a6SMartin Matuska#
27*d0abb9a6SMartin Matuska
28*d0abb9a6SMartin Matuska. $STF_SUITE/include/libtest.shlib
29*d0abb9a6SMartin Matuska
30*d0abb9a6SMartin Matuskatypeset -A failmode_sync_helper_cmd=(
31*d0abb9a6SMartin Matuska    ["fsync"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1 conv=fsync'
32*d0abb9a6SMartin Matuska    ["msync"]='mmap_write_sync DATAFILE'
33*d0abb9a6SMartin Matuska    ["osync"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1 oflag=sync'
34*d0abb9a6SMartin Matuska    ["syncalways"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1'
35*d0abb9a6SMartin Matuska)
36*d0abb9a6SMartin Matuska
37*d0abb9a6SMartin Matuskatypeset -A failmode_sync_helper_dsopts=(
38*d0abb9a6SMartin Matuska    ["syncalways"]="-o sync=always"
39*d0abb9a6SMartin Matuska)
40*d0abb9a6SMartin Matuska
41*d0abb9a6SMartin Matuskafunction failmode_sync_cleanup
42*d0abb9a6SMartin Matuska{
43*d0abb9a6SMartin Matuska	zinject -c all || true
44*d0abb9a6SMartin Matuska	zpool clear $TESTPOOL || true
45*d0abb9a6SMartin Matuska	destroy_pool $TESTPOOL
46*d0abb9a6SMartin Matuska}
47*d0abb9a6SMartin Matuska
48*d0abb9a6SMartin Matuska#
49*d0abb9a6SMartin Matuska# failmode_sync_test <failmode> <helper>
50*d0abb9a6SMartin Matuska#
51*d0abb9a6SMartin Matuska# run a failmode sync test:
52*d0abb9a6SMartin Matuska# - failmode: wait|continue
53*d0abb9a6SMartin Matuska# - helper: fsync|msync|osync|syncalways
54*d0abb9a6SMartin Matuska#
55*d0abb9a6SMartin Matuskafunction failmode_sync_test
56*d0abb9a6SMartin Matuska{
57*d0abb9a6SMartin Matuska	typeset failmode=$1
58*d0abb9a6SMartin Matuska	typeset helper=$2
59*d0abb9a6SMartin Matuska
60*d0abb9a6SMartin Matuska	# we'll need two disks, one for the main pool, one for the log
61*d0abb9a6SMartin Matuska	read -r DISK1 DISK2 _ <<<"$DISKS"
62*d0abb9a6SMartin Matuska
63*d0abb9a6SMartin Matuska	# file to write to the pool
64*d0abb9a6SMartin Matuska	typeset datafile="/$TESTPOOL/$TESTFS/datafile"
65*d0abb9a6SMartin Matuska
66*d0abb9a6SMartin Matuska	# create a single-disk pool with a separate log and the wanted failmode
67*d0abb9a6SMartin Matuska	log_must zpool create \
68*d0abb9a6SMartin Matuska	    -f -o failmode=$failmode $TESTPOOL $DISK1 log $DISK2
69*d0abb9a6SMartin Matuska
70*d0abb9a6SMartin Matuska	# create the test dataset. we bias the ZIL towards the log device to
71*d0abb9a6SMartin Matuska	# try to ensure that the sync write never involves the main device
72*d0abb9a6SMartin Matuska	log_must zfs create \
73*d0abb9a6SMartin Matuska	    -o recordsize=128k -o logbias=latency \
74*d0abb9a6SMartin Matuska	    ${failmode_sync_helper_dsopts[$helper]} \
75*d0abb9a6SMartin Matuska	    $TESTPOOL/$TESTFS
76*d0abb9a6SMartin Matuska
77*d0abb9a6SMartin Matuska	# create the target file. the ZIL head structure is created on first
78*d0abb9a6SMartin Matuska	# use, and does a full txg wait to finish, which we want to avoid
79*d0abb9a6SMartin Matuska	log_must dd if=/dev/zero of=$datafile bs=128k count=1 conv=fsync
80*d0abb9a6SMartin Matuska	log_must zpool sync
81*d0abb9a6SMartin Matuska
82*d0abb9a6SMartin Matuska	# inject errors. writes will fail, as will the followup probes
83*d0abb9a6SMartin Matuska	zinject -d $DISK1 -e io -T write $TESTPOOL
84*d0abb9a6SMartin Matuska	zinject -d $DISK1 -e nxio -T probe $TESTPOOL
85*d0abb9a6SMartin Matuska	zinject -d $DISK2 -e io -T write $TESTPOOL
86*d0abb9a6SMartin Matuska	zinject -d $DISK2 -e nxio -T probe $TESTPOOL
87*d0abb9a6SMartin Matuska
88*d0abb9a6SMartin Matuska	# run the helper program in the background. the pool should immediately
89*d0abb9a6SMartin Matuska	# suspend, and the sync op block or fail based on the failmode
90*d0abb9a6SMartin Matuska	typeset helper_cmd=${failmode_sync_helper_cmd[$helper]/DATAFILE/$datafile}
91*d0abb9a6SMartin Matuska	log_note "running failmode sync helper: $helper_cmd"
92*d0abb9a6SMartin Matuska	$helper_cmd &
93*d0abb9a6SMartin Matuska	typeset -i pid=$!
94*d0abb9a6SMartin Matuska
95*d0abb9a6SMartin Matuska	# should only take a moment, but give it a chance
96*d0abb9a6SMartin Matuska	log_note "waiting for pool to suspend"
97*d0abb9a6SMartin Matuska	typeset -i tries=10
98*d0abb9a6SMartin Matuska	until [[ $(kstat_pool $TESTPOOL state) == "SUSPENDED" ]] ; do
99*d0abb9a6SMartin Matuska		if ((tries-- == 0)); then
100*d0abb9a6SMartin Matuska			log_fail "pool didn't suspend"
101*d0abb9a6SMartin Matuska		fi
102*d0abb9a6SMartin Matuska		sleep 1
103*d0abb9a6SMartin Matuska	done
104*d0abb9a6SMartin Matuska
105*d0abb9a6SMartin Matuska	# zil_commit() should have noticed the suspend by now
106*d0abb9a6SMartin Matuska	typeset -i zilerr=$(kstat zil.zil_commit_error_count)
107*d0abb9a6SMartin Matuska
108*d0abb9a6SMartin Matuska	# see if the helper program blocked
109*d0abb9a6SMartin Matuska	typeset -i blocked
110*d0abb9a6SMartin Matuska	if kill -0 $pid ; then
111*d0abb9a6SMartin Matuska		blocked=1
112*d0abb9a6SMartin Matuska		log_note "$helper: blocked in the kernel"
113*d0abb9a6SMartin Matuska	else
114*d0abb9a6SMartin Matuska		blocked=0
115*d0abb9a6SMartin Matuska		log_note "$helper: exited while pool suspended"
116*d0abb9a6SMartin Matuska	fi
117*d0abb9a6SMartin Matuska
118*d0abb9a6SMartin Matuska	# bring the pool back online
119*d0abb9a6SMartin Matuska	zinject -c all
120*d0abb9a6SMartin Matuska	zpool clear $TESTPOOL
121*d0abb9a6SMartin Matuska
122*d0abb9a6SMartin Matuska	# program definitely exited now, get its return code
123*d0abb9a6SMartin Matuska	wait $pid
124*d0abb9a6SMartin Matuska	typeset -i rc=$?
125*d0abb9a6SMartin Matuska
126*d0abb9a6SMartin Matuska	failmode_sync_cleanup
127*d0abb9a6SMartin Matuska
128*d0abb9a6SMartin Matuska	log_note "$helper: zilerr=$zilerr blocked=$blocked rc=$rc"
129*d0abb9a6SMartin Matuska
130*d0abb9a6SMartin Matuska	# confirm expected results for the failmode
131*d0abb9a6SMartin Matuska	if [[ $failmode = "wait" ]] ; then
132*d0abb9a6SMartin Matuska		# - the ZIL saw an error, and fell back to a txg sync
133*d0abb9a6SMartin Matuska		# - sync op blocked when the pool suspended
134*d0abb9a6SMartin Matuska		# - after resume, sync op succeeded, helper returned success
135*d0abb9a6SMartin Matuska		log_must test $zilerr -ne 0
136*d0abb9a6SMartin Matuska		log_must test $blocked -eq 1
137*d0abb9a6SMartin Matuska		log_must test $rc -eq 0
138*d0abb9a6SMartin Matuska	elif [[ $failmode = "continue" ]] ; then
139*d0abb9a6SMartin Matuska		# confirm expected results:
140*d0abb9a6SMartin Matuska		# - the ZIL saw an error, and fell back to a txg sync
141*d0abb9a6SMartin Matuska		# - helper exited when the pool suspended
142*d0abb9a6SMartin Matuska		# - sync op returned an error, so helper returned failure
143*d0abb9a6SMartin Matuska		log_must test $zilerr -ne 0
144*d0abb9a6SMartin Matuska		log_must test $blocked -eq 0
145*d0abb9a6SMartin Matuska		log_must test $rc -ne 0
146*d0abb9a6SMartin Matuska	else
147*d0abb9a6SMartin Matuska		log_fail "impossible failmode: $failmode"
148*d0abb9a6SMartin Matuska	fi
149*d0abb9a6SMartin Matuska}
150