1#!/bin/ksh -p 2# SPDX-License-Identifier: CDDL-1.0 3# 4# CDDL HEADER START 5# 6# The contents of this file are subject to the terms of the 7# Common Development and Distribution License (the "License"). 8# You may not use this file except in compliance with the License. 9# 10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11# or https://opensource.org/licenses/CDDL-1.0. 12# See the License for the specific language governing permissions 13# and limitations under the License. 14# 15# When distributing Covered Code, include this CDDL HEADER in each 16# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17# If applicable, add the following below this CDDL HEADER, with the 18# fields enclosed by brackets "[]" replaced with your own identifying 19# information: Portions Copyright [yyyy] [name of copyright owner] 20# 21# CDDL HEADER END 22# 23 24# 25# Copyright (c) 2025, Klara, Inc. 26# 27 28. $STF_SUITE/include/libtest.shlib 29 30typeset -A failmode_sync_helper_cmd=( 31 ["fsync"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1 conv=fsync' 32 ["msync"]='mmap_write_sync DATAFILE' 33 ["osync"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1 oflag=sync' 34 ["syncalways"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1' 35) 36 37typeset -A failmode_sync_helper_dsopts=( 38 ["syncalways"]="-o sync=always" 39) 40 41function failmode_sync_cleanup 42{ 43 zinject -c all || true 44 zpool clear $TESTPOOL || true 45 destroy_pool $TESTPOOL 46} 47 48# 49# failmode_sync_test <failmode> <helper> 50# 51# run a failmode sync test: 52# - failmode: wait|continue 53# - helper: fsync|msync|osync|syncalways 54# 55function failmode_sync_test 56{ 57 typeset failmode=$1 58 typeset helper=$2 59 60 # we'll need two disks, one for the main pool, one for the log 61 read -r DISK1 DISK2 _ <<<"$DISKS" 62 63 # file to write to the pool 64 typeset datafile="/$TESTPOOL/$TESTFS/datafile" 65 66 # create a single-disk pool with a separate log and the wanted failmode 67 log_must zpool create \ 68 -f -o failmode=$failmode $TESTPOOL $DISK1 log $DISK2 69 70 # create the test dataset. we bias the ZIL towards the log device to 71 # try to ensure that the sync write never involves the main device 72 log_must zfs create \ 73 -o recordsize=128k -o logbias=latency \ 74 ${failmode_sync_helper_dsopts[$helper]} \ 75 $TESTPOOL/$TESTFS 76 77 # create the target file. the ZIL head structure is created on first 78 # use, and does a full txg wait to finish, which we want to avoid 79 log_must dd if=/dev/zero of=$datafile bs=128k count=1 conv=fsync 80 log_must zpool sync 81 82 # inject errors. writes will fail, as will the followup probes 83 zinject -d $DISK1 -e io -T write $TESTPOOL 84 zinject -d $DISK1 -e nxio -T probe $TESTPOOL 85 zinject -d $DISK2 -e io -T write $TESTPOOL 86 zinject -d $DISK2 -e nxio -T probe $TESTPOOL 87 88 # run the helper program in the background. the pool should immediately 89 # suspend, and the sync op block or fail based on the failmode 90 typeset helper_cmd=${failmode_sync_helper_cmd[$helper]/DATAFILE/$datafile} 91 log_note "running failmode sync helper: $helper_cmd" 92 $helper_cmd & 93 typeset -i pid=$! 94 95 # should only take a moment, but give it a chance 96 log_note "waiting for pool to suspend" 97 typeset -i tries=10 98 until [[ $(kstat_pool $TESTPOOL state) == "SUSPENDED" ]] ; do 99 if ((tries-- == 0)); then 100 log_fail "pool didn't suspend" 101 fi 102 sleep 1 103 done 104 105 # zil_commit() should have noticed the suspend by now 106 typeset -i zilerr=$(kstat zil.zil_commit_error_count) 107 108 # see if the helper program blocked 109 typeset -i blocked 110 if kill -0 $pid ; then 111 blocked=1 112 log_note "$helper: blocked in the kernel" 113 else 114 blocked=0 115 log_note "$helper: exited while pool suspended" 116 fi 117 118 # bring the pool back online 119 zinject -c all 120 zpool clear $TESTPOOL 121 122 # program definitely exited now, get its return code 123 wait $pid 124 typeset -i rc=$? 125 126 failmode_sync_cleanup 127 128 log_note "$helper: zilerr=$zilerr blocked=$blocked rc=$rc" 129 130 # confirm expected results for the failmode 131 if [[ $failmode = "wait" ]] ; then 132 # - the ZIL saw an error, and fell back to a txg sync 133 # - sync op blocked when the pool suspended 134 # - after resume, sync op succeeded, helper returned success 135 log_must test $zilerr -ne 0 136 log_must test $blocked -eq 1 137 log_must test $rc -eq 0 138 elif [[ $failmode = "continue" ]] ; then 139 # confirm expected results: 140 # - the ZIL saw an error, and fell back to a txg sync 141 # - helper exited when the pool suspended 142 # - sync op returned an error, so helper returned failure 143 log_must test $zilerr -ne 0 144 log_must test $blocked -eq 0 145 log_must test $rc -ne 0 146 else 147 log_fail "impossible failmode: $failmode" 148 fi 149} 150