xref: /freebsd/sys/contrib/openzfs/scripts/zloop.sh (revision 656d68a711952ac2b92ed258502978c5ba1dbc73)
1#!/usr/bin/env bash
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2015 by Delphix. All rights reserved.
20# Copyright (C) 2016 Lawrence Livermore National Security, LLC.
21# Copyright (c) 2017, Intel Corporation.
22#
23
24BASE_DIR=$(dirname "$0")
25SCRIPT_COMMON=common.sh
26if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then
27	. "${BASE_DIR}/${SCRIPT_COMMON}"
28else
29	echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
30fi
31
32# shellcheck disable=SC2034
33PROG=zloop.sh
34GDB=${GDB:-gdb}
35
36DEFAULTWORKDIR=/var/tmp
37DEFAULTCOREDIR=/var/tmp/zloop
38
39function usage
40{
41	cat >&2 <<EOF
42
43$0 [-hl] [-c <dump directory>] [-f <vdev directory>]
44  [-m <max core dumps>] [-s <vdev size>] [-t <timeout>]
45  [-I <max iterations>] [-- [extra ztest parameters]]
46
47  This script runs ztest repeatedly with randomized arguments.
48  If a crash is encountered, the ztest logs, any associated
49  vdev files, and core file (if one exists) are moved to the
50  output directory ($DEFAULTCOREDIR by default). Any options
51  after the -- end-of-options marker will be passed to ztest.
52
53  Options:
54    -c  Specify a core dump directory to use.
55    -f  Specify working directory for ztest vdev files.
56    -h  Print this help message.
57    -l  Create 'ztest.core.N' symlink to core directory.
58    -m  Max number of core dumps to allow before exiting.
59    -s  Size of vdev devices.
60    -t  Total time to loop for, in seconds. If not provided,
61        zloop runs forever.
62    -I  Max number of iterations to loop before exiting.
63
64EOF
65}
66
67function or_die
68{
69	# shellcheck disable=SC2068
70	if ! $@; then
71		echo "Command failed: $*"
72		exit 1
73	fi
74}
75
76case $(uname) in
77FreeBSD)
78	coreglob="z*.core"
79	;;
80Linux)
81	# core file helpers
82	origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
83	coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
84
85	if [[ $coreglob = "*" ]]; then
86		echo "Setting core file pattern..."
87		echo "core" > /proc/sys/kernel/core_pattern
88		coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
89		    /proc/sys/kernel/core_pattern)*"
90	fi
91	;;
92*)
93	exit 1
94	;;
95esac
96
97function core_file
98{
99	# shellcheck disable=SC2012,SC2086
100	ls -tr1 $coreglob 2>/dev/null | head -1
101}
102
103function core_prog
104{
105	# shellcheck disable=SC2154
106	prog=$ZTEST
107	core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
108	    tr  \' ' ')
109	if [[ "$core_id" == *"zdb "* ]]; then
110		# shellcheck disable=SC2154
111		prog=$ZDB
112	fi
113	printf "%s" "$prog"
114}
115
116function store_core
117{
118	core="$(core_file)"
119	if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
120		df -h "$workdir" >>ztest.out
121		coreid=$(date "+zloop-%y%m%d-%H%M%S")
122		foundcrashes=$((foundcrashes + 1))
123
124		# zdb debugging
125		zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
126		zdbdebug=$($zdbcmd 2>&1)
127		echo -e "$zdbcmd\n" >>ztest.zdb
128		echo "$zdbdebug" >>ztest.zdb
129
130		dest=$coredir/$coreid
131		or_die mkdir -p "$dest"
132		or_die mkdir -p "$dest/vdev"
133
134		if [[ $symlink -ne 0 ]]; then
135			or_die ln -sf "$dest" "ztest.core.${foundcrashes}"
136		fi
137
138		echo "*** ztest crash found - moving logs to $dest"
139
140		or_die mv ztest.history "$dest/"
141		or_die mv ztest.zdb "$dest/"
142		or_die mv ztest.out "$dest/"
143		or_die mv "$workdir/ztest*" "$dest/vdev/"
144
145		if [[ -e "$workdir/zpool.cache" ]]; then
146			or_die mv "$workdir/zpool.cache" "$dest/vdev/"
147		fi
148
149		# check for core
150		if [[ -f "$core" ]]; then
151			coreprog=$(core_prog "$core")
152			coredebug=$($GDB --batch --quiet \
153			    -ex "set print thread-events off" \
154			    -ex "printf \"*\n* Backtrace \n*\n\"" \
155			    -ex "bt" \
156			    -ex "printf \"*\n* Libraries \n*\n\"" \
157			    -ex "info sharedlib" \
158			    -ex "printf \"*\n* Threads (full) \n*\n\"" \
159			    -ex "info threads" \
160			    -ex "printf \"*\n* Backtraces \n*\n\"" \
161			    -ex "thread apply all bt" \
162			    -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
163			    -ex "thread apply all bt full" \
164			    -ex "quit" "$coreprog" "$core" 2>&1 | \
165			    grep -v "New LWP")
166
167			# Dump core + logs to stored directory
168			echo "$coredebug" >>"$dest/ztest.gdb"
169			or_die mv "$core" "$dest/"
170
171			# Record info in cores logfile
172			echo "*** core @ $coredir/$coreid/$core:" | \
173			    tee -a ztest.cores
174		fi
175
176		if [[ $coremax -gt 0 ]] &&
177		   [[ $foundcrashes -ge $coremax ]]; then
178			echo "exiting... max $coremax allowed cores"
179			exit 1
180		else
181			echo "continuing..."
182		fi
183	fi
184}
185
186# parse arguments
187# expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
188coredir=$DEFAULTCOREDIR
189basedir=$DEFAULTWORKDIR
190rundir="zloop-run"
191timeout=0
192size="512m"
193coremax=0
194symlink=0
195iterations=0
196while getopts ":ht:m:I:s:c:f:l" opt; do
197	case $opt in
198		t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
199		m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
200		I ) [[ -n $OPTARG ]] && iterations=$OPTARG ;;
201		s ) [[ -n $OPTARG ]] && size=$OPTARG ;;
202		c ) [[ -n $OPTARG ]] && coredir=$OPTARG ;;
203		f ) [[ -n $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
204		l ) symlink=1 ;;
205		h ) usage
206		    exit 2
207		    ;;
208		* ) echo "Invalid argument: -$OPTARG";
209		    usage
210		    exit 1
211	esac
212done
213# pass remaining arguments on to ztest
214shift $((OPTIND - 1))
215
216# enable core dumps
217ulimit -c unlimited
218export ASAN_OPTIONS=abort_on_error=true:halt_on_error=true:allocator_may_return_null=true:disable_coredump=false:detect_stack_use_after_return=true
219export UBSAN_OPTIONS=abort_on_error=true:halt_on_error=true:print_stacktrace=true
220
221if [[ -f "$(core_file)" ]]; then
222	echo -n "There's a core dump here you might want to look at first... "
223	core_file
224	echo
225	exit 1
226fi
227
228if [[ ! -d $coredir ]]; then
229	echo "core dump directory ($coredir) does not exist, creating it."
230	or_die mkdir -p "$coredir"
231fi
232
233if [[ ! -w $coredir ]]; then
234	echo "core dump directory ($coredir) is not writable."
235	exit 1
236fi
237
238or_die rm -f ztest.history
239or_die rm -f ztest.zdb
240or_die rm -f ztest.cores
241
242ztrc=0		# ztest return value
243foundcrashes=0	# number of crashes found so far
244starttime=$(date +%s)
245curtime=$starttime
246iteration=0
247
248# if no timeout was specified, loop forever.
249while (( timeout == 0 )) || (( curtime <= (starttime + timeout) )); do
250	if (( iterations > 0 )) && (( iteration++ == iterations )); then
251		break
252	fi
253
254	zopt="-G -VVVVV"
255
256	# start each run with an empty directory
257	workdir="$basedir/$rundir"
258	or_die rm -rf "$workdir"
259	or_die mkdir "$workdir"
260
261	# switch between three types of configs
262	# 1/3 basic, 1/3 raidz mix, and 1/3 draid mix
263	choice=$((RANDOM % 3))
264
265	# ashift range 9 - 15
266	align=$(((RANDOM % 2) * 3 + 9))
267
268	# randomly use special classes
269	class="special=random"
270
271	if [[ $choice -eq 0 ]]; then
272		# basic mirror only
273		parity=1
274		mirrors=2
275		draid_data=0
276		draid_spares=0
277		raid_children=0
278		vdevs=2
279		raid_type="raidz"
280	elif [[ $choice -eq 1 ]]; then
281		# fully randomized mirror/raidz (sans dRAID)
282		parity=$(((RANDOM % 3) + 1))
283		mirrors=$(((RANDOM % 3) * 1))
284		draid_data=0
285		draid_spares=0
286		raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
287		vdevs=$(((RANDOM % 3) + 3))
288		raid_type="raidz"
289	else
290		# fully randomized dRAID (sans mirror/raidz)
291		parity=$(((RANDOM % 3) + 1))
292		mirrors=0
293		draid_data=$(((RANDOM % 8) + 3))
294		draid_spares=$(((RANDOM % 2) + parity))
295		stripe=$((draid_data + parity))
296		extra=$((draid_spares + (RANDOM % 4)))
297		raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
298		vdevs=$((RANDOM % 3))
299		raid_type="draid"
300	fi
301
302	zopt="$zopt -K $raid_type"
303	zopt="$zopt -m $mirrors"
304	zopt="$zopt -r $raid_children"
305	zopt="$zopt -D $draid_data"
306	zopt="$zopt -S $draid_spares"
307	zopt="$zopt -R $parity"
308	zopt="$zopt -v $vdevs"
309	zopt="$zopt -a $align"
310	zopt="$zopt -C $class"
311	zopt="$zopt -s $size"
312	zopt="$zopt -f $workdir"
313
314	cmd="$ZTEST $zopt $*"
315	desc="$(date '+%m/%d %T') $cmd"
316	echo "$desc" | tee -a ztest.history
317	echo "$desc" >>ztest.out
318	$cmd >>ztest.out 2>&1
319	ztrc=$?
320	grep -E '===|WARNING' ztest.out >>ztest.history
321
322	store_core
323
324	curtime=$(date +%s)
325done
326
327echo "zloop finished, $foundcrashes crashes found"
328
329# restore core pattern.
330case $(uname) in
331Linux)
332	echo "$origcorepattern" > /proc/sys/kernel/core_pattern
333	;;
334*)
335	;;
336esac
337
338uptime >>ztest.out
339
340if [[ $foundcrashes -gt 0 ]]; then
341	exit 1
342fi
343