xref: /freebsd/sys/contrib/openzfs/scripts/zloop.sh (revision 9e5787d2284e187abb5b654d924394a65772e004)
1#!/usr/bin/env bash
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2015 by Delphix. All rights reserved.
20# Copyright (C) 2016 Lawrence Livermore National Security, LLC.
21#
22
23BASE_DIR=$(dirname "$0")
24SCRIPT_COMMON=common.sh
25if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then
26	. "${BASE_DIR}/${SCRIPT_COMMON}"
27else
28	echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
29fi
30
31# shellcheck disable=SC2034
32PROG=zloop.sh
33GDB=${GDB:-gdb}
34
35DEFAULTWORKDIR=/var/tmp
36DEFAULTCOREDIR=/var/tmp/zloop
37
38function usage
39{
40	echo -e "\n$0 [-t <timeout>] [ -s <vdev size> ] [-c <dump directory>]" \
41	    "[ -- [extra ztest parameters]]\n" \
42	    "\n" \
43	    "  This script runs ztest repeatedly with randomized arguments.\n" \
44	    "  If a crash is encountered, the ztest logs, any associated\n" \
45	    "  vdev files, and core file (if one exists) are moved to the\n" \
46	    "  output directory ($DEFAULTCOREDIR by default). Any options\n" \
47	    "  after the -- end-of-options marker will be passed to ztest.\n" \
48	    "\n" \
49	    "  Options:\n" \
50	    "    -t  Total time to loop for, in seconds. If not provided,\n" \
51	    "        zloop runs forever.\n" \
52	    "    -s  Size of vdev devices.\n" \
53	    "    -f  Specify working directory for ztest vdev files.\n" \
54	    "    -c  Specify a core dump directory to use.\n" \
55	    "    -m  Max number of core dumps to allow before exiting.\n" \
56	    "    -l  Create 'ztest.core.N' symlink to core directory.\n" \
57	    "    -h  Print this help message.\n" \
58	    "" >&2
59}
60
61function or_die
62{
63	# shellcheck disable=SC2068
64	$@
65	# shellcheck disable=SC2181
66	if [[ $? -ne 0 ]]; then
67		# shellcheck disable=SC2145
68		echo "Command failed: $@"
69		exit 1
70	fi
71}
72
73case $(uname) in
74FreeBSD)
75	coreglob="z*.core"
76	;;
77Linux)
78	# core file helpers
79	origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
80	coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
81
82	if [[ $coreglob = "*" ]]; then
83		echo "Setting core file pattern..."
84		echo "core" > /proc/sys/kernel/core_pattern
85		coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
86		    /proc/sys/kernel/core_pattern)*"
87	fi
88	;;
89*)
90	exit 1
91	;;
92esac
93
94function core_file
95{
96	# shellcheck disable=SC2012 disable=2086
97        printf "%s" "$(ls -tr1 $coreglob 2> /dev/null | head -1)"
98}
99
100function core_prog
101{
102	prog=$ZTEST
103	core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
104	    tr  \' ' ')
105	# shellcheck disable=SC2076
106	if [[ "$core_id" =~ "zdb "  ]]; then
107		prog=$ZDB
108	fi
109	printf "%s" "$prog"
110}
111
112function store_core
113{
114	core="$(core_file)"
115	if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
116		df -h "$workdir" >>ztest.out
117		coreid=$(date "+zloop-%y%m%d-%H%M%S")
118		foundcrashes=$((foundcrashes + 1))
119
120		# zdb debugging
121		zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
122		zdbdebug=$($zdbcmd 2>&1)
123		echo -e "$zdbcmd\n" >>ztest.zdb
124		echo "$zdbdebug" >>ztest.zdb
125
126		dest=$coredir/$coreid
127		or_die mkdir -p "$dest"
128		or_die mkdir -p "$dest/vdev"
129
130		if [[ $symlink -ne 0 ]]; then
131			or_die ln -sf "$dest" ztest.core.$foundcrashes
132		fi
133
134		echo "*** ztest crash found - moving logs to $dest"
135
136		or_die mv ztest.history "$dest/"
137		or_die mv ztest.zdb "$dest/"
138		or_die mv ztest.out "$dest/"
139		or_die mv "$workdir/ztest*" "$dest/vdev/"
140
141		if [[ -e "$workdir/zpool.cache" ]]; then
142			or_die mv "$workdir/zpool.cache" "$dest/vdev/"
143		fi
144
145		# check for core
146		if [[ -f "$core" ]]; then
147			coreprog=$(core_prog "$core")
148			coredebug=$($GDB --batch --quiet \
149			    -ex "set print thread-events off" \
150			    -ex "printf \"*\n* Backtrace \n*\n\"" \
151			    -ex "bt" \
152			    -ex "printf \"*\n* Libraries \n*\n\"" \
153			    -ex "info sharedlib" \
154			    -ex "printf \"*\n* Threads (full) \n*\n\"" \
155			    -ex "info threads" \
156			    -ex "printf \"*\n* Backtraces \n*\n\"" \
157			    -ex "thread apply all bt" \
158			    -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
159			    -ex "thread apply all bt full" \
160			    -ex "quit" "$coreprog" "$core" 2>&1 | \
161			    grep -v "New LWP")
162
163			# Dump core + logs to stored directory
164			echo "$coredebug" >>"$dest/ztest.gdb"
165			or_die mv "$core" "$dest/"
166
167			# Record info in cores logfile
168			echo "*** core @ $coredir/$coreid/$core:" | \
169			    tee -a ztest.cores
170		fi
171
172		if [[ $coremax -gt 0 ]] &&
173		   [[ $foundcrashes -ge $coremax ]]; then
174			echo "exiting... max $coremax allowed cores"
175			exit 1
176		else
177			echo "continuing..."
178		fi
179	fi
180}
181
182# parse arguments
183# expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
184coredir=$DEFAULTCOREDIR
185basedir=$DEFAULTWORKDIR
186rundir="zloop-run"
187timeout=0
188size="512m"
189coremax=0
190symlink=0
191while getopts ":ht:m:s:c:f:l" opt; do
192	case $opt in
193		t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
194		m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
195		s ) [[ $OPTARG ]] && size=$OPTARG ;;
196		c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
197		f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
198		l ) symlink=1 ;;
199		h ) usage
200		    exit 2
201		    ;;
202		* ) echo "Invalid argument: -$OPTARG";
203		    usage
204		    exit 1
205	esac
206done
207# pass remaining arguments on to ztest
208shift $((OPTIND - 1))
209
210# enable core dumps
211ulimit -c unlimited
212export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0
213
214if [[ -f "$(core_file)" ]]; then
215	echo -n "There's a core dump here you might want to look at first... "
216	core_file
217	echo
218	exit 1
219fi
220
221if [[ ! -d $coredir ]]; then
222	echo "core dump directory ($coredir) does not exist, creating it."
223	or_die mkdir -p "$coredir"
224fi
225
226if [[ ! -w $coredir ]]; then
227	echo "core dump directory ($coredir) is not writable."
228	exit 1
229fi
230
231or_die rm -f ztest.history
232or_die rm -f ztest.zdb
233or_die rm -f ztest.cores
234
235ztrc=0		# ztest return value
236foundcrashes=0	# number of crashes found so far
237starttime=$(date +%s)
238curtime=$starttime
239
240# if no timeout was specified, loop forever.
241while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
242	zopt="-G -VVVVV"
243
244	# start each run with an empty directory
245	workdir="$basedir/$rundir"
246	or_die rm -rf "$workdir"
247	or_die mkdir "$workdir"
248
249	# switch between common arrangements & fully randomized
250	if [[ $((RANDOM % 2)) -eq 0 ]]; then
251		mirrors=2
252		raidz=0
253		parity=1
254		vdevs=2
255	else
256		mirrors=$(((RANDOM % 3) * 1))
257		parity=$(((RANDOM % 3) + 1))
258		raidz=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
259		vdevs=$(((RANDOM % 3) + 3))
260	fi
261	align=$(((RANDOM % 2) * 3 + 9))
262	runtime=$((RANDOM % 100))
263	passtime=$((RANDOM % (runtime / 3 + 1) + 10))
264
265	zopt="$zopt -m $mirrors"
266	zopt="$zopt -r $raidz"
267	zopt="$zopt -R $parity"
268	zopt="$zopt -v $vdevs"
269	zopt="$zopt -a $align"
270	zopt="$zopt -T $runtime"
271	zopt="$zopt -P $passtime"
272	zopt="$zopt -s $size"
273	zopt="$zopt -f $workdir"
274
275	# shellcheck disable=SC2124
276	cmd="$ZTEST $zopt $@"
277	desc="$(date '+%m/%d %T') $cmd"
278	echo "$desc" | tee -a ztest.history
279	echo "$desc" >>ztest.out
280	$cmd >>ztest.out 2>&1
281	ztrc=$?
282	grep -E '===|WARNING' ztest.out >>ztest.history
283
284	store_core
285
286	curtime=$(date +%s)
287done
288
289echo "zloop finished, $foundcrashes crashes found"
290
291# restore core pattern.
292case $(uname) in
293Linux)
294	echo "$origcorepattern" > /proc/sys/kernel/core_pattern
295	;;
296*)
297	;;
298esac
299
300uptime >>ztest.out
301
302if [[ $foundcrashes -gt 0 ]]; then
303	exit 1
304fi
305