xref: /freebsd/sys/contrib/openzfs/scripts/zloop.sh (revision d485c77f203fb0f4cdc08dea5ff81631b51d8809)
1#!/usr/bin/env bash
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2015 by Delphix. All rights reserved.
20# Copyright (C) 2016 Lawrence Livermore National Security, LLC.
21# Copyright (c) 2017, Intel Corporation.
22#
23
24BASE_DIR=$(dirname "$0")
25SCRIPT_COMMON=common.sh
26if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then
27	. "${BASE_DIR}/${SCRIPT_COMMON}"
28else
29	echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
30fi
31
32# shellcheck disable=SC2034
33PROG=zloop.sh
34GDB=${GDB:-gdb}
35
36DEFAULTWORKDIR=/var/tmp
37DEFAULTCOREDIR=/var/tmp/zloop
38
39function usage
40{
41	echo -e "\n$0 [-t <timeout>] [ -s <vdev size> ] [-c <dump directory>]" \
42	    "[ -- [extra ztest parameters]]\n" \
43	    "\n" \
44	    "  This script runs ztest repeatedly with randomized arguments.\n" \
45	    "  If a crash is encountered, the ztest logs, any associated\n" \
46	    "  vdev files, and core file (if one exists) are moved to the\n" \
47	    "  output directory ($DEFAULTCOREDIR by default). Any options\n" \
48	    "  after the -- end-of-options marker will be passed to ztest.\n" \
49	    "\n" \
50	    "  Options:\n" \
51	    "    -t  Total time to loop for, in seconds. If not provided,\n" \
52	    "        zloop runs forever.\n" \
53	    "    -s  Size of vdev devices.\n" \
54	    "    -f  Specify working directory for ztest vdev files.\n" \
55	    "    -c  Specify a core dump directory to use.\n" \
56	    "    -m  Max number of core dumps to allow before exiting.\n" \
57	    "    -l  Create 'ztest.core.N' symlink to core directory.\n" \
58	    "    -h  Print this help message.\n" \
59	    "" >&2
60}
61
62function or_die
63{
64	# shellcheck disable=SC2068
65	$@
66	# shellcheck disable=SC2181
67	if [[ $? -ne 0 ]]; then
68		# shellcheck disable=SC2145
69		echo "Command failed: $@"
70		exit 1
71	fi
72}
73
74case $(uname) in
75FreeBSD)
76	coreglob="z*.core"
77	;;
78Linux)
79	# core file helpers
80	origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
81	coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
82
83	if [[ $coreglob = "*" ]]; then
84		echo "Setting core file pattern..."
85		echo "core" > /proc/sys/kernel/core_pattern
86		coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
87		    /proc/sys/kernel/core_pattern)*"
88	fi
89	;;
90*)
91	exit 1
92	;;
93esac
94
95function core_file
96{
97	# shellcheck disable=SC2012 disable=2086
98        printf "%s" "$(ls -tr1 $coreglob 2> /dev/null | head -1)"
99}
100
101function core_prog
102{
103	prog=$ZTEST
104	core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
105	    tr  \' ' ')
106	# shellcheck disable=SC2076
107	if [[ "$core_id" =~ "zdb "  ]]; then
108		prog=$ZDB
109	fi
110	printf "%s" "$prog"
111}
112
113function store_core
114{
115	core="$(core_file)"
116	if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
117		df -h "$workdir" >>ztest.out
118		coreid=$(date "+zloop-%y%m%d-%H%M%S")
119		foundcrashes=$((foundcrashes + 1))
120
121		# zdb debugging
122		zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
123		zdbdebug=$($zdbcmd 2>&1)
124		echo -e "$zdbcmd\n" >>ztest.zdb
125		echo "$zdbdebug" >>ztest.zdb
126
127		dest=$coredir/$coreid
128		or_die mkdir -p "$dest"
129		or_die mkdir -p "$dest/vdev"
130
131		if [[ $symlink -ne 0 ]]; then
132			or_die ln -sf "$dest" ztest.core.$foundcrashes
133		fi
134
135		echo "*** ztest crash found - moving logs to $dest"
136
137		or_die mv ztest.history "$dest/"
138		or_die mv ztest.zdb "$dest/"
139		or_die mv ztest.out "$dest/"
140		or_die mv "$workdir/ztest*" "$dest/vdev/"
141
142		if [[ -e "$workdir/zpool.cache" ]]; then
143			or_die mv "$workdir/zpool.cache" "$dest/vdev/"
144		fi
145
146		# check for core
147		if [[ -f "$core" ]]; then
148			coreprog=$(core_prog "$core")
149			coredebug=$($GDB --batch --quiet \
150			    -ex "set print thread-events off" \
151			    -ex "printf \"*\n* Backtrace \n*\n\"" \
152			    -ex "bt" \
153			    -ex "printf \"*\n* Libraries \n*\n\"" \
154			    -ex "info sharedlib" \
155			    -ex "printf \"*\n* Threads (full) \n*\n\"" \
156			    -ex "info threads" \
157			    -ex "printf \"*\n* Backtraces \n*\n\"" \
158			    -ex "thread apply all bt" \
159			    -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
160			    -ex "thread apply all bt full" \
161			    -ex "quit" "$coreprog" "$core" 2>&1 | \
162			    grep -v "New LWP")
163
164			# Dump core + logs to stored directory
165			echo "$coredebug" >>"$dest/ztest.gdb"
166			or_die mv "$core" "$dest/"
167
168			# Record info in cores logfile
169			echo "*** core @ $coredir/$coreid/$core:" | \
170			    tee -a ztest.cores
171		fi
172
173		if [[ $coremax -gt 0 ]] &&
174		   [[ $foundcrashes -ge $coremax ]]; then
175			echo "exiting... max $coremax allowed cores"
176			exit 1
177		else
178			echo "continuing..."
179		fi
180	fi
181}
182
183# parse arguments
184# expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
185coredir=$DEFAULTCOREDIR
186basedir=$DEFAULTWORKDIR
187rundir="zloop-run"
188timeout=0
189size="512m"
190coremax=0
191symlink=0
192while getopts ":ht:m:s:c:f:l" opt; do
193	case $opt in
194		t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
195		m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
196		s ) [[ $OPTARG ]] && size=$OPTARG ;;
197		c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
198		f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
199		l ) symlink=1 ;;
200		h ) usage
201		    exit 2
202		    ;;
203		* ) echo "Invalid argument: -$OPTARG";
204		    usage
205		    exit 1
206	esac
207done
208# pass remaining arguments on to ztest
209shift $((OPTIND - 1))
210
211# enable core dumps
212ulimit -c unlimited
213export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0
214
215if [[ -f "$(core_file)" ]]; then
216	echo -n "There's a core dump here you might want to look at first... "
217	core_file
218	echo
219	exit 1
220fi
221
222if [[ ! -d $coredir ]]; then
223	echo "core dump directory ($coredir) does not exist, creating it."
224	or_die mkdir -p "$coredir"
225fi
226
227if [[ ! -w $coredir ]]; then
228	echo "core dump directory ($coredir) is not writable."
229	exit 1
230fi
231
232or_die rm -f ztest.history
233or_die rm -f ztest.zdb
234or_die rm -f ztest.cores
235
236ztrc=0		# ztest return value
237foundcrashes=0	# number of crashes found so far
238starttime=$(date +%s)
239curtime=$starttime
240
241# if no timeout was specified, loop forever.
242while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
243	zopt="-G -VVVVV"
244
245	# start each run with an empty directory
246	workdir="$basedir/$rundir"
247	or_die rm -rf "$workdir"
248	or_die mkdir "$workdir"
249
250	# switch between three types of configs
251	# 1/3 basic, 1/3 raidz mix, and 1/3 draid mix
252	choice=$((RANDOM % 3))
253
254	# ashift range 9 - 15
255	align=$(((RANDOM % 2) * 3 + 9))
256
257	# randomly use special classes
258	class="special=random"
259
260	if [[ $choice -eq 0 ]]; then
261		# basic mirror only
262		parity=1
263		mirrors=2
264		draid_data=0
265		draid_spares=0
266		raid_children=0
267		vdevs=2
268		raid_type="raidz"
269	elif [[ $choice -eq 1 ]]; then
270		# fully randomized mirror/raidz (sans dRAID)
271		parity=$(((RANDOM % 3) + 1))
272		mirrors=$(((RANDOM % 3) * 1))
273		draid_data=0
274		draid_spares=0
275		raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
276		vdevs=$(((RANDOM % 3) + 3))
277		raid_type="raidz"
278	else
279		# fully randomized dRAID (sans mirror/raidz)
280		parity=$(((RANDOM % 3) + 1))
281		mirrors=0
282		draid_data=$(((RANDOM % 8) + 3))
283		draid_spares=$(((RANDOM % 2) + parity))
284		stripe=$((draid_data + parity))
285		extra=$((draid_spares + (RANDOM % 4)))
286		raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
287		vdevs=$((RANDOM % 3))
288		raid_type="draid"
289	fi
290
291	# run from 30 to 120 seconds
292	runtime=$(((RANDOM % 90) + 30))
293	passtime=$((RANDOM % (runtime / 3 + 1) + 10))
294
295	zopt="$zopt -K $raid_type"
296	zopt="$zopt -m $mirrors"
297	zopt="$zopt -r $raid_children"
298	zopt="$zopt -D $draid_data"
299	zopt="$zopt -S $draid_spares"
300	zopt="$zopt -R $parity"
301	zopt="$zopt -v $vdevs"
302	zopt="$zopt -a $align"
303	zopt="$zopt -C $class"
304	zopt="$zopt -T $runtime"
305	zopt="$zopt -P $passtime"
306	zopt="$zopt -s $size"
307	zopt="$zopt -f $workdir"
308
309	# shellcheck disable=SC2124
310	cmd="$ZTEST $zopt $@"
311	desc="$(date '+%m/%d %T') $cmd"
312	echo "$desc" | tee -a ztest.history
313	echo "$desc" >>ztest.out
314	$cmd >>ztest.out 2>&1
315	ztrc=$?
316	grep -E '===|WARNING' ztest.out >>ztest.history
317
318	store_core
319
320	curtime=$(date +%s)
321done
322
323echo "zloop finished, $foundcrashes crashes found"
324
325# restore core pattern.
326case $(uname) in
327Linux)
328	echo "$origcorepattern" > /proc/sys/kernel/core_pattern
329	;;
330*)
331	;;
332esac
333
334uptime >>ztest.out
335
336if [[ $foundcrashes -gt 0 ]]; then
337	exit 1
338fi
339