xref: /freebsd/sys/contrib/openzfs/scripts/zloop.sh (revision a90b9d0159070121c221b966469c3e36d912bf82)
1#!/usr/bin/env bash
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2015 by Delphix. All rights reserved.
20# Copyright (C) 2016 Lawrence Livermore National Security, LLC.
21# Copyright (c) 2017, Intel Corporation.
22#
23
24BASE_DIR=${0%/*}
25SCRIPT_COMMON=common.sh
26if [[ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]]; then
27	. "${BASE_DIR}/${SCRIPT_COMMON}"
28else
29	echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
30fi
31
32# shellcheck disable=SC2034
33PROG=zloop.sh
34GDB=${GDB:-gdb}
35
36DEFAULTWORKDIR=/var/tmp
37DEFAULTCOREDIR=/var/tmp/zloop
38
39function usage
40{
41	cat >&2 <<EOF
42
43$0 [-hl] [-c <dump directory>] [-f <vdev directory>]
44  [-m <max core dumps>] [-s <vdev size>] [-t <timeout>]
45  [-I <max iterations>] [-- [extra ztest parameters]]
46
47  This script runs ztest repeatedly with randomized arguments.
48  If a crash is encountered, the ztest logs, any associated
49  vdev files, and core file (if one exists) are moved to the
50  output directory ($DEFAULTCOREDIR by default). Any options
51  after the -- end-of-options marker will be passed to ztest.
52
53  Options:
54    -c  Specify a core dump directory to use.
55    -f  Specify working directory for ztest vdev files.
56    -h  Print this help message.
57    -l  Create 'ztest.core.N' symlink to core directory.
58    -m  Max number of core dumps to allow before exiting.
59    -s  Size of vdev devices.
60    -t  Total time to loop for, in seconds. If not provided,
61        zloop runs forever.
62    -I  Max number of iterations to loop before exiting.
63
64EOF
65}
66
67function or_die
68{
69	if ! "$@"; then
70		echo "Command failed: $*"
71		exit 1
72	fi
73}
74
75case $(uname) in
76FreeBSD)
77	coreglob="z*.core"
78	;;
79Linux)
80	# core file helpers
81	read -r origcorepattern </proc/sys/kernel/core_pattern
82	coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
83
84	if [[ $coreglob = "*" ]]; then
85		echo "Setting core file pattern..."
86		echo "core" > /proc/sys/kernel/core_pattern
87		coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
88		    /proc/sys/kernel/core_pattern)*"
89	fi
90	;;
91*)
92	exit 1
93	;;
94esac
95
96function core_file
97{
98	# shellcheck disable=SC2012,SC2086
99	ls -tr1 $coreglob 2>/dev/null | head -1
100}
101
102function core_prog
103{
104	# shellcheck disable=SC2154
105	prog=$ZTEST
106	core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
107	    tr  \' ' ')
108	if [[ "$core_id" == *"zdb "* ]]; then
109		# shellcheck disable=SC2154
110		prog=$ZDB
111	fi
112	printf "%s" "$prog"
113}
114
115function store_core
116{
117	core="$(core_file)"
118	if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
119		df -h "$workdir" >>ztest.out
120		coreid=$(date "+zloop-%y%m%d-%H%M%S")
121		foundcrashes=$((foundcrashes + 1))
122
123		# zdb debugging
124		zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
125		zdbdebug=$($zdbcmd 2>&1)
126		echo -e "$zdbcmd\n" >>ztest.zdb
127		echo "$zdbdebug" >>ztest.zdb
128
129		dest=$coredir/$coreid
130		or_die mkdir -p "$dest/vdev"
131
132		if [[ $symlink -ne 0 ]]; then
133			or_die ln -sf "$dest" "ztest.core.${foundcrashes}"
134		fi
135
136		echo "*** ztest crash found - moving logs to $dest"
137
138		or_die mv ztest.history ztest.zdb ztest.out "$dest/"
139		or_die mv "$workdir/"ztest* "$dest/vdev/"
140
141		if [[ -e "$workdir/zpool.cache" ]]; then
142			or_die mv "$workdir/zpool.cache" "$dest/vdev/"
143		fi
144
145		# check for core
146		if [[ -f "$core" ]]; then
147			coreprog=$(core_prog "$core")
148			coredebug=$($GDB --batch --quiet \
149			    -ex "set print thread-events off" \
150			    -ex "printf \"*\n* Backtrace \n*\n\"" \
151			    -ex "bt" \
152			    -ex "printf \"*\n* Libraries \n*\n\"" \
153			    -ex "info sharedlib" \
154			    -ex "printf \"*\n* Threads (full) \n*\n\"" \
155			    -ex "info threads" \
156			    -ex "printf \"*\n* Backtraces \n*\n\"" \
157			    -ex "thread apply all bt" \
158			    -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
159			    -ex "thread apply all bt full" \
160			    -ex "quit" "$coreprog" "$core" 2>&1 | \
161			    grep -v "New LWP")
162
163			# Dump core + logs to stored directory
164			echo "$coredebug" >>"$dest/ztest.gdb"
165			or_die mv "$core" "$dest/"
166
167			# Record info in cores logfile
168			echo "*** core @ $coredir/$coreid/$core:" | \
169			    tee -a ztest.cores
170		fi
171
172		if [[ $coremax -gt 0 ]] &&
173		   [[ $foundcrashes -ge $coremax ]]; then
174			echo "exiting... max $coremax allowed cores"
175			exit 1
176		else
177			echo "continuing..."
178		fi
179	fi
180}
181
182# parse arguments
183# expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
184coredir=$DEFAULTCOREDIR
185basedir=$DEFAULTWORKDIR
186rundir="zloop-run"
187timeout=0
188size="512m"
189coremax=0
190symlink=0
191iterations=0
192while getopts ":ht:m:I:s:c:f:l" opt; do
193	case $opt in
194		t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
195		m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
196		I ) [[ -n $OPTARG ]] && iterations=$OPTARG ;;
197		s ) [[ -n $OPTARG ]] && size=$OPTARG ;;
198		c ) [[ -n $OPTARG ]] && coredir=$OPTARG ;;
199		f ) [[ -n $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
200		l ) symlink=1 ;;
201		h ) usage
202		    exit 2
203		    ;;
204		* ) echo "Invalid argument: -$OPTARG";
205		    usage
206		    exit 1
207	esac
208done
209# pass remaining arguments on to ztest
210shift $((OPTIND - 1))
211
212# enable core dumps
213ulimit -c unlimited
214export ASAN_OPTIONS=abort_on_error=true:halt_on_error=true:allocator_may_return_null=true:disable_coredump=false:detect_stack_use_after_return=true
215export UBSAN_OPTIONS=abort_on_error=true:halt_on_error=true:print_stacktrace=true
216
217if [[ -f "$(core_file)" ]]; then
218	echo -n "There's a core dump here you might want to look at first... "
219	core_file
220	echo
221	exit 1
222fi
223
224if [[ ! -d $coredir ]]; then
225	echo "core dump directory ($coredir) does not exist, creating it."
226	or_die mkdir -p "$coredir"
227fi
228
229if [[ ! -w $coredir ]]; then
230	echo "core dump directory ($coredir) is not writable."
231	exit 1
232fi
233
234or_die rm -f ztest.history ztest.zdb ztest.cores
235
236ztrc=0		# ztest return value
237foundcrashes=0	# number of crashes found so far
238starttime=$(date +%s)
239curtime=$starttime
240iteration=0
241
242# if no timeout was specified, loop forever.
243while (( timeout == 0 )) || (( curtime <= (starttime + timeout) )); do
244	if (( iterations > 0 )) && (( iteration++ == iterations )); then
245		break
246	fi
247
248	zopt="-G -VVVVV"
249
250	# start each run with an empty directory
251	workdir="$basedir/$rundir"
252	or_die rm -rf "$workdir"
253	or_die mkdir "$workdir"
254
255	# ashift range 9 - 15
256	align=$(((RANDOM % 2) * 3 + 9))
257
258	# choose parity value
259	parity=$(((RANDOM % 3) + 1))
260
261	draid_data=0
262	draid_spares=0
263
264	# randomly use special classes
265	class="special=random"
266
267	# choose between four types of configs
268	# (basic, raidz mix, raidz expansion, and draid mix)
269	case $((RANDOM % 4)) in
270
271	# basic mirror configuration
272	0)	parity=1
273		mirrors=2
274		raid_children=0
275		vdevs=2
276		raid_type="raidz"
277		;;
278
279	# fully randomized mirror/raidz (sans dRAID)
280	1)	mirrors=$(((RANDOM % 3) * 1))
281		raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
282		vdevs=$(((RANDOM % 3) + 3))
283		raid_type="raidz"
284		;;
285
286	# randomized raidz expansion (one top-level raidz vdev)
287	2)	mirrors=0
288		vdevs=1
289		# derive initial raidz disk count based on parity choice
290		#   P1: 3 - 7 disks
291		#   P2: 5 - 9 disks
292		#   P3: 7 - 11 disks
293		raid_children=$(((RANDOM % 5) + (parity * 2) + 1))
294
295		# 1/3 of the time use a dedicated '-X' raidz expansion test
296		if [[ $((RANDOM % 3)) -eq 0 ]]; then
297			zopt="$zopt -X -t 16"
298			raid_type="raidz"
299		else
300			raid_type="eraidz"
301		fi
302		;;
303
304	# fully randomized dRAID (sans mirror/raidz)
305	3)	mirrors=0
306		draid_data=$(((RANDOM % 8) + 3))
307		draid_spares=$(((RANDOM % 2) + parity))
308		stripe=$((draid_data + parity))
309		extra=$((draid_spares + (RANDOM % 4)))
310		raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
311		vdevs=$((RANDOM % 3))
312		raid_type="draid"
313		;;
314	*)
315		# avoid shellcheck SC2249
316		;;
317	esac
318
319	zopt="$zopt -K $raid_type"
320	zopt="$zopt -m $mirrors"
321	zopt="$zopt -r $raid_children"
322	zopt="$zopt -D $draid_data"
323	zopt="$zopt -S $draid_spares"
324	zopt="$zopt -R $parity"
325	zopt="$zopt -v $vdevs"
326	zopt="$zopt -a $align"
327	zopt="$zopt -C $class"
328	zopt="$zopt -s $size"
329	zopt="$zopt -f $workdir"
330
331	cmd="$ZTEST $zopt $*"
332	echo "$(date '+%m/%d %T') $cmd" | tee -a ztest.history ztest.out
333	$cmd >>ztest.out 2>&1
334	ztrc=$?
335	grep -E '===|WARNING' ztest.out >>ztest.history
336
337	store_core
338
339	curtime=$(date +%s)
340done
341
342echo "zloop finished, $foundcrashes crashes found"
343
344# restore core pattern.
345case $(uname) in
346Linux)
347	echo "$origcorepattern" > /proc/sys/kernel/core_pattern
348	;;
349*)
350	;;
351esac
352
353uptime >>ztest.out
354
355if [[ $foundcrashes -gt 0 ]]; then
356	exit 1
357fi
358