xref: /freebsd/sys/contrib/openzfs/scripts/zloop.sh (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1#!/usr/bin/env bash
2# SPDX-License-Identifier: CDDL-1.0
3
4#
5# CDDL HEADER START
6#
7# This file and its contents are supplied under the terms of the
8# Common Development and Distribution License ("CDDL"), version 1.0.
9# You may only use this file in accordance with the terms of version
10# 1.0 of the CDDL.
11#
12# A full copy of the text of the CDDL should have accompanied this
13# source.  A copy of the CDDL is also available via the Internet at
14# http://www.illumos.org/license/CDDL.
15#
16# CDDL HEADER END
17#
18
19#
20# Copyright (c) 2015 by Delphix. All rights reserved.
21# Copyright (C) 2016 Lawrence Livermore National Security, LLC.
22# Copyright (c) 2017, Intel Corporation.
23#
24
25BASE_DIR=${0%/*}
26SCRIPT_COMMON=common.sh
27if [[ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]]; then
28	. "${BASE_DIR}/${SCRIPT_COMMON}"
29else
30	echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
31fi
32
33# shellcheck disable=SC2034
34PROG=zloop.sh
35GDB=${GDB:-gdb}
36
37DEFAULTWORKDIR=/var/tmp
38DEFAULTCOREDIR=/var/tmp/zloop
39
40function usage
41{
42	cat >&2 <<EOF
43
44$0 [-hl] [-c <dump directory>] [-f <vdev directory>]
45  [-m <max core dumps>] [-s <vdev size>] [-t <timeout>]
46  [-I <max iterations>] [-- [extra ztest parameters]]
47
48  This script runs ztest repeatedly with randomized arguments.
49  If a crash is encountered, the ztest logs, any associated
50  vdev files, and core file (if one exists) are moved to the
51  output directory ($DEFAULTCOREDIR by default). Any options
52  after the -- end-of-options marker will be passed to ztest.
53
54  Options:
55    -c  Specify a core dump directory to use.
56    -f  Specify working directory for ztest vdev files.
57    -h  Print this help message.
58    -l  Create 'ztest.core.N' symlink to core directory.
59    -m  Max number of core dumps to allow before exiting.
60    -s  Size of vdev devices.
61    -t  Total time to loop for, in seconds. If not provided,
62        zloop runs forever.
63    -I  Max number of iterations to loop before exiting.
64
65EOF
66}
67
68function or_die
69{
70	if ! "$@"; then
71		echo "Command failed: $*"
72		exit 1
73	fi
74}
75
76case $(uname) in
77FreeBSD)
78	coreglob="z*.core"
79	;;
80Linux)
81	# core file helpers
82	read -r origcorepattern </proc/sys/kernel/core_pattern
83	coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
84
85	if [[ $coreglob = "*" ]]; then
86		echo "Setting core file pattern..."
87		echo "core" > /proc/sys/kernel/core_pattern
88		coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
89		    /proc/sys/kernel/core_pattern)*"
90	fi
91	;;
92*)
93	exit 1
94	;;
95esac
96
97function core_file
98{
99	# shellcheck disable=SC2012,SC2086
100	ls -tr1 $coreglob 2>/dev/null | head -1
101}
102
103function core_prog
104{
105	# shellcheck disable=SC2154
106	prog=$ZTEST
107	core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
108	    tr  \' ' ')
109	if [[ "$core_id" == *"zdb "* ]]; then
110		# shellcheck disable=SC2154
111		prog=$ZDB
112	fi
113	printf "%s" "$prog"
114}
115
116function store_core
117{
118	core="$(core_file)"
119	if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
120		df -h "$workdir" >>ztest.out
121		coreid=$(date "+zloop-%y%m%d-%H%M%S")
122		foundcrashes=$((foundcrashes + 1))
123
124		# zdb debugging
125		zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
126		zdbdebug=$($zdbcmd 2>&1)
127		echo -e "$zdbcmd\n" >>ztest.zdb
128		echo "$zdbdebug" >>ztest.zdb
129
130		dest=$coredir/$coreid
131		or_die mkdir -p "$dest/vdev"
132
133		if [[ $symlink -ne 0 ]]; then
134			or_die ln -sf "$dest" "ztest.core.${foundcrashes}"
135		fi
136
137		echo "*** ztest crash found - moving logs to $dest"
138
139		or_die mv ztest.history ztest.zdb ztest.out "$dest/"
140		or_die mv "$workdir/"ztest* "$dest/vdev/"
141
142		if [[ -e "$workdir/zpool.cache" ]]; then
143			or_die mv "$workdir/zpool.cache" "$dest/vdev/"
144		fi
145
146		# check for core
147		if [[ -f "$core" ]]; then
148			coreprog=$(core_prog "$core")
149			coredebug=$($GDB --batch --quiet \
150			    -ex "set print thread-events off" \
151			    -ex "printf \"*\n* Backtrace \n*\n\"" \
152			    -ex "bt" \
153			    -ex "printf \"*\n* Libraries \n*\n\"" \
154			    -ex "info sharedlib" \
155			    -ex "printf \"*\n* Threads (full) \n*\n\"" \
156			    -ex "info threads" \
157			    -ex "printf \"*\n* Backtraces \n*\n\"" \
158			    -ex "thread apply all bt" \
159			    -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
160			    -ex "thread apply all bt full" \
161			    -ex "quit" "$coreprog" "$core" 2>&1 | \
162			    grep -v "New LWP")
163
164			# Dump core + logs to stored directory
165			echo "$coredebug" >>"$dest/ztest.gdb"
166			or_die mv "$core" "$dest/"
167
168			# Record info in cores logfile
169			echo "*** core @ $coredir/$coreid/$core:" | \
170			    tee -a ztest.cores
171		fi
172
173		if [[ $coremax -gt 0 ]] &&
174		   [[ $foundcrashes -ge $coremax ]]; then
175			echo "exiting... max $coremax allowed cores"
176			exit 1
177		else
178			echo "continuing..."
179		fi
180	fi
181}
182
183# parse arguments
184# expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
185coredir=$DEFAULTCOREDIR
186basedir=$DEFAULTWORKDIR
187rundir="zloop-run"
188timeout=0
189size="512m"
190coremax=0
191symlink=0
192iterations=0
193while getopts ":ht:m:I:s:c:f:l" opt; do
194	case $opt in
195		t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
196		m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
197		I ) [[ -n $OPTARG ]] && iterations=$OPTARG ;;
198		s ) [[ -n $OPTARG ]] && size=$OPTARG ;;
199		c ) [[ -n $OPTARG ]] && coredir=$OPTARG ;;
200		f ) [[ -n $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
201		l ) symlink=1 ;;
202		h ) usage
203		    exit 2
204		    ;;
205		* ) echo "Invalid argument: -$OPTARG";
206		    usage
207		    exit 1
208	esac
209done
210# pass remaining arguments on to ztest
211shift $((OPTIND - 1))
212
213# enable core dumps
214ulimit -c unlimited
215export ASAN_OPTIONS=abort_on_error=true:halt_on_error=true:allocator_may_return_null=true:disable_coredump=false:detect_stack_use_after_return=true
216export UBSAN_OPTIONS=abort_on_error=true:halt_on_error=true:print_stacktrace=true
217
218if [[ -f "$(core_file)" ]]; then
219	echo -n "There's a core dump here you might want to look at first... "
220	core_file
221	echo
222	exit 1
223fi
224
225if [[ ! -d $coredir ]]; then
226	echo "core dump directory ($coredir) does not exist, creating it."
227	or_die mkdir -p "$coredir"
228fi
229
230if [[ ! -w $coredir ]]; then
231	echo "core dump directory ($coredir) is not writable."
232	exit 1
233fi
234
235or_die rm -f ztest.history ztest.zdb ztest.cores
236
237ztrc=0		# ztest return value
238foundcrashes=0	# number of crashes found so far
239starttime=$(date +%s)
240curtime=$starttime
241iteration=0
242
243# if no timeout was specified, loop forever.
244while (( timeout == 0 )) || (( curtime <= (starttime + timeout) )); do
245	if (( iterations > 0 )) && (( iteration++ == iterations )); then
246		break
247	fi
248
249	zopt="-G -VVVVV"
250
251	# start each run with an empty directory
252	workdir="$basedir/$rundir"
253	or_die rm -rf "$workdir"
254	or_die mkdir "$workdir"
255
256	# ashift range 9 - 15
257	align=$(((RANDOM % 2) * 3 + 9))
258
259	# choose parity value
260	parity=$(((RANDOM % 3) + 1))
261
262	draid_data=0
263	draid_spares=0
264
265	# randomly use special classes
266	class="special=random"
267
268	# choose between four types of configs
269	# (basic, raidz mix, raidz expansion, and draid mix)
270	case $((RANDOM % 4)) in
271
272	# basic mirror configuration
273	0)	parity=1
274		mirrors=2
275		raid_children=0
276		vdevs=2
277		raid_type="raidz"
278		;;
279
280	# fully randomized mirror/raidz (sans dRAID)
281	1)	mirrors=$(((RANDOM % 3) * 1))
282		raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
283		vdevs=$(((RANDOM % 3) + 3))
284		raid_type="raidz"
285		;;
286
287	# randomized raidz expansion (one top-level raidz vdev)
288	2)	mirrors=0
289		vdevs=1
290		# derive initial raidz disk count based on parity choice
291		#   P1: 3 - 7 disks
292		#   P2: 5 - 9 disks
293		#   P3: 7 - 11 disks
294		raid_children=$(((RANDOM % 5) + (parity * 2) + 1))
295
296		# 1/3 of the time use a dedicated '-X' raidz expansion test
297		if [[ $((RANDOM % 3)) -eq 0 ]]; then
298			zopt="$zopt -X -t 16"
299			raid_type="raidz"
300		else
301			raid_type="eraidz"
302		fi
303		;;
304
305	# fully randomized dRAID (sans mirror/raidz)
306	3)	mirrors=0
307		draid_data=$(((RANDOM % 8) + 3))
308		draid_spares=$(((RANDOM % 2) + parity))
309		stripe=$((draid_data + parity))
310		extra=$((draid_spares + (RANDOM % 4)))
311		raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
312		vdevs=$((RANDOM % 3))
313		raid_type="draid"
314		;;
315	*)
316		# avoid shellcheck SC2249
317		;;
318	esac
319
320	zopt="$zopt -K $raid_type"
321	zopt="$zopt -m $mirrors"
322	zopt="$zopt -r $raid_children"
323	zopt="$zopt -D $draid_data"
324	zopt="$zopt -S $draid_spares"
325	zopt="$zopt -R $parity"
326	zopt="$zopt -v $vdevs"
327	zopt="$zopt -a $align"
328	zopt="$zopt -C $class"
329	zopt="$zopt -s $size"
330	zopt="$zopt -f $workdir"
331
332	cmd="$ZTEST $zopt $*"
333	echo "$(date '+%m/%d %T') $cmd" | tee -a ztest.history ztest.out
334	$cmd >>ztest.out 2>&1
335	ztrc=$?
336	grep -E '===|WARNING' ztest.out >>ztest.history
337
338	store_core
339
340	curtime=$(date +%s)
341done
342
343echo "zloop finished, $foundcrashes crashes found"
344
345# restore core pattern.
346case $(uname) in
347Linux)
348	echo "$origcorepattern" > /proc/sys/kernel/core_pattern
349	;;
350*)
351	;;
352esac
353
354uptime >>ztest.out
355
356if [[ $foundcrashes -gt 0 ]]; then
357	exit 1
358fi
359