1#!/usr/bin/env bash 2 3# 4# CDDL HEADER START 5# 6# This file and its contents are supplied under the terms of the 7# Common Development and Distribution License ("CDDL"), version 1.0. 8# You may only use this file in accordance with the terms of version 9# 1.0 of the CDDL. 10# 11# A full copy of the text of the CDDL should have accompanied this 12# source. A copy of the CDDL is also available via the Internet at 13# http://www.illumos.org/license/CDDL. 14# 15# CDDL HEADER END 16# 17 18# 19# Copyright (c) 2015 by Delphix. All rights reserved. 20# Copyright (C) 2016 Lawrence Livermore National Security, LLC. 21# 22 23BASE_DIR=$(dirname "$0") 24SCRIPT_COMMON=common.sh 25if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then 26 . "${BASE_DIR}/${SCRIPT_COMMON}" 27else 28 echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 29fi 30 31# shellcheck disable=SC2034 32PROG=zloop.sh 33GDB=${GDB:-gdb} 34 35DEFAULTWORKDIR=/var/tmp 36DEFAULTCOREDIR=/var/tmp/zloop 37 38function usage 39{ 40 echo -e "\n$0 [-t <timeout>] [ -s <vdev size> ] [-c <dump directory>]" \ 41 "[ -- [extra ztest parameters]]\n" \ 42 "\n" \ 43 " This script runs ztest repeatedly with randomized arguments.\n" \ 44 " If a crash is encountered, the ztest logs, any associated\n" \ 45 " vdev files, and core file (if one exists) are moved to the\n" \ 46 " output directory ($DEFAULTCOREDIR by default). Any options\n" \ 47 " after the -- end-of-options marker will be passed to ztest.\n" \ 48 "\n" \ 49 " Options:\n" \ 50 " -t Total time to loop for, in seconds. If not provided,\n" \ 51 " zloop runs forever.\n" \ 52 " -s Size of vdev devices.\n" \ 53 " -f Specify working directory for ztest vdev files.\n" \ 54 " -c Specify a core dump directory to use.\n" \ 55 " -m Max number of core dumps to allow before exiting.\n" \ 56 " -l Create 'ztest.core.N' symlink to core directory.\n" \ 57 " -h Print this help message.\n" \ 58 "" >&2 59} 60 61function or_die 62{ 63 # shellcheck disable=SC2068 64 $@ 65 # shellcheck disable=SC2181 66 if [[ $? -ne 0 ]]; then 67 # shellcheck disable=SC2145 68 echo "Command failed: $@" 69 exit 1 70 fi 71} 72 73case $(uname) in 74FreeBSD) 75 coreglob="z*.core" 76 ;; 77Linux) 78 # core file helpers 79 origcorepattern="$(cat /proc/sys/kernel/core_pattern)" 80 coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*" 81 82 if [[ $coreglob = "*" ]]; then 83 echo "Setting core file pattern..." 84 echo "core" > /proc/sys/kernel/core_pattern 85 coreglob="$(grep -E -o '^([^|%[:space:]]*)' \ 86 /proc/sys/kernel/core_pattern)*" 87 fi 88 ;; 89*) 90 exit 1 91 ;; 92esac 93 94function core_file 95{ 96 # shellcheck disable=SC2012 disable=2086 97 printf "%s" "$(ls -tr1 $coreglob 2> /dev/null | head -1)" 98} 99 100function core_prog 101{ 102 prog=$ZTEST 103 core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \ 104 tr \' ' ') 105 # shellcheck disable=SC2076 106 if [[ "$core_id" =~ "zdb " ]]; then 107 prog=$ZDB 108 fi 109 printf "%s" "$prog" 110} 111 112function store_core 113{ 114 core="$(core_file)" 115 if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then 116 df -h "$workdir" >>ztest.out 117 coreid=$(date "+zloop-%y%m%d-%H%M%S") 118 foundcrashes=$((foundcrashes + 1)) 119 120 # zdb debugging 121 zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest" 122 zdbdebug=$($zdbcmd 2>&1) 123 echo -e "$zdbcmd\n" >>ztest.zdb 124 echo "$zdbdebug" >>ztest.zdb 125 126 dest=$coredir/$coreid 127 or_die mkdir -p "$dest" 128 or_die mkdir -p "$dest/vdev" 129 130 if [[ $symlink -ne 0 ]]; then 131 or_die ln -sf "$dest" ztest.core.$foundcrashes 132 fi 133 134 echo "*** ztest crash found - moving logs to $dest" 135 136 or_die mv ztest.history "$dest/" 137 or_die mv ztest.zdb "$dest/" 138 or_die mv ztest.out "$dest/" 139 or_die mv "$workdir/ztest*" "$dest/vdev/" 140 141 if [[ -e "$workdir/zpool.cache" ]]; then 142 or_die mv "$workdir/zpool.cache" "$dest/vdev/" 143 fi 144 145 # check for core 146 if [[ -f "$core" ]]; then 147 coreprog=$(core_prog "$core") 148 coredebug=$($GDB --batch --quiet \ 149 -ex "set print thread-events off" \ 150 -ex "printf \"*\n* Backtrace \n*\n\"" \ 151 -ex "bt" \ 152 -ex "printf \"*\n* Libraries \n*\n\"" \ 153 -ex "info sharedlib" \ 154 -ex "printf \"*\n* Threads (full) \n*\n\"" \ 155 -ex "info threads" \ 156 -ex "printf \"*\n* Backtraces \n*\n\"" \ 157 -ex "thread apply all bt" \ 158 -ex "printf \"*\n* Backtraces (full) \n*\n\"" \ 159 -ex "thread apply all bt full" \ 160 -ex "quit" "$coreprog" "$core" 2>&1 | \ 161 grep -v "New LWP") 162 163 # Dump core + logs to stored directory 164 echo "$coredebug" >>"$dest/ztest.gdb" 165 or_die mv "$core" "$dest/" 166 167 # Record info in cores logfile 168 echo "*** core @ $coredir/$coreid/$core:" | \ 169 tee -a ztest.cores 170 fi 171 172 if [[ $coremax -gt 0 ]] && 173 [[ $foundcrashes -ge $coremax ]]; then 174 echo "exiting... max $coremax allowed cores" 175 exit 1 176 else 177 echo "continuing..." 178 fi 179 fi 180} 181 182# parse arguments 183# expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args] 184coredir=$DEFAULTCOREDIR 185basedir=$DEFAULTWORKDIR 186rundir="zloop-run" 187timeout=0 188size="512m" 189coremax=0 190symlink=0 191while getopts ":ht:m:s:c:f:l" opt; do 192 case $opt in 193 t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;; 194 m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;; 195 s ) [[ $OPTARG ]] && size=$OPTARG ;; 196 c ) [[ $OPTARG ]] && coredir=$OPTARG ;; 197 f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;; 198 l ) symlink=1 ;; 199 h ) usage 200 exit 2 201 ;; 202 * ) echo "Invalid argument: -$OPTARG"; 203 usage 204 exit 1 205 esac 206done 207# pass remaining arguments on to ztest 208shift $((OPTIND - 1)) 209 210# enable core dumps 211ulimit -c unlimited 212export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0 213 214if [[ -f "$(core_file)" ]]; then 215 echo -n "There's a core dump here you might want to look at first... " 216 core_file 217 echo 218 exit 1 219fi 220 221if [[ ! -d $coredir ]]; then 222 echo "core dump directory ($coredir) does not exist, creating it." 223 or_die mkdir -p "$coredir" 224fi 225 226if [[ ! -w $coredir ]]; then 227 echo "core dump directory ($coredir) is not writable." 228 exit 1 229fi 230 231or_die rm -f ztest.history 232or_die rm -f ztest.zdb 233or_die rm -f ztest.cores 234 235ztrc=0 # ztest return value 236foundcrashes=0 # number of crashes found so far 237starttime=$(date +%s) 238curtime=$starttime 239 240# if no timeout was specified, loop forever. 241while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do 242 zopt="-G -VVVVV" 243 244 # start each run with an empty directory 245 workdir="$basedir/$rundir" 246 or_die rm -rf "$workdir" 247 or_die mkdir "$workdir" 248 249 # switch between common arrangements & fully randomized 250 if [[ $((RANDOM % 2)) -eq 0 ]]; then 251 mirrors=2 252 raidz=0 253 parity=1 254 vdevs=2 255 else 256 mirrors=$(((RANDOM % 3) * 1)) 257 parity=$(((RANDOM % 3) + 1)) 258 raidz=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2))) 259 vdevs=$(((RANDOM % 3) + 3)) 260 fi 261 align=$(((RANDOM % 2) * 3 + 9)) 262 runtime=$((RANDOM % 100)) 263 passtime=$((RANDOM % (runtime / 3 + 1) + 10)) 264 265 zopt="$zopt -m $mirrors" 266 zopt="$zopt -r $raidz" 267 zopt="$zopt -R $parity" 268 zopt="$zopt -v $vdevs" 269 zopt="$zopt -a $align" 270 zopt="$zopt -T $runtime" 271 zopt="$zopt -P $passtime" 272 zopt="$zopt -s $size" 273 zopt="$zopt -f $workdir" 274 275 # shellcheck disable=SC2124 276 cmd="$ZTEST $zopt $@" 277 desc="$(date '+%m/%d %T') $cmd" 278 echo "$desc" | tee -a ztest.history 279 echo "$desc" >>ztest.out 280 $cmd >>ztest.out 2>&1 281 ztrc=$? 282 grep -E '===|WARNING' ztest.out >>ztest.history 283 284 store_core 285 286 curtime=$(date +%s) 287done 288 289echo "zloop finished, $foundcrashes crashes found" 290 291# restore core pattern. 292case $(uname) in 293Linux) 294 echo "$origcorepattern" > /proc/sys/kernel/core_pattern 295 ;; 296*) 297 ;; 298esac 299 300uptime >>ztest.out 301 302if [[ $foundcrashes -gt 0 ]]; then 303 exit 1 304fi 305