1# 2# This file and its contents are supplied under the terms of the 3# Common Development and Distribution License ("CDDL"), version 1.0. 4# You may only use this file in accordance with the terms of version 5# 1.0 of the CDDL. 6# 7# A full copy of the text of the CDDL should have accompanied this 8# source. A copy of the CDDL is also available via the Internet at 9# http://www.illumos.org/license/CDDL. 10# 11 12# 13# Copyright 2009 Sun Microsystems, Inc. All rights reserved. 14# Use is subject to license terms. 15# Copyright (c) 2012, 2019 by Delphix. All rights reserved. 16# Copyright 2016 Nexenta Systems, Inc. 17# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved. 18# Copyright (c) 2017 Lawrence Livermore National Security, LLC. 19# Copyright (c) 2017 Datto Inc. 20# Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 21# Copyright 2019 Richard Elling 22# 23 24# 25# Returns SCSI host number for the given disk 26# 27function get_scsi_host #disk 28{ 29 typeset disk=$1 30 ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1 31} 32 33# 34# Cause a scan of all scsi host adapters by default 35# 36# $1 optional host number 37# 38function scan_scsi_hosts 39{ 40 typeset hostnum=${1} 41 42 if is_linux; then 43 if [[ -z $hostnum ]]; then 44 for host in /sys/class/scsi_host/host*; do 45 log_must eval "echo '- - -' > $host/scan" 46 done 47 else 48 log_note "/sys/class/scsi_host/host$hostnum/scan" 49 log_must eval \ 50 "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan" 51 fi 52 fi 53} 54 55# 56# Wait for newly created block devices to have their minors created. 57# Additional arguments can be passed to udevadm trigger, with the expected 58# arguments to typically be a block device pathname. This is useful when 59# checking waiting on a specific device to settle rather than triggering 60# all devices and waiting for them all to settle. 61# 62# The udevadm settle timeout can be 120 or 180 seconds by default for 63# some distros. If a long delay is experienced, it could be due to some 64# strangeness in a malfunctioning device that isn't related to the devices 65# under test. To help debug this condition, a notice is given if settle takes 66# too long. 67# 68# Note: there is no meaningful return code if udevadm fails. Consumers 69# should not expect a return code (do not call as argument to log_must) 70# 71function block_device_wait 72{ 73 if is_linux; then 74 udevadm trigger $* 2>/dev/null 75 typeset start=$SECONDS 76 udevadm settle 77 typeset elapsed=$((SECONDS - start)) 78 [[ $elapsed > 60 ]] && \ 79 log_note udevadm settle time too long: $elapsed 80 elif is_freebsd; then 81 if [[ ${#@} -eq 0 ]]; then 82 # Do something that has to go through the geom event 83 # queue to complete. 84 sysctl kern.geom.conftxt >/dev/null 85 return 86 fi 87 fi 88 # Poll for the given paths to appear, but give up eventually. 89 typeset -i i 90 for (( i = 0; i < 5; ++i )); do 91 typeset missing=false 92 typeset dev 93 for dev in "${@}"; do 94 if ! [[ -e $dev ]]; then 95 missing=true 96 break 97 fi 98 done 99 if ! $missing; then 100 break 101 fi 102 sleep ${#@} 103 done 104} 105 106# 107# Check if the given device is physical device 108# 109function is_physical_device #device 110{ 111 typeset device=${1#$DEV_DSKDIR/} 112 device=${device#$DEV_RDSKDIR/} 113 114 if is_linux; then 115 is_disk_device "$DEV_DSKDIR/$device" && \ 116 [ -f /sys/module/loop/parameters/max_part ] 117 elif is_freebsd; then 118 is_disk_device "$DEV_DSKDIR/$device" && \ 119 echo $device | grep -qE \ 120 -e '^a?da[0-9]+$' \ 121 -e '^md[0-9]+$' \ 122 -e '^mfid[0-9]+$' \ 123 -e '^nda[0-9]+$' \ 124 -e '^nvd[0-9]+$' \ 125 -e '^vtbd[0-9]+$' 126 else 127 echo $device | grep -qE "^c[0-F]+([td][0-F]+)+$" 128 fi 129} 130 131# 132# Check if the given device is a real device (ie SCSI device) 133# 134function is_real_device #disk 135{ 136 typeset disk=$1 137 [[ -z $disk ]] && log_fail "No argument for disk given." 138 139 if is_linux; then 140 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ 141 grep -q disk 142 fi 143} 144 145# 146# Check if the given device is a loop device 147# 148function is_loop_device #disk 149{ 150 typeset disk=$1 151 [[ -z $disk ]] && log_fail "No argument for disk given." 152 153 if is_linux; then 154 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ 155 grep -q loop 156 fi 157} 158 159# 160# Linux: 161# Check if the given device is a multipath device and if there is a symbolic 162# link to a device mapper and to a disk 163# Currently no support for dm devices alone without multipath 164# 165# FreeBSD: 166# Check if the given device is a gmultipath device. 167# 168# Others: 169# No multipath detection. 170# 171function is_mpath_device #disk 172{ 173 typeset disk=$1 174 [[ -z $disk ]] && log_fail "No argument for disk given." 175 176 if is_linux; then 177 if lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \ 178 grep -q mpath; then 179 readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1 180 else 181 false 182 fi 183 elif is_freebsd; then 184 is_disk_device $DEV_MPATHDIR/$disk 185 else 186 false 187 fi 188} 189 190# 191# Check if the given path is the appropriate sort of device special node. 192# 193function is_disk_device #path 194{ 195 typeset path=$1 196 197 if is_freebsd; then 198 # FreeBSD doesn't have block devices, only character devices. 199 test -c $path 200 else 201 test -b $path 202 fi 203} 204 205# Set the slice prefix for disk partitioning depending 206# on whether the device is a real, multipath, or loop device. 207# Currently all disks have to be of the same type, so only 208# checks first disk to determine slice prefix. 209# 210function set_slice_prefix 211{ 212 typeset disk 213 typeset -i i=0 214 215 if is_linux; then 216 while (( i < $DISK_ARRAY_NUM )); do 217 disk="$(echo $DISKS | awk '{print $(i + 1)}')" 218 if is_mpath_device $disk && ! echo $disk | awk 'substr($1,18,1) ~ /^[[:digit:]]+$/ {exit 1}' || is_real_device $disk; then 219 export SLICE_PREFIX="" 220 return 0 221 elif is_mpath_device $disk || is_loop_device $disk; then 222 export SLICE_PREFIX="p" 223 return 0 224 else 225 log_fail "$disk not supported for partitioning." 226 fi 227 (( i = i + 1)) 228 done 229 fi 230} 231 232# 233# Set the directory path of the listed devices in $DISK_ARRAY_NUM 234# Currently all disks have to be of the same type, so only 235# checks first disk to determine device directory 236# default = /dev (linux) 237# real disk = /dev (linux) 238# multipath device = /dev/mapper (linux) 239# 240function set_device_dir 241{ 242 typeset disk 243 typeset -i i=0 244 245 if is_linux; then 246 while (( i < $DISK_ARRAY_NUM )); do 247 disk="$(echo $DISKS | awk '{print $(i + 1)}')" 248 if is_mpath_device $disk; then 249 export DEV_DSKDIR=$DEV_MPATHDIR 250 return 0 251 else 252 export DEV_DSKDIR=$DEV_RDSKDIR 253 return 0 254 fi 255 (( i = i + 1)) 256 done 257 else 258 export DEV_DSKDIR=$DEV_RDSKDIR 259 fi 260} 261 262# 263# Get the directory path of given device 264# 265function get_device_dir #device 266{ 267 typeset device=$1 268 269 if ! is_freebsd && ! is_physical_device $device; then 270 if [[ $device != "/" ]]; then 271 device=${device%/*} 272 fi 273 if is_disk_device "$DEV_DSKDIR/$device"; then 274 device="$DEV_DSKDIR" 275 fi 276 echo $device 277 else 278 echo "$DEV_DSKDIR" 279 fi 280} 281 282# 283# Get persistent name for given disk 284# 285function get_persistent_disk_name #device 286{ 287 typeset device=$1 288 289 if is_linux; then 290 if is_real_device $device; then 291 udevadm info -q all -n $DEV_DSKDIR/$device \ 292 | awk '/disk\/by-id/ {print $2; exit}' | cut -d/ -f3- 293 elif is_mpath_device $device; then 294 udevadm info -q all -n $DEV_DSKDIR/$device \ 295 | awk '/disk\/by-id\/dm-uuid/ {print $2; exit}' \ 296 | cut -d/ -f3 297 else 298 echo $device 299 fi 300 else 301 echo $device 302 fi 303} 304 305# 306# Online or offline a disk on the system 307# 308# First checks state of disk. Test will fail if disk is not properly onlined 309# or offlined. Online is a full rescan of SCSI disks by echoing to every 310# host entry. 311# 312function on_off_disk # disk state{online,offline} host 313{ 314 typeset disk=$1 315 typeset state=$2 316 typeset host=$3 317 318 [[ -z $disk ]] || [[ -z $state ]] && \ 319 log_fail "Arguments invalid or missing" 320 321 if is_linux; then 322 if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then 323 dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)" 324 dep="$(ls /sys/block/${dm_name}/slaves | awk '{print $1}')" 325 while [[ -n $dep ]]; do 326 #check if disk is online 327 if lsscsi | grep -qF $dep; then 328 dep_dir="/sys/block/${dm_name}" 329 dep_dir+="/slaves/${dep}/device" 330 ss="${dep_dir}/state" 331 sd="${dep_dir}/delete" 332 log_must eval "echo 'offline' > ${ss}" 333 log_must eval "echo '1' > ${sd}" 334 if lsscsi | grep -qF $dep; then 335 log_fail "Offlining $disk failed" 336 fi 337 fi 338 dep="$(ls /sys/block/$dm_name/slaves 2>/dev/null | awk '{print $1}')" 339 done 340 elif [[ $state == "offline" ]] && ( is_real_device $disk ); then 341 #check if disk is online 342 if lsscsi | grep -qF $disk; then 343 dev_state="/sys/block/$disk/device/state" 344 dev_delete="/sys/block/$disk/device/delete" 345 log_must eval "echo 'offline' > ${dev_state}" 346 log_must eval "echo '1' > ${dev_delete}" 347 if lsscsi | grep -qF $disk; then 348 log_fail "Offlining $disk failed" 349 fi 350 else 351 log_note "$disk is already offline" 352 fi 353 elif [[ $state == "online" ]]; then 354 #force a full rescan 355 scan_scsi_hosts $host 356 block_device_wait 357 if is_mpath_device $disk; then 358 dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)" 359 dep="$(ls /sys/block/$dm_name/slaves | awk '{print $1}')" 360 if lsscsi | grep -qF $dep; then 361 log_fail "Onlining $disk failed" 362 fi 363 elif is_real_device $disk; then 364 block_device_wait 365 typeset -i retries=0 366 while ! lsscsi | grep -qF $disk; do 367 if (( $retries > 2 )); then 368 log_fail "Onlining $disk failed" 369 break 370 fi 371 (( ++retries )) 372 sleep 1 373 done 374 else 375 log_fail "$disk is not a real dev" 376 fi 377 else 378 log_fail "$disk failed to $state" 379 fi 380 fi 381} 382 383# 384# Simulate disk removal 385# 386function remove_disk #disk 387{ 388 typeset disk=$1 389 on_off_disk $disk "offline" 390 block_device_wait 391} 392 393# 394# Simulate disk insertion for the given SCSI host 395# 396function insert_disk #disk scsi_host 397{ 398 typeset disk=$1 399 typeset scsi_host=$2 400 on_off_disk $disk "online" $scsi_host 401 block_device_wait 402} 403 404# 405# Load scsi_debug module with specified parameters 406# $blksz can be either one of: < 512b | 512e | 4Kn > 407# 408function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz 409{ 410 typeset devsize=$1 411 typeset hosts=$2 412 typeset tgts=$3 413 typeset luns=$4 414 typeset blksz=$5 415 416 [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \ 417 [[ -z $luns ]] || [[ -z $blksz ]] && \ 418 log_fail "Arguments invalid or missing" 419 420 case "$5" in 421 '512b') 422 typeset sector=512 423 typeset blkexp=0 424 ;; 425 '512e') 426 typeset sector=512 427 typeset blkexp=3 428 ;; 429 '4Kn') 430 typeset sector=4096 431 typeset blkexp=0 432 ;; 433 *) log_fail "Unsupported blksz value: $5" ;; 434 esac 435 436 if is_linux; then 437 modprobe -n scsi_debug || 438 log_unsupported "Platform does not have scsi_debug module" 439 if lsmod | grep -q scsi_debug; then 440 log_fail "scsi_debug module already installed" 441 else 442 log_must modprobe scsi_debug dev_size_mb=$devsize \ 443 add_host=$hosts num_tgts=$tgts max_luns=$luns \ 444 sector_size=$sector physblk_exp=$blkexp 445 block_device_wait 446 if ! lsscsi | grep -q scsi_debug; then 447 log_fail "scsi_debug module install failed" 448 fi 449 fi 450 fi 451} 452 453# 454# Unload scsi_debug module, if needed. 455# 456function unload_scsi_debug 457{ 458 log_must_retry "in use" 5 modprobe -r scsi_debug 459} 460 461# 462# Get scsi_debug device name. 463# Returns basename of scsi_debug device (for example "sdb"). 464# 465function get_debug_device 466{ 467 for i in {1..10} ; do 468 val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3) 469 470 # lsscsi can take time to settle 471 if [ "$val" != "-" ] ; then 472 break 473 fi 474 sleep 1 475 done 476 echo "$val" 477} 478 479# 480# Get actual devices used by the pool (i.e. linux sdb1 not sdb). 481# 482function get_pool_devices #testpool #devdir 483{ 484 typeset testpool=$1 485 typeset devdir=$2 486 typeset out="" 487 488 case "$UNAME" in 489 Linux|FreeBSD) 490 zpool status -P $testpool | awk -v d="$devdir" '$1 ~ d {sub(d "/", ""); printf("%s ", $1)}' 491 ;; 492 esac 493} 494 495# 496# Write to standard out giving the level, device name, offset and length 497# of all blocks in an input file. The offset and length are in units of 498# 512 byte blocks. In the case of mirrored vdevs, only the first 499# device is listed, as the levels, blocks and offsets will be the same 500# on other devices. Note that this function only works with mirrored 501# or non-redundant pools, not raidz. 502# 503# The output of this function can be used to introduce corruption at 504# varying levels of indirection. 505# 506function list_file_blocks # input_file 507{ 508 typeset input_file=$1 509 510 [[ -f $input_file ]] || log_fail "Couldn't find $input_file" 511 512 typeset ds="$(zfs list -H -o name $input_file)" 513 typeset pool="${ds%%/*}" 514 typeset objnum="$(get_objnum $input_file)" 515 516 # 517 # Establish a mapping between vdev ids as shown in a DVA and the 518 # pathnames they correspond to in ${VDEV_MAP[][]}. 519 # 520 # The vdev bits in a DVA refer to the top level vdev id. 521 # ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev. 522 # 523 eval $(zdb -C $pool | awk ' 524 BEGIN { printf "typeset -a VDEV_MAP;" } 525 function subscript(s) { 526 # "[#]" is more convenient than the bare "#" 527 match(s, /\[[0-9]*\]/) 528 return substr(s, RSTART, RLENGTH) 529 } 530 id && !/^ / { 531 # left a top level vdev 532 id = 0 533 } 534 id && $1 ~ /^path:$/ { 535 # found a vdev path; save it in the map 536 printf "VDEV_MAP%s%s=%s;", id, child, $2 537 } 538 /^ children/ { 539 # entering a top level vdev 540 id = subscript($0) 541 child = "[0]" # default in case there is no nested vdev 542 printf "typeset -a VDEV_MAP%s;", id 543 } 544 /^ children/ { 545 # entering a nested vdev (e.g. child of a top level mirror) 546 child = subscript($0) 547 } 548 ') 549 550 # 551 # The awk below parses the output of zdb, printing out the level 552 # of each block along with vdev id, offset and length. The last 553 # two are converted to decimal in the while loop. 4M is added to 554 # the offset to compensate for the first two labels and boot 555 # block. Lastly, the offset and length are printed in units of 556 # 512B blocks for ease of use with dd. 557 # 558 typeset level vdev path offset length 559 if awk -n '' 2>/dev/null; then 560 # gawk needs -n to decode hex 561 AWK='awk -n' 562 else 563 AWK='awk' 564 fi 565 sync_all_pools true 566 zdb -dddddd $ds $objnum | $AWK -v pad=$((4<<20)) -v bs=512 ' 567 /^$/ { looking = 0 } 568 looking { 569 level = $2 570 field = 3 571 while (split($field, dva, ":") == 3) { 572 # top level vdev id 573 vdev = int(dva[1]) 574 # offset + 4M label/boot pad in 512B blocks 575 offset = (int("0x"dva[2]) + pad) / bs 576 # length in 512B blocks 577 len = int("0x"dva[3]) / bs 578 579 print level, vdev, offset, len 580 581 ++field 582 } 583 } 584 /^Indirect blocks:/ { looking = 1 } 585 ' | \ 586 while read level vdev offset length; do 587 for path in ${VDEV_MAP[$vdev][@]}; do 588 echo "$level $path $offset $length" 589 done 590 done 2>/dev/null 591} 592 593function corrupt_blocks_at_level # input_file corrupt_level 594{ 595 typeset input_file=$1 596 typeset corrupt_level="L${2:-0}" 597 typeset level path offset length 598 599 [[ -f $input_file ]] || log_fail "Couldn't find $input_file" 600 601 if is_freebsd; then 602 # Temporarily allow corrupting an inuse device. 603 debugflags=$(sysctl -n kern.geom.debugflags) 604 sysctl kern.geom.debugflags=16 605 fi 606 607 list_file_blocks $input_file | \ 608 while read level path offset length; do 609 if [[ $level = $corrupt_level ]]; then 610 log_must dd if=/dev/urandom of=$path bs=512 \ 611 count=$length seek=$offset conv=notrunc 612 fi 613 done 614 615 if is_freebsd; then 616 sysctl kern.geom.debugflags=$debugflags 617 fi 618 619 # This is necessary for pools made of loop devices. 620 sync 621} 622 623function corrupt_label_checksum # label_number vdev_path 624{ 625 typeset label_size=$((256*1024)) 626 typeset vdev_size=$(stat_size ${2}) 627 typeset -a offsets=("$((128*1024 - 32))" \ 628 "$(($label_size + (128*1024 - 32)))" \ 629 "$(($vdev_size - $label_size - (128*1024 + 32)))" \ 630 "$(($vdev_size - (128*1024 + 32)))") 631 632 dd if=/dev/urandom of=${2} seek=${offsets[$1]} bs=1 count=32 \ 633 conv=notrunc 634} 635