1# SPDX-License-Identifier: CDDL-1.0 2# 3# This file and its contents are supplied under the terms of the 4# Common Development and Distribution License ("CDDL"), version 1.0. 5# You may only use this file in accordance with the terms of version 6# 1.0 of the CDDL. 7# 8# A full copy of the text of the CDDL should have accompanied this 9# source. A copy of the CDDL is also available via the Internet at 10# http://www.illumos.org/license/CDDL. 11# 12 13# 14# Copyright 2009 Sun Microsystems, Inc. All rights reserved. 15# Use is subject to license terms. 16# Copyright (c) 2012, 2019 by Delphix. All rights reserved. 17# Copyright 2016 Nexenta Systems, Inc. 18# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved. 19# Copyright (c) 2017 Lawrence Livermore National Security, LLC. 20# Copyright (c) 2017 Datto Inc. 21# Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 22# Copyright 2019 Richard Elling 23# 24 25# 26# Returns SCSI host number for the given disk 27# 28function get_scsi_host #disk 29{ 30 typeset disk=$1 31 ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1 32} 33 34# 35# Cause a scan of all scsi host adapters by default 36# 37# $1 optional host number 38# 39function scan_scsi_hosts 40{ 41 typeset hostnum=${1} 42 43 if is_linux; then 44 if [[ -z $hostnum ]]; then 45 for host in /sys/class/scsi_host/host*; do 46 log_must eval "echo '- - -' > $host/scan" 47 done 48 else 49 log_note "/sys/class/scsi_host/host$hostnum/scan" 50 log_must eval \ 51 "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan" 52 fi 53 fi 54} 55 56# 57# Wait for newly created block devices to have their minors created. 58# Additional arguments can be passed to udevadm trigger, with the expected 59# arguments to typically be a block device pathname. This is useful when 60# checking waiting on a specific device to settle rather than triggering 61# all devices and waiting for them all to settle. 62# 63# The udevadm settle timeout can be 120 or 180 seconds by default for 64# some distros. If a long delay is experienced, it could be due to some 65# strangeness in a malfunctioning device that isn't related to the devices 66# under test. To help debug this condition, a notice is given if settle takes 67# too long. 68# 69# Note: there is no meaningful return code if udevadm fails. Consumers 70# should not expect a return code (do not call as argument to log_must) 71# 72function block_device_wait 73{ 74 if is_linux; then 75 udevadm trigger $* 2>/dev/null 76 typeset start=$SECONDS 77 udevadm settle 78 typeset elapsed=$((SECONDS - start)) 79 [[ $elapsed > 60 ]] && \ 80 log_note udevadm settle time too long: $elapsed 81 elif is_freebsd; then 82 if [[ ${#@} -eq 0 ]]; then 83 # Do something that has to go through the geom event 84 # queue to complete. 85 sysctl kern.geom.conftxt >/dev/null 86 return 87 fi 88 fi 89 # Poll for the given paths to appear, but give up eventually. 90 typeset -i i 91 for (( i = 0; i < 5; ++i )); do 92 typeset missing=false 93 typeset dev 94 for dev in "${@}"; do 95 if ! [[ -e $dev ]]; then 96 missing=true 97 break 98 fi 99 done 100 if ! $missing; then 101 break 102 fi 103 sleep ${#@} 104 done 105} 106 107# 108# Check if the given device is physical device 109# 110function is_physical_device #device 111{ 112 typeset device=${1#$DEV_DSKDIR/} 113 device=${device#$DEV_RDSKDIR/} 114 115 if is_linux; then 116 is_disk_device "$DEV_DSKDIR/$device" && \ 117 [ -f /sys/module/loop/parameters/max_part ] 118 elif is_freebsd; then 119 is_disk_device "$DEV_DSKDIR/$device" && \ 120 echo $device | grep -qE \ 121 -e '^a?da[0-9]+$' \ 122 -e '^md[0-9]+$' \ 123 -e '^mfid[0-9]+$' \ 124 -e '^nda[0-9]+$' \ 125 -e '^nvd[0-9]+$' \ 126 -e '^vtbd[0-9]+$' 127 else 128 echo $device | grep -qE "^c[0-F]+([td][0-F]+)+$" 129 fi 130} 131 132# 133# Check if the given device is a real device (ie SCSI device) 134# 135function is_real_device #disk 136{ 137 typeset disk=$1 138 [[ -z $disk ]] && log_fail "No argument for disk given." 139 140 if is_linux; then 141 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ 142 grep -q disk 143 fi 144} 145 146# 147# Check if the given device is a loop device 148# 149function is_loop_device #disk 150{ 151 typeset disk=$1 152 [[ -z $disk ]] && log_fail "No argument for disk given." 153 154 if is_linux; then 155 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ 156 grep -q loop 157 fi 158} 159 160# 161# Linux: 162# Check if the given device is a multipath device and if there is a symbolic 163# link to a device mapper and to a disk 164# Currently no support for dm devices alone without multipath 165# 166# FreeBSD: 167# Check if the given device is a gmultipath device. 168# 169# Others: 170# No multipath detection. 171# 172function is_mpath_device #disk 173{ 174 typeset disk=$1 175 [[ -z $disk ]] && log_fail "No argument for disk given." 176 177 if is_linux; then 178 if lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \ 179 grep -q mpath; then 180 readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1 181 else 182 false 183 fi 184 elif is_freebsd; then 185 is_disk_device $DEV_MPATHDIR/$disk 186 else 187 false 188 fi 189} 190 191# 192# Check if the given path is the appropriate sort of device special node. 193# 194function is_disk_device #path 195{ 196 typeset path=$1 197 198 if is_freebsd; then 199 # FreeBSD doesn't have block devices, only character devices. 200 test -c $path 201 else 202 test -b $path 203 fi 204} 205 206# Set the slice prefix for disk partitioning depending 207# on whether the device is a real, multipath, or loop device. 208# Currently all disks have to be of the same type, so only 209# checks first disk to determine slice prefix. 210# 211function set_slice_prefix 212{ 213 typeset disk 214 typeset -i i=0 215 216 if is_linux; then 217 while (( i < $DISK_ARRAY_NUM )); do 218 disk="$(echo $DISKS | awk '{print $(i + 1)}')" 219 if is_mpath_device $disk && ! echo $disk | awk 'substr($1,18,1) ~ /^[[:digit:]]+$/ {exit 1}' || is_real_device $disk; then 220 export SLICE_PREFIX="" 221 return 0 222 elif is_mpath_device $disk || is_loop_device $disk; then 223 export SLICE_PREFIX="p" 224 return 0 225 else 226 log_fail "$disk not supported for partitioning." 227 fi 228 (( i = i + 1)) 229 done 230 fi 231} 232 233# 234# Set the directory path of the listed devices in $DISK_ARRAY_NUM 235# Currently all disks have to be of the same type, so only 236# checks first disk to determine device directory 237# default = /dev (linux) 238# real disk = /dev (linux) 239# multipath device = /dev/mapper (linux) 240# 241function set_device_dir 242{ 243 typeset disk 244 typeset -i i=0 245 246 if is_linux; then 247 while (( i < $DISK_ARRAY_NUM )); do 248 disk="$(echo $DISKS | awk '{print $(i + 1)}')" 249 if is_mpath_device $disk; then 250 export DEV_DSKDIR=$DEV_MPATHDIR 251 return 0 252 else 253 export DEV_DSKDIR=$DEV_RDSKDIR 254 return 0 255 fi 256 (( i = i + 1)) 257 done 258 else 259 export DEV_DSKDIR=$DEV_RDSKDIR 260 fi 261} 262 263# 264# Get the directory path of given device 265# 266function get_device_dir #device 267{ 268 typeset device=$1 269 270 if ! is_freebsd && ! is_physical_device $device; then 271 if [[ $device != "/" ]]; then 272 device=${device%/*} 273 fi 274 if is_disk_device "$DEV_DSKDIR/$device"; then 275 device="$DEV_DSKDIR" 276 fi 277 echo $device 278 else 279 echo "$DEV_DSKDIR" 280 fi 281} 282 283# 284# Get persistent name for given disk 285# 286function get_persistent_disk_name #device 287{ 288 typeset device=$1 289 290 if is_linux; then 291 if is_real_device $device; then 292 udevadm info -q all -n $DEV_DSKDIR/$device \ 293 | awk '/disk\/by-id/ {print $2; exit}' | cut -d/ -f3- 294 elif is_mpath_device $device; then 295 udevadm info -q all -n $DEV_DSKDIR/$device \ 296 | awk '/disk\/by-id\/dm-uuid/ {print $2; exit}' \ 297 | cut -d/ -f3 298 else 299 echo $device 300 fi 301 else 302 echo $device 303 fi 304} 305 306# 307# Online or offline a disk on the system 308# 309# First checks state of disk. Test will fail if disk is not properly onlined 310# or offlined. Online is a full rescan of SCSI disks by echoing to every 311# host entry. 312# 313function on_off_disk # disk state{online,offline} host 314{ 315 typeset disk=$1 316 typeset state=$2 317 typeset host=$3 318 319 [[ -z $disk ]] || [[ -z $state ]] && \ 320 log_fail "Arguments invalid or missing" 321 322 if is_linux; then 323 if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then 324 dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)" 325 dep="$(ls /sys/block/${dm_name}/slaves | awk '{print $1}')" 326 while [[ -n $dep ]]; do 327 #check if disk is online 328 if lsscsi | grep -qF $dep; then 329 dep_dir="/sys/block/${dm_name}" 330 dep_dir+="/slaves/${dep}/device" 331 ss="${dep_dir}/state" 332 sd="${dep_dir}/delete" 333 log_must eval "echo 'offline' > ${ss}" 334 log_must eval "echo '1' > ${sd}" 335 if lsscsi | grep -qF $dep; then 336 log_fail "Offlining $disk failed" 337 fi 338 fi 339 dep="$(ls /sys/block/$dm_name/slaves 2>/dev/null | awk '{print $1}')" 340 done 341 elif [[ $state == "offline" ]] && ( is_real_device $disk ); then 342 #check if disk is online 343 if lsscsi | grep -qF $disk; then 344 dev_state="/sys/block/$disk/device/state" 345 dev_delete="/sys/block/$disk/device/delete" 346 log_must eval "echo 'offline' > ${dev_state}" 347 log_must eval "echo '1' > ${dev_delete}" 348 if lsscsi | grep -qF $disk; then 349 log_fail "Offlining $disk failed" 350 fi 351 else 352 log_note "$disk is already offline" 353 fi 354 elif [[ $state == "online" ]]; then 355 #force a full rescan 356 scan_scsi_hosts $host 357 block_device_wait 358 if is_mpath_device $disk; then 359 dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)" 360 dep="$(ls /sys/block/$dm_name/slaves | awk '{print $1}')" 361 if lsscsi | grep -qF $dep; then 362 log_fail "Onlining $disk failed" 363 fi 364 elif is_real_device $disk; then 365 block_device_wait 366 typeset -i retries=0 367 while ! lsscsi | grep -qF $disk; do 368 if (( $retries > 2 )); then 369 log_fail "Onlining $disk failed" 370 break 371 fi 372 (( ++retries )) 373 sleep 1 374 done 375 else 376 log_fail "$disk is not a real dev" 377 fi 378 else 379 log_fail "$disk failed to $state" 380 fi 381 fi 382} 383 384# 385# Simulate disk removal 386# 387function remove_disk #disk 388{ 389 typeset disk=$1 390 on_off_disk $disk "offline" 391 block_device_wait 392} 393 394# 395# Simulate disk insertion for the given SCSI host 396# 397function insert_disk #disk scsi_host 398{ 399 typeset disk=$1 400 typeset scsi_host=$2 401 on_off_disk $disk "online" $scsi_host 402 block_device_wait 403} 404 405# 406# Load scsi_debug module with specified parameters 407# $blksz can be either one of: < 512b | 512e | 4Kn > 408# 409function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz 410{ 411 typeset devsize=$1 412 typeset hosts=$2 413 typeset tgts=$3 414 typeset luns=$4 415 typeset blksz=$5 416 417 [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \ 418 [[ -z $luns ]] || [[ -z $blksz ]] && \ 419 log_fail "Arguments invalid or missing" 420 421 case "$5" in 422 '512b') 423 typeset sector=512 424 typeset blkexp=0 425 ;; 426 '512e') 427 typeset sector=512 428 typeset blkexp=3 429 ;; 430 '4Kn') 431 typeset sector=4096 432 typeset blkexp=0 433 ;; 434 *) log_fail "Unsupported blksz value: $5" ;; 435 esac 436 437 if is_linux; then 438 modprobe -n scsi_debug || 439 log_unsupported "Platform does not have scsi_debug module" 440 if lsmod | grep -q scsi_debug; then 441 log_fail "scsi_debug module already installed" 442 else 443 log_must modprobe scsi_debug dev_size_mb=$devsize \ 444 add_host=$hosts num_tgts=$tgts max_luns=$luns \ 445 sector_size=$sector physblk_exp=$blkexp 446 block_device_wait 447 if ! lsscsi | grep -q scsi_debug; then 448 log_fail "scsi_debug module install failed" 449 fi 450 fi 451 fi 452} 453 454# 455# Unload scsi_debug module, if needed. 456# 457function unload_scsi_debug 458{ 459 log_must_retry "in use" 5 modprobe -r scsi_debug 460} 461 462# 463# Get scsi_debug device name. 464# Returns basename of scsi_debug device (for example "sdb"). 465# 466function get_debug_device 467{ 468 for i in {1..10} ; do 469 val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3) 470 471 # lsscsi can take time to settle 472 if [ "$val" != "-" ] ; then 473 break 474 fi 475 sleep 1 476 done 477 echo "$val" 478} 479 480# 481# Get actual devices used by the pool (i.e. linux sdb1 not sdb). 482# 483function get_pool_devices #testpool #devdir 484{ 485 typeset testpool=$1 486 typeset devdir=$2 487 typeset out="" 488 489 case "$UNAME" in 490 Linux|FreeBSD) 491 zpool status -P $testpool | awk -v d="$devdir" '$1 ~ d {sub(d "/", ""); printf("%s ", $1)}' 492 ;; 493 esac 494} 495 496# 497# Write to standard out giving the level, device name, offset and length 498# of all blocks in an input file. The offset and length are in units of 499# 512 byte blocks. In the case of mirrored vdevs, only the first 500# device is listed, as the levels, blocks and offsets will be the same 501# on other devices. Note that this function only works with mirrored 502# or non-redundant pools, not raidz. 503# 504# The output of this function can be used to introduce corruption at 505# varying levels of indirection. 506# 507function list_file_blocks # input_file 508{ 509 typeset input_file=$1 510 511 [[ -f $input_file ]] || log_fail "Couldn't find $input_file" 512 513 typeset ds="$(zfs list -H -o name $input_file)" 514 typeset pool="${ds%%/*}" 515 typeset objnum="$(get_objnum $input_file)" 516 517 # 518 # Establish a mapping between vdev ids as shown in a DVA and the 519 # pathnames they correspond to in ${VDEV_MAP[][]}. 520 # 521 # The vdev bits in a DVA refer to the top level vdev id. 522 # ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev. 523 # 524 eval $(zdb -C $pool | awk ' 525 BEGIN { printf "typeset -a VDEV_MAP;" } 526 function subscript(s) { 527 # "[#]" is more convenient than the bare "#" 528 match(s, /\[[0-9]*\]/) 529 return substr(s, RSTART, RLENGTH) 530 } 531 id && !/^ / { 532 # left a top level vdev 533 id = 0 534 } 535 id && $1 ~ /^path:$/ { 536 # found a vdev path; save it in the map 537 printf "VDEV_MAP%s%s=%s;", id, child, $2 538 } 539 /^ children/ { 540 # entering a top level vdev 541 id = subscript($0) 542 child = "[0]" # default in case there is no nested vdev 543 printf "typeset -a VDEV_MAP%s;", id 544 } 545 /^ children/ { 546 # entering a nested vdev (e.g. child of a top level mirror) 547 child = subscript($0) 548 } 549 ') 550 551 # 552 # The awk below parses the output of zdb, printing out the level 553 # of each block along with vdev id, offset and length. The last 554 # two are converted to decimal in the while loop. 4M is added to 555 # the offset to compensate for the first two labels and boot 556 # block. Lastly, the offset and length are printed in units of 557 # 512B blocks for ease of use with dd. 558 # 559 typeset level vdev path offset length 560 sync_all_pools true 561 zdb -dddddd $ds $objnum | awk ' 562 /^$/ { looking = 0 } 563 looking { 564 level = $2 565 field = 3 566 while (split($field, dva, ":") == 3) { 567 568 print level, int(dva[1]), "0x"dva[2], "0x"dva[3] 569 570 ++field 571 } 572 } 573 /^Indirect blocks:/ { looking = 1 } 574 ' | \ 575 while read level vdev offset length; do 576 for path in ${VDEV_MAP[$vdev][@]}; do 577 echo "$level $path $(( ($offset + (4<<20)) / 512 ))" \ 578 "$(( $length / 512 ))" 579 done 580 done 2>/dev/null 581} 582 583function corrupt_blocks_at_level # input_file corrupt_level 584{ 585 typeset input_file=$1 586 typeset corrupt_level="L${2:-0}" 587 typeset level path offset length 588 589 [[ -f $input_file ]] || log_fail "Couldn't find $input_file" 590 591 if is_freebsd; then 592 # Temporarily allow corrupting an inuse device. 593 debugflags=$(sysctl -n kern.geom.debugflags) 594 sysctl kern.geom.debugflags=16 595 fi 596 597 list_file_blocks $input_file | \ 598 while read level path offset length; do 599 if [[ $level = $corrupt_level ]]; then 600 log_must dd if=/dev/urandom of=$path bs=512 \ 601 count=$length seek=$offset conv=notrunc 602 fi 603 done 604 605 if is_freebsd; then 606 sysctl kern.geom.debugflags=$debugflags 607 fi 608 609 # This is necessary for pools made of loop devices. 610 sync 611} 612 613function corrupt_label_checksum # label_number vdev_path 614{ 615 typeset label_size=$((256*1024)) 616 typeset vdev_size=$(stat_size ${2}) 617 typeset -a offsets=("$((128*1024 - 32))" \ 618 "$(($label_size + (128*1024 - 32)))" \ 619 "$(($vdev_size - $label_size - (128*1024 + 32)))" \ 620 "$(($vdev_size - (128*1024 + 32)))") 621 622 dd if=/dev/urandom of=${2} seek=${offsets[$1]} bs=1 count=32 \ 623 conv=notrunc 624} 625