1# 2# This file and its contents are supplied under the terms of the 3# Common Development and Distribution License ("CDDL"), version 1.0. 4# You may only use this file in accordance with the terms of version 5# 1.0 of the CDDL. 6# 7# A full copy of the text of the CDDL should have accompanied this 8# source. A copy of the CDDL is also available via the Internet at 9# http://www.illumos.org/license/CDDL. 10# 11 12# 13# Copyright 2009 Sun Microsystems, Inc. All rights reserved. 14# Use is subject to license terms. 15# Copyright (c) 2012, 2019 by Delphix. All rights reserved. 16# Copyright 2016 Nexenta Systems, Inc. 17# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved. 18# Copyright (c) 2017 Lawrence Livermore National Security, LLC. 19# Copyright (c) 2017 Datto Inc. 20# Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 21# Copyright 2019 Richard Elling 22# 23 24# 25# Returns SCSI host number for the given disk 26# 27function get_scsi_host #disk 28{ 29 typeset disk=$1 30 ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1 31} 32 33# 34# Cause a scan of all scsi host adapters by default 35# 36# $1 optional host number 37# 38function scan_scsi_hosts 39{ 40 typeset hostnum=${1} 41 42 if is_linux; then 43 if [[ -z $hostnum ]]; then 44 for host in /sys/class/scsi_host/host*; do 45 log_must eval "echo '- - -' > $host/scan" 46 done 47 else 48 log_must eval \ 49 "echo /sys/class/scsi_host/host$hostnum/scan" \ 50 > /dev/null 51 log_must eval \ 52 "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan" 53 fi 54 fi 55} 56 57# 58# Wait for newly created block devices to have their minors created. 59# Additional arguments can be passed to udevadm trigger, with the expected 60# arguments to typically be a block device pathname. This is useful when 61# checking waiting on a specific device to settle rather than triggering 62# all devices and waiting for them all to settle. 63# 64# The udevadm settle timeout can be 120 or 180 seconds by default for 65# some distros. If a long delay is experienced, it could be due to some 66# strangeness in a malfunctioning device that isn't related to the devices 67# under test. To help debug this condition, a notice is given if settle takes 68# too long. 69# 70# Note: there is no meaningful return code if udevadm fails. Consumers 71# should not expect a return code (do not call as argument to log_must) 72# 73function block_device_wait 74{ 75 if is_linux; then 76 udevadm trigger $* 77 typeset start=$SECONDS 78 udevadm settle 79 typeset elapsed=$((SECONDS - start)) 80 [[ $elapsed > 60 ]] && \ 81 log_note udevadm settle time too long: $elapsed 82 elif is_freebsd; then 83 if [[ ${#@} -eq 0 ]]; then 84 # Do something that has to go through the geom event 85 # queue to complete. 86 sysctl kern.geom.conftxt >/dev/null 87 return 88 fi 89 fi 90 # Poll for the given paths to appear, but give up eventually. 91 typeset -i i 92 for (( i = 0; i < 5; ++i )); do 93 typeset missing=false 94 typeset dev 95 for dev in "${@}"; do 96 if ! [[ -f $dev ]]; then 97 missing=true 98 break 99 fi 100 done 101 if ! $missing; then 102 break 103 fi 104 sleep ${#@} 105 done 106} 107 108# 109# Check if the given device is physical device 110# 111function is_physical_device #device 112{ 113 typeset device=${1#$DEV_DSKDIR/} 114 device=${device#$DEV_RDSKDIR/} 115 116 if is_linux; then 117 is_disk_device "$DEV_DSKDIR/$device" && \ 118 [[ -f /sys/module/loop/parameters/max_part ]] 119 return $? 120 elif is_freebsd; then 121 is_disk_device "$DEV_DSKDIR/$device" && \ 122 echo $device | egrep -q \ 123 -e '^a?da[0-9]+$' \ 124 -e '^md[0-9]+$' \ 125 -e '^mfid[0-9]+$' \ 126 -e '^nda[0-9]+$' \ 127 -e '^nvd[0-9]+$' \ 128 -e '^vtbd[0-9]+$' 129 return $? 130 else 131 echo $device | egrep "^c[0-F]+([td][0-F]+)+$" > /dev/null 2>&1 132 return $? 133 fi 134} 135 136# 137# Check if the given device is a real device (ie SCSI device) 138# 139function is_real_device #disk 140{ 141 typeset disk=$1 142 [[ -z $disk ]] && log_fail "No argument for disk given." 143 144 if is_linux; then 145 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ 146 egrep disk >/dev/null 147 return $? 148 fi 149} 150 151# 152# Check if the given device is a loop device 153# 154function is_loop_device #disk 155{ 156 typeset disk=$1 157 [[ -z $disk ]] && log_fail "No argument for disk given." 158 159 if is_linux; then 160 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ 161 egrep loop >/dev/null 162 return $? 163 fi 164} 165 166# 167# Linux: 168# Check if the given device is a multipath device and if there is a symbolic 169# link to a device mapper and to a disk 170# Currently no support for dm devices alone without multipath 171# 172# FreeBSD: 173# Check if the given device is a gmultipath device. 174# 175# Others: 176# No multipath detection. 177# 178function is_mpath_device #disk 179{ 180 typeset disk=$1 181 [[ -z $disk ]] && log_fail "No argument for disk given." 182 183 if is_linux; then 184 lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \ 185 egrep mpath >/dev/null 186 if (($? == 0)); then 187 readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1 188 return $? 189 else 190 return $? 191 fi 192 elif is_freebsd; then 193 is_disk_device $DEV_MPATHDIR/$disk 194 else 195 false 196 fi 197} 198 199# 200# Check if the given path is the appropriate sort of device special node. 201# 202function is_disk_device #path 203{ 204 typeset path=$1 205 206 if is_freebsd; then 207 # FreeBSD doesn't have block devices, only character devices. 208 test -c $path 209 else 210 test -b $path 211 fi 212} 213 214# Set the slice prefix for disk partitioning depending 215# on whether the device is a real, multipath, or loop device. 216# Currently all disks have to be of the same type, so only 217# checks first disk to determine slice prefix. 218# 219function set_slice_prefix 220{ 221 typeset disk 222 typeset -i i=0 223 224 if is_linux; then 225 while (( i < $DISK_ARRAY_NUM )); do 226 disk="$(echo $DISKS | nawk '{print $(i + 1)}')" 227 if ( is_mpath_device $disk ) && [[ -z $(echo $disk | awk 'substr($1,18,1)\ 228 ~ /^[[:digit:]]+$/') ]] || ( is_real_device $disk ); then 229 export SLICE_PREFIX="" 230 return 0 231 elif ( is_mpath_device $disk || is_loop_device \ 232 $disk ); then 233 export SLICE_PREFIX="p" 234 return 0 235 else 236 log_fail "$disk not supported for partitioning." 237 fi 238 (( i = i + 1)) 239 done 240 fi 241} 242 243# 244# Set the directory path of the listed devices in $DISK_ARRAY_NUM 245# Currently all disks have to be of the same type, so only 246# checks first disk to determine device directory 247# default = /dev (linux) 248# real disk = /dev (linux) 249# multipath device = /dev/mapper (linux) 250# 251function set_device_dir 252{ 253 typeset disk 254 typeset -i i=0 255 256 if is_linux; then 257 while (( i < $DISK_ARRAY_NUM )); do 258 disk="$(echo $DISKS | nawk '{print $(i + 1)}')" 259 if is_mpath_device $disk; then 260 export DEV_DSKDIR=$DEV_MPATHDIR 261 return 0 262 else 263 export DEV_DSKDIR=$DEV_RDSKDIR 264 return 0 265 fi 266 (( i = i + 1)) 267 done 268 else 269 export DEV_DSKDIR=$DEV_RDSKDIR 270 fi 271} 272 273# 274# Get the directory path of given device 275# 276function get_device_dir #device 277{ 278 typeset device=$1 279 280 if ! is_freebsd && ! is_physical_device $device; then 281 if [[ $device != "/" ]]; then 282 device=${device%/*} 283 fi 284 if is_disk_device "$DEV_DSKDIR/$device"; then 285 device="$DEV_DSKDIR" 286 fi 287 echo $device 288 else 289 echo "$DEV_DSKDIR" 290 fi 291} 292 293# 294# Get persistent name for given disk 295# 296function get_persistent_disk_name #device 297{ 298 typeset device=$1 299 typeset dev_id 300 301 if is_linux; then 302 if is_real_device $device; then 303 dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \ 304 | egrep disk/by-id | nawk '{print $2; exit}' \ 305 | nawk -F / '{print $3}')" 306 echo $dev_id 307 elif is_mpath_device $device; then 308 dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \ 309 | egrep disk/by-id/dm-uuid \ 310 | nawk '{print $2; exit}' \ 311 | nawk -F / '{print $3}')" 312 echo $dev_id 313 else 314 echo $device 315 fi 316 else 317 echo $device 318 fi 319} 320 321# 322# Online or offline a disk on the system 323# 324# First checks state of disk. Test will fail if disk is not properly onlined 325# or offlined. Online is a full rescan of SCSI disks by echoing to every 326# host entry. 327# 328function on_off_disk # disk state{online,offline} host 329{ 330 typeset disk=$1 331 typeset state=$2 332 typeset host=$3 333 334 [[ -z $disk ]] || [[ -z $state ]] && \ 335 log_fail "Arguments invalid or missing" 336 337 if is_linux; then 338 if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then 339 dm_name="$(readlink $DEV_DSKDIR/$disk \ 340 | nawk -F / '{print $2}')" 341 dep="$(ls /sys/block/${dm_name}/slaves \ 342 | nawk '{print $1}')" 343 while [[ -n $dep ]]; do 344 #check if disk is online 345 lsscsi | egrep $dep > /dev/null 346 if (($? == 0)); then 347 dep_dir="/sys/block/${dm_name}" 348 dep_dir+="/slaves/${dep}/device" 349 ss="${dep_dir}/state" 350 sd="${dep_dir}/delete" 351 log_must eval "echo 'offline' > ${ss}" 352 log_must eval "echo '1' > ${sd}" 353 lsscsi | egrep $dep > /dev/null 354 if (($? == 0)); then 355 log_fail "Offlining" \ 356 "$disk failed" 357 fi 358 fi 359 dep="$(ls /sys/block/$dm_name/slaves \ 360 2>/dev/null | nawk '{print $1}')" 361 done 362 elif [[ $state == "offline" ]] && ( is_real_device $disk ); then 363 #check if disk is online 364 lsscsi | egrep $disk > /dev/null 365 if (($? == 0)); then 366 dev_state="/sys/block/$disk/device/state" 367 dev_delete="/sys/block/$disk/device/delete" 368 log_must eval "echo 'offline' > ${dev_state}" 369 log_must eval "echo '1' > ${dev_delete}" 370 lsscsi | egrep $disk > /dev/null 371 if (($? == 0)); then 372 log_fail "Offlining $disk" \ 373 "failed" 374 fi 375 else 376 log_note "$disk is already offline" 377 fi 378 elif [[ $state == "online" ]]; then 379 #force a full rescan 380 scan_scsi_hosts $host 381 block_device_wait 382 if is_mpath_device $disk; then 383 dm_name="$(readlink $DEV_DSKDIR/$disk \ 384 | nawk -F / '{print $2}')" 385 dep="$(ls /sys/block/$dm_name/slaves \ 386 | nawk '{print $1}')" 387 lsscsi | egrep $dep > /dev/null 388 if (($? != 0)); then 389 log_fail "Onlining $disk failed" 390 fi 391 elif is_real_device $disk; then 392 block_device_wait 393 typeset -i retries=0 394 while ! lsscsi | egrep -q $disk; do 395 if (( $retries > 2 )); then 396 log_fail "Onlining $disk failed" 397 break 398 fi 399 (( ++retries )) 400 sleep 1 401 done 402 else 403 log_fail "$disk is not a real dev" 404 fi 405 else 406 log_fail "$disk failed to $state" 407 fi 408 fi 409} 410 411# 412# Simulate disk removal 413# 414function remove_disk #disk 415{ 416 typeset disk=$1 417 on_off_disk $disk "offline" 418 block_device_wait 419} 420 421# 422# Simulate disk insertion for the given SCSI host 423# 424function insert_disk #disk scsi_host 425{ 426 typeset disk=$1 427 typeset scsi_host=$2 428 on_off_disk $disk "online" $scsi_host 429 block_device_wait 430} 431 432# 433# Load scsi_debug module with specified parameters 434# $blksz can be either one of: < 512b | 512e | 4Kn > 435# 436function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz 437{ 438 typeset devsize=$1 439 typeset hosts=$2 440 typeset tgts=$3 441 typeset luns=$4 442 typeset blksz=$5 443 444 [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \ 445 [[ -z $luns ]] || [[ -z $blksz ]] && \ 446 log_fail "Arguments invalid or missing" 447 448 case "$5" in 449 '512b') 450 typeset sector=512 451 typeset blkexp=0 452 ;; 453 '512e') 454 typeset sector=512 455 typeset blkexp=3 456 ;; 457 '4Kn') 458 typeset sector=4096 459 typeset blkexp=0 460 ;; 461 *) log_fail "Unsupported blksz value: $5" ;; 462 esac 463 464 if is_linux; then 465 modprobe -n scsi_debug 466 if (($? != 0)); then 467 log_unsupported "Platform does not have scsi_debug" 468 "module" 469 fi 470 lsmod | egrep scsi_debug > /dev/null 471 if (($? == 0)); then 472 log_fail "scsi_debug module already installed" 473 else 474 log_must modprobe scsi_debug dev_size_mb=$devsize \ 475 add_host=$hosts num_tgts=$tgts max_luns=$luns \ 476 sector_size=$sector physblk_exp=$blkexp 477 block_device_wait 478 lsscsi | egrep scsi_debug > /dev/null 479 if (($? == 1)); then 480 log_fail "scsi_debug module install failed" 481 fi 482 fi 483 fi 484} 485 486# 487# Unload scsi_debug module, if needed. 488# 489function unload_scsi_debug 490{ 491 log_must_retry "in use" 5 modprobe -r scsi_debug 492} 493 494# 495# Get scsi_debug device name. 496# Returns basename of scsi_debug device (for example "sdb"). 497# 498function get_debug_device 499{ 500 for i in {1..10} ; do 501 val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3) 502 503 # lsscsi can take time to settle 504 if [ "$val" != "-" ] ; then 505 break 506 fi 507 sleep 1 508 done 509 echo "$val" 510} 511 512# 513# Get actual devices used by the pool (i.e. linux sdb1 not sdb). 514# 515function get_pool_devices #testpool #devdir 516{ 517 typeset testpool=$1 518 typeset devdir=$2 519 typeset out="" 520 521 if is_linux || is_freebsd; then 522 out=$(zpool status -P $testpool |grep ${devdir} | awk '{print $1}') 523 out=$(echo $out | sed -e "s|${devdir}/||g" | tr '\n' ' ') 524 fi 525 echo $out 526} 527 528# 529# Write to standard out giving the level, device name, offset and length 530# of all blocks in an input file. The offset and length are in units of 531# 512 byte blocks. In the case of mirrored vdevs, only the first 532# device is listed, as the levels, blocks and offsets will be the same 533# on other devices. Note that this function only works with mirrored 534# or non-redundant pools, not raidz. 535# 536# The output of this function can be used to introduce corruption at 537# varying levels of indirection. 538# 539function list_file_blocks # input_file 540{ 541 typeset input_file=$1 542 543 [[ -f $input_file ]] || log_fail "Couldn't find $input_file" 544 545 typeset ds="$(zfs list -H -o name $input_file)" 546 typeset pool="${ds%%/*}" 547 typeset objnum="$(get_objnum $input_file)" 548 549 # 550 # Establish a mapping between vdev ids as shown in a DVA and the 551 # pathnames they correspond to in ${VDEV_MAP[][]}. 552 # 553 # The vdev bits in a DVA refer to the top level vdev id. 554 # ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev. 555 # 556 eval $(zdb -C $pool | awk ' 557 BEGIN { printf "typeset -a VDEV_MAP;" } 558 function subscript(s) { 559 # "[#]" is more convenient than the bare "#" 560 match(s, /\[[0-9]*\]/) 561 return substr(s, RSTART, RLENGTH) 562 } 563 id && !/^ / { 564 # left a top level vdev 565 id = 0 566 } 567 id && $1 ~ /^path:$/ { 568 # found a vdev path; save it in the map 569 printf "VDEV_MAP%s%s=%s;", id, child, $2 570 } 571 /^ children/ { 572 # entering a top level vdev 573 id = subscript($0) 574 child = "[0]" # default in case there is no nested vdev 575 printf "typeset -a VDEV_MAP%s;", id 576 } 577 /^ children/ { 578 # entering a nested vdev (e.g. child of a top level mirror) 579 child = subscript($0) 580 } 581 ') 582 583 # 584 # The awk below parses the output of zdb, printing out the level 585 # of each block along with vdev id, offset and length. The last 586 # two are converted to decimal in the while loop. 4M is added to 587 # the offset to compensate for the first two labels and boot 588 # block. Lastly, the offset and length are printed in units of 589 # 512B blocks for ease of use with dd. 590 # 591 typeset level vdev path offset length 592 if awk -n '' 2>/dev/null; then 593 # gawk needs -n to decode hex 594 AWK='awk -n' 595 else 596 AWK='awk' 597 fi 598 log_must zpool sync -f 599 zdb -dddddd $ds $objnum | $AWK -v pad=$((4<<20)) -v bs=512 ' 600 /^$/ { looking = 0 } 601 looking { 602 level = $2 603 field = 3 604 while (split($field, dva, ":") == 3) { 605 # top level vdev id 606 vdev = int(dva[1]) 607 # offset + 4M label/boot pad in 512B blocks 608 offset = (int("0x"dva[2]) + pad) / bs 609 # length in 512B blocks 610 len = int("0x"dva[3]) / bs 611 612 print level, vdev, offset, len 613 614 ++field 615 } 616 } 617 /^Indirect blocks:/ { looking = 1 } 618 ' | \ 619 while read level vdev offset length; do 620 for path in ${VDEV_MAP[$vdev][@]}; do 621 echo "$level $path $offset $length" 622 done 623 done 2>/dev/null 624} 625 626function corrupt_blocks_at_level # input_file corrupt_level 627{ 628 typeset input_file=$1 629 typeset corrupt_level="L${2:-0}" 630 typeset level path offset length 631 632 [[ -f $input_file ]] || log_fail "Couldn't find $input_file" 633 634 if is_freebsd; then 635 # Temporarily allow corrupting an inuse device. 636 debugflags=$(sysctl -n kern.geom.debugflags) 637 sysctl kern.geom.debugflags=16 638 fi 639 640 list_file_blocks $input_file | \ 641 while read level path offset length; do 642 if [[ $level = $corrupt_level ]]; then 643 log_must dd if=/dev/urandom of=$path bs=512 \ 644 count=$length seek=$offset conv=notrunc 645 fi 646 done 647 648 if is_freebsd; then 649 sysctl kern.geom.debugflags=$debugflags 650 fi 651 652 # This is necessary for pools made of loop devices. 653 sync 654} 655