xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/include/blkdev.shlib (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1# SPDX-License-Identifier: CDDL-1.0
2#
3# This file and its contents are supplied under the terms of the
4# Common Development and Distribution License ("CDDL"), version 1.0.
5# You may only use this file in accordance with the terms of version
6# 1.0 of the CDDL.
7#
8# A full copy of the text of the CDDL should have accompanied this
9# source.  A copy of the CDDL is also available via the Internet at
10# http://www.illumos.org/license/CDDL.
11#
12
13#
14# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
15# Use is subject to license terms.
16# Copyright (c) 2012, 2019 by Delphix. All rights reserved.
17# Copyright 2016 Nexenta Systems, Inc.
18# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
19# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
20# Copyright (c) 2017 Datto Inc.
21# Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
22# Copyright 2019 Richard Elling
23#
24
25#
26# Returns SCSI host number for the given disk
27#
28function get_scsi_host #disk
29{
30	typeset disk=$1
31	ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
32}
33
34#
35# Cause a scan of all scsi host adapters by default
36#
37# $1 optional host number
38#
39function scan_scsi_hosts
40{
41	typeset hostnum=${1}
42
43	if is_linux; then
44		if [[ -z $hostnum ]]; then
45			for host in /sys/class/scsi_host/host*; do
46				log_must eval "echo '- - -' > $host/scan"
47			done
48		else
49			log_note "/sys/class/scsi_host/host$hostnum/scan"
50			log_must eval \
51			    "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan"
52		fi
53	fi
54}
55
56#
57# Wait for newly created block devices to have their minors created.
58# Additional arguments can be passed to udevadm trigger, with the expected
59# arguments to typically be a block device pathname. This is useful when
60# checking waiting on a specific device to settle rather than triggering
61# all devices and waiting for them all to settle.
62#
63# The udevadm settle timeout can be 120 or 180 seconds by default for
64# some distros. If a long delay is experienced, it could be due to some
65# strangeness in a malfunctioning device that isn't related to the devices
66# under test. To help debug this condition, a notice is given if settle takes
67# too long.
68#
69# Note: there is no meaningful return code if udevadm fails. Consumers
70# should not expect a return code (do not call as argument to log_must)
71#
72function block_device_wait
73{
74	if is_linux; then
75		udevadm trigger $* 2>/dev/null
76		typeset start=$SECONDS
77		udevadm settle
78		typeset elapsed=$((SECONDS - start))
79		[[ $elapsed > 60 ]] && \
80		    log_note udevadm settle time too long: $elapsed
81	elif is_freebsd; then
82		if [[ ${#@} -eq 0 ]]; then
83			# Do something that has to go through the geom event
84			# queue to complete.
85			sysctl kern.geom.conftxt >/dev/null
86			return
87		fi
88	fi
89	# Poll for the given paths to appear, but give up eventually.
90	typeset -i i
91	for (( i = 0; i < 5; ++i )); do
92		typeset missing=false
93		typeset dev
94		for dev in "${@}"; do
95			if ! [[ -e $dev ]]; then
96				missing=true
97				break
98			fi
99		done
100		if ! $missing; then
101			break
102		fi
103		sleep ${#@}
104	done
105}
106
107#
108# Check if the given device is physical device
109#
110function is_physical_device #device
111{
112	typeset device=${1#$DEV_DSKDIR/}
113	device=${device#$DEV_RDSKDIR/}
114
115	if is_linux; then
116		is_disk_device "$DEV_DSKDIR/$device" && \
117		[ -f /sys/module/loop/parameters/max_part ]
118	elif is_freebsd; then
119		is_disk_device "$DEV_DSKDIR/$device" && \
120		echo $device | grep -qE \
121		    -e '^a?da[0-9]+$' \
122		    -e '^md[0-9]+$' \
123		    -e '^mfid[0-9]+$' \
124		    -e '^nda[0-9]+$' \
125		    -e '^nvd[0-9]+$' \
126		    -e '^vtbd[0-9]+$'
127	else
128		echo $device | grep -qE "^c[0-F]+([td][0-F]+)+$"
129	fi
130}
131
132#
133# Check if the given device is a real device (ie SCSI device)
134#
135function is_real_device #disk
136{
137	typeset disk=$1
138	[[ -z $disk ]] && log_fail "No argument for disk given."
139
140	if is_linux; then
141		lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
142		    grep -q disk
143	fi
144}
145
146#
147# Check if the given device is a loop device
148#
149function is_loop_device #disk
150{
151	typeset disk=$1
152	[[ -z $disk ]] && log_fail "No argument for disk given."
153
154	if is_linux; then
155		lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
156		    grep -q loop
157	fi
158}
159
160#
161# Linux:
162# Check if the given device is a multipath device and if there is a symbolic
163# link to a device mapper and to a disk
164# Currently no support for dm devices alone without multipath
165#
166# FreeBSD:
167# Check if the given device is a gmultipath device.
168#
169# Others:
170# No multipath detection.
171#
172function is_mpath_device #disk
173{
174	typeset disk=$1
175	[[ -z $disk ]] && log_fail "No argument for disk given."
176
177	if is_linux; then
178		if lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \
179		   grep -q mpath; then
180			readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1
181		else
182			false
183		fi
184	elif is_freebsd; then
185		is_disk_device $DEV_MPATHDIR/$disk
186	else
187		false
188	fi
189}
190
191#
192# Check if the given path is the appropriate sort of device special node.
193#
194function is_disk_device #path
195{
196	typeset path=$1
197
198	if is_freebsd; then
199		# FreeBSD doesn't have block devices, only character devices.
200		test -c $path
201	else
202		test -b $path
203	fi
204}
205
206# Set the slice prefix for disk partitioning depending
207# on whether the device is a real, multipath, or loop device.
208# Currently all disks have to be of the same type, so only
209# checks first disk to determine slice prefix.
210#
211function set_slice_prefix
212{
213	typeset disk
214	typeset -i i=0
215
216	if is_linux; then
217		while (( i < $DISK_ARRAY_NUM )); do
218			disk="$(echo $DISKS | awk '{print $(i + 1)}')"
219			if is_mpath_device $disk && ! echo $disk | awk 'substr($1,18,1) ~ /^[[:digit:]]+$/ {exit 1}' || is_real_device $disk; then
220				export SLICE_PREFIX=""
221				return 0
222			elif is_mpath_device $disk || is_loop_device $disk; then
223				export SLICE_PREFIX="p"
224				return 0
225			else
226				log_fail "$disk not supported for partitioning."
227			fi
228			(( i = i + 1))
229		done
230	fi
231}
232
233#
234# Set the directory path of the listed devices in $DISK_ARRAY_NUM
235# Currently all disks have to be of the same type, so only
236# checks first disk to determine device directory
237# default = /dev (linux)
238# real disk = /dev (linux)
239# multipath device = /dev/mapper (linux)
240#
241function set_device_dir
242{
243	typeset disk
244	typeset -i i=0
245
246	if is_linux; then
247		while (( i < $DISK_ARRAY_NUM )); do
248			disk="$(echo $DISKS | awk '{print $(i + 1)}')"
249			if is_mpath_device $disk; then
250				export DEV_DSKDIR=$DEV_MPATHDIR
251				return 0
252			else
253				export DEV_DSKDIR=$DEV_RDSKDIR
254				return 0
255			fi
256			(( i = i + 1))
257		done
258	else
259		export DEV_DSKDIR=$DEV_RDSKDIR
260	fi
261}
262
263#
264# Get the directory path of given device
265#
266function get_device_dir #device
267{
268	typeset device=$1
269
270	if ! is_freebsd && ! is_physical_device $device; then
271		if [[ $device != "/" ]]; then
272			device=${device%/*}
273		fi
274		if is_disk_device "$DEV_DSKDIR/$device"; then
275			device="$DEV_DSKDIR"
276		fi
277		echo $device
278	else
279		echo "$DEV_DSKDIR"
280	fi
281}
282
283#
284# Get persistent name for given disk
285#
286function get_persistent_disk_name #device
287{
288	typeset device=$1
289
290	if is_linux; then
291		if is_real_device $device; then
292			udevadm info -q all -n $DEV_DSKDIR/$device \
293			    | awk '/disk\/by-id/ {print $2; exit}' | cut -d/ -f3-
294		elif is_mpath_device $device; then
295			udevadm info -q all -n $DEV_DSKDIR/$device \
296			    | awk '/disk\/by-id\/dm-uuid/ {print $2; exit}' \
297			    | cut -d/ -f3
298		else
299			echo $device
300		fi
301	else
302		echo $device
303	fi
304}
305
306#
307# Online or offline a disk on the system
308#
309# First checks state of disk. Test will fail if disk is not properly onlined
310# or offlined. Online is a full rescan of SCSI disks by echoing to every
311# host entry.
312#
313function on_off_disk # disk state{online,offline} host
314{
315	typeset disk=$1
316	typeset state=$2
317	typeset host=$3
318
319	[[ -z $disk ]] || [[ -z $state ]] &&  \
320	    log_fail "Arguments invalid or missing"
321
322	if is_linux; then
323		if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
324			dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
325			dep="$(ls /sys/block/${dm_name}/slaves | awk '{print $1}')"
326			while [[ -n $dep ]]; do
327				#check if disk is online
328				if lsscsi | grep -qF $dep; then
329					dep_dir="/sys/block/${dm_name}"
330					dep_dir+="/slaves/${dep}/device"
331					ss="${dep_dir}/state"
332					sd="${dep_dir}/delete"
333					log_must eval "echo 'offline' > ${ss}"
334					log_must eval "echo '1' > ${sd}"
335					if lsscsi | grep -qF $dep; then
336						log_fail "Offlining $disk failed"
337					fi
338				fi
339				dep="$(ls /sys/block/$dm_name/slaves 2>/dev/null | awk '{print $1}')"
340			done
341		elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
342			#check if disk is online
343			if lsscsi | grep -qF $disk; then
344				dev_state="/sys/block/$disk/device/state"
345				dev_delete="/sys/block/$disk/device/delete"
346				log_must eval "echo 'offline' > ${dev_state}"
347				log_must eval "echo '1' > ${dev_delete}"
348				if lsscsi | grep -qF $disk; then
349					log_fail "Offlining $disk failed"
350				fi
351			else
352				log_note "$disk is already offline"
353			fi
354		elif [[ $state == "online" ]]; then
355			#force a full rescan
356			scan_scsi_hosts $host
357			block_device_wait
358			if is_mpath_device $disk; then
359				dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
360				dep="$(ls /sys/block/$dm_name/slaves | awk '{print $1}')"
361				if lsscsi | grep -qF $dep; then
362					log_fail "Onlining $disk failed"
363				fi
364			elif is_real_device $disk; then
365				block_device_wait
366				typeset -i retries=0
367				while ! lsscsi | grep -qF $disk; do
368					if (( $retries > 2 )); then
369						log_fail "Onlining $disk failed"
370						break
371					fi
372					(( ++retries ))
373					sleep 1
374				done
375			else
376				log_fail "$disk is not a real dev"
377			fi
378		else
379			log_fail "$disk failed to $state"
380		fi
381	fi
382}
383
384#
385# Simulate disk removal
386#
387function remove_disk #disk
388{
389	typeset disk=$1
390	on_off_disk $disk "offline"
391	block_device_wait
392}
393
394#
395# Simulate disk insertion for the given SCSI host
396#
397function insert_disk #disk scsi_host
398{
399	typeset disk=$1
400	typeset scsi_host=$2
401	on_off_disk $disk "online" $scsi_host
402	block_device_wait
403}
404
405#
406# Load scsi_debug module with specified parameters
407# $blksz can be either one of: < 512b | 512e | 4Kn >
408#
409function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
410{
411	typeset devsize=$1
412	typeset hosts=$2
413	typeset tgts=$3
414	typeset luns=$4
415	typeset blksz=$5
416
417	[[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
418	    [[ -z $luns ]] || [[ -z $blksz ]] && \
419	    log_fail "Arguments invalid or missing"
420
421	case "$5" in
422		'512b')
423			typeset sector=512
424			typeset blkexp=0
425		;;
426		'512e')
427			typeset sector=512
428			typeset blkexp=3
429		;;
430		'4Kn')
431			typeset sector=4096
432			typeset blkexp=0
433		;;
434		*) log_fail "Unsupported blksz value: $5" ;;
435	esac
436
437	if is_linux; then
438		modprobe -n scsi_debug ||
439			log_unsupported "Platform does not have scsi_debug module"
440		if lsmod | grep -q scsi_debug; then
441			log_fail "scsi_debug module already installed"
442		else
443			log_must modprobe scsi_debug dev_size_mb=$devsize \
444			    add_host=$hosts num_tgts=$tgts max_luns=$luns \
445			    sector_size=$sector physblk_exp=$blkexp
446			block_device_wait
447			if ! lsscsi | grep -q scsi_debug; then
448				log_fail "scsi_debug module install failed"
449			fi
450		fi
451	fi
452}
453
454#
455# Unload scsi_debug module, if needed.
456#
457function unload_scsi_debug
458{
459	log_must_retry "in use" 5 modprobe -r scsi_debug
460}
461
462#
463# Get scsi_debug device name.
464# Returns basename of scsi_debug device (for example "sdb").
465#
466function get_debug_device
467{
468	for i in {1..10} ; do
469		val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3)
470
471		# lsscsi can take time to settle
472		if [ "$val" != "-" ] ; then
473			break
474		fi
475		sleep 1
476	done
477	echo "$val"
478}
479
480#
481# Get actual devices used by the pool (i.e. linux sdb1 not sdb).
482#
483function get_pool_devices #testpool #devdir
484{
485	typeset testpool=$1
486	typeset devdir=$2
487	typeset out=""
488
489	case "$UNAME" in
490	Linux|FreeBSD)
491		zpool status -P $testpool | awk -v d="$devdir" '$1 ~ d {sub(d "/", ""); printf("%s ", $1)}'
492		;;
493	esac
494}
495
496#
497# Write to standard out giving the level, device name, offset and length
498# of all blocks in an input file. The offset and length are in units of
499# 512 byte blocks. In the case of mirrored vdevs, only the first
500# device is listed, as the levels, blocks and offsets will be the same
501# on other devices. Note that this function only works with mirrored
502# or non-redundant pools, not raidz.
503#
504# The output of this function can be used to introduce corruption at
505# varying levels of indirection.
506#
507function list_file_blocks # input_file
508{
509	typeset input_file=$1
510
511	[[ -f $input_file ]] || log_fail "Couldn't find $input_file"
512
513	typeset ds="$(zfs list -H -o name $input_file)"
514	typeset pool="${ds%%/*}"
515	typeset objnum="$(get_objnum $input_file)"
516
517	#
518	# Establish a mapping between vdev ids as shown in a DVA and the
519	# pathnames they correspond to in ${VDEV_MAP[][]}.
520	#
521	# The vdev bits in a DVA refer to the top level vdev id.
522	# ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev.
523	#
524	eval $(zdb -C $pool | awk '
525	    BEGIN { printf "typeset -a VDEV_MAP;" }
526	    function subscript(s) {
527	        # "[#]" is more convenient than the bare "#"
528	        match(s, /\[[0-9]*\]/)
529		return substr(s, RSTART, RLENGTH)
530	    }
531	    id && !/^                / {
532	        # left a top level vdev
533	        id = 0
534	    }
535	    id && $1 ~ /^path:$/ {
536	        # found a vdev path; save it in the map
537	        printf "VDEV_MAP%s%s=%s;", id, child, $2
538	    }
539	    /^            children/ {
540	        # entering a top level vdev
541	        id = subscript($0)
542		child = "[0]" # default in case there is no nested vdev
543		printf "typeset -a VDEV_MAP%s;", id
544	    }
545	    /^                children/ {
546	        # entering a nested vdev (e.g. child of a top level mirror)
547	        child = subscript($0)
548	    }
549	')
550
551	#
552	# The awk below parses the output of zdb, printing out the level
553	# of each block along with vdev id, offset and length. The last
554	# two are converted to decimal in the while loop. 4M is added to
555	# the offset to compensate for the first two labels and boot
556	# block. Lastly, the offset and length are printed in units of
557	# 512B blocks for ease of use with dd.
558	#
559	typeset level vdev path offset length
560	sync_all_pools true
561	zdb -dddddd $ds $objnum | awk '
562	    /^$/ { looking = 0 }
563	    looking {
564	        level = $2
565	        field = 3
566	        while (split($field, dva, ":") == 3) {
567
568	            print level, int(dva[1]), "0x"dva[2], "0x"dva[3]
569
570	            ++field
571	        }
572	    }
573	    /^Indirect blocks:/ { looking = 1 }
574	' | \
575	while read level vdev offset length; do
576		for path in ${VDEV_MAP[$vdev][@]}; do
577			echo "$level $path $(( ($offset + (4<<20)) / 512 ))" \
578			    "$(( $length / 512 ))"
579		done
580	done 2>/dev/null
581}
582
583function corrupt_blocks_at_level # input_file corrupt_level
584{
585	typeset input_file=$1
586	typeset corrupt_level="L${2:-0}"
587	typeset level path offset length
588
589	[[ -f $input_file ]] || log_fail "Couldn't find $input_file"
590
591	if is_freebsd; then
592		# Temporarily allow corrupting an inuse device.
593		debugflags=$(sysctl -n kern.geom.debugflags)
594		sysctl kern.geom.debugflags=16
595	fi
596
597	list_file_blocks $input_file | \
598	while read level path offset length; do
599		if [[ $level = $corrupt_level ]]; then
600			log_must dd if=/dev/urandom of=$path bs=512 \
601			    count=$length seek=$offset conv=notrunc
602		fi
603	done
604
605	if is_freebsd; then
606		sysctl kern.geom.debugflags=$debugflags
607	fi
608
609	# This is necessary for pools made of loop devices.
610	sync
611}
612
613function corrupt_label_checksum # label_number vdev_path
614{
615	typeset label_size=$((256*1024))
616	typeset vdev_size=$(stat_size ${2})
617	typeset -a offsets=("$((128*1024 - 32))" \
618	    "$(($label_size + (128*1024 - 32)))" \
619	    "$(($vdev_size - $label_size - (128*1024 + 32)))" \
620	    "$(($vdev_size - (128*1024 + 32)))")
621
622	dd if=/dev/urandom of=${2} seek=${offsets[$1]} bs=1 count=32 \
623	    conv=notrunc
624}
625