xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/tests/perf/perf.shlib (revision 24e4dcf4ba5e9dedcf89efd358ea3e1fe5867020)
1# SPDX-License-Identifier: CDDL-1.0
2#
3# This file and its contents are supplied under the terms of the
4# Common Development and Distribution License ("CDDL"), version 1.0.
5# You may only use this file in accordance with the terms of version
6# 1.0 of the CDDL.
7#
8# A full copy of the text of the CDDL should have accompanied this
9# source.  A copy of the CDDL is also available via the Internet at
10# http://www.illumos.org/license/CDDL.
11#
12
13#
14# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
15# Copyright (c) 2016, Intel Corporation.
16#
17
18. "$STF_SUITE"/include/libtest.shlib
19
20# Defaults common to all the tests in the regression group
21export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
22export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
23export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
24export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
25
26# Default to JSON for fio output
27export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
28
29# Default fs creation options
30export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
31    ' -o checksum=sha256 -o redundant_metadata=most'}
32
33function get_sync_str
34{
35	typeset sync=$1
36	typeset sync_str=''
37
38	[[ $sync -eq 0 ]] && sync_str='async'
39	[[ $sync -eq 1 ]] && sync_str='sync'
40	echo $sync_str
41}
42
43function get_suffix
44{
45	typeset threads=$1
46	typeset sync=$2
47	typeset iosize=$3
48
49	typeset sync_str=$(get_sync_str "$sync")
50	typeset filesystems=$(get_nfilesystems)
51
52	typeset suffix="$sync_str.$iosize-ios"
53	suffix="$suffix.$threads-threads.$filesystems-filesystems"
54	echo "$suffix"
55}
56
57function do_fio_run_impl
58{
59	typeset script=$1
60	typeset do_recreate=$2
61	typeset clear_cache=$3
62
63	typeset threads=$4
64	typeset threads_per_fs=$5
65	typeset sync=$6
66	typeset iosize=$7
67
68	typeset sync_str=$(get_sync_str "$sync")
69	log_note "Running with $threads $sync_str threads, $iosize ios"
70
71	if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
72		log_must test "$do_recreate"
73		verify_threads_per_fs "$threads" "$threads_per_fs"
74	fi
75
76	if $do_recreate; then
77		recreate_perf_pool
78
79		#
80		# A value of zero for "threads_per_fs" is "special", and
81		# means a single filesystem should be used, regardless
82		# of the number of threads.
83		#
84		if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
85			populate_perf_filesystems $((threads / threads_per_fs))
86		else
87			populate_perf_filesystems 1
88		fi
89	fi
90
91	if $clear_cache; then
92		# Clear the ARC
93		log_must zinject -a
94	fi
95
96	if [[ -n $ZINJECT_DELAYS ]]; then
97		apply_zinject_delays
98	else
99		log_note "No per-device commands to execute."
100	fi
101
102	#
103	# Allow this to be overridden by the individual test case. This
104	# can be used to run the FIO job against something other than
105	# the default filesystem (e.g. against a clone).
106	#
107	export DIRECTORY=$(get_directory)
108	log_note "DIRECTORY: $DIRECTORY"
109
110	export RUNTIME=$PERF_RUNTIME
111	export RANDSEED=$PERF_RANDSEED
112	export COMPPERCENT=$PERF_COMPPERCENT
113	export COMPCHUNK=$PERF_COMPCHUNK
114	export FILESIZE=$((TOTAL_SIZE / threads))
115	export NUMJOBS=$threads
116	export SYNC_TYPE=$sync
117	export BLOCKSIZE=$iosize
118	sync
119
120	# When running locally, we want to keep the default behavior of
121	# DIRECT == 0, so only set it when we're running over NFS to
122	# disable client cache for reads.
123	if [[ $NFS -eq 1 ]]; then
124		export DIRECT=1
125		do_setup_nfs "$script"
126	else
127		export DIRECT=0
128	fi
129
130	# This will be part of the output filename.
131	typeset suffix=$(get_suffix "$threads" "$sync" "$iosize")
132
133	# Start the data collection
134	do_collect_scripts "$suffix"
135
136	# Define output file
137	typeset logbase="$(get_perf_output_dir)/$(basename \
138	    "$SUDO_COMMAND")"
139	typeset outfile="$logbase.fio.$suffix"
140
141	# Start the load
142	if [[ $NFS -eq 1 ]]; then
143		log_must ssh -t "$NFS_USER@$NFS_CLIENT" "
144			fio --output-format=${PERF_FIO_FORMAT} \
145			    --output /tmp/fio.out /tmp/test.fio
146		"
147		log_must scp "$NFS_USER@$NFS_CLIENT":/tmp/fio.out "$outfile"
148		log_must ssh -t "$NFS_USER@$NFS_CLIENT" "sudo -S umount $NFS_MOUNT"
149	else
150		log_must fio --output-format="${PERF_FIO_FORMAT}" \
151		    --output "$outfile" "$FIO_SCRIPTS/$script"
152	fi
153}
154
155#
156# This function will run fio in a loop, according to the .fio file passed
157# in and a number of environment variables. The following variables can be
158# set before launching zfstest to override the defaults.
159#
160# PERF_RUNTIME: The time in seconds each fio invocation should run.
161# PERF_NTHREADS: A list of how many threads each fio invocation will use.
162# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
163# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
164# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
165#    pairs that will be added to the scripts specified in each test.
166#
167function do_fio_run
168{
169	typeset script=$1
170	typeset do_recreate=$2
171	typeset clear_cache=$3
172	typeset threads threads_per_fs sync iosize
173
174	for threads in $PERF_NTHREADS; do
175		for threads_per_fs in $PERF_NTHREADS_PER_FS; do
176			for sync in $PERF_SYNC_TYPES; do
177				for iosize in $PERF_IOSIZES; do
178					do_fio_run_impl \
179					    "$script" \
180					    "$do_recreate" \
181					    "$clear_cache" \
182					    "$threads" \
183					    "$threads_per_fs" \
184					    "$sync" \
185					    "$iosize"
186				done
187			done
188		done
189	done
190}
191
192# This function sets NFS mount on the client and make sure all correct
193# permissions are in place
194#
195function do_setup_nfs
196{
197	typeset script=$1
198	zfs set sharenfs=on "$TESTFS"
199	log_must chmod  -R 777 /"$TESTFS"
200
201	ssh -t "$NFS_USER@$NFS_CLIENT" "mkdir -m 777 -p $NFS_MOUNT"
202	ssh -t "$NFS_USER@$NFS_CLIENT" "sudo -S umount $NFS_MOUNT"
203	log_must ssh -t "$NFS_USER@$NFS_CLIENT" "
204		sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
205	"
206	#
207	# The variables in the fio script are only available in our current
208	# shell session, so we have to evaluate them here before copying
209	# the resulting script over to the target machine.
210	#
211	export jobnum='$jobnum'
212	while read line; do
213		eval echo "$line"
214	done < "$FIO_SCRIPTS/$script" > /tmp/test.fio
215	log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
216	log_must scp /tmp/test.fio "$NFS_USER@$NFS_CLIENT":/tmp
217	log_must rm /tmp/test.fio
218}
219
220#
221# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
222# The script at index N is launched in the background, with its output
223# redirected to a logfile containing the tag specified at index N + 1.
224#
225function do_collect_scripts
226{
227	typeset suffix=$1
228
229	[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
230	[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
231
232	# Add in user supplied scripts and logfiles, if any.
233	typeset oIFS=$IFS
234	IFS=','
235	for item in $PERF_COLLECT_SCRIPTS; do
236		collect_scripts+=($(echo "$item" | sed 's/^ *//g'))
237	done
238	IFS=$oIFS
239
240	typeset idx=0
241	while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
242		typeset logbase="$(get_perf_output_dir)/$(basename \
243		    "$SUDO_COMMAND")"
244		typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
245
246		timeout "$PERF_RUNTIME" "${collect_scripts[$idx]}" >"$outfile" 2>&1 &
247		((idx += 2))
248	done
249
250	# Need to explicitly return 0 because timeout(1) will kill
251	# a child process and cause us to return non-zero.
252	return 0
253}
254
255# Find a place to deposit performance data collected while under load.
256function get_perf_output_dir
257{
258	typeset dir="$PWD/perf_data"
259	[[ -d $dir ]] || mkdir -p "$dir"
260
261	echo "$dir"
262}
263
264function apply_zinject_delays
265{
266	typeset idx=0
267	while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
268		[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
269		    log_fail "No zinject delay found at index: $idx"
270
271		for disk in $DISKS; do
272			log_must zinject \
273			    -d "$disk" -D "${ZINJECT_DELAYS[$idx]}" "$PERFPOOL"
274		done
275
276		((idx += 1))
277	done
278}
279
280function clear_zinject_delays
281{
282	log_must zinject -c all
283}
284
285#
286# Destroy and create the pool used for performance tests.
287#
288function recreate_perf_pool
289{
290	[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
291
292	#
293	# In case there's been some "leaked" zinject delays, or if the
294	# performance test injected some delays itself, we clear all
295	# delays before attempting to destroy the pool. Each delay
296	# places a hold on the pool, so the destroy will fail if there
297	# are any outstanding delays.
298	#
299	clear_zinject_delays
300
301	#
302	# This function handles the case where the pool already exists,
303	# and will destroy the previous pool and recreate a new pool.
304	#
305	create_pool "$PERFPOOL" "$DISKS"
306}
307
308function verify_threads_per_fs
309{
310	typeset threads=$1
311	typeset threads_per_fs=$2
312
313	log_must test -n "$threads"
314	log_must test -n "$threads_per_fs"
315
316	#
317	# A value of "0" is treated as a "special value", and it is
318	# interpreted to mean all threads will run using a single
319	# filesystem.
320	#
321	[[ $threads_per_fs -eq 0 ]] && return
322
323	#
324	# The number of threads per filesystem must be a value greater
325	# than or equal to zero; since we just verified the value isn't
326	# 0 above, then it must be greater than zero here.
327	#
328	log_must test "$threads_per_fs" -ge 0
329
330	#
331	# This restriction can be lifted later if needed, but for now,
332	# we restrict the number of threads per filesystem to a value
333	# that evenly divides the thread count. This way, the threads
334	# will be evenly distributed over all the filesystems.
335	#
336	log_must test $((threads % threads_per_fs)) -eq 0
337}
338
339function populate_perf_filesystems
340{
341	typeset nfilesystems=${1:-1}
342
343	export TESTFS=""
344	for i in $(seq 1 "$nfilesystems"); do
345		typeset dataset="$PERFPOOL/fs$i"
346		create_dataset "$dataset" "$PERF_FS_OPTS"
347		if [[ -z "$TESTFS" ]]; then
348			TESTFS="$dataset"
349		else
350			TESTFS="$TESTFS $dataset"
351		fi
352	done
353}
354
355function get_nfilesystems
356{
357	typeset filesystems=($TESTFS)
358	echo ${#filesystems[@]}
359}
360
361function get_directory
362{
363	typeset filesystems=($TESTFS)
364	typeset directory=
365
366	typeset idx=0
367	while [[ $idx -lt "${#filesystems[@]}" ]]; do
368		mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
369
370		if [[ -n $directory ]]; then
371			directory=$directory:$mountpoint
372		else
373			directory=$mountpoint
374		fi
375
376		((idx += 1))
377	done
378
379	echo "$directory"
380}
381
382function get_min_arc_size
383{
384	case "$UNAME" in
385	Linux)
386		awk '$1 == "c_min" { print $3 }' /proc/spl/kstat/zfs/arcstats
387		;;
388	FreeBSD)
389		sysctl -n kstat.zfs.misc.arcstats.c_min
390		;;
391	*)
392		dtrace -qn 'BEGIN {
393		    printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
394		    exit(0);
395		}'
396		;;
397	esac || log_fail "get_min_arc_size failed"
398}
399
400function get_max_arc_size
401{
402	case "$UNAME" in
403	Linux)
404		awk '$1 == "c_max" { print $3 }' /proc/spl/kstat/zfs/arcstats
405		;;
406	FreeBSD)
407		sysctl -n kstat.zfs.misc.arcstats.c_max
408		;;
409	*)
410		dtrace -qn 'BEGIN {
411		    printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
412		    exit(0);
413		}'
414		;;
415	esac || log_fail "get_max_arc_size failed"
416}
417
418function get_arc_target
419{
420	case "$UNAME" in
421	Linux)
422		awk '$1 == "c" { print $3 }' /proc/spl/kstat/zfs/arcstats
423		;;
424	FreeBSD)
425		sysctl -n kstat.zfs.misc.arcstats.c
426		;;
427	*)
428		dtrace -qn 'BEGIN {
429		    printf("%u\n", `arc_stats.arcstat_c.value.ui64);
430		    exit(0);
431		}'
432		;;
433	esac || log_fail "get_arc_target failed"
434}
435
436function get_dbuf_cache_size
437{
438	typeset -l dbuf_cache_size dbuf_cache_shift
439
440	if is_illumos; then
441		dbuf_cache_size=$(dtrace -qn 'BEGIN {
442		    printf("%u\n", `dbuf_cache_max_bytes);
443		    exit(0);
444		}')
445	else
446		dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT)
447		dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift))
448	fi || log_fail "get_dbuf_cache_size failed"
449
450	echo "$dbuf_cache_size"
451}
452
453# Create a file with some information about how this system is configured.
454function get_system_config
455{
456	typeset config=$PERF_DATA_DIR/$1
457
458	echo "{" >>"$config"
459	if is_linux; then
460		echo "  \"ncpus\": \"$(lscpu | awk '/^CPU\(s\)/ {print $2; exit}')\"," >>"$config"
461		echo "  \"physmem\": \"$(free -b | \
462		    awk '$1 == "Mem:" { print $2 }')\"," >>"$config"
463		echo "  \"c_max\": \"$(get_max_arc_size)\"," >>"$config"
464		echo "  \"hostname\": \"$(uname -n)\"," >>"$config"
465		echo "  \"kernel version\": \"$(uname -sr)\"," >>"$config"
466	else
467		dtrace -qn 'BEGIN{
468		    printf("  \"ncpus\": %d,\n", `ncpus);
469		    printf("  \"physmem\": %u,\n", `physmem * `_pagesize);
470		    printf("  \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
471		    printf("  \"kmem_flags\": \"0x%x\",", `kmem_flags);
472		    exit(0)}' >>"$config"
473		echo "  \"hostname\": \"$(uname -n)\"," >>"$config"
474		echo "  \"kernel version\": \"$(uname -v)\"," >>"$config"
475	fi
476	if is_linux; then
477		lsblk -dino NAME,SIZE | awk 'BEGIN {
478		    printf("  \"disks\": {\n"); first = 1}
479		    {disk = $1} {size = $2;
480		    if (first != 1) {printf(",\n")} else {first = 0}
481		    printf("    \"%s\": \"%s\"", disk, size)}
482		    END {printf("\n  },\n")}' >>"$config"
483
484		zfs_tunables="/sys/module/zfs/parameters"
485
486		printf "  \"tunables\": {\n" >>"$config"
487		for tunable in \
488		    zfs_arc_max \
489		    zfs_arc_sys_free \
490		    zfs_dirty_data_max \
491		    zfs_flags \
492		    zfs_prefetch_disable \
493		    zfs_txg_timeout \
494		    zfs_vdev_aggregation_limit \
495		    zfs_vdev_async_read_max_active \
496		    zfs_vdev_async_write_max_active \
497		    zfs_vdev_sync_read_max_active \
498		    zfs_vdev_sync_write_max_active \
499		    zio_slow_io_ms
500		do
501			if [ "$tunable" != "zfs_arc_max" ]
502			then
503				printf ",\n" >>"$config"
504			fi
505			printf  "    \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
506			    >>"$config"
507		done
508		printf "\n  }\n" >>"$config"
509	else
510		iostat -En | awk 'BEGIN {
511		    printf("  \"disks\": {\n"); first = 1}
512		    /^c/ {disk = $1}
513		    /^Size: [^0]/ {size = $2;
514		    if (first != 1) {printf(",\n")} else {first = 0}
515		    printf("    \"%s\": \"%s\"", disk, size)}
516		    END {printf("\n  },\n")}' >>"$config"
517
518		sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
519		    awk -F= 'BEGIN {printf("  \"system\": {\n"); first = 1}
520		    {if (first != 1) {printf(",\n")} else {first = 0};
521		    printf("    \"%s\": %s", $1, $2)}
522		    END {printf("\n  }\n")}' >>"$config"
523	fi
524	echo "}" >>"$config"
525}
526
527#
528# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
529#
530function pool_to_lun_list
531{
532	typeset pool=$1
533	typeset ctd ctds devname lun
534	typeset lun_list=':'
535
536	case "$UNAME" in
537	Linux)
538		ctds=$(zpool list -HLv "$pool" | \
539		    awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
540
541		for ctd in $ctds; do
542			lun_list="$lun_list$ctd:"
543		done
544		;;
545	FreeBSD)
546		lun_list+=$(zpool list -HLv "$pool" | \
547		    awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
548		         { printf "%s:", $1 }')
549		;;
550	*)
551		ctds=$(zpool list -v "$pool" |
552		    awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
553
554		for ctd in $ctds; do
555			# Get the device name as it appears in /etc/path_to_inst
556			devname=$(readlink -f /dev/dsk/"${ctd}"s0 | sed -n 's/\/devices\([^:]*\):.*/\1/p')
557			# Add a string composed of the driver name and instance
558			# number to the list for comparison with dev_statname.
559			lun=$(sed 's/"//g' /etc/path_to_inst | awk -v dn="$devname" '$0 ~ dn {print $3$2}')
560			lun_list="$lun_list$lun:"
561		done
562		;;
563	esac
564	echo "$lun_list"
565}
566
567function print_perf_settings
568{
569	echo "PERF_NTHREADS: $PERF_NTHREADS"
570	echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS"
571	echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES"
572	echo "PERF_IOSIZES: $PERF_IOSIZES"
573}
574
575# Create a perf_data directory to hold performance statistics and
576# configuration information.
577export PERF_DATA_DIR=$(get_perf_output_dir)
578[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json
579