xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/tests/perf/perf.shlib (revision 5c8e8e82aeaf3aa788acdd6cfca30ef09094230d)
1#
2# This file and its contents are supplied under the terms of the
3# Common Development and Distribution License ("CDDL"), version 1.0.
4# You may only use this file in accordance with the terms of version
5# 1.0 of the CDDL.
6#
7# A full copy of the text of the CDDL should have accompanied this
8# source.  A copy of the CDDL is also available via the Internet at
9# http://www.illumos.org/license/CDDL.
10#
11
12#
13# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
14# Copyright (c) 2016, Intel Corporation.
15#
16
17. $STF_SUITE/include/libtest.shlib
18
19# Defaults common to all the tests in the regression group
20export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
21export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
22export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
23export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
24export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
25
26# Default to JSON for fio output
27export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
28
29# Default fs creation options
30export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
31    ' -o checksum=sha256 -o redundant_metadata=most'}
32
33function get_sync_str
34{
35	typeset sync=$1
36	typeset sync_str=''
37
38	[[ $sync -eq 0 ]] && sync_str='async'
39	[[ $sync -eq 1 ]] && sync_str='sync'
40	echo $sync_str
41}
42
43function get_suffix
44{
45	typeset threads=$1
46	typeset sync=$2
47	typeset iosize=$3
48
49	typeset sync_str=$(get_sync_str $sync)
50	typeset filesystems=$(get_nfilesystems)
51
52	typeset suffix="$sync_str.$iosize-ios"
53	suffix="$suffix.$threads-threads.$filesystems-filesystems"
54	echo $suffix
55}
56
57function do_fio_run_impl
58{
59	typeset script=$1
60	typeset do_recreate=$2
61	typeset clear_cache=$3
62
63	typeset threads=$4
64	typeset threads_per_fs=$5
65	typeset sync=$6
66	typeset iosize=$7
67
68	typeset sync_str=$(get_sync_str $sync)
69	log_note "Running with $threads $sync_str threads, $iosize ios"
70
71	if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
72		log_must test $do_recreate
73		verify_threads_per_fs $threads $threads_per_fs
74	fi
75
76	if $do_recreate; then
77		recreate_perf_pool
78
79		#
80		# A value of zero for "threads_per_fs" is "special", and
81		# means a single filesystem should be used, regardless
82		# of the number of threads.
83		#
84		if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
85			populate_perf_filesystems $((threads / threads_per_fs))
86		else
87			populate_perf_filesystems 1
88		fi
89	fi
90
91	if $clear_cache; then
92		# Clear the ARC
93		log_must zinject -a
94	fi
95
96	if [[ -n $ZINJECT_DELAYS ]]; then
97		apply_zinject_delays
98	else
99		log_note "No per-device commands to execute."
100	fi
101
102	#
103	# Allow this to be overridden by the individual test case. This
104	# can be used to run the FIO job against something other than
105	# the default filesystem (e.g. against a clone).
106	#
107	export DIRECTORY=$(get_directory)
108	log_note "DIRECTORY: " $DIRECTORY
109
110	export RUNTIME=$PERF_RUNTIME
111	export RANDSEED=$PERF_RANDSEED
112	export COMPPERCENT=$PERF_COMPPERCENT
113	export COMPCHUNK=$PERF_COMPCHUNK
114	export FILESIZE=$((TOTAL_SIZE / threads))
115	export NUMJOBS=$threads
116	export SYNC_TYPE=$sync
117	export BLOCKSIZE=$iosize
118	sync
119
120	# When running locally, we want to keep the default behavior of
121	# DIRECT == 0, so only set it when we're running over NFS to
122	# disable client cache for reads.
123	if [[ $NFS -eq 1 ]]; then
124		export DIRECT=1
125		do_setup_nfs $script
126	else
127		export DIRECT=0
128	fi
129
130	# This will be part of the output filename.
131	typeset suffix=$(get_suffix $threads $sync $iosize)
132
133	# Start the data collection
134	do_collect_scripts $suffix
135
136	# Define output file
137	typeset logbase="$(get_perf_output_dir)/$(basename \
138	    $SUDO_COMMAND)"
139	typeset outfile="$logbase.fio.$suffix"
140
141	# Start the load
142	if [[ $NFS -eq 1 ]]; then
143		log_must ssh -t $NFS_USER@$NFS_CLIENT "
144			fio --output-format=${PERF_FIO_FORMAT} \
145			    --output /tmp/fio.out /tmp/test.fio
146		"
147		log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
148		log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
149	else
150		log_must fio --output-format=${PERF_FIO_FORMAT} \
151		    --output $outfile $FIO_SCRIPTS/$script
152	fi
153}
154
155#
156# This function will run fio in a loop, according to the .fio file passed
157# in and a number of environment variables. The following variables can be
158# set before launching zfstest to override the defaults.
159#
160# PERF_RUNTIME: The time in seconds each fio invocation should run.
161# PERF_NTHREADS: A list of how many threads each fio invocation will use.
162# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
163# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
164# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
165#    pairs that will be added to the scripts specified in each test.
166#
167function do_fio_run
168{
169	typeset script=$1
170	typeset do_recreate=$2
171	typeset clear_cache=$3
172	typeset threads threads_per_fs sync iosize
173
174	for threads in $PERF_NTHREADS; do
175		for threads_per_fs in $PERF_NTHREADS_PER_FS; do
176			for sync in $PERF_SYNC_TYPES; do
177				for iosize in $PERF_IOSIZES; do
178					do_fio_run_impl \
179					    $script \
180					    $do_recreate \
181					    $clear_cache \
182					    $threads \
183					    $threads_per_fs \
184					    $sync \
185					    $iosize
186				done
187			done
188		done
189	done
190}
191
192# This function sets NFS mount on the client and make sure all correct
193# permissions are in place
194#
195function do_setup_nfs
196{
197	typeset script=$1
198	zfs set sharenfs=on $TESTFS
199	log_must chmod  -R 777 /$TESTFS
200
201	ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
202	ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
203	log_must ssh -t $NFS_USER@$NFS_CLIENT "
204		sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
205	"
206	#
207	# The variables in the fio script are only available in our current
208	# shell session, so we have to evaluate them here before copying
209	# the resulting script over to the target machine.
210	#
211	export jobnum='$jobnum'
212	while read line; do
213		eval echo "$line"
214	done < $FIO_SCRIPTS/$script > /tmp/test.fio
215	log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
216	log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
217	log_must rm /tmp/test.fio
218}
219
220#
221# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
222# The script at index N is launched in the background, with its output
223# redirected to a logfile containing the tag specified at index N + 1.
224#
225function do_collect_scripts
226{
227	typeset suffix=$1
228
229	[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
230	[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
231
232	# Add in user supplied scripts and logfiles, if any.
233	typeset oIFS=$IFS
234	IFS=','
235	for item in $PERF_COLLECT_SCRIPTS; do
236		collect_scripts+=($(echo $item | sed 's/^ *//g'))
237	done
238	IFS=$oIFS
239
240	typeset idx=0
241	while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
242		typeset logbase="$(get_perf_output_dir)/$(basename \
243		    $SUDO_COMMAND)"
244		typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
245
246		timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
247		((idx += 2))
248	done
249
250	# Need to explicitly return 0 because timeout(1) will kill
251	# a child process and cause us to return non-zero.
252	return 0
253}
254
255# Find a place to deposit performance data collected while under load.
256function get_perf_output_dir
257{
258	typeset dir="$(pwd)/perf_data"
259	[[ -d $dir ]] || mkdir -p $dir
260
261	echo $dir
262}
263
264function apply_zinject_delays
265{
266	typeset idx=0
267	while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
268		[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
269		    log_must "No zinject delay found at index: $idx"
270
271		for disk in $DISKS; do
272			log_must zinject \
273			    -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
274		done
275
276		((idx += 1))
277	done
278}
279
280function clear_zinject_delays
281{
282	log_must zinject -c all
283}
284
285#
286# Destroy and create the pool used for performance tests.
287#
288function recreate_perf_pool
289{
290	[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
291
292	#
293	# In case there's been some "leaked" zinject delays, or if the
294	# performance test injected some delays itself, we clear all
295	# delays before attempting to destroy the pool. Each delay
296	# places a hold on the pool, so the destroy will fail if there
297	# are any outstanding delays.
298	#
299	clear_zinject_delays
300
301	#
302	# This function handles the case where the pool already exists,
303	# and will destroy the previous pool and recreate a new pool.
304	#
305	create_pool $PERFPOOL $DISKS
306}
307
308function verify_threads_per_fs
309{
310	typeset threads=$1
311	typeset threads_per_fs=$2
312
313	log_must test -n $threads
314	log_must test -n $threads_per_fs
315
316	#
317	# A value of "0" is treated as a "special value", and it is
318	# interpreted to mean all threads will run using a single
319	# filesystem.
320	#
321	[[ $threads_per_fs -eq 0 ]] && return
322
323	#
324	# The number of threads per filesystem must be a value greater
325	# than or equal to zero; since we just verified the value isn't
326	# 0 above, then it must be greater than zero here.
327	#
328	log_must test $threads_per_fs -ge 0
329
330	#
331	# This restriction can be lifted later if needed, but for now,
332	# we restrict the number of threads per filesystem to a value
333	# that evenly divides the thread count. This way, the threads
334	# will be evenly distributed over all the filesystems.
335	#
336	log_must test $((threads % threads_per_fs)) -eq 0
337}
338
339function populate_perf_filesystems
340{
341	typeset nfilesystems=${1:-1}
342
343	export TESTFS=""
344	for i in $(seq 1 $nfilesystems); do
345		typeset dataset="$PERFPOOL/fs$i"
346		create_dataset $dataset $PERF_FS_OPTS
347		if [[ -z "$TESTFS" ]]; then
348			TESTFS="$dataset"
349		else
350			TESTFS="$TESTFS $dataset"
351		fi
352	done
353}
354
355function get_nfilesystems
356{
357	typeset filesystems=( $TESTFS )
358	echo ${#filesystems[@]}
359}
360
361function get_directory
362{
363	typeset filesystems=( $TESTFS )
364	typeset directory=
365
366	typeset idx=0
367	while [[ $idx -lt "${#filesystems[@]}" ]]; do
368		mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
369
370		if [[ -n $directory ]]; then
371			directory=$directory:$mountpoint
372		else
373			directory=$mountpoint
374		fi
375
376		((idx += 1))
377	done
378
379	echo $directory
380}
381
382function get_min_arc_size
383{
384	typeset -l min_arc_size
385
386	if is_freebsd; then
387		min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min)
388	elif is_illumos; then
389		min_arc_size=$(dtrace -qn 'BEGIN {
390		    printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
391		    exit(0);
392		}')
393	elif is_linux; then
394		min_arc_size=`awk '$1 == "c_min" { print $3 }' \
395		    /proc/spl/kstat/zfs/arcstats`
396	fi
397
398	[[ $? -eq 0 ]] || log_fail "get_min_arc_size failed"
399
400	echo $min_arc_size
401}
402
403function get_max_arc_size
404{
405	typeset -l max_arc_size
406
407	if is_freebsd; then
408		max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max)
409	elif is_illumos; then
410		max_arc_size=$(dtrace -qn 'BEGIN {
411		    printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
412		    exit(0);
413		}')
414	elif is_linux; then
415		max_arc_size=`awk '$1 == "c_max" { print $3 }' \
416		    /proc/spl/kstat/zfs/arcstats`
417	fi
418
419	[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
420
421	echo $max_arc_size
422}
423
424function get_arc_target
425{
426	typeset -l arc_c
427
428	if is_freebsd; then
429		arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c)
430	elif is_illumos; then
431		arc_c=$(dtrace -qn 'BEGIN {
432		    printf("%u\n", `arc_stats.arcstat_c.value.ui64);
433		    exit(0);
434		}')
435	elif is_linux; then
436		arc_c=`awk '$1 == "c" { print $3 }' \
437		    /proc/spl/kstat/zfs/arcstats`
438	fi
439
440	[[ $? -eq 0 ]] || log_fail "get_arc_target failed"
441
442	echo $arc_c
443}
444
445function get_dbuf_cache_size
446{
447	typeset -l dbuf_cache_size dbuf_cache_shift
448
449	if is_illumos; then
450		dbuf_cache_size=$(dtrace -qn 'BEGIN {
451		    printf("%u\n", `dbuf_cache_max_bytes);
452		    exit(0);
453		}')
454	else
455		dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT)
456		dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift))
457	fi
458
459	[[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed"
460
461	echo $dbuf_cache_size
462}
463
464# Create a file with some information about how this system is configured.
465function get_system_config
466{
467	typeset config=$PERF_DATA_DIR/$1
468
469	echo "{" >>$config
470	if is_linux; then
471		echo "  \"ncpus\": \"$(nproc --all)\"," >>$config
472		echo "  \"physmem\": \"$(free -b | \
473		    awk '$1 == "Mem:" { print $2 }')\"," >>$config
474		echo "  \"c_max\": \"$(get_max_arc_size)\"," >>$config
475		echo "  \"hostname\": \"$(uname -n)\"," >>$config
476		echo "  \"kernel version\": \"$(uname -sr)\"," >>$config
477	else
478		dtrace -qn 'BEGIN{
479		    printf("  \"ncpus\": %d,\n", `ncpus);
480		    printf("  \"physmem\": %u,\n", `physmem * `_pagesize);
481		    printf("  \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
482		    printf("  \"kmem_flags\": \"0x%x\",", `kmem_flags);
483		    exit(0)}' >>$config
484		echo "  \"hostname\": \"$(uname -n)\"," >>$config
485		echo "  \"kernel version\": \"$(uname -v)\"," >>$config
486	fi
487	if is_linux; then
488		lsblk -dino NAME,SIZE | awk 'BEGIN {
489		    printf("  \"disks\": {\n"); first = 1}
490		    {disk = $1} {size = $2;
491		    if (first != 1) {printf(",\n")} else {first = 0}
492		    printf("    \"%s\": \"%s\"", disk, size)}
493		    END {printf("\n  },\n")}' >>$config
494
495		zfs_tunables="/sys/module/zfs/parameters"
496
497		printf "  \"tunables\": {\n" >>$config
498		for tunable in \
499		    zfs_arc_max \
500		    zfs_arc_meta_limit \
501		    zfs_arc_sys_free \
502		    zfs_dirty_data_max \
503		    zfs_flags \
504		    zfs_prefetch_disable \
505		    zfs_txg_timeout \
506		    zfs_vdev_aggregation_limit \
507		    zfs_vdev_async_read_max_active \
508		    zfs_vdev_async_write_max_active \
509		    zfs_vdev_sync_read_max_active \
510		    zfs_vdev_sync_write_max_active \
511		    zio_slow_io_ms
512		do
513			if [ "$tunable" != "zfs_arc_max" ]
514			then
515				printf ",\n" >>$config
516			fi
517			printf  "    \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
518			    >>$config
519		done
520		printf "\n  }\n" >>$config
521	else
522		iostat -En | awk 'BEGIN {
523		    printf("  \"disks\": {\n"); first = 1}
524		    /^c/ {disk = $1}
525		    /^Size: [^0]/ {size = $2;
526		    if (first != 1) {printf(",\n")} else {first = 0}
527		    printf("    \"%s\": \"%s\"", disk, size)}
528		    END {printf("\n  },\n")}' >>$config
529
530		sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
531		    awk -F= 'BEGIN {printf("  \"system\": {\n"); first = 1}
532		    {if (first != 1) {printf(",\n")} else {first = 0};
533		    printf("    \"%s\": %s", $1, $2)}
534		    END {printf("\n  }\n")}' >>$config
535	fi
536	echo "}" >>$config
537}
538
539function num_jobs_by_cpu
540{
541	if is_linux; then
542		typeset ncpu=$($NPROC --all)
543	else
544		typeset ncpu=$(psrinfo | $WC -l)
545	fi
546	typeset num_jobs=$ncpu
547
548	[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
549
550	echo $num_jobs
551}
552
553#
554# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
555#
556function pool_to_lun_list
557{
558	typeset pool=$1
559	typeset ctd ctds devname lun
560	typeset lun_list=':'
561
562	if is_illumos; then
563		ctds=$(zpool list -v $pool |
564		    awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
565
566		for ctd in $ctds; do
567		# Get the device name as it appears in /etc/path_to_inst
568		devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
569		    's/\/devices\([^:]*\):.*/\1/p')
570		# Add a string composed of the driver name and instance
571		# number to the list for comparison with dev_statname.
572		lun=$(sed 's/"//g' /etc/path_to_inst | grep \
573		    $devname | awk '{print $3$2}')
574		lun_list="$lun_list$lun:"
575		done
576	elif is_freebsd; then
577		lun_list+=$(zpool list -HLv $pool | \
578		    awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
579		         { printf "%s:", $1 }')
580	elif is_linux; then
581		ctds=$(zpool list -HLv $pool | \
582		    awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
583
584		for ctd in $ctds; do
585			lun_list="$lun_list$ctd:"
586		done
587	fi
588	echo $lun_list
589}
590
591function print_perf_settings
592{
593	echo "PERF_NTHREADS: $PERF_NTHREADS"
594	echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS"
595	echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES"
596	echo "PERF_IOSIZES: $PERF_IOSIZES"
597}
598
599# Create a perf_data directory to hold performance statistics and
600# configuration information.
601export PERF_DATA_DIR=$(get_perf_output_dir)
602[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json
603