xref: /freebsd/sys/contrib/openzfs/tests/zfs-tests/tests/perf/perf.shlib (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1#
2# This file and its contents are supplied under the terms of the
3# Common Development and Distribution License ("CDDL"), version 1.0.
4# You may only use this file in accordance with the terms of version
5# 1.0 of the CDDL.
6#
7# A full copy of the text of the CDDL should have accompanied this
8# source.  A copy of the CDDL is also available via the Internet at
9# http://www.illumos.org/license/CDDL.
10#
11
12#
13# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
14# Copyright (c) 2016, Intel Corporation.
15#
16
17. $STF_SUITE/include/libtest.shlib
18
19# If neither is specified, do a nightly run.
20[[ -z $PERF_REGRESSION_WEEKLY ]] && export PERF_REGRESSION_NIGHTLY=1
21
22# Default runtime for each type of test run.
23export PERF_RUNTIME_WEEKLY=$((30 * 60))
24export PERF_RUNTIME_NIGHTLY=$((10 * 60))
25
26# Default to JSON for fio output
27export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
28
29# Default fs creation options
30export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
31    ' -o checksum=sha256 -o redundant_metadata=most'}
32
33function get_sync_str
34{
35	typeset sync=$1
36	typeset sync_str=''
37
38	[[ $sync -eq 0 ]] && sync_str='async'
39	[[ $sync -eq 1 ]] && sync_str='sync'
40	echo $sync_str
41}
42
43function get_suffix
44{
45	typeset threads=$1
46	typeset sync=$2
47	typeset iosize=$3
48
49	typeset sync_str=$(get_sync_str $sync)
50	typeset filesystems=$(get_nfilesystems)
51
52	typeset suffix="$sync_str.$iosize-ios"
53	suffix="$suffix.$threads-threads.$filesystems-filesystems"
54	echo $suffix
55}
56
57function do_fio_run_impl
58{
59	typeset script=$1
60	typeset do_recreate=$2
61	typeset clear_cache=$3
62
63	typeset threads=$4
64	typeset threads_per_fs=$5
65	typeset sync=$6
66	typeset iosize=$7
67
68	typeset sync_str=$(get_sync_str $sync)
69	log_note "Running with $threads $sync_str threads, $iosize ios"
70
71	if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
72		log_must test $do_recreate
73		verify_threads_per_fs $threads $threads_per_fs
74	fi
75
76	if $do_recreate; then
77		recreate_perf_pool
78
79		#
80		# A value of zero for "threads_per_fs" is "special", and
81		# means a single filesystem should be used, regardless
82		# of the number of threads.
83		#
84		if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
85			populate_perf_filesystems $((threads / threads_per_fs))
86		else
87			populate_perf_filesystems 1
88		fi
89	fi
90
91	if $clear_cache; then
92		# Clear the ARC
93		zpool export $PERFPOOL
94		zpool import $PERFPOOL
95	fi
96
97	if [[ -n $ZINJECT_DELAYS ]]; then
98		apply_zinject_delays
99	else
100		log_note "No per-device commands to execute."
101	fi
102
103	#
104	# Allow this to be overridden by the individual test case. This
105	# can be used to run the FIO job against something other than
106	# the default filesystem (e.g. against a clone).
107	#
108	export DIRECTORY=$(get_directory)
109	log_note "DIRECTORY: " $DIRECTORY
110
111	export RUNTIME=$PERF_RUNTIME
112	export RANDSEED=$PERF_RANDSEED
113	export COMPPERCENT=$PERF_COMPPERCENT
114	export COMPCHUNK=$PERF_COMPCHUNK
115	export FILESIZE=$((TOTAL_SIZE / threads))
116	export NUMJOBS=$threads
117	export SYNC_TYPE=$sync
118	export BLOCKSIZE=$iosize
119	sync
120
121	# When running locally, we want to keep the default behavior of
122	# DIRECT == 0, so only set it when we're running over NFS to
123	# disable client cache for reads.
124	if [[ $NFS -eq 1 ]]; then
125		export DIRECT=1
126		do_setup_nfs $script
127	else
128		export DIRECT=0
129	fi
130
131	# This will be part of the output filename.
132	typeset suffix=$(get_suffix $threads $sync $iosize)
133
134	# Start the data collection
135	do_collect_scripts $suffix
136
137	# Define output file
138	typeset logbase="$(get_perf_output_dir)/$(basename \
139	    $SUDO_COMMAND)"
140	typeset outfile="$logbase.fio.$suffix"
141
142	# Start the load
143	if [[ $NFS -eq 1 ]]; then
144		log_must ssh -t $NFS_USER@$NFS_CLIENT "
145			fio --output-format=${PERF_FIO_FORMAT} \
146			    --output /tmp/fio.out /tmp/test.fio
147		"
148		log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
149		log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
150	else
151		log_must fio --output-format=${PERF_FIO_FORMAT} \
152		    --output $outfile $FIO_SCRIPTS/$script
153	fi
154}
155
156#
157# This function will run fio in a loop, according to the .fio file passed
158# in and a number of environment variables. The following variables can be
159# set before launching zfstest to override the defaults.
160#
161# PERF_RUNTIME: The time in seconds each fio invocation should run.
162# PERF_RUNTYPE: A human readable tag that appears in logs. The defaults are
163#    nightly and weekly.
164# PERF_NTHREADS: A list of how many threads each fio invocation will use.
165# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
166# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
167# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
168#    pairs that will be added to the scripts specified in each test.
169#
170function do_fio_run
171{
172	typeset script=$1
173	typeset do_recreate=$2
174	typeset clear_cache=$3
175	typeset threads threads_per_fs sync iosize
176
177	for threads in $PERF_NTHREADS; do
178		for threads_per_fs in $PERF_NTHREADS_PER_FS; do
179			for sync in $PERF_SYNC_TYPES; do
180				for iosize in $PERF_IOSIZES; do
181					do_fio_run_impl \
182					    $script \
183					    $do_recreate \
184					    $clear_cache \
185					    $threads \
186					    $threads_per_fs \
187					    $sync \
188					    $iosize
189				done
190			done
191		done
192	done
193}
194
195# This function sets NFS mount on the client and make sure all correct
196# permissions are in place
197#
198function do_setup_nfs
199{
200	typeset script=$1
201	zfs set sharenfs=on $TESTFS
202	log_must chmod  -R 777 /$TESTFS
203
204	ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
205	ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
206	log_must ssh -t $NFS_USER@$NFS_CLIENT "
207		sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
208	"
209	#
210	# The variables in the fio script are only available in our current
211	# shell session, so we have to evaluate them here before copying
212	# the resulting script over to the target machine.
213	#
214	export jobnum='$jobnum'
215	while read line; do
216		eval echo "$line"
217	done < $FIO_SCRIPTS/$script > /tmp/test.fio
218	log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
219	log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
220	log_must rm /tmp/test.fio
221}
222
223#
224# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
225# The script at index N is launched in the background, with its output
226# redirected to a logfile containing the tag specified at index N + 1.
227#
228function do_collect_scripts
229{
230	typeset suffix=$1
231
232	[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
233	[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
234
235	# Add in user supplied scripts and logfiles, if any.
236	typeset oIFS=$IFS
237	IFS=','
238	for item in $PERF_COLLECT_SCRIPTS; do
239		collect_scripts+=($(echo $item | sed 's/^ *//g'))
240	done
241	IFS=$oIFS
242
243	typeset idx=0
244	while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
245		typeset logbase="$(get_perf_output_dir)/$(basename \
246		    $SUDO_COMMAND)"
247		typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
248
249		timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
250		((idx += 2))
251	done
252
253	# Need to explicitly return 0 because timeout(1) will kill
254	# a child process and cause us to return non-zero.
255	return 0
256}
257
258# Find a place to deposit performance data collected while under load.
259function get_perf_output_dir
260{
261	typeset dir="$(pwd)/perf_data"
262	[[ -d $dir ]] || mkdir -p $dir
263
264	echo $dir
265}
266
267function apply_zinject_delays
268{
269	typeset idx=0
270	while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
271		[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
272		    log_must "No zinject delay found at index: $idx"
273
274		for disk in $DISKS; do
275			log_must zinject \
276			    -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
277		done
278
279		((idx += 1))
280	done
281}
282
283function clear_zinject_delays
284{
285	log_must zinject -c all
286}
287
288#
289# Destroy and create the pool used for performance tests.
290#
291function recreate_perf_pool
292{
293	[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
294
295	#
296	# In case there's been some "leaked" zinject delays, or if the
297	# performance test injected some delays itself, we clear all
298	# delays before attempting to destroy the pool. Each delay
299	# places a hold on the pool, so the destroy will fail if there
300	# are any outstanding delays.
301	#
302	clear_zinject_delays
303
304	#
305	# This function handles the case where the pool already exists,
306	# and will destroy the previous pool and recreate a new pool.
307	#
308	create_pool $PERFPOOL $DISKS
309}
310
311function verify_threads_per_fs
312{
313	typeset threads=$1
314	typeset threads_per_fs=$2
315
316	log_must test -n $threads
317	log_must test -n $threads_per_fs
318
319	#
320	# A value of "0" is treated as a "special value", and it is
321	# interpreted to mean all threads will run using a single
322	# filesystem.
323	#
324	[[ $threads_per_fs -eq 0 ]] && return
325
326	#
327	# The number of threads per filesystem must be a value greater
328	# than or equal to zero; since we just verified the value isn't
329	# 0 above, then it must be greater than zero here.
330	#
331	log_must test $threads_per_fs -ge 0
332
333	#
334	# This restriction can be lifted later if needed, but for now,
335	# we restrict the number of threads per filesystem to a value
336	# that evenly divides the thread count. This way, the threads
337	# will be evenly distributed over all the filesystems.
338	#
339	log_must test $((threads % threads_per_fs)) -eq 0
340}
341
342function populate_perf_filesystems
343{
344	typeset nfilesystems=${1:-1}
345
346	export TESTFS=""
347	for i in $(seq 1 $nfilesystems); do
348		typeset dataset="$PERFPOOL/fs$i"
349		create_dataset $dataset $PERF_FS_OPTS
350		if [[ -z "$TESTFS" ]]; then
351			TESTFS="$dataset"
352		else
353			TESTFS="$TESTFS $dataset"
354		fi
355	done
356}
357
358function get_nfilesystems
359{
360	typeset filesystems=( $TESTFS )
361	echo ${#filesystems[@]}
362}
363
364function get_directory
365{
366	typeset filesystems=( $TESTFS )
367	typeset directory=
368
369	typeset idx=0
370	while [[ $idx -lt "${#filesystems[@]}" ]]; do
371		mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
372
373		if [[ -n $directory ]]; then
374			directory=$directory:$mountpoint
375		else
376			directory=$mountpoint
377		fi
378
379		((idx += 1))
380	done
381
382	echo $directory
383}
384
385function get_min_arc_size
386{
387	typeset -l min_arc_size
388
389	if is_freebsd; then
390		min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min)
391	elif is_illumos; then
392		min_arc_size=$(dtrace -qn 'BEGIN {
393		    printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
394		    exit(0);
395		}')
396	elif is_linux; then
397		min_arc_size=`awk '$1 == "c_min" { print $3 }' \
398		    /proc/spl/kstat/zfs/arcstats`
399	fi
400
401	[[ $? -eq 0 ]] || log_fail "get_min_arc_size failed"
402
403	echo $min_arc_size
404}
405
406function get_max_arc_size
407{
408	typeset -l max_arc_size
409
410	if is_freebsd; then
411		max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max)
412	elif is_illumos; then
413		max_arc_size=$(dtrace -qn 'BEGIN {
414		    printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
415		    exit(0);
416		}')
417	elif is_linux; then
418		max_arc_size=`awk '$1 == "c_max" { print $3 }' \
419		    /proc/spl/kstat/zfs/arcstats`
420	fi
421
422	[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
423
424	echo $max_arc_size
425}
426
427function get_max_dbuf_cache_size
428{
429	typeset -l max_dbuf_cache_size
430
431	if is_illumos; then
432		max_dbuf_cache_size=$(dtrace -qn 'BEGIN {
433		    printf("%u\n", `dbuf_cache_max_bytes);
434		    exit(0);
435		}')
436	else
437		max_dbuf_cache_size=$(get_tunable DBUF_CACHE_MAX_BYTES)
438	fi
439
440	[[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed"
441
442	echo $max_dbuf_cache_size
443}
444
445# Create a file with some information about how this system is configured.
446function get_system_config
447{
448	typeset config=$PERF_DATA_DIR/$1
449
450	echo "{" >>$config
451	if is_linux; then
452		echo "  \"ncpus\": \"$(nproc --all)\"," >>$config
453		echo "  \"physmem\": \"$(free -b | \
454		    awk '$1 == "Mem:" { print $2 }')\"," >>$config
455		echo "  \"c_max\": \"$(get_max_arc_size)\"," >>$config
456		echo "  \"hostname\": \"$(uname -n)\"," >>$config
457		echo "  \"kernel version\": \"$(uname -sr)\"," >>$config
458	else
459		dtrace -qn 'BEGIN{
460		    printf("  \"ncpus\": %d,\n", `ncpus);
461		    printf("  \"physmem\": %u,\n", `physmem * `_pagesize);
462		    printf("  \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
463		    printf("  \"kmem_flags\": \"0x%x\",", `kmem_flags);
464		    exit(0)}' >>$config
465		echo "  \"hostname\": \"$(uname -n)\"," >>$config
466		echo "  \"kernel version\": \"$(uname -v)\"," >>$config
467	fi
468	if is_linux; then
469		lsblk -dino NAME,SIZE | awk 'BEGIN {
470		    printf("  \"disks\": {\n"); first = 1}
471		    {disk = $1} {size = $2;
472		    if (first != 1) {printf(",\n")} else {first = 0}
473		    printf("    \"%s\": \"%s\"", disk, size)}
474		    END {printf("\n  },\n")}' >>$config
475
476		zfs_tunables="/sys/module/zfs/parameters"
477
478		printf "  \"tunables\": {\n" >>$config
479		for tunable in \
480		    zfs_arc_max \
481		    zfs_arc_meta_limit \
482		    zfs_arc_sys_free \
483		    zfs_dirty_data_max \
484		    zfs_flags \
485		    zfs_prefetch_disable \
486		    zfs_txg_timeout \
487		    zfs_vdev_aggregation_limit \
488		    zfs_vdev_async_read_max_active \
489		    zfs_vdev_async_write_max_active \
490		    zfs_vdev_sync_read_max_active \
491		    zfs_vdev_sync_write_max_active \
492		    zio_slow_io_ms
493		do
494			if [ "$tunable" != "zfs_arc_max" ]
495			then
496				printf ",\n" >>$config
497			fi
498			printf  "    \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
499			    >>$config
500		done
501		printf "\n  }\n" >>$config
502	else
503		iostat -En | awk 'BEGIN {
504		    printf("  \"disks\": {\n"); first = 1}
505		    /^c/ {disk = $1}
506		    /^Size: [^0]/ {size = $2;
507		    if (first != 1) {printf(",\n")} else {first = 0}
508		    printf("    \"%s\": \"%s\"", disk, size)}
509		    END {printf("\n  },\n")}' >>$config
510
511		sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
512		    awk -F= 'BEGIN {printf("  \"system\": {\n"); first = 1}
513		    {if (first != 1) {printf(",\n")} else {first = 0};
514		    printf("    \"%s\": %s", $1, $2)}
515		    END {printf("\n  }\n")}' >>$config
516	fi
517	echo "}" >>$config
518}
519
520function num_jobs_by_cpu
521{
522	if is_linux; then
523		typeset ncpu=$($NPROC --all)
524	else
525		typeset ncpu=$(psrinfo | $WC -l)
526	fi
527	typeset num_jobs=$ncpu
528
529	[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
530
531	echo $num_jobs
532}
533
534#
535# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
536#
537function pool_to_lun_list
538{
539	typeset pool=$1
540	typeset ctd ctds devname lun
541	typeset lun_list=':'
542
543	if is_illumos; then
544		ctds=$(zpool list -v $pool |
545		    awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
546
547		for ctd in $ctds; do
548		# Get the device name as it appears in /etc/path_to_inst
549		devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
550		    's/\/devices\([^:]*\):.*/\1/p')
551		# Add a string composed of the driver name and instance
552		# number to the list for comparison with dev_statname.
553		lun=$(sed 's/"//g' /etc/path_to_inst | grep \
554		    $devname | awk '{print $3$2}')
555		lun_list="$lun_list$lun:"
556		done
557	elif is_freebsd; then
558		lun_list+=$(zpool list -HLv $pool | \
559		    awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
560		         { printf "%s:", $1 }')
561	elif is_linux; then
562		ctds=$(zpool list -HLv $pool | \
563		    awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
564
565		for ctd in $ctds; do
566			lun_list="$lun_list$ctd:"
567		done
568	fi
569	echo $lun_list
570}
571
572# Create a perf_data directory to hold performance statistics and
573# configuration information.
574export PERF_DATA_DIR=$(get_perf_output_dir)
575[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json
576