xref: /illumos-gate/usr/src/test/zfs-tests/tests/perf/perf.shlib (revision 3a18338393f3485e50eae6288b6a9ab89e9f715a)
1#
2# This file and its contents are supplied under the terms of the
3# Common Development and Distribution License ("CDDL"), version 1.0.
4# You may only use this file in accordance with the terms of version
5# 1.0 of the CDDL.
6#
7# A full copy of the text of the CDDL should have accompanied this
8# source.  A copy of the CDDL is also available via the Internet at
9# http://www.illumos.org/license/CDDL.
10#
11
12#
13# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
14#
15
16. $STF_SUITE/include/libtest.shlib
17
18# If neither is specified, do a nightly run.
19[[ -z $PERF_REGRESSION_WEEKLY ]] && export PERF_REGRESSION_NIGHTLY=1
20
21# Default runtime for each type of test run.
22export PERF_RUNTIME_WEEKLY=$((30 * 60))
23export PERF_RUNTIME_NIGHTLY=$((10 * 60))
24
25# Default fs creation options
26export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
27    ' -o checksum=sha256 -o redundant_metadata=most'}
28
29function get_sync_str
30{
31	typeset sync=$1
32	typeset sync_str=''
33
34	[[ $sync -eq 0 ]] && sync_str='async'
35	[[ $sync -eq 1 ]] && sync_str='sync'
36	echo $sync_str
37}
38
39function get_suffix
40{
41	typeset threads=$1
42	typeset sync=$2
43	typeset iosize=$3
44
45	typeset sync_str=$(get_sync_str $sync)
46	typeset filesystems=$(get_nfilesystems)
47
48	typeset suffix="$sync_str.$iosize-ios"
49	suffix="$suffix.$threads-threads.$filesystems-filesystems"
50	echo $suffix
51}
52
53function do_fio_run_impl
54{
55	typeset script=$1
56	typeset do_recreate=$2
57	typeset clear_cache=$3
58
59	typeset threads=$4
60	typeset threads_per_fs=$5
61	typeset sync=$6
62	typeset iosize=$7
63
64	typeset sync_str=$(get_sync_str $sync)
65	log_note "Running with $threads $sync_str threads, $iosize ios"
66
67	if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
68		log_must test $do_recreate
69		verify_threads_per_fs $threads $threads_per_fs
70	fi
71
72	if $do_recreate; then
73		recreate_perf_pool
74
75		#
76		# A value of zero for "threads_per_fs" is "special", and
77		# means a single filesystem should be used, regardless
78		# of the number of threads.
79		#
80		if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
81			populate_perf_filesystems $((threads / threads_per_fs))
82		else
83			populate_perf_filesystems 1
84		fi
85	fi
86
87	if $clear_cache; then
88		# Clear the ARC
89		zpool export $PERFPOOL
90		zpool import $PERFPOOL
91	fi
92
93	if [[ -n $ZINJECT_DELAYS ]]; then
94		apply_zinject_delays
95	else
96		log_note "No per-device commands to execute."
97	fi
98
99	#
100	# Allow this to be overridden by the individual test case. This
101	# can be used to run the FIO job against something other than
102	# the default filesystem (e.g. against a clone).
103	#
104	export DIRECTORY=$(get_directory)
105	log_note "DIRECTORY: " $DIRECTORY
106
107	export RUNTIME=$PERF_RUNTIME
108	export FILESIZE=$((TOTAL_SIZE / threads))
109	export NUMJOBS=$threads
110	export SYNC_TYPE=$sync
111	export BLOCKSIZE=$iosize
112	sync
113
114	# This will be part of the output filename.
115	typeset suffix=$(get_suffix $threads $sync $iosize)
116
117	# Start the data collection
118	do_collect_scripts $suffix
119
120	# Define output file
121	typeset logbase="$(get_perf_output_dir)/$(basename \
122	    $SUDO_COMMAND)"
123	typeset outfile="$logbase.fio.$suffix"
124
125	# Start the load
126	log_must fio --output $outfile $FIO_SCRIPTS/$script
127}
128
129#
130# This function will run fio in a loop, according to the .fio file passed
131# in and a number of environment variables. The following variables can be
132# set before launching zfstest to override the defaults.
133#
134# PERF_RUNTIME: The time in seconds each fio invocation should run.
135# PERF_RUNTYPE: A human readable tag that appears in logs. The defaults are
136#    nightly and weekly.
137# PERF_NTHREADS: A list of how many threads each fio invocation will use.
138# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
139# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
140# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
141#    pairs that will be added to the scripts specified in each test.
142#
143function do_fio_run
144{
145	typeset script=$1
146	typeset do_recreate=$2
147	typeset clear_cache=$3
148	typeset threads threads_per_fs sync iosize
149
150	for threads in $PERF_NTHREADS; do
151		for threads_per_fs in $PERF_NTHREADS_PER_FS; do
152			for sync in $PERF_SYNC_TYPES; do
153				for iosize in $PERF_IOSIZES; do
154					do_fio_run_impl \
155					    $script \
156					    $do_recreate \
157					    $clear_cache \
158					    $threads \
159					    $threads_per_fs \
160					    $sync \
161					    $iosize
162				done
163			done
164		done
165	done
166}
167
168#
169# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
170# The script at index N is launched in the background, with its output
171# redirected to a logfile containing the tag specified at index N + 1.
172#
173function do_collect_scripts
174{
175	typeset suffix=$1
176
177	[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
178	[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
179
180	# Add in user supplied scripts and logfiles, if any.
181	typeset oIFS=$IFS
182	IFS=','
183	for item in $PERF_COLLECT_SCRIPTS; do
184		collect_scripts+=($(echo $item | sed 's/^ *//g'))
185	done
186	IFS=$oIFS
187
188	typeset idx=0
189	while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
190		typeset logbase="$(get_perf_output_dir)/$(basename \
191		    $SUDO_COMMAND)"
192		typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
193
194		timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
195		((idx += 2))
196	done
197
198	# Need to explicitly return 0 because timeout(1) will kill
199	# a child process and cause us to return non-zero.
200	return 0
201}
202
203# Find a place to deposit performance data collected while under load.
204function get_perf_output_dir
205{
206	typeset dir="$(pwd)/perf_data"
207	[[ -d $dir ]] || mkdir -p $dir
208
209	echo $dir
210}
211
212function apply_zinject_delays
213{
214	typeset idx=0
215	while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
216		[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
217		    log_must "No zinject delay found at index: $idx"
218
219		for disk in $DISKS; do
220			log_must zinject \
221			    -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
222		done
223
224		((idx += 1))
225	done
226}
227
228function clear_zinject_delays
229{
230	log_must zinject -c all
231}
232
233#
234# Destroy and create the pool used for performance tests.
235#
236function recreate_perf_pool
237{
238	[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
239
240	#
241	# In case there's been some "leaked" zinject delays, or if the
242	# performance test injected some delays itself, we clear all
243	# delays before attempting to destroy the pool. Each delay
244	# places a hold on the pool, so the destroy will fail if there
245	# are any outstanding delays.
246	#
247	clear_zinject_delays
248
249	#
250	# This function handles the case where the pool already exists,
251	# and will destroy the previous pool and recreate a new pool.
252	#
253	create_pool $PERFPOOL $DISKS
254}
255
256function verify_threads_per_fs
257{
258	typeset threads=$1
259	typeset threads_per_fs=$2
260
261	log_must test -n $threads
262	log_must test -n $threads_per_fs
263
264	#
265	# A value of "0" is treated as a "special value", and it is
266	# interpreted to mean all threads will run using a single
267	# filesystem.
268	#
269	[[ $threads_per_fs -eq 0 ]] && return
270
271	#
272	# The number of threads per filesystem must be a value greater
273	# than or equal to zero; since we just verified the value isn't
274	# 0 above, then it must be greater than zero here.
275	#
276	log_must test $threads_per_fs -ge 0
277
278	#
279	# This restriction can be lifted later if needed, but for now,
280	# we restrict the number of threads per filesystem to a value
281	# that evenly divides the thread count. This way, the threads
282	# will be evenly distributed over all the filesystems.
283	#
284	log_must test $((threads % threads_per_fs)) -eq 0
285}
286
287function populate_perf_filesystems
288{
289	typeset nfilesystems=${1:-1}
290
291	export TESTFS=""
292	for i in $(seq 1 $nfilesystems); do
293		typeset dataset="$PERFPOOL/fs$i"
294		create_dataset $dataset $PERF_FS_OPTS
295		if [[ -z "$TESTFS" ]]; then
296			TESTFS="$dataset"
297		else
298			TESTFS="$TESTFS $dataset"
299		fi
300	done
301}
302
303function get_nfilesystems
304{
305	typeset filesystems=( $TESTFS )
306	echo ${#filesystems[@]}
307}
308
309function get_directory
310{
311	typeset filesystems=( $TESTFS )
312	typeset directory=
313
314	typeset idx=0
315	while [[ $idx -lt "${#filesystems[@]}" ]]; do
316		mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
317
318		if [[ -n $directory ]]; then
319			directory=$directory:$mountpoint
320		else
321			directory=$mountpoint
322		fi
323
324		((idx += 1))
325	done
326
327	echo $directory
328}
329
330function get_max_arc_size
331{
332	typeset -l max_arc_size=$(dtrace -qn 'BEGIN {
333	    printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
334	    exit(0);
335	}')
336
337	[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
338
339	echo $max_arc_size
340}
341
342function get_max_dbuf_cache_size
343{
344	typeset -l max_dbuf_cache_size=$(dtrace -qn 'BEGIN {
345	    printf("%u\n", `dbuf_cache_max_bytes);
346	    exit(0);
347	}')
348
349	[[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed"
350
351	echo $max_dbuf_cache_size
352}
353
354# Create a file with some information about how this system is configured.
355function get_system_config
356{
357	typeset config=$PERF_DATA_DIR/$1
358
359	echo "{" >>$config
360	dtrace -qn 'BEGIN{
361	    printf("  \"ncpus\": %d,\n", `ncpus);
362	    printf("  \"physmem\": %u,\n", `physmem * `_pagesize);
363	    printf("  \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
364	    printf("  \"kmem_flags\": \"0x%x\",", `kmem_flags);
365	    exit(0)}' >>$config
366	echo "  \"hostname\": \"$(uname -n)\"," >>$config
367	echo "  \"kernel version\": \"$(uname -v)\"," >>$config
368	iostat -En | awk 'BEGIN {
369	    printf("  \"disks\": {\n"); first = 1}
370	    /^c/ {disk = $1}
371	    /^Size: [^0]/ {size = $2;
372	    if (first != 1) {printf(",\n")} else {first = 0}
373	    printf("    \"%s\": \"%s\"", disk, size)}
374	    END {printf("\n  },\n")}' >>$config
375	sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
376	    awk -F= 'BEGIN {printf("  \"system\": {\n"); first = 1}
377	    {if (first != 1) {printf(",\n")} else {first = 0};
378	    printf("    \"%s\": %s", $1, $2)}
379	    END {printf("\n  }\n")}' >>$config
380	echo "}" >>$config
381}
382
383function num_jobs_by_cpu
384{
385	typeset ncpu=$(psrinfo | wc -l)
386	typeset num_jobs=$ncpu
387
388	[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
389
390	echo $num_jobs
391}
392
393function pool_to_lun_list
394{
395	typeset pool=$1
396	typeset ctd ctds devname lun
397	typeset lun_list=':'
398
399	ctds=$(zpool list -v $pool | awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ \
400	    {print $1}')
401
402	for ctd in $ctds; do
403		# Get the device name as it appears in /etc/path_to_inst
404		devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
405		    's/\/devices\([^:]*\):.*/\1/p')
406		# Add a string composed of the driver name and instance
407		# number to the list for comparison with dev_statname.
408		lun=$(sed 's/"//g' /etc/path_to_inst | grep $devname | awk \
409		    '{print $3$2}')
410		lun_list="$lun_list$lun:"
411	done
412	echo $lun_list
413}
414
415# Create a perf_data directory to hold performance statistics and
416# configuration information.
417export PERF_DATA_DIR=$(get_perf_output_dir)
418[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json
419