1# 2# This file and its contents are supplied under the terms of the 3# Common Development and Distribution License ("CDDL"), version 1.0. 4# You may only use this file in accordance with the terms of version 5# 1.0 of the CDDL. 6# 7# A full copy of the text of the CDDL should have accompanied this 8# source. A copy of the CDDL is also available via the Internet at 9# http://www.illumos.org/license/CDDL. 10# 11 12# 13# Copyright (c) 2015, 2021 by Delphix. All rights reserved. 14# Copyright (c) 2016, Intel Corporation. 15# 16 17. $STF_SUITE/include/libtest.shlib 18 19# Defaults common to all the tests in the regression group 20export PERF_RUNTIME=${PERF_RUNTIME:-'180'} 21export PERF_RANDSEED=${PERF_RANDSEED:-'1234'} 22export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'} 23export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'} 24 25# Default to JSON for fio output 26export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} 27 28# Default fs creation options 29export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \ 30 ' -o checksum=sha256 -o redundant_metadata=most'} 31 32function get_sync_str 33{ 34 typeset sync=$1 35 typeset sync_str='' 36 37 [[ $sync -eq 0 ]] && sync_str='async' 38 [[ $sync -eq 1 ]] && sync_str='sync' 39 echo $sync_str 40} 41 42function get_suffix 43{ 44 typeset threads=$1 45 typeset sync=$2 46 typeset iosize=$3 47 48 typeset sync_str=$(get_sync_str $sync) 49 typeset filesystems=$(get_nfilesystems) 50 51 typeset suffix="$sync_str.$iosize-ios" 52 suffix="$suffix.$threads-threads.$filesystems-filesystems" 53 echo $suffix 54} 55 56function do_fio_run_impl 57{ 58 typeset script=$1 59 typeset do_recreate=$2 60 typeset clear_cache=$3 61 62 typeset threads=$4 63 typeset threads_per_fs=$5 64 typeset sync=$6 65 typeset iosize=$7 66 67 typeset sync_str=$(get_sync_str $sync) 68 log_note "Running with $threads $sync_str threads, $iosize ios" 69 70 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 71 log_must test $do_recreate 72 verify_threads_per_fs $threads $threads_per_fs 73 fi 74 75 if $do_recreate; then 76 recreate_perf_pool 77 78 # 79 # A value of zero for "threads_per_fs" is "special", and 80 # means a single filesystem should be used, regardless 81 # of the number of threads. 82 # 83 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 84 populate_perf_filesystems $((threads / threads_per_fs)) 85 else 86 populate_perf_filesystems 1 87 fi 88 fi 89 90 if $clear_cache; then 91 # Clear the ARC 92 log_must zinject -a 93 fi 94 95 if [[ -n $ZINJECT_DELAYS ]]; then 96 apply_zinject_delays 97 else 98 log_note "No per-device commands to execute." 99 fi 100 101 # 102 # Allow this to be overridden by the individual test case. This 103 # can be used to run the FIO job against something other than 104 # the default filesystem (e.g. against a clone). 105 # 106 export DIRECTORY=$(get_directory) 107 log_note "DIRECTORY: " $DIRECTORY 108 109 export RUNTIME=$PERF_RUNTIME 110 export RANDSEED=$PERF_RANDSEED 111 export COMPPERCENT=$PERF_COMPPERCENT 112 export COMPCHUNK=$PERF_COMPCHUNK 113 export FILESIZE=$((TOTAL_SIZE / threads)) 114 export NUMJOBS=$threads 115 export SYNC_TYPE=$sync 116 export BLOCKSIZE=$iosize 117 sync 118 119 # When running locally, we want to keep the default behavior of 120 # DIRECT == 0, so only set it when we're running over NFS to 121 # disable client cache for reads. 122 if [[ $NFS -eq 1 ]]; then 123 export DIRECT=1 124 do_setup_nfs $script 125 else 126 export DIRECT=0 127 fi 128 129 # This will be part of the output filename. 130 typeset suffix=$(get_suffix $threads $sync $iosize) 131 132 # Start the data collection 133 do_collect_scripts $suffix 134 135 # Define output file 136 typeset logbase="$(get_perf_output_dir)/$(basename \ 137 $SUDO_COMMAND)" 138 typeset outfile="$logbase.fio.$suffix" 139 140 # Start the load 141 if [[ $NFS -eq 1 ]]; then 142 log_must ssh -t $NFS_USER@$NFS_CLIENT " 143 fio --output-format=${PERF_FIO_FORMAT} \ 144 --output /tmp/fio.out /tmp/test.fio 145 " 146 log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile 147 log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 148 else 149 log_must fio --output-format=${PERF_FIO_FORMAT} \ 150 --output $outfile $FIO_SCRIPTS/$script 151 fi 152} 153 154# 155# This function will run fio in a loop, according to the .fio file passed 156# in and a number of environment variables. The following variables can be 157# set before launching zfstest to override the defaults. 158# 159# PERF_RUNTIME: The time in seconds each fio invocation should run. 160# PERF_NTHREADS: A list of how many threads each fio invocation will use. 161# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO. 162# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO. 163# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag' 164# pairs that will be added to the scripts specified in each test. 165# 166function do_fio_run 167{ 168 typeset script=$1 169 typeset do_recreate=$2 170 typeset clear_cache=$3 171 typeset threads threads_per_fs sync iosize 172 173 for threads in $PERF_NTHREADS; do 174 for threads_per_fs in $PERF_NTHREADS_PER_FS; do 175 for sync in $PERF_SYNC_TYPES; do 176 for iosize in $PERF_IOSIZES; do 177 do_fio_run_impl \ 178 $script \ 179 $do_recreate \ 180 $clear_cache \ 181 $threads \ 182 $threads_per_fs \ 183 $sync \ 184 $iosize 185 done 186 done 187 done 188 done 189} 190 191# This function sets NFS mount on the client and make sure all correct 192# permissions are in place 193# 194function do_setup_nfs 195{ 196 typeset script=$1 197 zfs set sharenfs=on $TESTFS 198 log_must chmod -R 777 /$TESTFS 199 200 ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT" 201 ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 202 log_must ssh -t $NFS_USER@$NFS_CLIENT " 203 sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT 204 " 205 # 206 # The variables in the fio script are only available in our current 207 # shell session, so we have to evaluate them here before copying 208 # the resulting script over to the target machine. 209 # 210 export jobnum='$jobnum' 211 while read line; do 212 eval echo "$line" 213 done < $FIO_SCRIPTS/$script > /tmp/test.fio 214 log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio 215 log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp 216 log_must rm /tmp/test.fio 217} 218 219# 220# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS. 221# The script at index N is launched in the background, with its output 222# redirected to a logfile containing the tag specified at index N + 1. 223# 224function do_collect_scripts 225{ 226 typeset suffix=$1 227 228 [[ -n $collect_scripts ]] || log_fail "No data collection scripts." 229 [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified." 230 231 # Add in user supplied scripts and logfiles, if any. 232 typeset oIFS=$IFS 233 IFS=',' 234 for item in $PERF_COLLECT_SCRIPTS; do 235 collect_scripts+=($(echo $item | sed 's/^ *//g')) 236 done 237 IFS=$oIFS 238 239 typeset idx=0 240 while [[ $idx -lt "${#collect_scripts[@]}" ]]; do 241 typeset logbase="$(get_perf_output_dir)/$(basename \ 242 $SUDO_COMMAND)" 243 typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix" 244 245 timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 & 246 ((idx += 2)) 247 done 248 249 # Need to explicitly return 0 because timeout(1) will kill 250 # a child process and cause us to return non-zero. 251 return 0 252} 253 254# Find a place to deposit performance data collected while under load. 255function get_perf_output_dir 256{ 257 typeset dir="$PWD/perf_data" 258 [[ -d $dir ]] || mkdir -p $dir 259 260 echo $dir 261} 262 263function apply_zinject_delays 264{ 265 typeset idx=0 266 while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do 267 [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \ 268 log_fail "No zinject delay found at index: $idx" 269 270 for disk in $DISKS; do 271 log_must zinject \ 272 -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL 273 done 274 275 ((idx += 1)) 276 done 277} 278 279function clear_zinject_delays 280{ 281 log_must zinject -c all 282} 283 284# 285# Destroy and create the pool used for performance tests. 286# 287function recreate_perf_pool 288{ 289 [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." 290 291 # 292 # In case there's been some "leaked" zinject delays, or if the 293 # performance test injected some delays itself, we clear all 294 # delays before attempting to destroy the pool. Each delay 295 # places a hold on the pool, so the destroy will fail if there 296 # are any outstanding delays. 297 # 298 clear_zinject_delays 299 300 # 301 # This function handles the case where the pool already exists, 302 # and will destroy the previous pool and recreate a new pool. 303 # 304 create_pool $PERFPOOL $DISKS 305} 306 307function verify_threads_per_fs 308{ 309 typeset threads=$1 310 typeset threads_per_fs=$2 311 312 log_must test -n $threads 313 log_must test -n $threads_per_fs 314 315 # 316 # A value of "0" is treated as a "special value", and it is 317 # interpreted to mean all threads will run using a single 318 # filesystem. 319 # 320 [[ $threads_per_fs -eq 0 ]] && return 321 322 # 323 # The number of threads per filesystem must be a value greater 324 # than or equal to zero; since we just verified the value isn't 325 # 0 above, then it must be greater than zero here. 326 # 327 log_must test $threads_per_fs -ge 0 328 329 # 330 # This restriction can be lifted later if needed, but for now, 331 # we restrict the number of threads per filesystem to a value 332 # that evenly divides the thread count. This way, the threads 333 # will be evenly distributed over all the filesystems. 334 # 335 log_must test $((threads % threads_per_fs)) -eq 0 336} 337 338function populate_perf_filesystems 339{ 340 typeset nfilesystems=${1:-1} 341 342 export TESTFS="" 343 for i in $(seq 1 $nfilesystems); do 344 typeset dataset="$PERFPOOL/fs$i" 345 create_dataset $dataset $PERF_FS_OPTS 346 if [[ -z "$TESTFS" ]]; then 347 TESTFS="$dataset" 348 else 349 TESTFS="$TESTFS $dataset" 350 fi 351 done 352} 353 354function get_nfilesystems 355{ 356 typeset filesystems=( $TESTFS ) 357 echo ${#filesystems[@]} 358} 359 360function get_directory 361{ 362 typeset filesystems=( $TESTFS ) 363 typeset directory= 364 365 typeset idx=0 366 while [[ $idx -lt "${#filesystems[@]}" ]]; do 367 mountpoint=$(get_prop mountpoint "${filesystems[$idx]}") 368 369 if [[ -n $directory ]]; then 370 directory=$directory:$mountpoint 371 else 372 directory=$mountpoint 373 fi 374 375 ((idx += 1)) 376 done 377 378 echo $directory 379} 380 381function get_min_arc_size 382{ 383 case "$UNAME" in 384 Linux) 385 awk '$1 == "c_min" { print $3 }' /proc/spl/kstat/zfs/arcstats 386 ;; 387 FreeBSD) 388 sysctl -n kstat.zfs.misc.arcstats.c_min 389 ;; 390 *) 391 dtrace -qn 'BEGIN { 392 printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); 393 exit(0); 394 }' 395 ;; 396 esac || log_fail "get_min_arc_size failed" 397} 398 399function get_max_arc_size 400{ 401 case "$UNAME" in 402 Linux) 403 awk '$1 == "c_max" { print $3 }' /proc/spl/kstat/zfs/arcstats 404 ;; 405 FreeBSD) 406 sysctl -n kstat.zfs.misc.arcstats.c_max 407 ;; 408 *) 409 dtrace -qn 'BEGIN { 410 printf("%u\n", `arc_stats.arcstat_c_max.value.ui64); 411 exit(0); 412 }' 413 ;; 414 esac || log_fail "get_max_arc_size failed" 415} 416 417function get_arc_target 418{ 419 case "$UNAME" in 420 Linux) 421 awk '$1 == "c" { print $3 }' /proc/spl/kstat/zfs/arcstats 422 ;; 423 FreeBSD) 424 sysctl -n kstat.zfs.misc.arcstats.c 425 ;; 426 *) 427 dtrace -qn 'BEGIN { 428 printf("%u\n", `arc_stats.arcstat_c.value.ui64); 429 exit(0); 430 }' 431 ;; 432 esac || log_fail "get_arc_target failed" 433} 434 435function get_dbuf_cache_size 436{ 437 typeset -l dbuf_cache_size dbuf_cache_shift 438 439 if is_illumos; then 440 dbuf_cache_size=$(dtrace -qn 'BEGIN { 441 printf("%u\n", `dbuf_cache_max_bytes); 442 exit(0); 443 }') 444 else 445 dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT) 446 dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift)) 447 fi || log_fail "get_dbuf_cache_size failed" 448 449 echo $dbuf_cache_size 450} 451 452# Create a file with some information about how this system is configured. 453function get_system_config 454{ 455 typeset config=$PERF_DATA_DIR/$1 456 457 echo "{" >>$config 458 if is_linux; then 459 echo " \"ncpus\": \"$(lscpu | awk '/^CPU\(s\)/ {print $2; exit}')\"," >>$config 460 echo " \"physmem\": \"$(free -b | \ 461 awk '$1 == "Mem:" { print $2 }')\"," >>$config 462 echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config 463 echo " \"hostname\": \"$(uname -n)\"," >>$config 464 echo " \"kernel version\": \"$(uname -sr)\"," >>$config 465 else 466 dtrace -qn 'BEGIN{ 467 printf(" \"ncpus\": %d,\n", `ncpus); 468 printf(" \"physmem\": %u,\n", `physmem * `_pagesize); 469 printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64); 470 printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags); 471 exit(0)}' >>$config 472 echo " \"hostname\": \"$(uname -n)\"," >>$config 473 echo " \"kernel version\": \"$(uname -v)\"," >>$config 474 fi 475 if is_linux; then 476 lsblk -dino NAME,SIZE | awk 'BEGIN { 477 printf(" \"disks\": {\n"); first = 1} 478 {disk = $1} {size = $2; 479 if (first != 1) {printf(",\n")} else {first = 0} 480 printf(" \"%s\": \"%s\"", disk, size)} 481 END {printf("\n },\n")}' >>$config 482 483 zfs_tunables="/sys/module/zfs/parameters" 484 485 printf " \"tunables\": {\n" >>$config 486 for tunable in \ 487 zfs_arc_max \ 488 zfs_arc_sys_free \ 489 zfs_dirty_data_max \ 490 zfs_flags \ 491 zfs_prefetch_disable \ 492 zfs_txg_timeout \ 493 zfs_vdev_aggregation_limit \ 494 zfs_vdev_async_read_max_active \ 495 zfs_vdev_async_write_max_active \ 496 zfs_vdev_sync_read_max_active \ 497 zfs_vdev_sync_write_max_active \ 498 zio_slow_io_ms 499 do 500 if [ "$tunable" != "zfs_arc_max" ] 501 then 502 printf ",\n" >>$config 503 fi 504 printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \ 505 >>$config 506 done 507 printf "\n }\n" >>$config 508 else 509 iostat -En | awk 'BEGIN { 510 printf(" \"disks\": {\n"); first = 1} 511 /^c/ {disk = $1} 512 /^Size: [^0]/ {size = $2; 513 if (first != 1) {printf(",\n")} else {first = 0} 514 printf(" \"%s\": \"%s\"", disk, size)} 515 END {printf("\n },\n")}' >>$config 516 517 sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \ 518 awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1} 519 {if (first != 1) {printf(",\n")} else {first = 0}; 520 printf(" \"%s\": %s", $1, $2)} 521 END {printf("\n }\n")}' >>$config 522 fi 523 echo "}" >>$config 524} 525 526# 527# On illumos this looks like: ":sd3:sd4:sd1:sd2:" 528# 529function pool_to_lun_list 530{ 531 typeset pool=$1 532 typeset ctd ctds devname lun 533 typeset lun_list=':' 534 535 case "$UNAME" in 536 Linux) 537 ctds=$(zpool list -HLv $pool | \ 538 awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}') 539 540 for ctd in $ctds; do 541 lun_list="$lun_list$ctd:" 542 done 543 ;; 544 FreeBSD) 545 lun_list+=$(zpool list -HLv $pool | \ 546 awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/ 547 { printf "%s:", $1 }') 548 ;; 549 *) 550 ctds=$(zpool list -v $pool | 551 awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}') 552 553 for ctd in $ctds; do 554 # Get the device name as it appears in /etc/path_to_inst 555 devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n 's/\/devices\([^:]*\):.*/\1/p') 556 # Add a string composed of the driver name and instance 557 # number to the list for comparison with dev_statname. 558 lun=$(sed 's/"//g' /etc/path_to_inst | awk -v dn="$devname" '$0 ~ dn {print $3$2}') 559 lun_list="$lun_list$lun:" 560 done 561 ;; 562 esac 563 echo $lun_list 564} 565 566function print_perf_settings 567{ 568 echo "PERF_NTHREADS: $PERF_NTHREADS" 569 echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS" 570 echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES" 571 echo "PERF_IOSIZES: $PERF_IOSIZES" 572} 573 574# Create a perf_data directory to hold performance statistics and 575# configuration information. 576export PERF_DATA_DIR=$(get_perf_output_dir) 577[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json 578