1# 2# This file and its contents are supplied under the terms of the 3# Common Development and Distribution License ("CDDL"), version 1.0. 4# You may only use this file in accordance with the terms of version 5# 1.0 of the CDDL. 6# 7# A full copy of the text of the CDDL should have accompanied this 8# source. A copy of the CDDL is also available via the Internet at 9# http://www.illumos.org/license/CDDL. 10# 11 12# 13# Copyright (c) 2015, 2021 by Delphix. All rights reserved. 14# Copyright (c) 2016, Intel Corporation. 15# 16 17. $STF_SUITE/include/libtest.shlib 18 19# Defaults common to all the tests in the regression group 20export PERF_RUNTIME=${PERF_RUNTIME:-'180'} 21export PERF_RANDSEED=${PERF_RANDSEED:-'1234'} 22export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'} 23export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'} 24export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} 25 26# Default to JSON for fio output 27export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} 28 29# Default fs creation options 30export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \ 31 ' -o checksum=sha256 -o redundant_metadata=most'} 32 33function get_sync_str 34{ 35 typeset sync=$1 36 typeset sync_str='' 37 38 [[ $sync -eq 0 ]] && sync_str='async' 39 [[ $sync -eq 1 ]] && sync_str='sync' 40 echo $sync_str 41} 42 43function get_suffix 44{ 45 typeset threads=$1 46 typeset sync=$2 47 typeset iosize=$3 48 49 typeset sync_str=$(get_sync_str $sync) 50 typeset filesystems=$(get_nfilesystems) 51 52 typeset suffix="$sync_str.$iosize-ios" 53 suffix="$suffix.$threads-threads.$filesystems-filesystems" 54 echo $suffix 55} 56 57function do_fio_run_impl 58{ 59 typeset script=$1 60 typeset do_recreate=$2 61 typeset clear_cache=$3 62 63 typeset threads=$4 64 typeset threads_per_fs=$5 65 typeset sync=$6 66 typeset iosize=$7 67 68 typeset sync_str=$(get_sync_str $sync) 69 log_note "Running with $threads $sync_str threads, $iosize ios" 70 71 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 72 log_must test $do_recreate 73 verify_threads_per_fs $threads $threads_per_fs 74 fi 75 76 if $do_recreate; then 77 recreate_perf_pool 78 79 # 80 # A value of zero for "threads_per_fs" is "special", and 81 # means a single filesystem should be used, regardless 82 # of the number of threads. 83 # 84 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 85 populate_perf_filesystems $((threads / threads_per_fs)) 86 else 87 populate_perf_filesystems 1 88 fi 89 fi 90 91 if $clear_cache; then 92 # Clear the ARC 93 log_must zinject -a 94 fi 95 96 if [[ -n $ZINJECT_DELAYS ]]; then 97 apply_zinject_delays 98 else 99 log_note "No per-device commands to execute." 100 fi 101 102 # 103 # Allow this to be overridden by the individual test case. This 104 # can be used to run the FIO job against something other than 105 # the default filesystem (e.g. against a clone). 106 # 107 export DIRECTORY=$(get_directory) 108 log_note "DIRECTORY: " $DIRECTORY 109 110 export RUNTIME=$PERF_RUNTIME 111 export RANDSEED=$PERF_RANDSEED 112 export COMPPERCENT=$PERF_COMPPERCENT 113 export COMPCHUNK=$PERF_COMPCHUNK 114 export FILESIZE=$((TOTAL_SIZE / threads)) 115 export NUMJOBS=$threads 116 export SYNC_TYPE=$sync 117 export BLOCKSIZE=$iosize 118 sync 119 120 # When running locally, we want to keep the default behavior of 121 # DIRECT == 0, so only set it when we're running over NFS to 122 # disable client cache for reads. 123 if [[ $NFS -eq 1 ]]; then 124 export DIRECT=1 125 do_setup_nfs $script 126 else 127 export DIRECT=0 128 fi 129 130 # This will be part of the output filename. 131 typeset suffix=$(get_suffix $threads $sync $iosize) 132 133 # Start the data collection 134 do_collect_scripts $suffix 135 136 # Define output file 137 typeset logbase="$(get_perf_output_dir)/$(basename \ 138 $SUDO_COMMAND)" 139 typeset outfile="$logbase.fio.$suffix" 140 141 # Start the load 142 if [[ $NFS -eq 1 ]]; then 143 log_must ssh -t $NFS_USER@$NFS_CLIENT " 144 fio --output-format=${PERF_FIO_FORMAT} \ 145 --output /tmp/fio.out /tmp/test.fio 146 " 147 log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile 148 log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 149 else 150 log_must fio --output-format=${PERF_FIO_FORMAT} \ 151 --output $outfile $FIO_SCRIPTS/$script 152 fi 153} 154 155# 156# This function will run fio in a loop, according to the .fio file passed 157# in and a number of environment variables. The following variables can be 158# set before launching zfstest to override the defaults. 159# 160# PERF_RUNTIME: The time in seconds each fio invocation should run. 161# PERF_NTHREADS: A list of how many threads each fio invocation will use. 162# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO. 163# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO. 164# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag' 165# pairs that will be added to the scripts specified in each test. 166# 167function do_fio_run 168{ 169 typeset script=$1 170 typeset do_recreate=$2 171 typeset clear_cache=$3 172 typeset threads threads_per_fs sync iosize 173 174 for threads in $PERF_NTHREADS; do 175 for threads_per_fs in $PERF_NTHREADS_PER_FS; do 176 for sync in $PERF_SYNC_TYPES; do 177 for iosize in $PERF_IOSIZES; do 178 do_fio_run_impl \ 179 $script \ 180 $do_recreate \ 181 $clear_cache \ 182 $threads \ 183 $threads_per_fs \ 184 $sync \ 185 $iosize 186 done 187 done 188 done 189 done 190} 191 192# This function sets NFS mount on the client and make sure all correct 193# permissions are in place 194# 195function do_setup_nfs 196{ 197 typeset script=$1 198 zfs set sharenfs=on $TESTFS 199 log_must chmod -R 777 /$TESTFS 200 201 ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT" 202 ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 203 log_must ssh -t $NFS_USER@$NFS_CLIENT " 204 sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT 205 " 206 # 207 # The variables in the fio script are only available in our current 208 # shell session, so we have to evaluate them here before copying 209 # the resulting script over to the target machine. 210 # 211 export jobnum='$jobnum' 212 while read line; do 213 eval echo "$line" 214 done < $FIO_SCRIPTS/$script > /tmp/test.fio 215 log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio 216 log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp 217 log_must rm /tmp/test.fio 218} 219 220# 221# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS. 222# The script at index N is launched in the background, with its output 223# redirected to a logfile containing the tag specified at index N + 1. 224# 225function do_collect_scripts 226{ 227 typeset suffix=$1 228 229 [[ -n $collect_scripts ]] || log_fail "No data collection scripts." 230 [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified." 231 232 # Add in user supplied scripts and logfiles, if any. 233 typeset oIFS=$IFS 234 IFS=',' 235 for item in $PERF_COLLECT_SCRIPTS; do 236 collect_scripts+=($(echo $item | sed 's/^ *//g')) 237 done 238 IFS=$oIFS 239 240 typeset idx=0 241 while [[ $idx -lt "${#collect_scripts[@]}" ]]; do 242 typeset logbase="$(get_perf_output_dir)/$(basename \ 243 $SUDO_COMMAND)" 244 typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix" 245 246 timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 & 247 ((idx += 2)) 248 done 249 250 # Need to explicitly return 0 because timeout(1) will kill 251 # a child process and cause us to return non-zero. 252 return 0 253} 254 255# Find a place to deposit performance data collected while under load. 256function get_perf_output_dir 257{ 258 typeset dir="$(pwd)/perf_data" 259 [[ -d $dir ]] || mkdir -p $dir 260 261 echo $dir 262} 263 264function apply_zinject_delays 265{ 266 typeset idx=0 267 while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do 268 [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \ 269 log_must "No zinject delay found at index: $idx" 270 271 for disk in $DISKS; do 272 log_must zinject \ 273 -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL 274 done 275 276 ((idx += 1)) 277 done 278} 279 280function clear_zinject_delays 281{ 282 log_must zinject -c all 283} 284 285# 286# Destroy and create the pool used for performance tests. 287# 288function recreate_perf_pool 289{ 290 [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." 291 292 # 293 # In case there's been some "leaked" zinject delays, or if the 294 # performance test injected some delays itself, we clear all 295 # delays before attempting to destroy the pool. Each delay 296 # places a hold on the pool, so the destroy will fail if there 297 # are any outstanding delays. 298 # 299 clear_zinject_delays 300 301 # 302 # This function handles the case where the pool already exists, 303 # and will destroy the previous pool and recreate a new pool. 304 # 305 create_pool $PERFPOOL $DISKS 306} 307 308function verify_threads_per_fs 309{ 310 typeset threads=$1 311 typeset threads_per_fs=$2 312 313 log_must test -n $threads 314 log_must test -n $threads_per_fs 315 316 # 317 # A value of "0" is treated as a "special value", and it is 318 # interpreted to mean all threads will run using a single 319 # filesystem. 320 # 321 [[ $threads_per_fs -eq 0 ]] && return 322 323 # 324 # The number of threads per filesystem must be a value greater 325 # than or equal to zero; since we just verified the value isn't 326 # 0 above, then it must be greater than zero here. 327 # 328 log_must test $threads_per_fs -ge 0 329 330 # 331 # This restriction can be lifted later if needed, but for now, 332 # we restrict the number of threads per filesystem to a value 333 # that evenly divides the thread count. This way, the threads 334 # will be evenly distributed over all the filesystems. 335 # 336 log_must test $((threads % threads_per_fs)) -eq 0 337} 338 339function populate_perf_filesystems 340{ 341 typeset nfilesystems=${1:-1} 342 343 export TESTFS="" 344 for i in $(seq 1 $nfilesystems); do 345 typeset dataset="$PERFPOOL/fs$i" 346 create_dataset $dataset $PERF_FS_OPTS 347 if [[ -z "$TESTFS" ]]; then 348 TESTFS="$dataset" 349 else 350 TESTFS="$TESTFS $dataset" 351 fi 352 done 353} 354 355function get_nfilesystems 356{ 357 typeset filesystems=( $TESTFS ) 358 echo ${#filesystems[@]} 359} 360 361function get_directory 362{ 363 typeset filesystems=( $TESTFS ) 364 typeset directory= 365 366 typeset idx=0 367 while [[ $idx -lt "${#filesystems[@]}" ]]; do 368 mountpoint=$(get_prop mountpoint "${filesystems[$idx]}") 369 370 if [[ -n $directory ]]; then 371 directory=$directory:$mountpoint 372 else 373 directory=$mountpoint 374 fi 375 376 ((idx += 1)) 377 done 378 379 echo $directory 380} 381 382function get_min_arc_size 383{ 384 typeset -l min_arc_size 385 386 if is_freebsd; then 387 min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min) 388 elif is_illumos; then 389 min_arc_size=$(dtrace -qn 'BEGIN { 390 printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); 391 exit(0); 392 }') 393 elif is_linux; then 394 min_arc_size=`awk '$1 == "c_min" { print $3 }' \ 395 /proc/spl/kstat/zfs/arcstats` 396 fi 397 398 [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed" 399 400 echo $min_arc_size 401} 402 403function get_max_arc_size 404{ 405 typeset -l max_arc_size 406 407 if is_freebsd; then 408 max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max) 409 elif is_illumos; then 410 max_arc_size=$(dtrace -qn 'BEGIN { 411 printf("%u\n", `arc_stats.arcstat_c_max.value.ui64); 412 exit(0); 413 }') 414 elif is_linux; then 415 max_arc_size=`awk '$1 == "c_max" { print $3 }' \ 416 /proc/spl/kstat/zfs/arcstats` 417 fi 418 419 [[ $? -eq 0 ]] || log_fail "get_max_arc_size failed" 420 421 echo $max_arc_size 422} 423 424function get_arc_target 425{ 426 typeset -l arc_c 427 428 if is_freebsd; then 429 arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c) 430 elif is_illumos; then 431 arc_c=$(dtrace -qn 'BEGIN { 432 printf("%u\n", `arc_stats.arcstat_c.value.ui64); 433 exit(0); 434 }') 435 elif is_linux; then 436 arc_c=`awk '$1 == "c" { print $3 }' \ 437 /proc/spl/kstat/zfs/arcstats` 438 fi 439 440 [[ $? -eq 0 ]] || log_fail "get_arc_target failed" 441 442 echo $arc_c 443} 444 445function get_dbuf_cache_size 446{ 447 typeset -l dbuf_cache_size dbuf_cache_shift 448 449 if is_illumos; then 450 dbuf_cache_size=$(dtrace -qn 'BEGIN { 451 printf("%u\n", `dbuf_cache_max_bytes); 452 exit(0); 453 }') 454 else 455 dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT) 456 dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift)) 457 fi 458 459 [[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed" 460 461 echo $dbuf_cache_size 462} 463 464# Create a file with some information about how this system is configured. 465function get_system_config 466{ 467 typeset config=$PERF_DATA_DIR/$1 468 469 echo "{" >>$config 470 if is_linux; then 471 echo " \"ncpus\": \"$(nproc --all)\"," >>$config 472 echo " \"physmem\": \"$(free -b | \ 473 awk '$1 == "Mem:" { print $2 }')\"," >>$config 474 echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config 475 echo " \"hostname\": \"$(uname -n)\"," >>$config 476 echo " \"kernel version\": \"$(uname -sr)\"," >>$config 477 else 478 dtrace -qn 'BEGIN{ 479 printf(" \"ncpus\": %d,\n", `ncpus); 480 printf(" \"physmem\": %u,\n", `physmem * `_pagesize); 481 printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64); 482 printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags); 483 exit(0)}' >>$config 484 echo " \"hostname\": \"$(uname -n)\"," >>$config 485 echo " \"kernel version\": \"$(uname -v)\"," >>$config 486 fi 487 if is_linux; then 488 lsblk -dino NAME,SIZE | awk 'BEGIN { 489 printf(" \"disks\": {\n"); first = 1} 490 {disk = $1} {size = $2; 491 if (first != 1) {printf(",\n")} else {first = 0} 492 printf(" \"%s\": \"%s\"", disk, size)} 493 END {printf("\n },\n")}' >>$config 494 495 zfs_tunables="/sys/module/zfs/parameters" 496 497 printf " \"tunables\": {\n" >>$config 498 for tunable in \ 499 zfs_arc_max \ 500 zfs_arc_meta_limit \ 501 zfs_arc_sys_free \ 502 zfs_dirty_data_max \ 503 zfs_flags \ 504 zfs_prefetch_disable \ 505 zfs_txg_timeout \ 506 zfs_vdev_aggregation_limit \ 507 zfs_vdev_async_read_max_active \ 508 zfs_vdev_async_write_max_active \ 509 zfs_vdev_sync_read_max_active \ 510 zfs_vdev_sync_write_max_active \ 511 zio_slow_io_ms 512 do 513 if [ "$tunable" != "zfs_arc_max" ] 514 then 515 printf ",\n" >>$config 516 fi 517 printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \ 518 >>$config 519 done 520 printf "\n }\n" >>$config 521 else 522 iostat -En | awk 'BEGIN { 523 printf(" \"disks\": {\n"); first = 1} 524 /^c/ {disk = $1} 525 /^Size: [^0]/ {size = $2; 526 if (first != 1) {printf(",\n")} else {first = 0} 527 printf(" \"%s\": \"%s\"", disk, size)} 528 END {printf("\n },\n")}' >>$config 529 530 sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \ 531 awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1} 532 {if (first != 1) {printf(",\n")} else {first = 0}; 533 printf(" \"%s\": %s", $1, $2)} 534 END {printf("\n }\n")}' >>$config 535 fi 536 echo "}" >>$config 537} 538 539function num_jobs_by_cpu 540{ 541 if is_linux; then 542 typeset ncpu=$($NPROC --all) 543 else 544 typeset ncpu=$(psrinfo | $WC -l) 545 fi 546 typeset num_jobs=$ncpu 547 548 [[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc) 549 550 echo $num_jobs 551} 552 553# 554# On illumos this looks like: ":sd3:sd4:sd1:sd2:" 555# 556function pool_to_lun_list 557{ 558 typeset pool=$1 559 typeset ctd ctds devname lun 560 typeset lun_list=':' 561 562 if is_illumos; then 563 ctds=$(zpool list -v $pool | 564 awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}') 565 566 for ctd in $ctds; do 567 # Get the device name as it appears in /etc/path_to_inst 568 devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \ 569 's/\/devices\([^:]*\):.*/\1/p') 570 # Add a string composed of the driver name and instance 571 # number to the list for comparison with dev_statname. 572 lun=$(sed 's/"//g' /etc/path_to_inst | grep \ 573 $devname | awk '{print $3$2}') 574 lun_list="$lun_list$lun:" 575 done 576 elif is_freebsd; then 577 lun_list+=$(zpool list -HLv $pool | \ 578 awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/ 579 { printf "%s:", $1 }') 580 elif is_linux; then 581 ctds=$(zpool list -HLv $pool | \ 582 awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}') 583 584 for ctd in $ctds; do 585 lun_list="$lun_list$ctd:" 586 done 587 fi 588 echo $lun_list 589} 590 591function print_perf_settings 592{ 593 echo "PERF_NTHREADS: $PERF_NTHREADS" 594 echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS" 595 echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES" 596 echo "PERF_IOSIZES: $PERF_IOSIZES" 597} 598 599# Create a perf_data directory to hold performance statistics and 600# configuration information. 601export PERF_DATA_DIR=$(get_perf_output_dir) 602[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json 603