1# 2# This file and its contents are supplied under the terms of the 3# Common Development and Distribution License ("CDDL"), version 1.0. 4# You may only use this file in accordance with the terms of version 5# 1.0 of the CDDL. 6# 7# A full copy of the text of the CDDL should have accompanied this 8# source. A copy of the CDDL is also available via the Internet at 9# http://www.illumos.org/license/CDDL. 10# 11 12# 13# Copyright (c) 2015, 2021 by Delphix. All rights reserved. 14# Copyright (c) 2016, Intel Corporation. 15# 16 17. $STF_SUITE/include/libtest.shlib 18 19# Defaults common to all the tests in the regression group 20export PERF_RUNTIME=${PERF_RUNTIME:-'180'} 21export PERF_RANDSEED=${PERF_RANDSEED:-'1234'} 22export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'} 23export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'} 24 25# Default to JSON for fio output 26export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} 27 28# Default fs creation options 29export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \ 30 ' -o checksum=sha256 -o redundant_metadata=most'} 31 32function get_sync_str 33{ 34 typeset sync=$1 35 typeset sync_str='' 36 37 [[ $sync -eq 0 ]] && sync_str='async' 38 [[ $sync -eq 1 ]] && sync_str='sync' 39 echo $sync_str 40} 41 42function get_suffix 43{ 44 typeset threads=$1 45 typeset sync=$2 46 typeset iosize=$3 47 48 typeset sync_str=$(get_sync_str $sync) 49 typeset filesystems=$(get_nfilesystems) 50 51 typeset suffix="$sync_str.$iosize-ios" 52 suffix="$suffix.$threads-threads.$filesystems-filesystems" 53 echo $suffix 54} 55 56function do_fio_run_impl 57{ 58 typeset script=$1 59 typeset do_recreate=$2 60 typeset clear_cache=$3 61 62 typeset threads=$4 63 typeset threads_per_fs=$5 64 typeset sync=$6 65 typeset iosize=$7 66 67 typeset sync_str=$(get_sync_str $sync) 68 log_note "Running with $threads $sync_str threads, $iosize ios" 69 70 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 71 log_must test $do_recreate 72 verify_threads_per_fs $threads $threads_per_fs 73 fi 74 75 if $do_recreate; then 76 recreate_perf_pool 77 78 # 79 # A value of zero for "threads_per_fs" is "special", and 80 # means a single filesystem should be used, regardless 81 # of the number of threads. 82 # 83 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 84 populate_perf_filesystems $((threads / threads_per_fs)) 85 else 86 populate_perf_filesystems 1 87 fi 88 fi 89 90 if $clear_cache; then 91 # Clear the ARC 92 log_must zinject -a 93 fi 94 95 if [[ -n $ZINJECT_DELAYS ]]; then 96 apply_zinject_delays 97 else 98 log_note "No per-device commands to execute." 99 fi 100 101 # 102 # Allow this to be overridden by the individual test case. This 103 # can be used to run the FIO job against something other than 104 # the default filesystem (e.g. against a clone). 105 # 106 export DIRECTORY=$(get_directory) 107 log_note "DIRECTORY: " $DIRECTORY 108 109 export RUNTIME=$PERF_RUNTIME 110 export RANDSEED=$PERF_RANDSEED 111 export COMPPERCENT=$PERF_COMPPERCENT 112 export COMPCHUNK=$PERF_COMPCHUNK 113 export FILESIZE=$((TOTAL_SIZE / threads)) 114 export NUMJOBS=$threads 115 export SYNC_TYPE=$sync 116 export BLOCKSIZE=$iosize 117 sync 118 119 # When running locally, we want to keep the default behavior of 120 # DIRECT == 0, so only set it when we're running over NFS to 121 # disable client cache for reads. 122 if [[ $NFS -eq 1 ]]; then 123 export DIRECT=1 124 do_setup_nfs $script 125 else 126 export DIRECT=0 127 fi 128 129 # This will be part of the output filename. 130 typeset suffix=$(get_suffix $threads $sync $iosize) 131 132 # Start the data collection 133 do_collect_scripts $suffix 134 135 # Define output file 136 typeset logbase="$(get_perf_output_dir)/$(basename \ 137 $SUDO_COMMAND)" 138 typeset outfile="$logbase.fio.$suffix" 139 140 # Start the load 141 if [[ $NFS -eq 1 ]]; then 142 log_must ssh -t $NFS_USER@$NFS_CLIENT " 143 fio --output-format=${PERF_FIO_FORMAT} \ 144 --output /tmp/fio.out /tmp/test.fio 145 " 146 log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile 147 log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 148 else 149 log_must fio --output-format=${PERF_FIO_FORMAT} \ 150 --output $outfile $FIO_SCRIPTS/$script 151 fi 152} 153 154# 155# This function will run fio in a loop, according to the .fio file passed 156# in and a number of environment variables. The following variables can be 157# set before launching zfstest to override the defaults. 158# 159# PERF_RUNTIME: The time in seconds each fio invocation should run. 160# PERF_NTHREADS: A list of how many threads each fio invocation will use. 161# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO. 162# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO. 163# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag' 164# pairs that will be added to the scripts specified in each test. 165# 166function do_fio_run 167{ 168 typeset script=$1 169 typeset do_recreate=$2 170 typeset clear_cache=$3 171 typeset threads threads_per_fs sync iosize 172 173 for threads in $PERF_NTHREADS; do 174 for threads_per_fs in $PERF_NTHREADS_PER_FS; do 175 for sync in $PERF_SYNC_TYPES; do 176 for iosize in $PERF_IOSIZES; do 177 do_fio_run_impl \ 178 $script \ 179 $do_recreate \ 180 $clear_cache \ 181 $threads \ 182 $threads_per_fs \ 183 $sync \ 184 $iosize 185 done 186 done 187 done 188 done 189} 190 191# This function sets NFS mount on the client and make sure all correct 192# permissions are in place 193# 194function do_setup_nfs 195{ 196 typeset script=$1 197 zfs set sharenfs=on $TESTFS 198 log_must chmod -R 777 /$TESTFS 199 200 ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT" 201 ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 202 log_must ssh -t $NFS_USER@$NFS_CLIENT " 203 sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT 204 " 205 # 206 # The variables in the fio script are only available in our current 207 # shell session, so we have to evaluate them here before copying 208 # the resulting script over to the target machine. 209 # 210 export jobnum='$jobnum' 211 while read line; do 212 eval echo "$line" 213 done < $FIO_SCRIPTS/$script > /tmp/test.fio 214 log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio 215 log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp 216 log_must rm /tmp/test.fio 217} 218 219# 220# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS. 221# The script at index N is launched in the background, with its output 222# redirected to a logfile containing the tag specified at index N + 1. 223# 224function do_collect_scripts 225{ 226 typeset suffix=$1 227 228 [[ -n $collect_scripts ]] || log_fail "No data collection scripts." 229 [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified." 230 231 # Add in user supplied scripts and logfiles, if any. 232 typeset oIFS=$IFS 233 IFS=',' 234 for item in $PERF_COLLECT_SCRIPTS; do 235 collect_scripts+=($(echo $item | sed 's/^ *//g')) 236 done 237 IFS=$oIFS 238 239 typeset idx=0 240 while [[ $idx -lt "${#collect_scripts[@]}" ]]; do 241 typeset logbase="$(get_perf_output_dir)/$(basename \ 242 $SUDO_COMMAND)" 243 typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix" 244 245 timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 & 246 ((idx += 2)) 247 done 248 249 # Need to explicitly return 0 because timeout(1) will kill 250 # a child process and cause us to return non-zero. 251 return 0 252} 253 254# Find a place to deposit performance data collected while under load. 255function get_perf_output_dir 256{ 257 typeset dir="$(pwd)/perf_data" 258 [[ -d $dir ]] || mkdir -p $dir 259 260 echo $dir 261} 262 263function apply_zinject_delays 264{ 265 typeset idx=0 266 while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do 267 [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \ 268 log_must "No zinject delay found at index: $idx" 269 270 for disk in $DISKS; do 271 log_must zinject \ 272 -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL 273 done 274 275 ((idx += 1)) 276 done 277} 278 279function clear_zinject_delays 280{ 281 log_must zinject -c all 282} 283 284# 285# Destroy and create the pool used for performance tests. 286# 287function recreate_perf_pool 288{ 289 [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." 290 291 # 292 # In case there's been some "leaked" zinject delays, or if the 293 # performance test injected some delays itself, we clear all 294 # delays before attempting to destroy the pool. Each delay 295 # places a hold on the pool, so the destroy will fail if there 296 # are any outstanding delays. 297 # 298 clear_zinject_delays 299 300 # 301 # This function handles the case where the pool already exists, 302 # and will destroy the previous pool and recreate a new pool. 303 # 304 create_pool $PERFPOOL $DISKS 305} 306 307function verify_threads_per_fs 308{ 309 typeset threads=$1 310 typeset threads_per_fs=$2 311 312 log_must test -n $threads 313 log_must test -n $threads_per_fs 314 315 # 316 # A value of "0" is treated as a "special value", and it is 317 # interpreted to mean all threads will run using a single 318 # filesystem. 319 # 320 [[ $threads_per_fs -eq 0 ]] && return 321 322 # 323 # The number of threads per filesystem must be a value greater 324 # than or equal to zero; since we just verified the value isn't 325 # 0 above, then it must be greater than zero here. 326 # 327 log_must test $threads_per_fs -ge 0 328 329 # 330 # This restriction can be lifted later if needed, but for now, 331 # we restrict the number of threads per filesystem to a value 332 # that evenly divides the thread count. This way, the threads 333 # will be evenly distributed over all the filesystems. 334 # 335 log_must test $((threads % threads_per_fs)) -eq 0 336} 337 338function populate_perf_filesystems 339{ 340 typeset nfilesystems=${1:-1} 341 342 export TESTFS="" 343 for i in $(seq 1 $nfilesystems); do 344 typeset dataset="$PERFPOOL/fs$i" 345 create_dataset $dataset $PERF_FS_OPTS 346 if [[ -z "$TESTFS" ]]; then 347 TESTFS="$dataset" 348 else 349 TESTFS="$TESTFS $dataset" 350 fi 351 done 352} 353 354function get_nfilesystems 355{ 356 typeset filesystems=( $TESTFS ) 357 echo ${#filesystems[@]} 358} 359 360function get_directory 361{ 362 typeset filesystems=( $TESTFS ) 363 typeset directory= 364 365 typeset idx=0 366 while [[ $idx -lt "${#filesystems[@]}" ]]; do 367 mountpoint=$(get_prop mountpoint "${filesystems[$idx]}") 368 369 if [[ -n $directory ]]; then 370 directory=$directory:$mountpoint 371 else 372 directory=$mountpoint 373 fi 374 375 ((idx += 1)) 376 done 377 378 echo $directory 379} 380 381function get_min_arc_size 382{ 383 typeset -l min_arc_size 384 385 if is_freebsd; then 386 min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min) 387 elif is_illumos; then 388 min_arc_size=$(dtrace -qn 'BEGIN { 389 printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); 390 exit(0); 391 }') 392 elif is_linux; then 393 min_arc_size=`awk '$1 == "c_min" { print $3 }' \ 394 /proc/spl/kstat/zfs/arcstats` 395 fi 396 397 [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed" 398 399 echo $min_arc_size 400} 401 402function get_max_arc_size 403{ 404 typeset -l max_arc_size 405 406 if is_freebsd; then 407 max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max) 408 elif is_illumos; then 409 max_arc_size=$(dtrace -qn 'BEGIN { 410 printf("%u\n", `arc_stats.arcstat_c_max.value.ui64); 411 exit(0); 412 }') 413 elif is_linux; then 414 max_arc_size=`awk '$1 == "c_max" { print $3 }' \ 415 /proc/spl/kstat/zfs/arcstats` 416 fi 417 418 [[ $? -eq 0 ]] || log_fail "get_max_arc_size failed" 419 420 echo $max_arc_size 421} 422 423function get_arc_target 424{ 425 typeset -l arc_c 426 427 if is_freebsd; then 428 arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c) 429 elif is_illumos; then 430 arc_c=$(dtrace -qn 'BEGIN { 431 printf("%u\n", `arc_stats.arcstat_c.value.ui64); 432 exit(0); 433 }') 434 elif is_linux; then 435 arc_c=`awk '$1 == "c" { print $3 }' \ 436 /proc/spl/kstat/zfs/arcstats` 437 fi 438 439 [[ $? -eq 0 ]] || log_fail "get_arc_target failed" 440 441 echo $arc_c 442} 443 444function get_dbuf_cache_size 445{ 446 typeset -l dbuf_cache_size dbuf_cache_shift 447 448 if is_illumos; then 449 dbuf_cache_size=$(dtrace -qn 'BEGIN { 450 printf("%u\n", `dbuf_cache_max_bytes); 451 exit(0); 452 }') 453 else 454 dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT) 455 dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift)) 456 fi 457 458 [[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed" 459 460 echo $dbuf_cache_size 461} 462 463# Create a file with some information about how this system is configured. 464function get_system_config 465{ 466 typeset config=$PERF_DATA_DIR/$1 467 468 echo "{" >>$config 469 if is_linux; then 470 echo " \"ncpus\": \"$(nproc --all)\"," >>$config 471 echo " \"physmem\": \"$(free -b | \ 472 awk '$1 == "Mem:" { print $2 }')\"," >>$config 473 echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config 474 echo " \"hostname\": \"$(uname -n)\"," >>$config 475 echo " \"kernel version\": \"$(uname -sr)\"," >>$config 476 else 477 dtrace -qn 'BEGIN{ 478 printf(" \"ncpus\": %d,\n", `ncpus); 479 printf(" \"physmem\": %u,\n", `physmem * `_pagesize); 480 printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64); 481 printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags); 482 exit(0)}' >>$config 483 echo " \"hostname\": \"$(uname -n)\"," >>$config 484 echo " \"kernel version\": \"$(uname -v)\"," >>$config 485 fi 486 if is_linux; then 487 lsblk -dino NAME,SIZE | awk 'BEGIN { 488 printf(" \"disks\": {\n"); first = 1} 489 {disk = $1} {size = $2; 490 if (first != 1) {printf(",\n")} else {first = 0} 491 printf(" \"%s\": \"%s\"", disk, size)} 492 END {printf("\n },\n")}' >>$config 493 494 zfs_tunables="/sys/module/zfs/parameters" 495 496 printf " \"tunables\": {\n" >>$config 497 for tunable in \ 498 zfs_arc_max \ 499 zfs_arc_meta_limit \ 500 zfs_arc_sys_free \ 501 zfs_dirty_data_max \ 502 zfs_flags \ 503 zfs_prefetch_disable \ 504 zfs_txg_timeout \ 505 zfs_vdev_aggregation_limit \ 506 zfs_vdev_async_read_max_active \ 507 zfs_vdev_async_write_max_active \ 508 zfs_vdev_sync_read_max_active \ 509 zfs_vdev_sync_write_max_active \ 510 zio_slow_io_ms 511 do 512 if [ "$tunable" != "zfs_arc_max" ] 513 then 514 printf ",\n" >>$config 515 fi 516 printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \ 517 >>$config 518 done 519 printf "\n }\n" >>$config 520 else 521 iostat -En | awk 'BEGIN { 522 printf(" \"disks\": {\n"); first = 1} 523 /^c/ {disk = $1} 524 /^Size: [^0]/ {size = $2; 525 if (first != 1) {printf(",\n")} else {first = 0} 526 printf(" \"%s\": \"%s\"", disk, size)} 527 END {printf("\n },\n")}' >>$config 528 529 sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \ 530 awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1} 531 {if (first != 1) {printf(",\n")} else {first = 0}; 532 printf(" \"%s\": %s", $1, $2)} 533 END {printf("\n }\n")}' >>$config 534 fi 535 echo "}" >>$config 536} 537 538function num_jobs_by_cpu 539{ 540 if is_linux; then 541 typeset ncpu=$($NPROC --all) 542 else 543 typeset ncpu=$(psrinfo | $WC -l) 544 fi 545 typeset num_jobs=$ncpu 546 547 [[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc) 548 549 echo $num_jobs 550} 551 552# 553# On illumos this looks like: ":sd3:sd4:sd1:sd2:" 554# 555function pool_to_lun_list 556{ 557 typeset pool=$1 558 typeset ctd ctds devname lun 559 typeset lun_list=':' 560 561 if is_illumos; then 562 ctds=$(zpool list -v $pool | 563 awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}') 564 565 for ctd in $ctds; do 566 # Get the device name as it appears in /etc/path_to_inst 567 devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \ 568 's/\/devices\([^:]*\):.*/\1/p') 569 # Add a string composed of the driver name and instance 570 # number to the list for comparison with dev_statname. 571 lun=$(sed 's/"//g' /etc/path_to_inst | grep \ 572 $devname | awk '{print $3$2}') 573 lun_list="$lun_list$lun:" 574 done 575 elif is_freebsd; then 576 lun_list+=$(zpool list -HLv $pool | \ 577 awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/ 578 { printf "%s:", $1 }') 579 elif is_linux; then 580 ctds=$(zpool list -HLv $pool | \ 581 awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}') 582 583 for ctd in $ctds; do 584 lun_list="$lun_list$ctd:" 585 done 586 fi 587 echo $lun_list 588} 589 590function print_perf_settings 591{ 592 echo "PERF_NTHREADS: $PERF_NTHREADS" 593 echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS" 594 echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES" 595 echo "PERF_IOSIZES: $PERF_IOSIZES" 596} 597 598# Create a perf_data directory to hold performance statistics and 599# configuration information. 600export PERF_DATA_DIR=$(get_perf_output_dir) 601[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json 602