1# 2# This file and its contents are supplied under the terms of the 3# Common Development and Distribution License ("CDDL"), version 1.0. 4# You may only use this file in accordance with the terms of version 5# 1.0 of the CDDL. 6# 7# A full copy of the text of the CDDL should have accompanied this 8# source. A copy of the CDDL is also available via the Internet at 9# http://www.illumos.org/license/CDDL. 10# 11 12# 13# Copyright (c) 2015, 2016 by Delphix. All rights reserved. 14# Copyright (c) 2016, Intel Corporation. 15# 16 17. $STF_SUITE/include/libtest.shlib 18 19# If neither is specified, do a nightly run. 20[[ -z $PERF_REGRESSION_WEEKLY ]] && export PERF_REGRESSION_NIGHTLY=1 21 22# Default runtime for each type of test run. 23export PERF_RUNTIME_WEEKLY=$((30 * 60)) 24export PERF_RUNTIME_NIGHTLY=$((10 * 60)) 25 26# Default to JSON for fio output 27export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} 28 29# Default fs creation options 30export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \ 31 ' -o checksum=sha256 -o redundant_metadata=most'} 32 33function get_sync_str 34{ 35 typeset sync=$1 36 typeset sync_str='' 37 38 [[ $sync -eq 0 ]] && sync_str='async' 39 [[ $sync -eq 1 ]] && sync_str='sync' 40 echo $sync_str 41} 42 43function get_suffix 44{ 45 typeset threads=$1 46 typeset sync=$2 47 typeset iosize=$3 48 49 typeset sync_str=$(get_sync_str $sync) 50 typeset filesystems=$(get_nfilesystems) 51 52 typeset suffix="$sync_str.$iosize-ios" 53 suffix="$suffix.$threads-threads.$filesystems-filesystems" 54 echo $suffix 55} 56 57function do_fio_run_impl 58{ 59 typeset script=$1 60 typeset do_recreate=$2 61 typeset clear_cache=$3 62 63 typeset threads=$4 64 typeset threads_per_fs=$5 65 typeset sync=$6 66 typeset iosize=$7 67 68 typeset sync_str=$(get_sync_str $sync) 69 log_note "Running with $threads $sync_str threads, $iosize ios" 70 71 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 72 log_must test $do_recreate 73 verify_threads_per_fs $threads $threads_per_fs 74 fi 75 76 if $do_recreate; then 77 recreate_perf_pool 78 79 # 80 # A value of zero for "threads_per_fs" is "special", and 81 # means a single filesystem should be used, regardless 82 # of the number of threads. 83 # 84 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 85 populate_perf_filesystems $((threads / threads_per_fs)) 86 else 87 populate_perf_filesystems 1 88 fi 89 fi 90 91 if $clear_cache; then 92 # Clear the ARC 93 zpool export $PERFPOOL 94 zpool import $PERFPOOL 95 fi 96 97 if [[ -n $ZINJECT_DELAYS ]]; then 98 apply_zinject_delays 99 else 100 log_note "No per-device commands to execute." 101 fi 102 103 # 104 # Allow this to be overridden by the individual test case. This 105 # can be used to run the FIO job against something other than 106 # the default filesystem (e.g. against a clone). 107 # 108 export DIRECTORY=$(get_directory) 109 log_note "DIRECTORY: " $DIRECTORY 110 111 export RUNTIME=$PERF_RUNTIME 112 export RANDSEED=$PERF_RANDSEED 113 export COMPPERCENT=$PERF_COMPPERCENT 114 export COMPCHUNK=$PERF_COMPCHUNK 115 export FILESIZE=$((TOTAL_SIZE / threads)) 116 export NUMJOBS=$threads 117 export SYNC_TYPE=$sync 118 export BLOCKSIZE=$iosize 119 sync 120 121 # When running locally, we want to keep the default behavior of 122 # DIRECT == 0, so only set it when we're running over NFS to 123 # disable client cache for reads. 124 if [[ $NFS -eq 1 ]]; then 125 export DIRECT=1 126 do_setup_nfs $script 127 else 128 export DIRECT=0 129 fi 130 131 # This will be part of the output filename. 132 typeset suffix=$(get_suffix $threads $sync $iosize) 133 134 # Start the data collection 135 do_collect_scripts $suffix 136 137 # Define output file 138 typeset logbase="$(get_perf_output_dir)/$(basename \ 139 $SUDO_COMMAND)" 140 typeset outfile="$logbase.fio.$suffix" 141 142 # Start the load 143 if [[ $NFS -eq 1 ]]; then 144 log_must ssh -t $NFS_USER@$NFS_CLIENT " 145 fio --output-format=${PERF_FIO_FORMAT} \ 146 --output /tmp/fio.out /tmp/test.fio 147 " 148 log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile 149 log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 150 else 151 log_must fio --output-format=${PERF_FIO_FORMAT} \ 152 --output $outfile $FIO_SCRIPTS/$script 153 fi 154} 155 156# 157# This function will run fio in a loop, according to the .fio file passed 158# in and a number of environment variables. The following variables can be 159# set before launching zfstest to override the defaults. 160# 161# PERF_RUNTIME: The time in seconds each fio invocation should run. 162# PERF_RUNTYPE: A human readable tag that appears in logs. The defaults are 163# nightly and weekly. 164# PERF_NTHREADS: A list of how many threads each fio invocation will use. 165# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO. 166# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO. 167# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag' 168# pairs that will be added to the scripts specified in each test. 169# 170function do_fio_run 171{ 172 typeset script=$1 173 typeset do_recreate=$2 174 typeset clear_cache=$3 175 typeset threads threads_per_fs sync iosize 176 177 for threads in $PERF_NTHREADS; do 178 for threads_per_fs in $PERF_NTHREADS_PER_FS; do 179 for sync in $PERF_SYNC_TYPES; do 180 for iosize in $PERF_IOSIZES; do 181 do_fio_run_impl \ 182 $script \ 183 $do_recreate \ 184 $clear_cache \ 185 $threads \ 186 $threads_per_fs \ 187 $sync \ 188 $iosize 189 done 190 done 191 done 192 done 193} 194 195# This function sets NFS mount on the client and make sure all correct 196# permissions are in place 197# 198function do_setup_nfs 199{ 200 typeset script=$1 201 zfs set sharenfs=on $TESTFS 202 log_must chmod -R 777 /$TESTFS 203 204 ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT" 205 ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" 206 log_must ssh -t $NFS_USER@$NFS_CLIENT " 207 sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT 208 " 209 # 210 # The variables in the fio script are only available in our current 211 # shell session, so we have to evaluate them here before copying 212 # the resulting script over to the target machine. 213 # 214 export jobnum='$jobnum' 215 while read line; do 216 eval echo "$line" 217 done < $FIO_SCRIPTS/$script > /tmp/test.fio 218 log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio 219 log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp 220 log_must rm /tmp/test.fio 221} 222 223# 224# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS. 225# The script at index N is launched in the background, with its output 226# redirected to a logfile containing the tag specified at index N + 1. 227# 228function do_collect_scripts 229{ 230 typeset suffix=$1 231 232 [[ -n $collect_scripts ]] || log_fail "No data collection scripts." 233 [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified." 234 235 # Add in user supplied scripts and logfiles, if any. 236 typeset oIFS=$IFS 237 IFS=',' 238 for item in $PERF_COLLECT_SCRIPTS; do 239 collect_scripts+=($(echo $item | sed 's/^ *//g')) 240 done 241 IFS=$oIFS 242 243 typeset idx=0 244 while [[ $idx -lt "${#collect_scripts[@]}" ]]; do 245 typeset logbase="$(get_perf_output_dir)/$(basename \ 246 $SUDO_COMMAND)" 247 typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix" 248 249 timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 & 250 ((idx += 2)) 251 done 252 253 # Need to explicitly return 0 because timeout(1) will kill 254 # a child process and cause us to return non-zero. 255 return 0 256} 257 258# Find a place to deposit performance data collected while under load. 259function get_perf_output_dir 260{ 261 typeset dir="$(pwd)/perf_data" 262 [[ -d $dir ]] || mkdir -p $dir 263 264 echo $dir 265} 266 267function apply_zinject_delays 268{ 269 typeset idx=0 270 while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do 271 [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \ 272 log_must "No zinject delay found at index: $idx" 273 274 for disk in $DISKS; do 275 log_must zinject \ 276 -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL 277 done 278 279 ((idx += 1)) 280 done 281} 282 283function clear_zinject_delays 284{ 285 log_must zinject -c all 286} 287 288# 289# Destroy and create the pool used for performance tests. 290# 291function recreate_perf_pool 292{ 293 [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." 294 295 # 296 # In case there's been some "leaked" zinject delays, or if the 297 # performance test injected some delays itself, we clear all 298 # delays before attempting to destroy the pool. Each delay 299 # places a hold on the pool, so the destroy will fail if there 300 # are any outstanding delays. 301 # 302 clear_zinject_delays 303 304 # 305 # This function handles the case where the pool already exists, 306 # and will destroy the previous pool and recreate a new pool. 307 # 308 create_pool $PERFPOOL $DISKS 309} 310 311function verify_threads_per_fs 312{ 313 typeset threads=$1 314 typeset threads_per_fs=$2 315 316 log_must test -n $threads 317 log_must test -n $threads_per_fs 318 319 # 320 # A value of "0" is treated as a "special value", and it is 321 # interpreted to mean all threads will run using a single 322 # filesystem. 323 # 324 [[ $threads_per_fs -eq 0 ]] && return 325 326 # 327 # The number of threads per filesystem must be a value greater 328 # than or equal to zero; since we just verified the value isn't 329 # 0 above, then it must be greater than zero here. 330 # 331 log_must test $threads_per_fs -ge 0 332 333 # 334 # This restriction can be lifted later if needed, but for now, 335 # we restrict the number of threads per filesystem to a value 336 # that evenly divides the thread count. This way, the threads 337 # will be evenly distributed over all the filesystems. 338 # 339 log_must test $((threads % threads_per_fs)) -eq 0 340} 341 342function populate_perf_filesystems 343{ 344 typeset nfilesystems=${1:-1} 345 346 export TESTFS="" 347 for i in $(seq 1 $nfilesystems); do 348 typeset dataset="$PERFPOOL/fs$i" 349 create_dataset $dataset $PERF_FS_OPTS 350 if [[ -z "$TESTFS" ]]; then 351 TESTFS="$dataset" 352 else 353 TESTFS="$TESTFS $dataset" 354 fi 355 done 356} 357 358function get_nfilesystems 359{ 360 typeset filesystems=( $TESTFS ) 361 echo ${#filesystems[@]} 362} 363 364function get_directory 365{ 366 typeset filesystems=( $TESTFS ) 367 typeset directory= 368 369 typeset idx=0 370 while [[ $idx -lt "${#filesystems[@]}" ]]; do 371 mountpoint=$(get_prop mountpoint "${filesystems[$idx]}") 372 373 if [[ -n $directory ]]; then 374 directory=$directory:$mountpoint 375 else 376 directory=$mountpoint 377 fi 378 379 ((idx += 1)) 380 done 381 382 echo $directory 383} 384 385function get_min_arc_size 386{ 387 typeset -l min_arc_size 388 389 if is_freebsd; then 390 min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min) 391 elif is_illumos; then 392 min_arc_size=$(dtrace -qn 'BEGIN { 393 printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); 394 exit(0); 395 }') 396 elif is_linux; then 397 min_arc_size=`awk '$1 == "c_min" { print $3 }' \ 398 /proc/spl/kstat/zfs/arcstats` 399 fi 400 401 [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed" 402 403 echo $min_arc_size 404} 405 406function get_max_arc_size 407{ 408 typeset -l max_arc_size 409 410 if is_freebsd; then 411 max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max) 412 elif is_illumos; then 413 max_arc_size=$(dtrace -qn 'BEGIN { 414 printf("%u\n", `arc_stats.arcstat_c_max.value.ui64); 415 exit(0); 416 }') 417 elif is_linux; then 418 max_arc_size=`awk '$1 == "c_max" { print $3 }' \ 419 /proc/spl/kstat/zfs/arcstats` 420 fi 421 422 [[ $? -eq 0 ]] || log_fail "get_max_arc_size failed" 423 424 echo $max_arc_size 425} 426 427function get_max_dbuf_cache_size 428{ 429 typeset -l max_dbuf_cache_size 430 431 if is_illumos; then 432 max_dbuf_cache_size=$(dtrace -qn 'BEGIN { 433 printf("%u\n", `dbuf_cache_max_bytes); 434 exit(0); 435 }') 436 else 437 max_dbuf_cache_size=$(get_tunable DBUF_CACHE_MAX_BYTES) 438 fi 439 440 [[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed" 441 442 echo $max_dbuf_cache_size 443} 444 445# Create a file with some information about how this system is configured. 446function get_system_config 447{ 448 typeset config=$PERF_DATA_DIR/$1 449 450 echo "{" >>$config 451 if is_linux; then 452 echo " \"ncpus\": \"$(nproc --all)\"," >>$config 453 echo " \"physmem\": \"$(free -b | \ 454 awk '$1 == "Mem:" { print $2 }')\"," >>$config 455 echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config 456 echo " \"hostname\": \"$(uname -n)\"," >>$config 457 echo " \"kernel version\": \"$(uname -sr)\"," >>$config 458 else 459 dtrace -qn 'BEGIN{ 460 printf(" \"ncpus\": %d,\n", `ncpus); 461 printf(" \"physmem\": %u,\n", `physmem * `_pagesize); 462 printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64); 463 printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags); 464 exit(0)}' >>$config 465 echo " \"hostname\": \"$(uname -n)\"," >>$config 466 echo " \"kernel version\": \"$(uname -v)\"," >>$config 467 fi 468 if is_linux; then 469 lsblk -dino NAME,SIZE | awk 'BEGIN { 470 printf(" \"disks\": {\n"); first = 1} 471 {disk = $1} {size = $2; 472 if (first != 1) {printf(",\n")} else {first = 0} 473 printf(" \"%s\": \"%s\"", disk, size)} 474 END {printf("\n },\n")}' >>$config 475 476 zfs_tunables="/sys/module/zfs/parameters" 477 478 printf " \"tunables\": {\n" >>$config 479 for tunable in \ 480 zfs_arc_max \ 481 zfs_arc_meta_limit \ 482 zfs_arc_sys_free \ 483 zfs_dirty_data_max \ 484 zfs_flags \ 485 zfs_prefetch_disable \ 486 zfs_txg_timeout \ 487 zfs_vdev_aggregation_limit \ 488 zfs_vdev_async_read_max_active \ 489 zfs_vdev_async_write_max_active \ 490 zfs_vdev_sync_read_max_active \ 491 zfs_vdev_sync_write_max_active \ 492 zio_slow_io_ms 493 do 494 if [ "$tunable" != "zfs_arc_max" ] 495 then 496 printf ",\n" >>$config 497 fi 498 printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \ 499 >>$config 500 done 501 printf "\n }\n" >>$config 502 else 503 iostat -En | awk 'BEGIN { 504 printf(" \"disks\": {\n"); first = 1} 505 /^c/ {disk = $1} 506 /^Size: [^0]/ {size = $2; 507 if (first != 1) {printf(",\n")} else {first = 0} 508 printf(" \"%s\": \"%s\"", disk, size)} 509 END {printf("\n },\n")}' >>$config 510 511 sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \ 512 awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1} 513 {if (first != 1) {printf(",\n")} else {first = 0}; 514 printf(" \"%s\": %s", $1, $2)} 515 END {printf("\n }\n")}' >>$config 516 fi 517 echo "}" >>$config 518} 519 520function num_jobs_by_cpu 521{ 522 if is_linux; then 523 typeset ncpu=$($NPROC --all) 524 else 525 typeset ncpu=$(psrinfo | $WC -l) 526 fi 527 typeset num_jobs=$ncpu 528 529 [[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc) 530 531 echo $num_jobs 532} 533 534# 535# On illumos this looks like: ":sd3:sd4:sd1:sd2:" 536# 537function pool_to_lun_list 538{ 539 typeset pool=$1 540 typeset ctd ctds devname lun 541 typeset lun_list=':' 542 543 if is_illumos; then 544 ctds=$(zpool list -v $pool | 545 awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}') 546 547 for ctd in $ctds; do 548 # Get the device name as it appears in /etc/path_to_inst 549 devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \ 550 's/\/devices\([^:]*\):.*/\1/p') 551 # Add a string composed of the driver name and instance 552 # number to the list for comparison with dev_statname. 553 lun=$(sed 's/"//g' /etc/path_to_inst | grep \ 554 $devname | awk '{print $3$2}') 555 lun_list="$lun_list$lun:" 556 done 557 elif is_freebsd; then 558 lun_list+=$(zpool list -HLv $pool | \ 559 awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/ 560 { printf "%s:", $1 }') 561 elif is_linux; then 562 ctds=$(zpool list -HLv $pool | \ 563 awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}') 564 565 for ctd in $ctds; do 566 lun_list="$lun_list$ctd:" 567 done 568 fi 569 echo $lun_list 570} 571 572# Create a perf_data directory to hold performance statistics and 573# configuration information. 574export PERF_DATA_DIR=$(get_perf_output_dir) 575[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json 576