1# SPDX-License-Identifier: CDDL-1.0 2# 3# This file and its contents are supplied under the terms of the 4# Common Development and Distribution License ("CDDL"), version 1.0. 5# You may only use this file in accordance with the terms of version 6# 1.0 of the CDDL. 7# 8# A full copy of the text of the CDDL should have accompanied this 9# source. A copy of the CDDL is also available via the Internet at 10# http://www.illumos.org/license/CDDL. 11# 12 13# 14# Copyright (c) 2015, 2021 by Delphix. All rights reserved. 15# Copyright (c) 2016, Intel Corporation. 16# 17 18. "$STF_SUITE"/include/libtest.shlib 19 20# Defaults common to all the tests in the regression group 21export PERF_RUNTIME=${PERF_RUNTIME:-'180'} 22export PERF_RANDSEED=${PERF_RANDSEED:-'1234'} 23export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'} 24export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'} 25 26# Default to JSON for fio output 27export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} 28 29# Default fs creation options 30export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \ 31 ' -o checksum=sha256 -o redundant_metadata=most'} 32 33function get_sync_str 34{ 35 typeset sync=$1 36 typeset sync_str='' 37 38 [[ $sync -eq 0 ]] && sync_str='async' 39 [[ $sync -eq 1 ]] && sync_str='sync' 40 echo $sync_str 41} 42 43function get_suffix 44{ 45 typeset threads=$1 46 typeset sync=$2 47 typeset iosize=$3 48 49 typeset sync_str=$(get_sync_str "$sync") 50 typeset filesystems=$(get_nfilesystems) 51 52 typeset suffix="$sync_str.$iosize-ios" 53 suffix="$suffix.$threads-threads.$filesystems-filesystems" 54 echo "$suffix" 55} 56 57function do_fio_run_impl 58{ 59 typeset script=$1 60 typeset do_recreate=$2 61 typeset clear_cache=$3 62 63 typeset threads=$4 64 typeset threads_per_fs=$5 65 typeset sync=$6 66 typeset iosize=$7 67 68 typeset sync_str=$(get_sync_str "$sync") 69 log_note "Running with $threads $sync_str threads, $iosize ios" 70 71 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 72 log_must test "$do_recreate" 73 verify_threads_per_fs "$threads" "$threads_per_fs" 74 fi 75 76 if $do_recreate; then 77 recreate_perf_pool 78 79 # 80 # A value of zero for "threads_per_fs" is "special", and 81 # means a single filesystem should be used, regardless 82 # of the number of threads. 83 # 84 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then 85 populate_perf_filesystems $((threads / threads_per_fs)) 86 else 87 populate_perf_filesystems 1 88 fi 89 fi 90 91 if $clear_cache; then 92 # Clear the ARC 93 log_must zinject -a 94 fi 95 96 if [[ -n $ZINJECT_DELAYS ]]; then 97 apply_zinject_delays 98 else 99 log_note "No per-device commands to execute." 100 fi 101 102 # 103 # Allow this to be overridden by the individual test case. This 104 # can be used to run the FIO job against something other than 105 # the default filesystem (e.g. against a clone). 106 # 107 export DIRECTORY=$(get_directory) 108 log_note "DIRECTORY: $DIRECTORY" 109 110 export RUNTIME=$PERF_RUNTIME 111 export RANDSEED=$PERF_RANDSEED 112 export COMPPERCENT=$PERF_COMPPERCENT 113 export COMPCHUNK=$PERF_COMPCHUNK 114 export FILESIZE=$((TOTAL_SIZE / threads)) 115 export NUMJOBS=$threads 116 export SYNC_TYPE=$sync 117 export BLOCKSIZE=$iosize 118 sync 119 120 # When running locally, we want to keep the default behavior of 121 # DIRECT == 0, so only set it when we're running over NFS to 122 # disable client cache for reads. 123 if [[ $NFS -eq 1 ]]; then 124 export DIRECT=1 125 do_setup_nfs "$script" 126 else 127 export DIRECT=0 128 fi 129 130 # This will be part of the output filename. 131 typeset suffix=$(get_suffix "$threads" "$sync" "$iosize") 132 133 # Start the data collection 134 do_collect_scripts "$suffix" 135 136 # Define output file 137 typeset logbase="$(get_perf_output_dir)/$(basename \ 138 "$SUDO_COMMAND")" 139 typeset outfile="$logbase.fio.$suffix" 140 141 # Start the load 142 if [[ $NFS -eq 1 ]]; then 143 log_must ssh -t "$NFS_USER@$NFS_CLIENT" " 144 fio --output-format=${PERF_FIO_FORMAT} \ 145 --output /tmp/fio.out /tmp/test.fio 146 " 147 log_must scp "$NFS_USER@$NFS_CLIENT":/tmp/fio.out "$outfile" 148 log_must ssh -t "$NFS_USER@$NFS_CLIENT" "sudo -S umount $NFS_MOUNT" 149 else 150 log_must fio --output-format="${PERF_FIO_FORMAT}" \ 151 --output "$outfile" "$FIO_SCRIPTS/$script" 152 fi 153} 154 155# 156# This function will run fio in a loop, according to the .fio file passed 157# in and a number of environment variables. The following variables can be 158# set before launching zfstest to override the defaults. 159# 160# PERF_RUNTIME: The time in seconds each fio invocation should run. 161# PERF_NTHREADS: A list of how many threads each fio invocation will use. 162# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO. 163# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO. 164# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag' 165# pairs that will be added to the scripts specified in each test. 166# 167function do_fio_run 168{ 169 typeset script=$1 170 typeset do_recreate=$2 171 typeset clear_cache=$3 172 typeset threads threads_per_fs sync iosize 173 174 for threads in $PERF_NTHREADS; do 175 for threads_per_fs in $PERF_NTHREADS_PER_FS; do 176 for sync in $PERF_SYNC_TYPES; do 177 for iosize in $PERF_IOSIZES; do 178 do_fio_run_impl \ 179 "$script" \ 180 "$do_recreate" \ 181 "$clear_cache" \ 182 "$threads" \ 183 "$threads_per_fs" \ 184 "$sync" \ 185 "$iosize" 186 done 187 done 188 done 189 done 190} 191 192# This function sets NFS mount on the client and make sure all correct 193# permissions are in place 194# 195function do_setup_nfs 196{ 197 typeset script=$1 198 zfs set sharenfs=on "$TESTFS" 199 log_must chmod -R 777 /"$TESTFS" 200 201 ssh -t "$NFS_USER@$NFS_CLIENT" "mkdir -m 777 -p $NFS_MOUNT" 202 ssh -t "$NFS_USER@$NFS_CLIENT" "sudo -S umount $NFS_MOUNT" 203 log_must ssh -t "$NFS_USER@$NFS_CLIENT" " 204 sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT 205 " 206 # 207 # The variables in the fio script are only available in our current 208 # shell session, so we have to evaluate them here before copying 209 # the resulting script over to the target machine. 210 # 211 export jobnum='$jobnum' 212 while read line; do 213 eval echo "$line" 214 done < "$FIO_SCRIPTS/$script" > /tmp/test.fio 215 log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio 216 log_must scp /tmp/test.fio "$NFS_USER@$NFS_CLIENT":/tmp 217 log_must rm /tmp/test.fio 218} 219 220# 221# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS. 222# The script at index N is launched in the background, with its output 223# redirected to a logfile containing the tag specified at index N + 1. 224# 225function do_collect_scripts 226{ 227 typeset suffix=$1 228 229 [[ -n $collect_scripts ]] || log_fail "No data collection scripts." 230 [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified." 231 232 # Add in user supplied scripts and logfiles, if any. 233 typeset oIFS=$IFS 234 IFS=',' 235 for item in $PERF_COLLECT_SCRIPTS; do 236 collect_scripts+=($(echo "$item" | sed 's/^ *//g')) 237 done 238 IFS=$oIFS 239 240 typeset idx=0 241 while [[ $idx -lt "${#collect_scripts[@]}" ]]; do 242 typeset logbase="$(get_perf_output_dir)/$(basename \ 243 "$SUDO_COMMAND")" 244 typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix" 245 246 timeout "$PERF_RUNTIME" "${collect_scripts[$idx]}" >"$outfile" 2>&1 & 247 ((idx += 2)) 248 done 249 250 # Need to explicitly return 0 because timeout(1) will kill 251 # a child process and cause us to return non-zero. 252 return 0 253} 254 255# Find a place to deposit performance data collected while under load. 256function get_perf_output_dir 257{ 258 typeset dir="$PWD/perf_data" 259 [[ -d $dir ]] || mkdir -p "$dir" 260 261 echo "$dir" 262} 263 264function apply_zinject_delays 265{ 266 typeset idx=0 267 while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do 268 [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \ 269 log_fail "No zinject delay found at index: $idx" 270 271 for disk in $DISKS; do 272 log_must zinject \ 273 -d "$disk" -D "${ZINJECT_DELAYS[$idx]}" "$PERFPOOL" 274 done 275 276 ((idx += 1)) 277 done 278} 279 280function clear_zinject_delays 281{ 282 log_must zinject -c all 283} 284 285# 286# Destroy and create the pool used for performance tests. 287# 288function recreate_perf_pool 289{ 290 [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." 291 292 # 293 # In case there's been some "leaked" zinject delays, or if the 294 # performance test injected some delays itself, we clear all 295 # delays before attempting to destroy the pool. Each delay 296 # places a hold on the pool, so the destroy will fail if there 297 # are any outstanding delays. 298 # 299 clear_zinject_delays 300 301 # 302 # This function handles the case where the pool already exists, 303 # and will destroy the previous pool and recreate a new pool. 304 # 305 create_pool "$PERFPOOL" "$DISKS" 306} 307 308function verify_threads_per_fs 309{ 310 typeset threads=$1 311 typeset threads_per_fs=$2 312 313 log_must test -n "$threads" 314 log_must test -n "$threads_per_fs" 315 316 # 317 # A value of "0" is treated as a "special value", and it is 318 # interpreted to mean all threads will run using a single 319 # filesystem. 320 # 321 [[ $threads_per_fs -eq 0 ]] && return 322 323 # 324 # The number of threads per filesystem must be a value greater 325 # than or equal to zero; since we just verified the value isn't 326 # 0 above, then it must be greater than zero here. 327 # 328 log_must test "$threads_per_fs" -ge 0 329 330 # 331 # This restriction can be lifted later if needed, but for now, 332 # we restrict the number of threads per filesystem to a value 333 # that evenly divides the thread count. This way, the threads 334 # will be evenly distributed over all the filesystems. 335 # 336 log_must test $((threads % threads_per_fs)) -eq 0 337} 338 339function populate_perf_filesystems 340{ 341 typeset nfilesystems=${1:-1} 342 343 export TESTFS="" 344 for i in $(seq 1 "$nfilesystems"); do 345 typeset dataset="$PERFPOOL/fs$i" 346 create_dataset "$dataset" "$PERF_FS_OPTS" 347 if [[ -z "$TESTFS" ]]; then 348 TESTFS="$dataset" 349 else 350 TESTFS="$TESTFS $dataset" 351 fi 352 done 353} 354 355function get_nfilesystems 356{ 357 typeset filesystems=($TESTFS) 358 echo ${#filesystems[@]} 359} 360 361function get_directory 362{ 363 typeset filesystems=($TESTFS) 364 typeset directory= 365 366 typeset idx=0 367 while [[ $idx -lt "${#filesystems[@]}" ]]; do 368 mountpoint=$(get_prop mountpoint "${filesystems[$idx]}") 369 370 if [[ -n $directory ]]; then 371 directory=$directory:$mountpoint 372 else 373 directory=$mountpoint 374 fi 375 376 ((idx += 1)) 377 done 378 379 echo "$directory" 380} 381 382function get_min_arc_size 383{ 384 case "$UNAME" in 385 Linux) 386 awk '$1 == "c_min" { print $3 }' /proc/spl/kstat/zfs/arcstats 387 ;; 388 FreeBSD) 389 sysctl -n kstat.zfs.misc.arcstats.c_min 390 ;; 391 *) 392 dtrace -qn 'BEGIN { 393 printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); 394 exit(0); 395 }' 396 ;; 397 esac || log_fail "get_min_arc_size failed" 398} 399 400function get_max_arc_size 401{ 402 case "$UNAME" in 403 Linux) 404 awk '$1 == "c_max" { print $3 }' /proc/spl/kstat/zfs/arcstats 405 ;; 406 FreeBSD) 407 sysctl -n kstat.zfs.misc.arcstats.c_max 408 ;; 409 *) 410 dtrace -qn 'BEGIN { 411 printf("%u\n", `arc_stats.arcstat_c_max.value.ui64); 412 exit(0); 413 }' 414 ;; 415 esac || log_fail "get_max_arc_size failed" 416} 417 418function get_arc_target 419{ 420 case "$UNAME" in 421 Linux) 422 awk '$1 == "c" { print $3 }' /proc/spl/kstat/zfs/arcstats 423 ;; 424 FreeBSD) 425 sysctl -n kstat.zfs.misc.arcstats.c 426 ;; 427 *) 428 dtrace -qn 'BEGIN { 429 printf("%u\n", `arc_stats.arcstat_c.value.ui64); 430 exit(0); 431 }' 432 ;; 433 esac || log_fail "get_arc_target failed" 434} 435 436function get_dbuf_cache_size 437{ 438 typeset -l dbuf_cache_size dbuf_cache_shift 439 440 if is_illumos; then 441 dbuf_cache_size=$(dtrace -qn 'BEGIN { 442 printf("%u\n", `dbuf_cache_max_bytes); 443 exit(0); 444 }') 445 else 446 dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT) 447 dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift)) 448 fi || log_fail "get_dbuf_cache_size failed" 449 450 echo "$dbuf_cache_size" 451} 452 453# Create a file with some information about how this system is configured. 454function get_system_config 455{ 456 typeset config=$PERF_DATA_DIR/$1 457 458 echo "{" >>"$config" 459 if is_linux; then 460 echo " \"ncpus\": \"$(lscpu | awk '/^CPU\(s\)/ {print $2; exit}')\"," >>"$config" 461 echo " \"physmem\": \"$(free -b | \ 462 awk '$1 == "Mem:" { print $2 }')\"," >>"$config" 463 echo " \"c_max\": \"$(get_max_arc_size)\"," >>"$config" 464 echo " \"hostname\": \"$(uname -n)\"," >>"$config" 465 echo " \"kernel version\": \"$(uname -sr)\"," >>"$config" 466 else 467 dtrace -qn 'BEGIN{ 468 printf(" \"ncpus\": %d,\n", `ncpus); 469 printf(" \"physmem\": %u,\n", `physmem * `_pagesize); 470 printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64); 471 printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags); 472 exit(0)}' >>"$config" 473 echo " \"hostname\": \"$(uname -n)\"," >>"$config" 474 echo " \"kernel version\": \"$(uname -v)\"," >>"$config" 475 fi 476 if is_linux; then 477 lsblk -dino NAME,SIZE | awk 'BEGIN { 478 printf(" \"disks\": {\n"); first = 1} 479 {disk = $1} {size = $2; 480 if (first != 1) {printf(",\n")} else {first = 0} 481 printf(" \"%s\": \"%s\"", disk, size)} 482 END {printf("\n },\n")}' >>"$config" 483 484 zfs_tunables="/sys/module/zfs/parameters" 485 486 printf " \"tunables\": {\n" >>"$config" 487 for tunable in \ 488 zfs_arc_max \ 489 zfs_arc_sys_free \ 490 zfs_dirty_data_max \ 491 zfs_flags \ 492 zfs_prefetch_disable \ 493 zfs_txg_timeout \ 494 zfs_vdev_aggregation_limit \ 495 zfs_vdev_async_read_max_active \ 496 zfs_vdev_async_write_max_active \ 497 zfs_vdev_sync_read_max_active \ 498 zfs_vdev_sync_write_max_active \ 499 zio_slow_io_ms 500 do 501 if [ "$tunable" != "zfs_arc_max" ] 502 then 503 printf ",\n" >>"$config" 504 fi 505 printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \ 506 >>"$config" 507 done 508 printf "\n }\n" >>"$config" 509 else 510 iostat -En | awk 'BEGIN { 511 printf(" \"disks\": {\n"); first = 1} 512 /^c/ {disk = $1} 513 /^Size: [^0]/ {size = $2; 514 if (first != 1) {printf(",\n")} else {first = 0} 515 printf(" \"%s\": \"%s\"", disk, size)} 516 END {printf("\n },\n")}' >>"$config" 517 518 sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \ 519 awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1} 520 {if (first != 1) {printf(",\n")} else {first = 0}; 521 printf(" \"%s\": %s", $1, $2)} 522 END {printf("\n }\n")}' >>"$config" 523 fi 524 echo "}" >>"$config" 525} 526 527# 528# On illumos this looks like: ":sd3:sd4:sd1:sd2:" 529# 530function pool_to_lun_list 531{ 532 typeset pool=$1 533 typeset ctd ctds devname lun 534 typeset lun_list=':' 535 536 case "$UNAME" in 537 Linux) 538 ctds=$(zpool list -HLv "$pool" | \ 539 awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}') 540 541 for ctd in $ctds; do 542 lun_list="$lun_list$ctd:" 543 done 544 ;; 545 FreeBSD) 546 lun_list+=$(zpool list -HLv "$pool" | \ 547 awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/ 548 { printf "%s:", $1 }') 549 ;; 550 *) 551 ctds=$(zpool list -v "$pool" | 552 awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}') 553 554 for ctd in $ctds; do 555 # Get the device name as it appears in /etc/path_to_inst 556 devname=$(readlink -f /dev/dsk/"${ctd}"s0 | sed -n 's/\/devices\([^:]*\):.*/\1/p') 557 # Add a string composed of the driver name and instance 558 # number to the list for comparison with dev_statname. 559 lun=$(sed 's/"//g' /etc/path_to_inst | awk -v dn="$devname" '$0 ~ dn {print $3$2}') 560 lun_list="$lun_list$lun:" 561 done 562 ;; 563 esac 564 echo "$lun_list" 565} 566 567function print_perf_settings 568{ 569 echo "PERF_NTHREADS: $PERF_NTHREADS" 570 echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS" 571 echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES" 572 echo "PERF_IOSIZES: $PERF_IOSIZES" 573} 574 575# Create a perf_data directory to hold performance statistics and 576# configuration information. 577export PERF_DATA_DIR=$(get_perf_output_dir) 578[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json 579