xref: /linux/tools/testing/selftests/net/forwarding/lib.sh (revision f72aa1b276281b4e4f75261af8425bc99d903f3e)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4##############################################################################
5# Topology description. p1 looped back to p2, p3 to p4 and so on.
6
7declare -A NETIFS=(
8    [p1]=veth0
9    [p2]=veth1
10    [p3]=veth2
11    [p4]=veth3
12    [p5]=veth4
13    [p6]=veth5
14    [p7]=veth6
15    [p8]=veth7
16    [p9]=veth8
17    [p10]=veth9
18)
19
20# Port that does not have a cable connected.
21: "${NETIF_NO_CABLE:=eth8}"
22
23##############################################################################
24# Defines
25
26# Networking utilities.
27: "${PING:=ping}"
28: "${PING6:=ping6}"	# Some distros just use ping.
29: "${ARPING:=arping}"
30: "${TROUTE6:=traceroute6}"
31
32# Packet generator.
33: "${MZ:=mausezahn}"	# Some distributions use 'mz'.
34: "${MZ_DELAY:=0}"
35
36# Host configuration tools.
37: "${TEAMD:=teamd}"
38: "${MCD:=smcrouted}"
39: "${MC_CLI:=smcroutectl}"
40
41# Constants for netdevice bring-up:
42# Default time in seconds to wait for an interface to come up before giving up
43# and bailing out. Used during initial setup.
44: "${INTERFACE_TIMEOUT:=600}"
45# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
46: "${WAIT_TIMEOUT:=20}"
47# Time to wait after interfaces participating in the test are all UP.
48: "${WAIT_TIME:=5}"
49
50# Whether to pause on, respectively, after a failure and before cleanup.
51: "${PAUSE_ON_FAIL:=no}"
52: "${PAUSE_ON_CLEANUP:=no}"
53
54# Whether to create virtual interfaces, and what netdevice type they should be.
55: "${NETIF_CREATE:=yes}"
56: "${NETIF_TYPE:=veth}"
57
58# Constants for ping tests:
59# How many packets should be sent.
60: "${PING_COUNT:=10}"
61# Timeout (in seconds) before ping exits regardless of how many packets have
62# been sent or received
63: "${PING_TIMEOUT:=5}"
64
65# Minimum ageing_time (in centiseconds) supported by hardware
66: "${LOW_AGEING_TIME:=1000}"
67
68# Whether to check for availability of certain tools.
69: "${REQUIRE_JQ:=yes}"
70: "${REQUIRE_MZ:=yes}"
71: "${REQUIRE_MTOOLS:=no}"
72
73# Whether to override MAC addresses on interfaces participating in the test.
74: "${STABLE_MAC_ADDRS:=no}"
75
76# Flags for tcpdump
77: "${TCPDUMP_EXTRA_FLAGS:=}"
78
79# Flags for TC filters.
80: "${TC_FLAG:=skip_hw}"
81
82# Whether the machine is "slow" -- i.e. might be incapable of running tests
83# involving heavy traffic. This might be the case on a debug kernel, a VM, or
84# e.g. a low-power board.
85: "${KSFT_MACHINE_SLOW:=no}"
86
87##############################################################################
88# Find netifs by test-specified driver name
89
90driver_name_get()
91{
92	local dev=$1; shift
93	local driver_path="/sys/class/net/$dev/device/driver"
94
95	if [[ -L $driver_path ]]; then
96		basename `realpath $driver_path`
97	fi
98}
99
100netif_find_driver()
101{
102	local ifnames=`ip -j link show | jq -r ".[].ifname"`
103	local count=0
104
105	for ifname in $ifnames
106	do
107		local driver_name=`driver_name_get $ifname`
108		if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
109			count=$((count + 1))
110			NETIFS[p$count]="$ifname"
111		fi
112	done
113}
114
115# Whether to find netdevice according to the driver speficied by the importer
116: "${NETIF_FIND_DRIVER:=}"
117
118if [[ $NETIF_FIND_DRIVER ]]; then
119	unset NETIFS
120	declare -A NETIFS
121	netif_find_driver
122fi
123
124net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
125
126if [[ -f $net_forwarding_dir/forwarding.config ]]; then
127	source "$net_forwarding_dir/forwarding.config"
128fi
129
130source "$net_forwarding_dir/../lib.sh"
131
132##############################################################################
133# Sanity checks
134
135check_tc_version()
136{
137	tc -j &> /dev/null
138	if [[ $? -ne 0 ]]; then
139		echo "SKIP: iproute2 too old; tc is missing JSON support"
140		exit $ksft_skip
141	fi
142}
143
144# Old versions of tc don't understand "mpls_uc"
145check_tc_mpls_support()
146{
147	local dev=$1; shift
148
149	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
150		matchall action pipe &> /dev/null
151	if [[ $? -ne 0 ]]; then
152		echo "SKIP: iproute2 too old; tc is missing MPLS support"
153		return $ksft_skip
154	fi
155	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
156		matchall
157}
158
159# Old versions of tc produce invalid json output for mpls lse statistics
160check_tc_mpls_lse_stats()
161{
162	local dev=$1; shift
163	local ret;
164
165	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
166		flower mpls lse depth 2                                 \
167		action continue &> /dev/null
168
169	if [[ $? -ne 0 ]]; then
170		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
171		return $ksft_skip
172	fi
173
174	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
175	ret=$?
176	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
177		flower
178
179	if [[ $ret -ne 0 ]]; then
180		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
181		return $ksft_skip
182	fi
183}
184
185check_tc_shblock_support()
186{
187	tc filter help 2>&1 | grep block &> /dev/null
188	if [[ $? -ne 0 ]]; then
189		echo "SKIP: iproute2 too old; tc is missing shared block support"
190		exit $ksft_skip
191	fi
192}
193
194check_tc_chain_support()
195{
196	tc help 2>&1|grep chain &> /dev/null
197	if [[ $? -ne 0 ]]; then
198		echo "SKIP: iproute2 too old; tc is missing chain support"
199		exit $ksft_skip
200	fi
201}
202
203check_tc_action_hw_stats_support()
204{
205	tc actions help 2>&1 | grep -q hw_stats
206	if [[ $? -ne 0 ]]; then
207		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
208		exit $ksft_skip
209	fi
210}
211
212check_tc_fp_support()
213{
214	tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
215	if [[ $? -ne 0 ]]; then
216		echo "SKIP: iproute2 too old; tc is missing frame preemption support"
217		exit $ksft_skip
218	fi
219}
220
221check_ethtool_lanes_support()
222{
223	ethtool --help 2>&1| grep lanes &> /dev/null
224	if [[ $? -ne 0 ]]; then
225		echo "SKIP: ethtool too old; it is missing lanes support"
226		exit $ksft_skip
227	fi
228}
229
230check_ethtool_mm_support()
231{
232	ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
233	if [[ $? -ne 0 ]]; then
234		echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
235		exit $ksft_skip
236	fi
237}
238
239check_ethtool_counter_group_support()
240{
241	ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
242	if [[ $? -ne 0 ]]; then
243		echo "SKIP: ethtool too old; it is missing standard counter group support"
244		exit $ksft_skip
245	fi
246}
247
248check_ethtool_pmac_std_stats_support()
249{
250	local dev=$1; shift
251	local grp=$1; shift
252
253	[ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
254		| jq ".[].\"$grp\" | length") ]
255}
256
257check_locked_port_support()
258{
259	if ! bridge -d link show | grep -q " locked"; then
260		echo "SKIP: iproute2 too old; Locked port feature not supported."
261		return $ksft_skip
262	fi
263}
264
265check_port_mab_support()
266{
267	if ! bridge -d link show | grep -q "mab"; then
268		echo "SKIP: iproute2 too old; MacAuth feature not supported."
269		return $ksft_skip
270	fi
271}
272
273if [[ "$(id -u)" -ne 0 ]]; then
274	echo "SKIP: need root privileges"
275	exit $ksft_skip
276fi
277
278check_driver()
279{
280	local dev=$1; shift
281	local expected=$1; shift
282	local driver_name=`driver_name_get $dev`
283
284	if [[ $driver_name != $expected ]]; then
285		echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
286		exit $ksft_skip
287	fi
288}
289
290if [[ "$CHECK_TC" = "yes" ]]; then
291	check_tc_version
292fi
293
294require_command()
295{
296	local cmd=$1; shift
297
298	if [[ ! -x "$(command -v "$cmd")" ]]; then
299		echo "SKIP: $cmd not installed"
300		exit $ksft_skip
301	fi
302}
303
304# IPv6 support was added in v3.0
305check_mtools_version()
306{
307	local version="$(msend -v)"
308	local major
309
310	version=${version##msend version }
311	major=$(echo $version | cut -d. -f1)
312
313	if [ $major -lt 3 ]; then
314		echo "SKIP: expected mtools version 3.0, got $version"
315		exit $ksft_skip
316	fi
317}
318
319if [[ "$REQUIRE_JQ" = "yes" ]]; then
320	require_command jq
321fi
322if [[ "$REQUIRE_MZ" = "yes" ]]; then
323	require_command $MZ
324fi
325if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
326	# https://github.com/troglobit/mtools
327	require_command msend
328	require_command mreceive
329	check_mtools_version
330fi
331
332##############################################################################
333# Command line options handling
334
335count=0
336
337while [[ $# -gt 0 ]]; do
338	if [[ "$count" -eq "0" ]]; then
339		unset NETIFS
340		declare -A NETIFS
341	fi
342	count=$((count + 1))
343	NETIFS[p$count]="$1"
344	shift
345done
346
347##############################################################################
348# Network interfaces configuration
349
350if [[ ! -v NUM_NETIFS ]]; then
351	echo "SKIP: importer does not define \"NUM_NETIFS\""
352	exit $ksft_skip
353fi
354
355if (( NUM_NETIFS > ${#NETIFS[@]} )); then
356	echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
357	exit $ksft_skip
358fi
359
360for i in $(seq ${#NETIFS[@]}); do
361	if [[ ! ${NETIFS[p$i]} ]]; then
362		echo "SKIP: NETIFS[p$i] not given"
363		exit $ksft_skip
364	fi
365done
366
367create_netif_veth()
368{
369	local i
370
371	for ((i = 1; i <= NUM_NETIFS; ++i)); do
372		local j=$((i+1))
373
374		if [ -z ${NETIFS[p$i]} ]; then
375			echo "SKIP: Cannot create interface. Name not specified"
376			exit $ksft_skip
377		fi
378
379		ip link show dev ${NETIFS[p$i]} &> /dev/null
380		if [[ $? -ne 0 ]]; then
381			ip link add ${NETIFS[p$i]} type veth \
382				peer name ${NETIFS[p$j]}
383			if [[ $? -ne 0 ]]; then
384				echo "Failed to create netif"
385				exit 1
386			fi
387		fi
388		i=$j
389	done
390}
391
392create_netif()
393{
394	case "$NETIF_TYPE" in
395	veth) create_netif_veth
396	      ;;
397	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
398	   exit 1
399	   ;;
400	esac
401}
402
403declare -A MAC_ADDR_ORIG
404mac_addr_prepare()
405{
406	local new_addr=
407	local dev=
408
409	for ((i = 1; i <= NUM_NETIFS; ++i)); do
410		dev=${NETIFS[p$i]}
411		new_addr=$(printf "00:01:02:03:04:%02x" $i)
412
413		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
414		# Strip quotes
415		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
416		ip link set dev $dev address $new_addr
417	done
418}
419
420mac_addr_restore()
421{
422	local dev=
423
424	for ((i = 1; i <= NUM_NETIFS; ++i)); do
425		dev=${NETIFS[p$i]}
426		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
427	done
428}
429
430if [[ "$NETIF_CREATE" = "yes" ]]; then
431	create_netif
432fi
433
434if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
435	mac_addr_prepare
436fi
437
438for ((i = 1; i <= NUM_NETIFS; ++i)); do
439	ip link show dev ${NETIFS[p$i]} &> /dev/null
440	if [[ $? -ne 0 ]]; then
441		echo "SKIP: could not find all required interfaces"
442		exit $ksft_skip
443	fi
444done
445
446##############################################################################
447# Helpers
448
449# Exit status to return at the end. Set in case one of the tests fails.
450EXIT_STATUS=0
451# Per-test return value. Clear at the beginning of each test.
452RET=0
453
454ret_set_ksft_status()
455{
456	local ksft_status=$1; shift
457	local msg=$1; shift
458
459	RET=$(ksft_status_merge $RET $ksft_status)
460	if (( $? )); then
461		retmsg=$msg
462	fi
463}
464
465# Whether FAILs should be interpreted as XFAILs. Internal.
466FAIL_TO_XFAIL=
467
468check_err()
469{
470	local err=$1
471	local msg=$2
472
473	if ((err)); then
474		if [[ $FAIL_TO_XFAIL = yes ]]; then
475			ret_set_ksft_status $ksft_xfail "$msg"
476		else
477			ret_set_ksft_status $ksft_fail "$msg"
478		fi
479	fi
480}
481
482check_fail()
483{
484	local err=$1
485	local msg=$2
486
487	check_err $((!err)) "$msg"
488}
489
490check_err_fail()
491{
492	local should_fail=$1; shift
493	local err=$1; shift
494	local what=$1; shift
495
496	if ((should_fail)); then
497		check_fail $err "$what succeeded, but should have failed"
498	else
499		check_err $err "$what failed"
500	fi
501}
502
503xfail()
504{
505	FAIL_TO_XFAIL=yes "$@"
506}
507
508xfail_on_slow()
509{
510	if [[ $KSFT_MACHINE_SLOW = yes ]]; then
511		FAIL_TO_XFAIL=yes "$@"
512	else
513		"$@"
514	fi
515}
516
517omit_on_slow()
518{
519	if [[ $KSFT_MACHINE_SLOW != yes ]]; then
520		"$@"
521	fi
522}
523
524xfail_on_veth()
525{
526	local dev=$1; shift
527	local kind
528
529	kind=$(ip -j -d link show dev $dev |
530			jq -r '.[].linkinfo.info_kind')
531	if [[ $kind = veth ]]; then
532		FAIL_TO_XFAIL=yes "$@"
533	else
534		"$@"
535	fi
536}
537
538log_test_result()
539{
540	local test_name=$1; shift
541	local opt_str=$1; shift
542	local result=$1; shift
543	local retmsg=$1; shift
544
545	printf "TEST: %-60s  [%s]\n" "$test_name $opt_str" "$result"
546	if [[ $retmsg ]]; then
547		printf "\t%s\n" "$retmsg"
548	fi
549}
550
551pause_on_fail()
552{
553	if [[ $PAUSE_ON_FAIL == yes ]]; then
554		echo "Hit enter to continue, 'q' to quit"
555		read a
556		[[ $a == q ]] && exit 1
557	fi
558}
559
560handle_test_result_pass()
561{
562	local test_name=$1; shift
563	local opt_str=$1; shift
564
565	log_test_result "$test_name" "$opt_str" " OK "
566}
567
568handle_test_result_fail()
569{
570	local test_name=$1; shift
571	local opt_str=$1; shift
572
573	log_test_result "$test_name" "$opt_str" FAIL "$retmsg"
574	pause_on_fail
575}
576
577handle_test_result_xfail()
578{
579	local test_name=$1; shift
580	local opt_str=$1; shift
581
582	log_test_result "$test_name" "$opt_str" XFAIL "$retmsg"
583	pause_on_fail
584}
585
586handle_test_result_skip()
587{
588	local test_name=$1; shift
589	local opt_str=$1; shift
590
591	log_test_result "$test_name" "$opt_str" SKIP "$retmsg"
592}
593
594log_test()
595{
596	local test_name=$1
597	local opt_str=$2
598
599	if [[ $# -eq 2 ]]; then
600		opt_str="($opt_str)"
601	fi
602
603	if ((RET == ksft_pass)); then
604		handle_test_result_pass "$test_name" "$opt_str"
605	elif ((RET == ksft_xfail)); then
606		handle_test_result_xfail "$test_name" "$opt_str"
607	elif ((RET == ksft_skip)); then
608		handle_test_result_skip "$test_name" "$opt_str"
609	else
610		handle_test_result_fail "$test_name" "$opt_str"
611	fi
612
613	EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET)
614	return $RET
615}
616
617log_test_skip()
618{
619	RET=$ksft_skip retmsg= log_test "$@"
620}
621
622log_test_xfail()
623{
624	RET=$ksft_xfail retmsg= log_test "$@"
625}
626
627log_info()
628{
629	local msg=$1
630
631	echo "INFO: $msg"
632}
633
634not()
635{
636	"$@"
637	[[ $? != 0 ]]
638}
639
640get_max()
641{
642	local arr=("$@")
643
644	max=${arr[0]}
645	for cur in ${arr[@]}; do
646		if [[ $cur -gt $max ]]; then
647			max=$cur
648		fi
649	done
650
651	echo $max
652}
653
654grep_bridge_fdb()
655{
656	local addr=$1; shift
657	local word
658	local flag
659
660	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
661		word=$1; shift
662		if [ "$1" == "-v" ]; then
663			flag=$1; shift
664		fi
665	fi
666
667	$@ | grep $addr | grep $flag "$word"
668}
669
670wait_for_port_up()
671{
672	"$@" | grep -q "Link detected: yes"
673}
674
675wait_for_offload()
676{
677	"$@" | grep -q offload
678}
679
680wait_for_trap()
681{
682	"$@" | grep -q trap
683}
684
685setup_wait_dev()
686{
687	local dev=$1; shift
688	local wait_time=${1:-$WAIT_TIME}; shift
689
690	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
691
692	if (($?)); then
693		check_err 1
694		log_test setup_wait_dev ": Interface $dev does not come up."
695		exit 1
696	fi
697}
698
699setup_wait_dev_with_timeout()
700{
701	local dev=$1; shift
702	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
703	local wait_time=${1:-$WAIT_TIME}; shift
704	local i
705
706	for ((i = 1; i <= $max_iterations; ++i)); do
707		ip link show dev $dev up \
708			| grep 'state UP' &> /dev/null
709		if [[ $? -ne 0 ]]; then
710			sleep 1
711		else
712			sleep $wait_time
713			return 0
714		fi
715	done
716
717	return 1
718}
719
720setup_wait()
721{
722	local num_netifs=${1:-$NUM_NETIFS}
723	local i
724
725	for ((i = 1; i <= num_netifs; ++i)); do
726		setup_wait_dev ${NETIFS[p$i]} 0
727	done
728
729	# Make sure links are ready.
730	sleep $WAIT_TIME
731}
732
733wait_for_dev()
734{
735        local dev=$1; shift
736        local timeout=${1:-$WAIT_TIMEOUT}; shift
737
738        slowwait $timeout ip link show dev $dev &> /dev/null
739        if (( $? )); then
740                check_err 1
741                log_test wait_for_dev "Interface $dev did not appear."
742                exit $EXIT_STATUS
743        fi
744}
745
746cmd_jq()
747{
748	local cmd=$1
749	local jq_exp=$2
750	local jq_opts=$3
751	local ret
752	local output
753
754	output="$($cmd)"
755	# it the command fails, return error right away
756	ret=$?
757	if [[ $ret -ne 0 ]]; then
758		return $ret
759	fi
760	output=$(echo $output | jq -r $jq_opts "$jq_exp")
761	ret=$?
762	if [[ $ret -ne 0 ]]; then
763		return $ret
764	fi
765	echo $output
766	# return success only in case of non-empty output
767	[ ! -z "$output" ]
768}
769
770pre_cleanup()
771{
772	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
773		echo "Pausing before cleanup, hit any key to continue"
774		read
775	fi
776
777	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
778		mac_addr_restore
779	fi
780}
781
782vrf_prepare()
783{
784	ip -4 rule add pref 32765 table local
785	ip -4 rule del pref 0
786	ip -6 rule add pref 32765 table local
787	ip -6 rule del pref 0
788}
789
790vrf_cleanup()
791{
792	ip -6 rule add pref 0 table local
793	ip -6 rule del pref 32765
794	ip -4 rule add pref 0 table local
795	ip -4 rule del pref 32765
796}
797
798__last_tb_id=0
799declare -A __TB_IDS
800
801__vrf_td_id_assign()
802{
803	local vrf_name=$1
804
805	__last_tb_id=$((__last_tb_id + 1))
806	__TB_IDS[$vrf_name]=$__last_tb_id
807	return $__last_tb_id
808}
809
810__vrf_td_id_lookup()
811{
812	local vrf_name=$1
813
814	return ${__TB_IDS[$vrf_name]}
815}
816
817vrf_create()
818{
819	local vrf_name=$1
820	local tb_id
821
822	__vrf_td_id_assign $vrf_name
823	tb_id=$?
824
825	ip link add dev $vrf_name type vrf table $tb_id
826	ip -4 route add table $tb_id unreachable default metric 4278198272
827	ip -6 route add table $tb_id unreachable default metric 4278198272
828}
829
830vrf_destroy()
831{
832	local vrf_name=$1
833	local tb_id
834
835	__vrf_td_id_lookup $vrf_name
836	tb_id=$?
837
838	ip -6 route del table $tb_id unreachable default metric 4278198272
839	ip -4 route del table $tb_id unreachable default metric 4278198272
840	ip link del dev $vrf_name
841}
842
843__addr_add_del()
844{
845	local if_name=$1
846	local add_del=$2
847	local array
848
849	shift
850	shift
851	array=("${@}")
852
853	for addrstr in "${array[@]}"; do
854		ip address $add_del $addrstr dev $if_name
855	done
856}
857
858__simple_if_init()
859{
860	local if_name=$1; shift
861	local vrf_name=$1; shift
862	local addrs=("${@}")
863
864	ip link set dev $if_name master $vrf_name
865	ip link set dev $if_name up
866
867	__addr_add_del $if_name add "${addrs[@]}"
868}
869
870__simple_if_fini()
871{
872	local if_name=$1; shift
873	local addrs=("${@}")
874
875	__addr_add_del $if_name del "${addrs[@]}"
876
877	ip link set dev $if_name down
878	ip link set dev $if_name nomaster
879}
880
881simple_if_init()
882{
883	local if_name=$1
884	local vrf_name
885	local array
886
887	shift
888	vrf_name=v$if_name
889	array=("${@}")
890
891	vrf_create $vrf_name
892	ip link set dev $vrf_name up
893	__simple_if_init $if_name $vrf_name "${array[@]}"
894}
895
896simple_if_fini()
897{
898	local if_name=$1
899	local vrf_name
900	local array
901
902	shift
903	vrf_name=v$if_name
904	array=("${@}")
905
906	__simple_if_fini $if_name "${array[@]}"
907	vrf_destroy $vrf_name
908}
909
910tunnel_create()
911{
912	local name=$1; shift
913	local type=$1; shift
914	local local=$1; shift
915	local remote=$1; shift
916
917	ip link add name $name type $type \
918	   local $local remote $remote "$@"
919	ip link set dev $name up
920}
921
922tunnel_destroy()
923{
924	local name=$1; shift
925
926	ip link del dev $name
927}
928
929vlan_create()
930{
931	local if_name=$1; shift
932	local vid=$1; shift
933	local vrf=$1; shift
934	local ips=("${@}")
935	local name=$if_name.$vid
936
937	ip link add name $name link $if_name type vlan id $vid
938	if [ "$vrf" != "" ]; then
939		ip link set dev $name master $vrf
940	fi
941	ip link set dev $name up
942	__addr_add_del $name add "${ips[@]}"
943}
944
945vlan_destroy()
946{
947	local if_name=$1; shift
948	local vid=$1; shift
949	local name=$if_name.$vid
950
951	ip link del dev $name
952}
953
954team_create()
955{
956	local if_name=$1; shift
957	local mode=$1; shift
958
959	require_command $TEAMD
960	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
961	for slave in "$@"; do
962		ip link set dev $slave down
963		ip link set dev $slave master $if_name
964		ip link set dev $slave up
965	done
966	ip link set dev $if_name up
967}
968
969team_destroy()
970{
971	local if_name=$1; shift
972
973	$TEAMD -t $if_name -k
974}
975
976master_name_get()
977{
978	local if_name=$1
979
980	ip -j link show dev $if_name | jq -r '.[]["master"]'
981}
982
983link_stats_get()
984{
985	local if_name=$1; shift
986	local dir=$1; shift
987	local stat=$1; shift
988
989	ip -j -s link show dev $if_name \
990		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
991}
992
993link_stats_tx_packets_get()
994{
995	link_stats_get $1 tx packets
996}
997
998link_stats_rx_errors_get()
999{
1000	link_stats_get $1 rx errors
1001}
1002
1003ethtool_stats_get()
1004{
1005	local dev=$1; shift
1006	local stat=$1; shift
1007
1008	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
1009}
1010
1011ethtool_std_stats_get()
1012{
1013	local dev=$1; shift
1014	local grp=$1; shift
1015	local name=$1; shift
1016	local src=$1; shift
1017
1018	ethtool --json -S $dev --groups $grp -- --src $src | \
1019		jq '.[]."'"$grp"'"."'$name'"'
1020}
1021
1022qdisc_stats_get()
1023{
1024	local dev=$1; shift
1025	local handle=$1; shift
1026	local selector=$1; shift
1027
1028	tc -j -s qdisc show dev "$dev" \
1029	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
1030}
1031
1032qdisc_parent_stats_get()
1033{
1034	local dev=$1; shift
1035	local parent=$1; shift
1036	local selector=$1; shift
1037
1038	tc -j -s qdisc show dev "$dev" invisible \
1039	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
1040}
1041
1042ipv6_stats_get()
1043{
1044	local dev=$1; shift
1045	local stat=$1; shift
1046
1047	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
1048}
1049
1050hw_stats_get()
1051{
1052	local suite=$1; shift
1053	local if_name=$1; shift
1054	local dir=$1; shift
1055	local stat=$1; shift
1056
1057	ip -j stats show dev $if_name group offload subgroup $suite |
1058		jq ".[0].stats64.$dir.$stat"
1059}
1060
1061__nh_stats_get()
1062{
1063	local key=$1; shift
1064	local group_id=$1; shift
1065	local member_id=$1; shift
1066
1067	ip -j -s -s nexthop show id $group_id |
1068	    jq --argjson member_id "$member_id" --arg key "$key" \
1069	       '.[].group_stats[] | select(.id == $member_id) | .[$key]'
1070}
1071
1072nh_stats_get()
1073{
1074	local group_id=$1; shift
1075	local member_id=$1; shift
1076
1077	__nh_stats_get packets "$group_id" "$member_id"
1078}
1079
1080nh_stats_get_hw()
1081{
1082	local group_id=$1; shift
1083	local member_id=$1; shift
1084
1085	__nh_stats_get packets_hw "$group_id" "$member_id"
1086}
1087
1088humanize()
1089{
1090	local speed=$1; shift
1091
1092	for unit in bps Kbps Mbps Gbps; do
1093		if (($(echo "$speed < 1024" | bc))); then
1094			break
1095		fi
1096
1097		speed=$(echo "scale=1; $speed / 1024" | bc)
1098	done
1099
1100	echo "$speed${unit}"
1101}
1102
1103rate()
1104{
1105	local t0=$1; shift
1106	local t1=$1; shift
1107	local interval=$1; shift
1108
1109	echo $((8 * (t1 - t0) / interval))
1110}
1111
1112packets_rate()
1113{
1114	local t0=$1; shift
1115	local t1=$1; shift
1116	local interval=$1; shift
1117
1118	echo $(((t1 - t0) / interval))
1119}
1120
1121mac_get()
1122{
1123	local if_name=$1
1124
1125	ip -j link show dev $if_name | jq -r '.[]["address"]'
1126}
1127
1128ether_addr_to_u64()
1129{
1130	local addr="$1"
1131	local order="$((1 << 40))"
1132	local val=0
1133	local byte
1134
1135	addr="${addr//:/ }"
1136
1137	for byte in $addr; do
1138		byte="0x$byte"
1139		val=$((val + order * byte))
1140		order=$((order >> 8))
1141	done
1142
1143	printf "0x%x" $val
1144}
1145
1146u64_to_ether_addr()
1147{
1148	local val=$1
1149	local byte
1150	local i
1151
1152	for ((i = 40; i >= 0; i -= 8)); do
1153		byte=$(((val & (0xff << i)) >> i))
1154		printf "%02x" $byte
1155		if [ $i -ne 0 ]; then
1156			printf ":"
1157		fi
1158	done
1159}
1160
1161ipv6_lladdr_get()
1162{
1163	local if_name=$1
1164
1165	ip -j addr show dev $if_name | \
1166		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
1167		head -1
1168}
1169
1170bridge_ageing_time_get()
1171{
1172	local bridge=$1
1173	local ageing_time
1174
1175	# Need to divide by 100 to convert to seconds.
1176	ageing_time=$(ip -j -d link show dev $bridge \
1177		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
1178	echo $((ageing_time / 100))
1179}
1180
1181declare -A SYSCTL_ORIG
1182sysctl_save()
1183{
1184	local key=$1; shift
1185
1186	SYSCTL_ORIG[$key]=$(sysctl -n $key)
1187}
1188
1189sysctl_set()
1190{
1191	local key=$1; shift
1192	local value=$1; shift
1193
1194	sysctl_save "$key"
1195	sysctl -qw $key="$value"
1196}
1197
1198sysctl_restore()
1199{
1200	local key=$1; shift
1201
1202	sysctl -qw $key="${SYSCTL_ORIG[$key]}"
1203}
1204
1205forwarding_enable()
1206{
1207	sysctl_set net.ipv4.conf.all.forwarding 1
1208	sysctl_set net.ipv6.conf.all.forwarding 1
1209}
1210
1211forwarding_restore()
1212{
1213	sysctl_restore net.ipv6.conf.all.forwarding
1214	sysctl_restore net.ipv4.conf.all.forwarding
1215}
1216
1217declare -A MTU_ORIG
1218mtu_set()
1219{
1220	local dev=$1; shift
1221	local mtu=$1; shift
1222
1223	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
1224	ip link set dev $dev mtu $mtu
1225}
1226
1227mtu_restore()
1228{
1229	local dev=$1; shift
1230
1231	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
1232}
1233
1234tc_offload_check()
1235{
1236	local num_netifs=${1:-$NUM_NETIFS}
1237
1238	for ((i = 1; i <= num_netifs; ++i)); do
1239		ethtool -k ${NETIFS[p$i]} \
1240			| grep "hw-tc-offload: on" &> /dev/null
1241		if [[ $? -ne 0 ]]; then
1242			return 1
1243		fi
1244	done
1245
1246	return 0
1247}
1248
1249trap_install()
1250{
1251	local dev=$1; shift
1252	local direction=$1; shift
1253
1254	# Some devices may not support or need in-hardware trapping of traffic
1255	# (e.g. the veth pairs that this library creates for non-existent
1256	# loopbacks). Use continue instead, so that there is a filter in there
1257	# (some tests check counters), and so that other filters are still
1258	# processed.
1259	tc filter add dev $dev $direction pref 1 \
1260		flower skip_sw action trap 2>/dev/null \
1261	    || tc filter add dev $dev $direction pref 1 \
1262		       flower action continue
1263}
1264
1265trap_uninstall()
1266{
1267	local dev=$1; shift
1268	local direction=$1; shift
1269
1270	tc filter del dev $dev $direction pref 1 flower
1271}
1272
1273__icmp_capture_add_del()
1274{
1275	local add_del=$1; shift
1276	local pref=$1; shift
1277	local vsuf=$1; shift
1278	local tundev=$1; shift
1279	local filter=$1; shift
1280
1281	tc filter $add_del dev "$tundev" ingress \
1282	   proto ip$vsuf pref $pref \
1283	   flower ip_proto icmp$vsuf $filter \
1284	   action pass
1285}
1286
1287icmp_capture_install()
1288{
1289	local tundev=$1; shift
1290	local filter=$1; shift
1291
1292	__icmp_capture_add_del add 100 "" "$tundev" "$filter"
1293}
1294
1295icmp_capture_uninstall()
1296{
1297	local tundev=$1; shift
1298	local filter=$1; shift
1299
1300	__icmp_capture_add_del del 100 "" "$tundev" "$filter"
1301}
1302
1303icmp6_capture_install()
1304{
1305	local tundev=$1; shift
1306	local filter=$1; shift
1307
1308	__icmp_capture_add_del add 100 v6 "$tundev" "$filter"
1309}
1310
1311icmp6_capture_uninstall()
1312{
1313	local tundev=$1; shift
1314	local filter=$1; shift
1315
1316	__icmp_capture_add_del del 100 v6 "$tundev" "$filter"
1317}
1318
1319__vlan_capture_add_del()
1320{
1321	local add_del=$1; shift
1322	local pref=$1; shift
1323	local dev=$1; shift
1324	local filter=$1; shift
1325
1326	tc filter $add_del dev "$dev" ingress \
1327	   proto 802.1q pref $pref \
1328	   flower $filter \
1329	   action pass
1330}
1331
1332vlan_capture_install()
1333{
1334	local dev=$1; shift
1335	local filter=$1; shift
1336
1337	__vlan_capture_add_del add 100 "$dev" "$filter"
1338}
1339
1340vlan_capture_uninstall()
1341{
1342	local dev=$1; shift
1343	local filter=$1; shift
1344
1345	__vlan_capture_add_del del 100 "$dev" "$filter"
1346}
1347
1348__dscp_capture_add_del()
1349{
1350	local add_del=$1; shift
1351	local dev=$1; shift
1352	local base=$1; shift
1353	local dscp;
1354
1355	for prio in {0..7}; do
1356		dscp=$((base + prio))
1357		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1358				       "skip_hw ip_tos $((dscp << 2))"
1359	done
1360}
1361
1362dscp_capture_install()
1363{
1364	local dev=$1; shift
1365	local base=$1; shift
1366
1367	__dscp_capture_add_del add $dev $base
1368}
1369
1370dscp_capture_uninstall()
1371{
1372	local dev=$1; shift
1373	local base=$1; shift
1374
1375	__dscp_capture_add_del del $dev $base
1376}
1377
1378dscp_fetch_stats()
1379{
1380	local dev=$1; shift
1381	local base=$1; shift
1382
1383	for prio in {0..7}; do
1384		local dscp=$((base + prio))
1385		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1386		echo "[$dscp]=$t "
1387	done
1388}
1389
1390matchall_sink_create()
1391{
1392	local dev=$1; shift
1393
1394	tc qdisc add dev $dev clsact
1395	tc filter add dev $dev ingress \
1396	   pref 10000 \
1397	   matchall \
1398	   action drop
1399}
1400
1401tests_run()
1402{
1403	local current_test
1404
1405	for current_test in ${TESTS:-$ALL_TESTS}; do
1406		in_defer_scope \
1407			$current_test
1408	done
1409}
1410
1411cleanup()
1412{
1413	pre_cleanup
1414	defer_scopes_cleanup
1415}
1416
1417multipath_eval()
1418{
1419	local desc="$1"
1420	local weight_rp12=$2
1421	local weight_rp13=$3
1422	local packets_rp12=$4
1423	local packets_rp13=$5
1424	local weights_ratio packets_ratio diff
1425
1426	RET=0
1427
1428	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1429		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1430				| bc -l)
1431	else
1432		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1433				| bc -l)
1434	fi
1435
1436	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1437	       check_err 1 "Packet difference is 0"
1438	       log_test "Multipath"
1439	       log_info "Expected ratio $weights_ratio"
1440	       return
1441	fi
1442
1443	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1444		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1445				| bc -l)
1446	else
1447		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1448				| bc -l)
1449	fi
1450
1451	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1452	diff=${diff#-}
1453
1454	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1455	check_err $? "Too large discrepancy between expected and measured ratios"
1456	log_test "$desc"
1457	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1458}
1459
1460in_ns()
1461{
1462	local name=$1; shift
1463
1464	ip netns exec $name bash <<-EOF
1465		NUM_NETIFS=0
1466		source lib.sh
1467		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1468	EOF
1469}
1470
1471##############################################################################
1472# Tests
1473
1474ping_do()
1475{
1476	local if_name=$1
1477	local dip=$2
1478	local args=$3
1479	local vrf_name
1480
1481	vrf_name=$(master_name_get $if_name)
1482	ip vrf exec $vrf_name \
1483		$PING $args $dip -c $PING_COUNT -i 0.1 \
1484		-w $PING_TIMEOUT &> /dev/null
1485}
1486
1487ping_test()
1488{
1489	RET=0
1490
1491	ping_do $1 $2
1492	check_err $?
1493	log_test "ping$3"
1494}
1495
1496ping_test_fails()
1497{
1498	RET=0
1499
1500	ping_do $1 $2
1501	check_fail $?
1502	log_test "ping fails$3"
1503}
1504
1505ping6_do()
1506{
1507	local if_name=$1
1508	local dip=$2
1509	local args=$3
1510	local vrf_name
1511
1512	vrf_name=$(master_name_get $if_name)
1513	ip vrf exec $vrf_name \
1514		$PING6 $args $dip -c $PING_COUNT -i 0.1 \
1515		-w $PING_TIMEOUT &> /dev/null
1516}
1517
1518ping6_test()
1519{
1520	RET=0
1521
1522	ping6_do $1 $2
1523	check_err $?
1524	log_test "ping6$3"
1525}
1526
1527ping6_test_fails()
1528{
1529	RET=0
1530
1531	ping6_do $1 $2
1532	check_fail $?
1533	log_test "ping6 fails$3"
1534}
1535
1536learning_test()
1537{
1538	local bridge=$1
1539	local br_port1=$2	# Connected to `host1_if`.
1540	local host1_if=$3
1541	local host2_if=$4
1542	local mac=de:ad:be:ef:13:37
1543	local ageing_time
1544
1545	RET=0
1546
1547	bridge -j fdb show br $bridge brport $br_port1 \
1548		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1549	check_fail $? "Found FDB record when should not"
1550
1551	# Disable unknown unicast flooding on `br_port1` to make sure
1552	# packets are only forwarded through the port after a matching
1553	# FDB entry was installed.
1554	bridge link set dev $br_port1 flood off
1555
1556	ip link set $host1_if promisc on
1557	tc qdisc add dev $host1_if ingress
1558	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1559		flower dst_mac $mac action drop
1560
1561	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1562	sleep 1
1563
1564	tc -j -s filter show dev $host1_if ingress \
1565		| jq -e ".[] | select(.options.handle == 101) \
1566		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1567	check_fail $? "Packet reached first host when should not"
1568
1569	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1570	sleep 1
1571
1572	bridge -j fdb show br $bridge brport $br_port1 \
1573		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1574	check_err $? "Did not find FDB record when should"
1575
1576	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1577	sleep 1
1578
1579	tc -j -s filter show dev $host1_if ingress \
1580		| jq -e ".[] | select(.options.handle == 101) \
1581		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1582	check_err $? "Packet did not reach second host when should"
1583
1584	# Wait for 10 seconds after the ageing time to make sure FDB
1585	# record was aged-out.
1586	ageing_time=$(bridge_ageing_time_get $bridge)
1587	sleep $((ageing_time + 10))
1588
1589	bridge -j fdb show br $bridge brport $br_port1 \
1590		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1591	check_fail $? "Found FDB record when should not"
1592
1593	bridge link set dev $br_port1 learning off
1594
1595	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1596	sleep 1
1597
1598	bridge -j fdb show br $bridge brport $br_port1 \
1599		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1600	check_fail $? "Found FDB record when should not"
1601
1602	bridge link set dev $br_port1 learning on
1603
1604	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1605	tc qdisc del dev $host1_if ingress
1606	ip link set $host1_if promisc off
1607
1608	bridge link set dev $br_port1 flood on
1609
1610	log_test "FDB learning"
1611}
1612
1613flood_test_do()
1614{
1615	local should_flood=$1
1616	local mac=$2
1617	local ip=$3
1618	local host1_if=$4
1619	local host2_if=$5
1620	local err=0
1621
1622	# Add an ACL on `host2_if` which will tell us whether the packet
1623	# was flooded to it or not.
1624	ip link set $host2_if promisc on
1625	tc qdisc add dev $host2_if ingress
1626	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1627		flower dst_mac $mac action drop
1628
1629	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1630	sleep 1
1631
1632	tc -j -s filter show dev $host2_if ingress \
1633		| jq -e ".[] | select(.options.handle == 101) \
1634		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1635	if [[ $? -ne 0 && $should_flood == "true" || \
1636	      $? -eq 0 && $should_flood == "false" ]]; then
1637		err=1
1638	fi
1639
1640	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1641	tc qdisc del dev $host2_if ingress
1642	ip link set $host2_if promisc off
1643
1644	return $err
1645}
1646
1647flood_unicast_test()
1648{
1649	local br_port=$1
1650	local host1_if=$2
1651	local host2_if=$3
1652	local mac=de:ad:be:ef:13:37
1653	local ip=192.0.2.100
1654
1655	RET=0
1656
1657	bridge link set dev $br_port flood off
1658
1659	flood_test_do false $mac $ip $host1_if $host2_if
1660	check_err $? "Packet flooded when should not"
1661
1662	bridge link set dev $br_port flood on
1663
1664	flood_test_do true $mac $ip $host1_if $host2_if
1665	check_err $? "Packet was not flooded when should"
1666
1667	log_test "Unknown unicast flood"
1668}
1669
1670flood_multicast_test()
1671{
1672	local br_port=$1
1673	local host1_if=$2
1674	local host2_if=$3
1675	local mac=01:00:5e:00:00:01
1676	local ip=239.0.0.1
1677
1678	RET=0
1679
1680	bridge link set dev $br_port mcast_flood off
1681
1682	flood_test_do false $mac $ip $host1_if $host2_if
1683	check_err $? "Packet flooded when should not"
1684
1685	bridge link set dev $br_port mcast_flood on
1686
1687	flood_test_do true $mac $ip $host1_if $host2_if
1688	check_err $? "Packet was not flooded when should"
1689
1690	log_test "Unregistered multicast flood"
1691}
1692
1693flood_test()
1694{
1695	# `br_port` is connected to `host2_if`
1696	local br_port=$1
1697	local host1_if=$2
1698	local host2_if=$3
1699
1700	flood_unicast_test $br_port $host1_if $host2_if
1701	flood_multicast_test $br_port $host1_if $host2_if
1702}
1703
1704__start_traffic()
1705{
1706	local pktsize=$1; shift
1707	local proto=$1; shift
1708	local h_in=$1; shift    # Where the traffic egresses the host
1709	local sip=$1; shift
1710	local dip=$1; shift
1711	local dmac=$1; shift
1712	local -a mz_args=("$@")
1713
1714	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1715		-a own -b $dmac -t "$proto" -q "${mz_args[@]}" &
1716	sleep 1
1717}
1718
1719start_traffic_pktsize()
1720{
1721	local pktsize=$1; shift
1722	local h_in=$1; shift
1723	local sip=$1; shift
1724	local dip=$1; shift
1725	local dmac=$1; shift
1726	local -a mz_args=("$@")
1727
1728	__start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \
1729			"${mz_args[@]}"
1730}
1731
1732start_tcp_traffic_pktsize()
1733{
1734	local pktsize=$1; shift
1735	local h_in=$1; shift
1736	local sip=$1; shift
1737	local dip=$1; shift
1738	local dmac=$1; shift
1739	local -a mz_args=("$@")
1740
1741	__start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \
1742			"${mz_args[@]}"
1743}
1744
1745start_traffic()
1746{
1747	local h_in=$1; shift
1748	local sip=$1; shift
1749	local dip=$1; shift
1750	local dmac=$1; shift
1751	local -a mz_args=("$@")
1752
1753	start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1754			      "${mz_args[@]}"
1755}
1756
1757start_tcp_traffic()
1758{
1759	local h_in=$1; shift
1760	local sip=$1; shift
1761	local dip=$1; shift
1762	local dmac=$1; shift
1763	local -a mz_args=("$@")
1764
1765	start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1766				  "${mz_args[@]}"
1767}
1768
1769stop_traffic()
1770{
1771	local pid=${1-%%}; shift
1772
1773	# Suppress noise from killing mausezahn.
1774	{ kill $pid && wait $pid; } 2>/dev/null
1775}
1776
1777declare -A cappid
1778declare -A capfile
1779declare -A capout
1780
1781tcpdump_start()
1782{
1783	local if_name=$1; shift
1784	local ns=$1; shift
1785
1786	capfile[$if_name]=$(mktemp)
1787	capout[$if_name]=$(mktemp)
1788
1789	if [ -z $ns ]; then
1790		ns_cmd=""
1791	else
1792		ns_cmd="ip netns exec ${ns}"
1793	fi
1794
1795	if [ -z $SUDO_USER ] ; then
1796		capuser=""
1797	else
1798		capuser="-Z $SUDO_USER"
1799	fi
1800
1801	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1802		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1803		> "${capout[$if_name]}" 2>&1 &
1804	cappid[$if_name]=$!
1805
1806	sleep 1
1807}
1808
1809tcpdump_stop()
1810{
1811	local if_name=$1
1812	local pid=${cappid[$if_name]}
1813
1814	$ns_cmd kill "$pid" && wait "$pid"
1815	sleep 1
1816}
1817
1818tcpdump_cleanup()
1819{
1820	local if_name=$1
1821
1822	rm ${capfile[$if_name]} ${capout[$if_name]}
1823}
1824
1825tcpdump_show()
1826{
1827	local if_name=$1
1828
1829	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
1830}
1831
1832# return 0 if the packet wasn't seen on host2_if or 1 if it was
1833mcast_packet_test()
1834{
1835	local mac=$1
1836	local src_ip=$2
1837	local ip=$3
1838	local host1_if=$4
1839	local host2_if=$5
1840	local seen=0
1841	local tc_proto="ip"
1842	local mz_v6arg=""
1843
1844	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1845	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1846		tc_proto="ipv6"
1847		mz_v6arg="-6"
1848	fi
1849
1850	# Add an ACL on `host2_if` which will tell us whether the packet
1851	# was received by it or not.
1852	tc qdisc add dev $host2_if ingress
1853	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1854		flower ip_proto udp dst_mac $mac action drop
1855
1856	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1857	sleep 1
1858
1859	tc -j -s filter show dev $host2_if ingress \
1860		| jq -e ".[] | select(.options.handle == 101) \
1861		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1862	if [[ $? -eq 0 ]]; then
1863		seen=1
1864	fi
1865
1866	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1867	tc qdisc del dev $host2_if ingress
1868
1869	return $seen
1870}
1871
1872brmcast_check_sg_entries()
1873{
1874	local report=$1; shift
1875	local slist=("$@")
1876	local sarg=""
1877
1878	for src in "${slist[@]}"; do
1879		sarg="${sarg} and .source_list[].address == \"$src\""
1880	done
1881	bridge -j -d -s mdb show dev br0 \
1882		| jq -e ".[].mdb[] | \
1883			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1884	check_err $? "Wrong *,G entry source list after $report report"
1885
1886	for sgent in "${slist[@]}"; do
1887		bridge -j -d -s mdb show dev br0 \
1888			| jq -e ".[].mdb[] | \
1889				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1890		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1891	done
1892}
1893
1894brmcast_check_sg_fwding()
1895{
1896	local should_fwd=$1; shift
1897	local sources=("$@")
1898
1899	for src in "${sources[@]}"; do
1900		local retval=0
1901
1902		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1903		retval=$?
1904		if [ $should_fwd -eq 1 ]; then
1905			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1906		else
1907			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1908		fi
1909	done
1910}
1911
1912brmcast_check_sg_state()
1913{
1914	local is_blocked=$1; shift
1915	local sources=("$@")
1916	local should_fail=1
1917
1918	if [ $is_blocked -eq 1 ]; then
1919		should_fail=0
1920	fi
1921
1922	for src in "${sources[@]}"; do
1923		bridge -j -d -s mdb show dev br0 \
1924			| jq -e ".[].mdb[] | \
1925				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1926				 .source_list[] |
1927				 select(.address == \"$src\") |
1928				 select(.timer == \"0.00\")" &>/dev/null
1929		check_err_fail $should_fail $? "Entry $src has zero timer"
1930
1931		bridge -j -d -s mdb show dev br0 \
1932			| jq -e ".[].mdb[] | \
1933				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1934				 .flags[] == \"blocked\")" &>/dev/null
1935		check_err_fail $should_fail $? "Entry $src has blocked flag"
1936	done
1937}
1938
1939mc_join()
1940{
1941	local if_name=$1
1942	local group=$2
1943	local vrf_name=$(master_name_get $if_name)
1944
1945	# We don't care about actual reception, just about joining the
1946	# IP multicast group and adding the L2 address to the device's
1947	# MAC filtering table
1948	ip vrf exec $vrf_name \
1949		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1950	mreceive_pid=$!
1951
1952	sleep 1
1953}
1954
1955mc_leave()
1956{
1957	kill "$mreceive_pid" && wait "$mreceive_pid"
1958}
1959
1960mc_send()
1961{
1962	local if_name=$1
1963	local groups=$2
1964	local vrf_name=$(master_name_get $if_name)
1965
1966	ip vrf exec $vrf_name \
1967		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1968}
1969
1970start_ip_monitor()
1971{
1972	local mtype=$1; shift
1973	local ip=${1-ip}; shift
1974
1975	# start the monitor in the background
1976	tmpfile=`mktemp /var/run/nexthoptestXXX`
1977	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1978	sleep 0.2
1979	echo "$mpid $tmpfile"
1980}
1981
1982stop_ip_monitor()
1983{
1984	local mpid=$1; shift
1985	local tmpfile=$1; shift
1986	local el=$1; shift
1987	local what=$1; shift
1988
1989	sleep 0.2
1990	kill $mpid
1991	local lines=`grep '^\w' $tmpfile | wc -l`
1992	test $lines -eq $el
1993	check_err $? "$what: $lines lines of events, expected $el"
1994	rm -rf $tmpfile
1995}
1996
1997hw_stats_monitor_test()
1998{
1999	local dev=$1; shift
2000	local type=$1; shift
2001	local make_suitable=$1; shift
2002	local make_unsuitable=$1; shift
2003	local ip=${1-ip}; shift
2004
2005	RET=0
2006
2007	# Expect a notification about enablement.
2008	local ipmout=$(start_ip_monitor stats "$ip")
2009	$ip stats set dev $dev ${type}_stats on
2010	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
2011
2012	# Expect a notification about offload.
2013	local ipmout=$(start_ip_monitor stats "$ip")
2014	$make_suitable
2015	stop_ip_monitor $ipmout 1 "${type}_stats installation"
2016
2017	# Expect a notification about loss of offload.
2018	local ipmout=$(start_ip_monitor stats "$ip")
2019	$make_unsuitable
2020	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
2021
2022	# Expect a notification about disablement
2023	local ipmout=$(start_ip_monitor stats "$ip")
2024	$ip stats set dev $dev ${type}_stats off
2025	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
2026
2027	log_test "${type}_stats notifications"
2028}
2029
2030ipv4_to_bytes()
2031{
2032	local IP=$1; shift
2033
2034	printf '%02x:' ${IP//./ } |
2035	    sed 's/:$//'
2036}
2037
2038# Convert a given IPv6 address, `IP' such that the :: token, if present, is
2039# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
2040# digits. An optional `BYTESEP' parameter can be given to further separate
2041# individual bytes of each 16-bit group.
2042expand_ipv6()
2043{
2044	local IP=$1; shift
2045	local bytesep=$1; shift
2046
2047	local cvt_ip=${IP/::/_}
2048	local colons=${cvt_ip//[^:]/}
2049	local allcol=:::::::
2050	# IP where :: -> the appropriate number of colons:
2051	local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
2052
2053	echo $allcol_ip | tr : '\n' |
2054	    sed s/^/0000/ |
2055	    sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
2056	    tr '\n' : |
2057	    sed 's/:$//'
2058}
2059
2060ipv6_to_bytes()
2061{
2062	local IP=$1; shift
2063
2064	expand_ipv6 "$IP" :
2065}
2066
2067u16_to_bytes()
2068{
2069	local u16=$1; shift
2070
2071	printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
2072}
2073
2074# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
2075# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
2076# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
2077# stands for 00:00.
2078payload_template_calc_checksum()
2079{
2080	local payload=$1; shift
2081
2082	(
2083	    # Set input radix.
2084	    echo "16i"
2085	    # Push zero for the initial checksum.
2086	    echo 0
2087
2088	    # Pad the payload with a terminating 00: in case we get an odd
2089	    # number of bytes.
2090	    echo "${payload%:}:00:" |
2091		sed 's/CHECKSUM/00:00/g' |
2092		tr '[:lower:]' '[:upper:]' |
2093		# Add the word to the checksum.
2094		sed 's/\(..\):\(..\):/\1\2+\n/g' |
2095		# Strip the extra odd byte we pushed if left unconverted.
2096		sed 's/\(..\):$//'
2097
2098	    echo "10000 ~ +"	# Calculate and add carry.
2099	    echo "FFFF r - p"	# Bit-flip and print.
2100	) |
2101	    dc |
2102	    tr '[:upper:]' '[:lower:]'
2103}
2104
2105payload_template_expand_checksum()
2106{
2107	local payload=$1; shift
2108	local checksum=$1; shift
2109
2110	local ckbytes=$(u16_to_bytes $checksum)
2111
2112	echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
2113}
2114
2115payload_template_nbytes()
2116{
2117	local payload=$1; shift
2118
2119	payload_template_expand_checksum "${payload%:}" 0 |
2120		sed 's/:/\n/g' | wc -l
2121}
2122
2123igmpv3_is_in_get()
2124{
2125	local GRP=$1; shift
2126	local sources=("$@")
2127
2128	local igmpv3
2129	local nsources=$(u16_to_bytes ${#sources[@]})
2130
2131	# IS_IN ( $sources )
2132	igmpv3=$(:
2133		)"22:"$(			: Type - Membership Report
2134		)"00:"$(			: Reserved
2135		)"CHECKSUM:"$(			: Checksum
2136		)"00:00:"$(			: Reserved
2137		)"00:01:"$(			: Number of Group Records
2138		)"01:"$(			: Record Type - IS_IN
2139		)"00:"$(			: Aux Data Len
2140		)"${nsources}:"$(		: Number of Sources
2141		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
2142		)"$(for src in "${sources[@]}"; do
2143			ipv4_to_bytes $src
2144			echo -n :
2145		    done)"$(			: Source Addresses
2146		)
2147	local checksum=$(payload_template_calc_checksum "$igmpv3")
2148
2149	payload_template_expand_checksum "$igmpv3" $checksum
2150}
2151
2152igmpv2_leave_get()
2153{
2154	local GRP=$1; shift
2155
2156	local payload=$(:
2157		)"17:"$(			: Type - Leave Group
2158		)"00:"$(			: Max Resp Time - not meaningful
2159		)"CHECKSUM:"$(			: Checksum
2160		)"$(ipv4_to_bytes $GRP)"$(	: Group Address
2161		)
2162	local checksum=$(payload_template_calc_checksum "$payload")
2163
2164	payload_template_expand_checksum "$payload" $checksum
2165}
2166
2167mldv2_is_in_get()
2168{
2169	local SIP=$1; shift
2170	local GRP=$1; shift
2171	local sources=("$@")
2172
2173	local hbh
2174	local icmpv6
2175	local nsources=$(u16_to_bytes ${#sources[@]})
2176
2177	hbh=$(:
2178		)"3a:"$(			: Next Header - ICMPv6
2179		)"00:"$(			: Hdr Ext Len
2180		)"00:00:00:00:00:00:"$(		: Options and Padding
2181		)
2182
2183	icmpv6=$(:
2184		)"8f:"$(			: Type - MLDv2 Report
2185		)"00:"$(			: Code
2186		)"CHECKSUM:"$(			: Checksum
2187		)"00:00:"$(			: Reserved
2188		)"00:01:"$(			: Number of Group Records
2189		)"01:"$(			: Record Type - IS_IN
2190		)"00:"$(			: Aux Data Len
2191		)"${nsources}:"$(		: Number of Sources
2192		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2193		)"$(for src in "${sources[@]}"; do
2194			ipv6_to_bytes $src
2195			echo -n :
2196		    done)"$(			: Source Addresses
2197		)
2198
2199	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2200	local sudohdr=$(:
2201		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2202		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2203	        )"${len}:"$(			: Upper-layer length
2204	        )"00:3a:"$(			: Zero and next-header
2205	        )
2206	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2207
2208	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2209}
2210
2211mldv1_done_get()
2212{
2213	local SIP=$1; shift
2214	local GRP=$1; shift
2215
2216	local hbh
2217	local icmpv6
2218
2219	hbh=$(:
2220		)"3a:"$(			: Next Header - ICMPv6
2221		)"00:"$(			: Hdr Ext Len
2222		)"00:00:00:00:00:00:"$(		: Options and Padding
2223		)
2224
2225	icmpv6=$(:
2226		)"84:"$(			: Type - MLDv1 Done
2227		)"00:"$(			: Code
2228		)"CHECKSUM:"$(			: Checksum
2229		)"00:00:"$(			: Max Resp Delay - not meaningful
2230		)"00:00:"$(			: Reserved
2231		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2232		)
2233
2234	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2235	local sudohdr=$(:
2236		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2237		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2238	        )"${len}:"$(			: Upper-layer length
2239	        )"00:3a:"$(			: Zero and next-header
2240	        )
2241	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2242
2243	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2244}
2245
2246bail_on_lldpad()
2247{
2248	local reason1="$1"; shift
2249	local reason2="$1"; shift
2250	local caller=${FUNCNAME[1]}
2251	local src=${BASH_SOURCE[1]}
2252
2253	if systemctl is-active --quiet lldpad; then
2254
2255		cat >/dev/stderr <<-EOF
2256		WARNING: lldpad is running
2257
2258			lldpad will likely $reason1, and this test will
2259			$reason2. Both are not supported at the same time,
2260			one of them is arbitrarily going to overwrite the
2261			other. That will cause spurious failures (or, unlikely,
2262			passes) of this test.
2263		EOF
2264
2265		if [[ -z $ALLOW_LLDPAD ]]; then
2266			cat >/dev/stderr <<-EOF
2267
2268				If you want to run the test anyway, please set
2269				an environment variable ALLOW_LLDPAD to a
2270				non-empty string.
2271			EOF
2272			log_test_skip $src:$caller
2273			exit $EXIT_STATUS
2274		else
2275			return
2276		fi
2277	fi
2278}
2279
2280absval()
2281{
2282	local v=$1; shift
2283
2284	echo $((v > 0 ? v : -v))
2285}
2286
2287has_unicast_flt()
2288{
2289	local dev=$1; shift
2290	local mac_addr=$(mac_get $dev)
2291	local tmp=$(ether_addr_to_u64 $mac_addr)
2292	local promisc
2293
2294	ip link set $dev up
2295	ip link add link $dev name macvlan-tmp type macvlan mode private
2296	ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1)))
2297	ip link set macvlan-tmp up
2298
2299	promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity')
2300
2301	ip link del macvlan-tmp
2302
2303	[[ $promisc == 1 ]] && echo "no" || echo "yes"
2304}
2305