xref: /linux/tools/testing/selftests/net/forwarding/lib.sh (revision 3d2c3d2eea9acdbee5b5742d15d021069b49d3f9)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
4
5##############################################################################
6# Topology description. p1 looped back to p2, p3 to p4 and so on.
7
8declare -A NETIFS=(
9    [p1]=veth0
10    [p2]=veth1
11    [p3]=veth2
12    [p4]=veth3
13    [p5]=veth4
14    [p6]=veth5
15    [p7]=veth6
16    [p8]=veth7
17    [p9]=veth8
18    [p10]=veth9
19)
20
21# Port that does not have a cable connected.
22: "${NETIF_NO_CABLE:=eth8}"
23
24##############################################################################
25# Defines
26
27# Networking utilities.
28: "${PING:=ping}"
29: "${PING6:=ping6}"	# Some distros just use ping.
30: "${ARPING:=arping}"
31: "${TROUTE6:=traceroute6}"
32
33# Packet generator.
34: "${MZ:=mausezahn}"	# Some distributions use 'mz'.
35: "${MZ_DELAY:=0}"
36
37# Host configuration tools.
38: "${TEAMD:=teamd}"
39: "${MCD:=smcrouted}"
40: "${MC_CLI:=smcroutectl}"
41: "${MCD_TABLE_NAME:=selftests}"
42
43# Constants for netdevice bring-up:
44# Default time in seconds to wait for an interface to come up before giving up
45# and bailing out. Used during initial setup.
46: "${INTERFACE_TIMEOUT:=600}"
47# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
48: "${WAIT_TIMEOUT:=20}"
49# Time to wait after interfaces participating in the test are all UP.
50: "${WAIT_TIME:=5}"
51
52# Whether to pause on, respectively, after a failure and before cleanup.
53: "${PAUSE_ON_CLEANUP:=no}"
54
55# Whether to create virtual interfaces, and what netdevice type they should be.
56: "${NETIF_CREATE:=yes}"
57: "${NETIF_TYPE:=veth}"
58
59# Constants for ping tests:
60# How many packets should be sent.
61: "${PING_COUNT:=10}"
62# Timeout (in seconds) before ping exits regardless of how many packets have
63# been sent or received
64: "${PING_TIMEOUT:=5}"
65
66# Minimum ageing_time (in centiseconds) supported by hardware
67: "${LOW_AGEING_TIME:=1000}"
68
69# Whether to check for availability of certain tools.
70: "${REQUIRE_JQ:=yes}"
71: "${REQUIRE_MZ:=yes}"
72: "${REQUIRE_MTOOLS:=no}"
73: "${REQUIRE_TEAMD:=no}"
74
75# Whether to override MAC addresses on interfaces participating in the test.
76: "${STABLE_MAC_ADDRS:=no}"
77
78# Flags for tcpdump
79: "${TCPDUMP_EXTRA_FLAGS:=}"
80
81# Flags for TC filters.
82: "${TC_FLAG:=skip_hw}"
83
84# Whether the machine is "slow" -- i.e. might be incapable of running tests
85# involving heavy traffic. This might be the case on a debug kernel, a VM, or
86# e.g. a low-power board.
87: "${KSFT_MACHINE_SLOW:=no}"
88
89##############################################################################
90# Find netifs by test-specified driver name
91
92driver_name_get()
93{
94	local dev=$1; shift
95	local driver_path="/sys/class/net/$dev/device/driver"
96
97	if [[ -L $driver_path ]]; then
98		basename `realpath $driver_path`
99	fi
100}
101
102netif_find_driver()
103{
104	local ifnames=`ip -j link show | jq -r ".[].ifname"`
105	local count=0
106
107	for ifname in $ifnames
108	do
109		local driver_name=`driver_name_get $ifname`
110		if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
111			count=$((count + 1))
112			NETIFS[p$count]="$ifname"
113		fi
114	done
115}
116
117# Whether to find netdevice according to the driver speficied by the importer
118: "${NETIF_FIND_DRIVER:=}"
119
120if [[ $NETIF_FIND_DRIVER ]]; then
121	unset NETIFS
122	declare -A NETIFS
123	netif_find_driver
124fi
125
126net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
127
128if [[ -f $net_forwarding_dir/forwarding.config ]]; then
129	source "$net_forwarding_dir/forwarding.config"
130fi
131
132source "$net_forwarding_dir/../lib.sh"
133
134##############################################################################
135# Sanity checks
136
137check_tc_version()
138{
139	tc -j &> /dev/null
140	if [[ $? -ne 0 ]]; then
141		echo "SKIP: iproute2 too old; tc is missing JSON support"
142		exit $ksft_skip
143	fi
144}
145
146check_tc_erspan_support()
147{
148	local dev=$1; shift
149
150	tc filter add dev $dev ingress pref 1 handle 1 flower \
151		erspan_opts 1:0:0:0 &> /dev/null
152	if [[ $? -ne 0 ]]; then
153		echo "SKIP: iproute2 too old; tc is missing erspan support"
154		return $ksft_skip
155	fi
156	tc filter del dev $dev ingress pref 1 handle 1 flower \
157		erspan_opts 1:0:0:0 &> /dev/null
158}
159
160# Old versions of tc don't understand "mpls_uc"
161check_tc_mpls_support()
162{
163	local dev=$1; shift
164
165	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
166		matchall action pipe &> /dev/null
167	if [[ $? -ne 0 ]]; then
168		echo "SKIP: iproute2 too old; tc is missing MPLS support"
169		return $ksft_skip
170	fi
171	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
172		matchall
173}
174
175# Old versions of tc produce invalid json output for mpls lse statistics
176check_tc_mpls_lse_stats()
177{
178	local dev=$1; shift
179	local ret;
180
181	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
182		flower mpls lse depth 2                                 \
183		action continue &> /dev/null
184
185	if [[ $? -ne 0 ]]; then
186		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
187		return $ksft_skip
188	fi
189
190	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
191	ret=$?
192	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
193		flower
194
195	if [[ $ret -ne 0 ]]; then
196		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
197		return $ksft_skip
198	fi
199}
200
201check_tc_shblock_support()
202{
203	tc filter help 2>&1 | grep block &> /dev/null
204	if [[ $? -ne 0 ]]; then
205		echo "SKIP: iproute2 too old; tc is missing shared block support"
206		exit $ksft_skip
207	fi
208}
209
210check_tc_chain_support()
211{
212	tc help 2>&1|grep chain &> /dev/null
213	if [[ $? -ne 0 ]]; then
214		echo "SKIP: iproute2 too old; tc is missing chain support"
215		exit $ksft_skip
216	fi
217}
218
219check_tc_action_hw_stats_support()
220{
221	tc actions help 2>&1 | grep -q hw_stats
222	if [[ $? -ne 0 ]]; then
223		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
224		exit $ksft_skip
225	fi
226}
227
228check_tc_fp_support()
229{
230	tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
231	if [[ $? -ne 0 ]]; then
232		echo "SKIP: iproute2 too old; tc is missing frame preemption support"
233		exit $ksft_skip
234	fi
235}
236
237check_ethtool_lanes_support()
238{
239	ethtool --help 2>&1| grep lanes &> /dev/null
240	if [[ $? -ne 0 ]]; then
241		echo "SKIP: ethtool too old; it is missing lanes support"
242		exit $ksft_skip
243	fi
244}
245
246check_ethtool_mm_support()
247{
248	ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
249	if [[ $? -ne 0 ]]; then
250		echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
251		exit $ksft_skip
252	fi
253}
254
255check_ethtool_counter_group_support()
256{
257	ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
258	if [[ $? -ne 0 ]]; then
259		echo "SKIP: ethtool too old; it is missing standard counter group support"
260		exit $ksft_skip
261	fi
262}
263
264check_ethtool_pmac_std_stats_support()
265{
266	local dev=$1; shift
267	local grp=$1; shift
268
269	[ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
270		| jq ".[].\"$grp\" | length") ]
271}
272
273check_locked_port_support()
274{
275	if ! bridge -d link show | grep -q " locked"; then
276		echo "SKIP: iproute2 too old; Locked port feature not supported."
277		return $ksft_skip
278	fi
279}
280
281check_port_mab_support()
282{
283	if ! bridge -d link show | grep -q "mab"; then
284		echo "SKIP: iproute2 too old; MacAuth feature not supported."
285		return $ksft_skip
286	fi
287}
288
289if [[ "$(id -u)" -ne 0 ]]; then
290	echo "SKIP: need root privileges"
291	exit $ksft_skip
292fi
293
294check_driver()
295{
296	local dev=$1; shift
297	local expected=$1; shift
298	local driver_name=`driver_name_get $dev`
299
300	if [[ $driver_name != $expected ]]; then
301		echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
302		exit $ksft_skip
303	fi
304}
305
306if [[ "$CHECK_TC" = "yes" ]]; then
307	check_tc_version
308fi
309
310# IPv6 support was added in v3.0
311check_mtools_version()
312{
313	local version="$(msend -v)"
314	local major
315
316	version=${version##msend version }
317	major=$(echo $version | cut -d. -f1)
318
319	if [ $major -lt 3 ]; then
320		echo "SKIP: expected mtools version 3.0, got $version"
321		exit $ksft_skip
322	fi
323}
324
325if [[ "$REQUIRE_JQ" = "yes" ]]; then
326	require_command jq
327fi
328if [[ "$REQUIRE_MZ" = "yes" ]]; then
329	require_command $MZ
330fi
331if [[ "$REQUIRE_TEAMD" = "yes" ]]; then
332	require_command $TEAMD
333fi
334if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
335	# https://github.com/troglobit/mtools
336	require_command msend
337	require_command mreceive
338	check_mtools_version
339fi
340
341##############################################################################
342# Command line options handling
343
344check_env() {
345	if [[ ! (( -n "$LOCAL_V4" && -n "$REMOTE_V4") ||
346		 ( -n "$LOCAL_V6" && -n "$REMOTE_V6" )) ]]; then
347		echo "SKIP: Invalid environment, missing or inconsistent LOCAL_V4/REMOTE_V4/LOCAL_V6/REMOTE_V6"
348		echo "Please see tools/testing/selftests/drivers/net/README.rst"
349		exit "$ksft_skip"
350	fi
351
352	if [[ -z "$REMOTE_TYPE" ]]; then
353		echo "SKIP: Invalid environment, missing REMOTE_TYPE"
354		exit "$ksft_skip"
355	fi
356
357	if [[ -z "$REMOTE_ARGS" ]]; then
358		echo "SKIP: Invalid environment, missing REMOTE_ARGS"
359		exit "$ksft_skip"
360	fi
361}
362
363__run_on()
364{
365	local target=$1; shift
366	local type args
367
368	IFS=':' read -r type args <<< "$target"
369
370	case "$type" in
371	netns)
372		# Execute command in network namespace
373		# args contains the namespace name
374		ip netns exec "$args" "$@"
375		;;
376	ssh)
377		# Execute command via SSH args contains user@host
378		ssh -n "$args" "$@"
379		;;
380	local|*)
381		# Execute command locally. This is also the fallback
382		# case for when the interface's target is not found in
383		# the TARGETS array.
384		"$@"
385		;;
386	esac
387}
388
389run_on()
390{
391	local iface=$1; shift
392	local target="local:"
393
394	if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
395		target="${TARGETS[$iface]}"
396	fi
397
398	__run_on "$target" "$@"
399}
400
401get_ifname_by_ip()
402{
403	local target=$1; shift
404	local ip_addr=$1; shift
405
406	__run_on "$target" ip -j addr show to "$ip_addr" | jq -r '.[].ifname'
407}
408
409# Whether the test is conforming to the requirements and usage described in
410# drivers/net/README.rst.
411: "${DRIVER_TEST_CONFORMANT:=no}"
412
413declare -A TARGETS
414
415# Based on DRIVER_TEST_CONFORMANT, decide if to source drivers/net/net.config
416# or not. In the "yes" case, the test expects to pass the arguments through the
417# variables specified in drivers/net/README.rst file. If not, fallback on
418# parsing the script arguments for interface names.
419if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
420	if [[ -f $net_forwarding_dir/../../drivers/net/net.config ]]; then
421		source "$net_forwarding_dir/../../drivers/net/net.config"
422	fi
423
424	if (( NUM_NETIFS > 2)); then
425		echo "SKIP: DRIVER_TEST_CONFORMANT=yes and NUM_NETIFS is bigger than 2"
426		exit "$ksft_skip"
427	fi
428
429	check_env
430
431	# Populate the NETIFS and TARGETS arrays automatically based on the
432	# environment variables. The TARGETS array is indexed by the network
433	# interface name keeping track of the target on which the interface
434	# resides. Values will be strings of the following format -
435	# <type>:<args>.
436	#
437	# TARGETS[eth0]="local:" - meaning that the eth0 interface is
438	# accessible locally
439	# TARGETS[eth1]="netns:foo" - eth1 is in the foo netns
440	# TARGETS[eth2]="ssh:root@10.0.0.2" - eth2 is accessible through
441	# running the 'ssh root@10.0.0.2' command.
442
443	unset NETIFS
444	declare -A NETIFS
445
446	NETIFS[p1]="$NETIF"
447	TARGETS[$NETIF]="local:"
448
449	# Locate the name of the remote interface
450	remote_target="$REMOTE_TYPE:$REMOTE_ARGS"
451	if [[ -v REMOTE_V4 ]]; then
452		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V4")
453	else
454		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V6")
455	fi
456	if [[ ! -n "$remote_netif" ]]; then
457		echo "SKIP: cannot find remote interface"
458		exit "$ksft_skip"
459	fi
460
461	if [[ "$NETIF" == "$remote_netif" ]]; then
462		echo "SKIP: local and remote interfaces cannot have the same name"
463		exit "$ksft_skip"
464	fi
465
466	NETIFS[p2]="$remote_netif"
467	TARGETS[$remote_netif]="$REMOTE_TYPE:$REMOTE_ARGS"
468else
469	count=0
470	# Prime NETIFS from the command line, but retain if none given.
471	if [[ $# -gt 0 ]]; then
472		unset NETIFS
473		declare -A NETIFS
474
475		while [[ $# -gt 0 ]]; do
476			count=$((count + 1))
477			NETIFS[p$count]="$1"
478			TARGETS[$1]="local:"
479			shift
480		done
481	fi
482fi
483
484##############################################################################
485# Network interfaces configuration
486
487if [[ ! -v NUM_NETIFS ]]; then
488	echo "SKIP: importer does not define \"NUM_NETIFS\""
489	exit $ksft_skip
490fi
491
492if (( NUM_NETIFS > ${#NETIFS[@]} )); then
493	echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
494	exit $ksft_skip
495fi
496
497for i in $(seq ${#NETIFS[@]}); do
498	if [[ ! ${NETIFS[p$i]} ]]; then
499		echo "SKIP: NETIFS[p$i] not given"
500		exit $ksft_skip
501	fi
502done
503
504create_netif_veth()
505{
506	local i
507
508	for ((i = 1; i <= NUM_NETIFS; ++i)); do
509		local j=$((i+1))
510
511		if [ -z ${NETIFS[p$i]} ]; then
512			echo "SKIP: Cannot create interface. Name not specified"
513			exit $ksft_skip
514		fi
515
516		ip link show dev ${NETIFS[p$i]} &> /dev/null
517		if [[ $? -ne 0 ]]; then
518			ip link add ${NETIFS[p$i]} type veth \
519				peer name ${NETIFS[p$j]}
520			if [[ $? -ne 0 ]]; then
521				echo "Failed to create netif"
522				exit 1
523			fi
524		fi
525		i=$j
526	done
527}
528
529create_netif()
530{
531	case "$NETIF_TYPE" in
532	veth) create_netif_veth
533	      ;;
534	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
535	   exit 1
536	   ;;
537	esac
538}
539
540declare -A MAC_ADDR_ORIG
541mac_addr_prepare()
542{
543	local new_addr=
544	local dev=
545
546	for ((i = 1; i <= NUM_NETIFS; ++i)); do
547		dev=${NETIFS[p$i]}
548		new_addr=$(printf "00:01:02:03:04:%02x" $i)
549
550		MAC_ADDR_ORIG["$dev"]=$(run_on "$dev" \
551			ip -j link show dev "$dev" | jq -e '.[].address')
552		# Strip quotes
553		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
554		run_on "$dev" ip link set dev "$dev" address $new_addr
555	done
556}
557
558mac_addr_restore()
559{
560	local dev=
561
562	for ((i = 1; i <= NUM_NETIFS; ++i)); do
563		dev=${NETIFS[p$i]}
564		run_on "$dev" \
565			ip link set dev "$dev" address ${MAC_ADDR_ORIG["$dev"]}
566	done
567}
568
569if [[ "$NETIF_CREATE" = "yes" ]]; then
570	create_netif
571fi
572
573if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
574	mac_addr_prepare
575fi
576
577for ((i = 1; i <= NUM_NETIFS; ++i)); do
578	int="${NETIFS[p$i]}"
579
580	run_on "$int" ip link show dev "$int" &> /dev/null
581	if [[ $? -ne 0 ]]; then
582		echo "SKIP: could not find all required interfaces"
583		exit $ksft_skip
584	fi
585done
586
587##############################################################################
588# Helpers
589
590not()
591{
592	"$@"
593	[[ $? != 0 ]]
594}
595
596get_max()
597{
598	local arr=("$@")
599
600	max=${arr[0]}
601	for cur in ${arr[@]}; do
602		if [[ $cur -gt $max ]]; then
603			max=$cur
604		fi
605	done
606
607	echo $max
608}
609
610grep_bridge_fdb()
611{
612	local addr=$1; shift
613	local word
614	local flag
615
616	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
617		word=$1; shift
618		if [ "$1" == "-v" ]; then
619			flag=$1; shift
620		fi
621	fi
622
623	$@ | grep $addr | grep $flag "$word"
624}
625
626wait_for_port_up()
627{
628	"$@" | grep -q "Link detected: yes"
629}
630
631wait_for_offload()
632{
633	"$@" | grep -q offload
634}
635
636wait_for_trap()
637{
638	"$@" | grep -q trap
639}
640
641setup_wait_dev()
642{
643	local dev=$1; shift
644	local wait_time=${1:-$WAIT_TIME}; shift
645
646	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
647
648	if (($?)); then
649		check_err 1
650		log_test setup_wait_dev ": Interface $dev does not come up."
651		exit 1
652	fi
653}
654
655setup_wait_dev_with_timeout()
656{
657	local dev=$1; shift
658	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
659	local wait_time=${1:-$WAIT_TIME}; shift
660	local i
661
662	for ((i = 1; i <= $max_iterations; ++i)); do
663		run_on "$dev" ip link show dev "$dev" up \
664			| grep 'state UP' &> /dev/null
665		if [[ $? -ne 0 ]]; then
666			sleep 1
667		else
668			sleep $wait_time
669			return 0
670		fi
671	done
672
673	return 1
674}
675
676setup_wait_n()
677{
678	local num_netifs=$1; shift
679	local i
680
681	for ((i = 1; i <= num_netifs; ++i)); do
682		setup_wait_dev ${NETIFS[p$i]} 0
683	done
684
685	# Make sure links are ready.
686	sleep $WAIT_TIME
687}
688
689setup_wait()
690{
691	setup_wait_n "$NUM_NETIFS"
692}
693
694wait_for_dev()
695{
696        local dev=$1; shift
697        local timeout=${1:-$WAIT_TIMEOUT}; shift
698
699        slowwait $timeout ip link show dev $dev &> /dev/null
700        if (( $? )); then
701                check_err 1
702                log_test wait_for_dev "Interface $dev did not appear."
703                exit $EXIT_STATUS
704        fi
705}
706
707pre_cleanup()
708{
709	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
710		echo "Pausing before cleanup, hit any key to continue"
711		read
712	fi
713
714	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
715		mac_addr_restore
716	fi
717}
718
719vrf_prepare()
720{
721	ip -4 rule add pref 32765 table local
722	ip -4 rule del pref 0
723	ip -6 rule add pref 32765 table local
724	ip -6 rule del pref 0
725}
726
727vrf_cleanup()
728{
729	ip -6 rule add pref 0 table local
730	ip -6 rule del pref 32765
731	ip -4 rule add pref 0 table local
732	ip -4 rule del pref 32765
733}
734
735adf_vrf_prepare()
736{
737	vrf_prepare
738	defer vrf_cleanup
739}
740
741__last_tb_id=0
742declare -A __TB_IDS
743
744__vrf_td_id_assign()
745{
746	local vrf_name=$1
747
748	__last_tb_id=$((__last_tb_id + 1))
749	__TB_IDS[$vrf_name]=$__last_tb_id
750	return $__last_tb_id
751}
752
753__vrf_td_id_lookup()
754{
755	local vrf_name=$1
756
757	return ${__TB_IDS[$vrf_name]}
758}
759
760vrf_create()
761{
762	local vrf_name=$1
763	local tb_id
764
765	__vrf_td_id_assign $vrf_name
766	tb_id=$?
767
768	ip link add dev $vrf_name type vrf table $tb_id
769	ip -4 route add table $tb_id unreachable default metric 4278198272
770	ip -6 route add table $tb_id unreachable default metric 4278198272
771}
772
773vrf_destroy()
774{
775	local vrf_name=$1
776	local tb_id
777
778	__vrf_td_id_lookup $vrf_name
779	tb_id=$?
780
781	ip -6 route del table $tb_id unreachable default metric 4278198272
782	ip -4 route del table $tb_id unreachable default metric 4278198272
783	ip link del dev $vrf_name
784}
785
786__addr_add_del()
787{
788	local if_name=$1
789	local add_del=$2
790	local array
791
792	shift
793	shift
794	array=("${@}")
795
796	for addrstr in "${array[@]}"; do
797		ip address $add_del $addrstr dev $if_name
798	done
799}
800
801__simple_if_init()
802{
803	local if_name=$1; shift
804	local vrf_name=$1; shift
805	local addrs=("${@}")
806
807	ip link set dev $if_name master $vrf_name
808	ip link set dev $if_name up
809
810	__addr_add_del $if_name add "${addrs[@]}"
811}
812
813__simple_if_fini()
814{
815	local if_name=$1; shift
816	local addrs=("${@}")
817
818	__addr_add_del $if_name del "${addrs[@]}"
819
820	ip link set dev $if_name down
821	ip link set dev $if_name nomaster
822}
823
824simple_if_init()
825{
826	local if_name=$1
827	local vrf_name
828	local array
829
830	shift
831	vrf_name=v$if_name
832	array=("${@}")
833
834	vrf_create $vrf_name
835	ip link set dev $vrf_name up
836	__simple_if_init $if_name $vrf_name "${array[@]}"
837}
838
839simple_if_fini()
840{
841	local if_name=$1
842	local vrf_name
843	local array
844
845	shift
846	vrf_name=v$if_name
847	array=("${@}")
848
849	__simple_if_fini $if_name "${array[@]}"
850	vrf_destroy $vrf_name
851}
852
853adf_simple_if_init()
854{
855	simple_if_init "$@"
856	defer simple_if_fini "$@"
857}
858
859tunnel_create()
860{
861	local name=$1; shift
862	local type=$1; shift
863	local local=$1; shift
864	local remote=$1; shift
865
866	ip link add name $name type $type \
867	   local $local remote $remote "$@"
868	ip link set dev $name up
869}
870
871tunnel_destroy()
872{
873	local name=$1; shift
874
875	ip link del dev $name
876}
877
878vlan_create()
879{
880	local if_name=$1; shift
881	local vid=$1; shift
882	local vrf=$1; shift
883	local ips=("${@}")
884	local name=$if_name.$vid
885
886	ip link add name $name link $if_name type vlan id $vid
887	if [ "$vrf" != "" ]; then
888		ip link set dev $name master $vrf
889	fi
890	ip link set dev $name up
891	__addr_add_del $name add "${ips[@]}"
892}
893
894vlan_destroy()
895{
896	local if_name=$1; shift
897	local vid=$1; shift
898	local name=$if_name.$vid
899
900	ip link del dev $name
901}
902
903team_create()
904{
905	local if_name=$1; shift
906	local mode=$1; shift
907
908	require_command $TEAMD
909	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
910	for slave in "$@"; do
911		ip link set dev $slave down
912		ip link set dev $slave master $if_name
913		ip link set dev $slave up
914	done
915	ip link set dev $if_name up
916}
917
918team_destroy()
919{
920	local if_name=$1; shift
921
922	$TEAMD -t $if_name -k
923}
924
925master_name_get()
926{
927	local if_name=$1
928
929	ip -j link show dev $if_name | jq -r '.[]["master"]'
930}
931
932link_stats_get()
933{
934	local if_name=$1; shift
935	local dir=$1; shift
936	local stat=$1; shift
937
938	ip -j -s link show dev $if_name \
939		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
940}
941
942link_stats_tx_packets_get()
943{
944	link_stats_get $1 tx packets
945}
946
947link_stats_rx_errors_get()
948{
949	link_stats_get $1 rx errors
950}
951
952ethtool_stats_get()
953{
954	local dev=$1; shift
955	local stat=$1; shift
956
957	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
958}
959
960ethtool_std_stats_get()
961{
962	local dev=$1; shift
963	local grp=$1; shift
964	local name=$1; shift
965	local src=$1; shift
966
967	if [[ "$grp" == "pause" ]]; then
968		run_on "$dev" ethtool -I --json -a "$dev" --src "$src" | \
969			jq --arg name "$name" '.[].statistics[$name]'
970		return
971	fi
972
973	run_on "$dev" \
974		ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \
975		jq --arg grp "$grp" --arg name "$name" '.[][$grp][$name]'
976}
977
978qdisc_stats_get()
979{
980	local dev=$1; shift
981	local handle=$1; shift
982	local selector=$1; shift
983
984	tc -j -s qdisc show dev "$dev" \
985	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
986}
987
988qdisc_parent_stats_get()
989{
990	local dev=$1; shift
991	local parent=$1; shift
992	local selector=$1; shift
993
994	tc -j -s qdisc show dev "$dev" invisible \
995	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
996}
997
998ipv6_stats_get()
999{
1000	local dev=$1; shift
1001	local stat=$1; shift
1002
1003	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
1004}
1005
1006hw_stats_get()
1007{
1008	local suite=$1; shift
1009	local if_name=$1; shift
1010	local dir=$1; shift
1011	local stat=$1; shift
1012
1013	ip -j stats show dev $if_name group offload subgroup $suite |
1014		jq ".[0].stats64.$dir.$stat"
1015}
1016
1017__nh_stats_get()
1018{
1019	local key=$1; shift
1020	local group_id=$1; shift
1021	local member_id=$1; shift
1022
1023	ip -j -s -s nexthop show id $group_id |
1024	    jq --argjson member_id "$member_id" --arg key "$key" \
1025	       '.[].group_stats[] | select(.id == $member_id) | .[$key]'
1026}
1027
1028nh_stats_get()
1029{
1030	local group_id=$1; shift
1031	local member_id=$1; shift
1032
1033	__nh_stats_get packets "$group_id" "$member_id"
1034}
1035
1036nh_stats_get_hw()
1037{
1038	local group_id=$1; shift
1039	local member_id=$1; shift
1040
1041	__nh_stats_get packets_hw "$group_id" "$member_id"
1042}
1043
1044humanize()
1045{
1046	local speed=$1; shift
1047
1048	for unit in bps Kbps Mbps Gbps; do
1049		if (($(echo "$speed < 1024" | bc))); then
1050			break
1051		fi
1052
1053		speed=$(echo "scale=1; $speed / 1024" | bc)
1054	done
1055
1056	echo "$speed${unit}"
1057}
1058
1059rate()
1060{
1061	local t0=$1; shift
1062	local t1=$1; shift
1063	local interval=$1; shift
1064
1065	echo $((8 * (t1 - t0) / interval))
1066}
1067
1068packets_rate()
1069{
1070	local t0=$1; shift
1071	local t1=$1; shift
1072	local interval=$1; shift
1073
1074	echo $(((t1 - t0) / interval))
1075}
1076
1077ether_addr_to_u64()
1078{
1079	local addr="$1"
1080	local order="$((1 << 40))"
1081	local val=0
1082	local byte
1083
1084	addr="${addr//:/ }"
1085
1086	for byte in $addr; do
1087		byte="0x$byte"
1088		val=$((val + order * byte))
1089		order=$((order >> 8))
1090	done
1091
1092	printf "0x%x" $val
1093}
1094
1095u64_to_ether_addr()
1096{
1097	local val=$1
1098	local byte
1099	local i
1100
1101	for ((i = 40; i >= 0; i -= 8)); do
1102		byte=$(((val & (0xff << i)) >> i))
1103		printf "%02x" $byte
1104		if [ $i -ne 0 ]; then
1105			printf ":"
1106		fi
1107	done
1108}
1109
1110ipv6_lladdr_get()
1111{
1112	local if_name=$1
1113
1114	ip -j addr show dev $if_name | \
1115		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
1116		head -1
1117}
1118
1119bridge_ageing_time_get()
1120{
1121	local bridge=$1
1122	local ageing_time
1123
1124	# Need to divide by 100 to convert to seconds.
1125	ageing_time=$(ip -j -d link show dev $bridge \
1126		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
1127	echo $((ageing_time / 100))
1128}
1129
1130declare -A SYSCTL_ORIG
1131sysctl_save()
1132{
1133	local key=$1; shift
1134
1135	SYSCTL_ORIG[$key]=$(sysctl -n $key)
1136}
1137
1138sysctl_set()
1139{
1140	local key=$1; shift
1141	local value=$1; shift
1142
1143	sysctl_save "$key"
1144	sysctl -qw $key="$value"
1145}
1146
1147sysctl_restore()
1148{
1149	local key=$1; shift
1150
1151	sysctl -qw $key="${SYSCTL_ORIG[$key]}"
1152}
1153
1154forwarding_enable()
1155{
1156	sysctl_set net.ipv4.conf.all.forwarding 1
1157	sysctl_set net.ipv6.conf.all.forwarding 1
1158}
1159
1160forwarding_restore()
1161{
1162	sysctl_restore net.ipv6.conf.all.forwarding
1163	sysctl_restore net.ipv4.conf.all.forwarding
1164}
1165
1166adf_forwarding_enable()
1167{
1168	forwarding_enable
1169	defer forwarding_restore
1170}
1171
1172declare -A MTU_ORIG
1173mtu_set()
1174{
1175	local dev=$1; shift
1176	local mtu=$1; shift
1177
1178	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
1179	ip link set dev $dev mtu $mtu
1180}
1181
1182mtu_restore()
1183{
1184	local dev=$1; shift
1185
1186	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
1187}
1188
1189tc_offload_check()
1190{
1191	local num_netifs=${1:-$NUM_NETIFS}
1192
1193	for ((i = 1; i <= num_netifs; ++i)); do
1194		ethtool -k ${NETIFS[p$i]} \
1195			| grep "hw-tc-offload: on" &> /dev/null
1196		if [[ $? -ne 0 ]]; then
1197			return 1
1198		fi
1199	done
1200
1201	return 0
1202}
1203
1204trap_install()
1205{
1206	local dev=$1; shift
1207	local direction=$1; shift
1208
1209	# Some devices may not support or need in-hardware trapping of traffic
1210	# (e.g. the veth pairs that this library creates for non-existent
1211	# loopbacks). Use continue instead, so that there is a filter in there
1212	# (some tests check counters), and so that other filters are still
1213	# processed.
1214	tc filter add dev $dev $direction pref 1 \
1215		flower skip_sw action trap 2>/dev/null \
1216	    || tc filter add dev $dev $direction pref 1 \
1217		       flower action continue
1218}
1219
1220trap_uninstall()
1221{
1222	local dev=$1; shift
1223	local direction=$1; shift
1224
1225	tc filter del dev $dev $direction pref 1 flower
1226}
1227
1228__icmp_capture_add_del()
1229{
1230	local add_del=$1; shift
1231	local pref=$1; shift
1232	local vsuf=$1; shift
1233	local tundev=$1; shift
1234	local filter=$1; shift
1235
1236	tc filter $add_del dev "$tundev" ingress \
1237	   proto ip$vsuf pref $pref \
1238	   flower ip_proto icmp$vsuf $filter \
1239	   action pass
1240}
1241
1242icmp_capture_install()
1243{
1244	local tundev=$1; shift
1245	local filter=$1; shift
1246
1247	__icmp_capture_add_del add 100 "" "$tundev" "$filter"
1248}
1249
1250icmp_capture_uninstall()
1251{
1252	local tundev=$1; shift
1253	local filter=$1; shift
1254
1255	__icmp_capture_add_del del 100 "" "$tundev" "$filter"
1256}
1257
1258icmp6_capture_install()
1259{
1260	local tundev=$1; shift
1261	local filter=$1; shift
1262
1263	__icmp_capture_add_del add 100 v6 "$tundev" "$filter"
1264}
1265
1266icmp6_capture_uninstall()
1267{
1268	local tundev=$1; shift
1269	local filter=$1; shift
1270
1271	__icmp_capture_add_del del 100 v6 "$tundev" "$filter"
1272}
1273
1274__vlan_capture_add_del()
1275{
1276	local add_del=$1; shift
1277	local pref=$1; shift
1278	local dev=$1; shift
1279	local filter=$1; shift
1280
1281	tc filter $add_del dev "$dev" ingress \
1282	   proto 802.1q pref $pref \
1283	   flower $filter \
1284	   action pass
1285}
1286
1287vlan_capture_install()
1288{
1289	local dev=$1; shift
1290	local filter=$1; shift
1291
1292	__vlan_capture_add_del add 100 "$dev" "$filter"
1293}
1294
1295vlan_capture_uninstall()
1296{
1297	local dev=$1; shift
1298	local filter=$1; shift
1299
1300	__vlan_capture_add_del del 100 "$dev" "$filter"
1301}
1302
1303__dscp_capture_add_del()
1304{
1305	local add_del=$1; shift
1306	local dev=$1; shift
1307	local base=$1; shift
1308	local dscp;
1309
1310	for prio in {0..7}; do
1311		dscp=$((base + prio))
1312		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1313				       "skip_hw ip_tos $((dscp << 2))"
1314	done
1315}
1316
1317dscp_capture_install()
1318{
1319	local dev=$1; shift
1320	local base=$1; shift
1321
1322	__dscp_capture_add_del add $dev $base
1323}
1324
1325dscp_capture_uninstall()
1326{
1327	local dev=$1; shift
1328	local base=$1; shift
1329
1330	__dscp_capture_add_del del $dev $base
1331}
1332
1333dscp_fetch_stats()
1334{
1335	local dev=$1; shift
1336	local base=$1; shift
1337
1338	for prio in {0..7}; do
1339		local dscp=$((base + prio))
1340		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1341		echo "[$dscp]=$t "
1342	done
1343}
1344
1345matchall_sink_create()
1346{
1347	local dev=$1; shift
1348
1349	tc qdisc add dev $dev clsact
1350	tc filter add dev $dev ingress \
1351	   pref 10000 \
1352	   matchall \
1353	   action drop
1354}
1355
1356cleanup()
1357{
1358	pre_cleanup
1359	defer_scopes_cleanup
1360}
1361
1362multipath_eval()
1363{
1364	local desc="$1"
1365	local weight_rp12=$2
1366	local weight_rp13=$3
1367	local packets_rp12=$4
1368	local packets_rp13=$5
1369	local weights_ratio packets_ratio diff
1370
1371	RET=0
1372
1373	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1374		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1375				| bc -l)
1376	else
1377		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1378				| bc -l)
1379	fi
1380
1381	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1382	       check_err 1 "Packet difference is 0"
1383	       log_test "Multipath"
1384	       log_info "Expected ratio $weights_ratio"
1385	       return
1386	fi
1387
1388	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1389		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1390				| bc -l)
1391	else
1392		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1393				| bc -l)
1394	fi
1395
1396	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1397	diff=${diff#-}
1398
1399	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1400	check_err $? "Too large discrepancy between expected and measured ratios"
1401	log_test "$desc"
1402	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1403}
1404
1405in_ns()
1406{
1407	local name=$1; shift
1408
1409	ip netns exec $name bash <<-EOF
1410		NUM_NETIFS=0
1411		source lib.sh
1412		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1413	EOF
1414}
1415
1416##############################################################################
1417# Tests
1418
1419ping_do()
1420{
1421	local if_name=$1
1422	local dip=$2
1423	local args=$3
1424	local vrf_name
1425
1426	vrf_name=$(master_name_get $if_name)
1427	ip vrf exec $vrf_name \
1428		$PING $args -c $PING_COUNT -i 0.1 \
1429		-w $PING_TIMEOUT $dip &> /dev/null
1430}
1431
1432ping_test()
1433{
1434	RET=0
1435
1436	ping_do $1 $2
1437	check_err $?
1438	log_test "ping$3"
1439}
1440
1441ping_test_fails()
1442{
1443	RET=0
1444
1445	ping_do $1 $2
1446	check_fail $?
1447	log_test "ping fails$3"
1448}
1449
1450ping6_do()
1451{
1452	local if_name=$1
1453	local dip=$2
1454	local args=$3
1455	local vrf_name
1456
1457	vrf_name=$(master_name_get $if_name)
1458	ip vrf exec $vrf_name \
1459		$PING6 $args -c $PING_COUNT -i 0.1 \
1460		-w $PING_TIMEOUT $dip &> /dev/null
1461}
1462
1463ping6_test()
1464{
1465	RET=0
1466
1467	ping6_do $1 $2
1468	check_err $?
1469	log_test "ping6$3"
1470}
1471
1472ping6_test_fails()
1473{
1474	RET=0
1475
1476	ping6_do $1 $2
1477	check_fail $?
1478	log_test "ping6 fails$3"
1479}
1480
1481learning_test()
1482{
1483	local bridge=$1
1484	local br_port1=$2	# Connected to `host1_if`.
1485	local host1_if=$3
1486	local host2_if=$4
1487	local mac=de:ad:be:ef:13:37
1488	local ageing_time
1489
1490	RET=0
1491
1492	bridge -j fdb show br $bridge brport $br_port1 \
1493		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1494	check_fail $? "Found FDB record when should not"
1495
1496	# Disable unknown unicast flooding on `br_port1` to make sure
1497	# packets are only forwarded through the port after a matching
1498	# FDB entry was installed.
1499	bridge link set dev $br_port1 flood off
1500
1501	ip link set $host1_if promisc on
1502	tc qdisc add dev $host1_if ingress
1503	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1504		flower dst_mac $mac action drop
1505
1506	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1507	sleep 1
1508
1509	tc -j -s filter show dev $host1_if ingress \
1510		| jq -e ".[] | select(.options.handle == 101) \
1511		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1512	check_fail $? "Packet reached first host when should not"
1513
1514	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1515	sleep 1
1516
1517	bridge -j fdb show br $bridge brport $br_port1 \
1518		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1519	check_err $? "Did not find FDB record when should"
1520
1521	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1522	sleep 1
1523
1524	tc -j -s filter show dev $host1_if ingress \
1525		| jq -e ".[] | select(.options.handle == 101) \
1526		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1527	check_err $? "Packet did not reach second host when should"
1528
1529	# Wait for 10 seconds after the ageing time to make sure FDB
1530	# record was aged-out.
1531	ageing_time=$(bridge_ageing_time_get $bridge)
1532	sleep $((ageing_time + 10))
1533
1534	bridge -j fdb show br $bridge brport $br_port1 \
1535		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1536	check_fail $? "Found FDB record when should not"
1537
1538	bridge link set dev $br_port1 learning off
1539
1540	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1541	sleep 1
1542
1543	bridge -j fdb show br $bridge brport $br_port1 \
1544		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1545	check_fail $? "Found FDB record when should not"
1546
1547	bridge link set dev $br_port1 learning on
1548
1549	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1550	tc qdisc del dev $host1_if ingress
1551	ip link set $host1_if promisc off
1552
1553	bridge link set dev $br_port1 flood on
1554
1555	log_test "FDB learning"
1556}
1557
1558flood_test_do()
1559{
1560	local should_flood=$1
1561	local mac=$2
1562	local ip=$3
1563	local host1_if=$4
1564	local host2_if=$5
1565	local err=0
1566
1567	# Add an ACL on `host2_if` which will tell us whether the packet
1568	# was flooded to it or not.
1569	ip link set $host2_if promisc on
1570	tc qdisc add dev $host2_if ingress
1571	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1572		flower dst_mac $mac action drop
1573
1574	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1575	sleep 1
1576
1577	tc -j -s filter show dev $host2_if ingress \
1578		| jq -e ".[] | select(.options.handle == 101) \
1579		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1580	if [[ $? -ne 0 && $should_flood == "true" || \
1581	      $? -eq 0 && $should_flood == "false" ]]; then
1582		err=1
1583	fi
1584
1585	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1586	tc qdisc del dev $host2_if ingress
1587	ip link set $host2_if promisc off
1588
1589	return $err
1590}
1591
1592flood_unicast_test()
1593{
1594	local br_port=$1
1595	local host1_if=$2
1596	local host2_if=$3
1597	local mac=de:ad:be:ef:13:37
1598	local ip=192.0.2.100
1599
1600	RET=0
1601
1602	bridge link set dev $br_port flood off
1603
1604	flood_test_do false $mac $ip $host1_if $host2_if
1605	check_err $? "Packet flooded when should not"
1606
1607	bridge link set dev $br_port flood on
1608
1609	flood_test_do true $mac $ip $host1_if $host2_if
1610	check_err $? "Packet was not flooded when should"
1611
1612	log_test "Unknown unicast flood"
1613}
1614
1615flood_multicast_test()
1616{
1617	local br_port=$1
1618	local host1_if=$2
1619	local host2_if=$3
1620	local mac=01:00:5e:00:00:01
1621	local ip=239.0.0.1
1622
1623	RET=0
1624
1625	bridge link set dev $br_port mcast_flood off
1626
1627	flood_test_do false $mac $ip $host1_if $host2_if
1628	check_err $? "Packet flooded when should not"
1629
1630	bridge link set dev $br_port mcast_flood on
1631
1632	flood_test_do true $mac $ip $host1_if $host2_if
1633	check_err $? "Packet was not flooded when should"
1634
1635	log_test "Unregistered multicast flood"
1636}
1637
1638flood_test()
1639{
1640	# `br_port` is connected to `host2_if`
1641	local br_port=$1
1642	local host1_if=$2
1643	local host2_if=$3
1644
1645	flood_unicast_test $br_port $host1_if $host2_if
1646	flood_multicast_test $br_port $host1_if $host2_if
1647}
1648
1649__start_traffic()
1650{
1651	local pktsize=$1; shift
1652	local proto=$1; shift
1653	local h_in=$1; shift    # Where the traffic egresses the host
1654	local sip=$1; shift
1655	local dip=$1; shift
1656	local dmac=$1; shift
1657	local -a mz_args=("$@")
1658
1659	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1660		-a own -b $dmac -t "$proto" -q "${mz_args[@]}" &
1661	sleep 1
1662}
1663
1664start_traffic_pktsize()
1665{
1666	local pktsize=$1; shift
1667	local h_in=$1; shift
1668	local sip=$1; shift
1669	local dip=$1; shift
1670	local dmac=$1; shift
1671	local -a mz_args=("$@")
1672
1673	__start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \
1674			"${mz_args[@]}"
1675}
1676
1677start_tcp_traffic_pktsize()
1678{
1679	local pktsize=$1; shift
1680	local h_in=$1; shift
1681	local sip=$1; shift
1682	local dip=$1; shift
1683	local dmac=$1; shift
1684	local -a mz_args=("$@")
1685
1686	__start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \
1687			"${mz_args[@]}"
1688}
1689
1690start_traffic()
1691{
1692	local h_in=$1; shift
1693	local sip=$1; shift
1694	local dip=$1; shift
1695	local dmac=$1; shift
1696	local -a mz_args=("$@")
1697
1698	start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1699			      "${mz_args[@]}"
1700}
1701
1702start_tcp_traffic()
1703{
1704	local h_in=$1; shift
1705	local sip=$1; shift
1706	local dip=$1; shift
1707	local dmac=$1; shift
1708	local -a mz_args=("$@")
1709
1710	start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1711				  "${mz_args[@]}"
1712}
1713
1714stop_traffic()
1715{
1716	local pid=${1-%%}; shift
1717
1718	kill_process "$pid"
1719}
1720
1721declare -A cappid
1722declare -A capfile
1723declare -A capout
1724
1725tcpdump_start()
1726{
1727	local if_name=$1; shift
1728	local ns=$1; shift
1729
1730	capfile[$if_name]=$(mktemp)
1731	capout[$if_name]=$(mktemp)
1732
1733	if [ -z $ns ]; then
1734		ns_cmd=""
1735	else
1736		ns_cmd="ip netns exec ${ns}"
1737	fi
1738
1739	if [ -z $SUDO_USER ] ; then
1740		capuser=""
1741	else
1742		capuser="-Z $SUDO_USER"
1743	fi
1744
1745	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1746		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1747		> "${capout[$if_name]}" 2>&1 &
1748	cappid[$if_name]=$!
1749
1750	sleep 1
1751}
1752
1753tcpdump_stop()
1754{
1755	local if_name=$1
1756	local pid=${cappid[$if_name]}
1757
1758	$ns_cmd kill "$pid" && wait "$pid"
1759	sleep 1
1760}
1761
1762tcpdump_cleanup()
1763{
1764	local if_name=$1
1765
1766	rm ${capfile[$if_name]} ${capout[$if_name]}
1767}
1768
1769tcpdump_show()
1770{
1771	local if_name=$1
1772
1773	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
1774}
1775
1776# return 0 if the packet wasn't seen on host2_if or 1 if it was
1777mcast_packet_test()
1778{
1779	local mac=$1
1780	local src_ip=$2
1781	local ip=$3
1782	local host1_if=$4
1783	local host2_if=$5
1784	local seen=0
1785	local tc_proto="ip"
1786	local mz_v6arg=""
1787
1788	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1789	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1790		tc_proto="ipv6"
1791		mz_v6arg="-6"
1792	fi
1793
1794	# Add an ACL on `host2_if` which will tell us whether the packet
1795	# was received by it or not.
1796	tc qdisc add dev $host2_if ingress
1797	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1798		flower ip_proto udp dst_mac $mac action drop
1799
1800	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1801	sleep 1
1802
1803	tc -j -s filter show dev $host2_if ingress \
1804		| jq -e ".[] | select(.options.handle == 101) \
1805		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1806	if [[ $? -eq 0 ]]; then
1807		seen=1
1808	fi
1809
1810	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1811	tc qdisc del dev $host2_if ingress
1812
1813	return $seen
1814}
1815
1816brmcast_check_sg_entries()
1817{
1818	local report=$1; shift
1819	local slist=("$@")
1820	local sarg=""
1821
1822	for src in "${slist[@]}"; do
1823		sarg="${sarg} and .source_list[].address == \"$src\""
1824	done
1825	bridge -j -d -s mdb show dev br0 \
1826		| jq -e ".[].mdb[] | \
1827			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1828	check_err $? "Wrong *,G entry source list after $report report"
1829
1830	for sgent in "${slist[@]}"; do
1831		bridge -j -d -s mdb show dev br0 \
1832			| jq -e ".[].mdb[] | \
1833				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1834		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1835	done
1836}
1837
1838brmcast_check_sg_fwding()
1839{
1840	local should_fwd=$1; shift
1841	local sources=("$@")
1842
1843	for src in "${sources[@]}"; do
1844		local retval=0
1845
1846		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1847		retval=$?
1848		if [ $should_fwd -eq 1 ]; then
1849			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1850		else
1851			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1852		fi
1853	done
1854}
1855
1856brmcast_check_sg_state()
1857{
1858	local is_blocked=$1; shift
1859	local sources=("$@")
1860	local should_fail=1
1861
1862	if [ $is_blocked -eq 1 ]; then
1863		should_fail=0
1864	fi
1865
1866	for src in "${sources[@]}"; do
1867		bridge -j -d -s mdb show dev br0 \
1868			| jq -e ".[].mdb[] | \
1869				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1870				 .source_list[] |
1871				 select(.address == \"$src\") |
1872				 select(.timer == \"0.00\")" &>/dev/null
1873		check_err_fail $should_fail $? "Entry $src has zero timer"
1874
1875		bridge -j -d -s mdb show dev br0 \
1876			| jq -e ".[].mdb[] | \
1877				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1878				 .flags[] == \"blocked\")" &>/dev/null
1879		check_err_fail $should_fail $? "Entry $src has blocked flag"
1880	done
1881}
1882
1883mc_join()
1884{
1885	local if_name=$1
1886	local group=$2
1887	local vrf_name=$(master_name_get $if_name)
1888
1889	# We don't care about actual reception, just about joining the
1890	# IP multicast group and adding the L2 address to the device's
1891	# MAC filtering table
1892	ip vrf exec $vrf_name \
1893		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1894	mreceive_pid=$!
1895
1896	sleep 1
1897}
1898
1899mc_leave()
1900{
1901	kill "$mreceive_pid" && wait "$mreceive_pid"
1902}
1903
1904mc_send()
1905{
1906	local if_name=$1
1907	local groups=$2
1908	local vrf_name=$(master_name_get $if_name)
1909
1910	ip vrf exec $vrf_name \
1911		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1912}
1913
1914adf_mcd_start()
1915{
1916	local ifs=("$@")
1917
1918	local table_name="$MCD_TABLE_NAME"
1919	local smcroutedir
1920	local pid
1921	local if
1922	local i
1923
1924	check_command "$MCD" || return 1
1925	check_command "$MC_CLI" || return 1
1926
1927	smcroutedir=$(mktemp -d)
1928	defer rm -rf "$smcroutedir"
1929
1930	for ((i = 1; i <= NUM_NETIFS; ++i)); do
1931		echo "phyint ${NETIFS[p$i]} enable" >> \
1932			"$smcroutedir/$table_name.conf"
1933	done
1934
1935	for if in "${ifs[@]}"; do
1936		if ! ip_link_has_flag "$if" MULTICAST; then
1937			ip link set dev "$if" multicast on
1938			defer ip link set dev "$if" multicast off
1939		fi
1940
1941		echo "phyint $if enable" >> \
1942			"$smcroutedir/$table_name.conf"
1943	done
1944
1945	"$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
1946		-P "$smcroutedir/$table_name.pid"
1947	busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
1948	pid=$(cat "$smcroutedir/$table_name.pid")
1949	defer kill_process "$pid"
1950}
1951
1952mc_cli()
1953{
1954	local table_name="$MCD_TABLE_NAME"
1955
1956        "$MC_CLI" -I "$table_name" "$@"
1957}
1958
1959start_ip_monitor()
1960{
1961	local mtype=$1; shift
1962	local ip=${1-ip}; shift
1963
1964	# start the monitor in the background
1965	tmpfile=`mktemp /var/run/nexthoptestXXX`
1966	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1967	sleep 0.2
1968	echo "$mpid $tmpfile"
1969}
1970
1971stop_ip_monitor()
1972{
1973	local mpid=$1; shift
1974	local tmpfile=$1; shift
1975	local el=$1; shift
1976	local what=$1; shift
1977
1978	sleep 0.2
1979	kill $mpid
1980	local lines=`grep '^\w' $tmpfile | wc -l`
1981	test $lines -eq $el
1982	check_err $? "$what: $lines lines of events, expected $el"
1983	rm -rf $tmpfile
1984}
1985
1986hw_stats_monitor_test()
1987{
1988	local dev=$1; shift
1989	local type=$1; shift
1990	local make_suitable=$1; shift
1991	local make_unsuitable=$1; shift
1992	local ip=${1-ip}; shift
1993
1994	RET=0
1995
1996	# Expect a notification about enablement.
1997	local ipmout=$(start_ip_monitor stats "$ip")
1998	$ip stats set dev $dev ${type}_stats on
1999	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
2000
2001	# Expect a notification about offload.
2002	local ipmout=$(start_ip_monitor stats "$ip")
2003	$make_suitable
2004	stop_ip_monitor $ipmout 1 "${type}_stats installation"
2005
2006	# Expect a notification about loss of offload.
2007	local ipmout=$(start_ip_monitor stats "$ip")
2008	$make_unsuitable
2009	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
2010
2011	# Expect a notification about disablement
2012	local ipmout=$(start_ip_monitor stats "$ip")
2013	$ip stats set dev $dev ${type}_stats off
2014	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
2015
2016	log_test "${type}_stats notifications"
2017}
2018
2019ipv4_to_bytes()
2020{
2021	local IP=$1; shift
2022
2023	printf '%02x:' ${IP//./ } |
2024	    sed 's/:$//'
2025}
2026
2027# Convert a given IPv6 address, `IP' such that the :: token, if present, is
2028# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
2029# digits. An optional `BYTESEP' parameter can be given to further separate
2030# individual bytes of each 16-bit group.
2031expand_ipv6()
2032{
2033	local IP=$1; shift
2034	local bytesep=$1; shift
2035
2036	local cvt_ip=${IP/::/_}
2037	local colons=${cvt_ip//[^:]/}
2038	local allcol=:::::::
2039	# IP where :: -> the appropriate number of colons:
2040	local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
2041
2042	echo $allcol_ip | tr : '\n' |
2043	    sed s/^/0000/ |
2044	    sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
2045	    tr '\n' : |
2046	    sed 's/:$//'
2047}
2048
2049ipv6_to_bytes()
2050{
2051	local IP=$1; shift
2052
2053	expand_ipv6 "$IP" :
2054}
2055
2056u16_to_bytes()
2057{
2058	local u16=$1; shift
2059
2060	printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
2061}
2062
2063# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
2064# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
2065# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
2066# stands for 00:00.
2067payload_template_calc_checksum()
2068{
2069	local payload=$1; shift
2070
2071	(
2072	    # Set input radix.
2073	    echo "16i"
2074	    # Push zero for the initial checksum.
2075	    echo 0
2076
2077	    # Pad the payload with a terminating 00: in case we get an odd
2078	    # number of bytes.
2079	    echo "${payload%:}:00:" |
2080		sed 's/CHECKSUM/00:00/g' |
2081		tr '[:lower:]' '[:upper:]' |
2082		# Add the word to the checksum.
2083		sed 's/\(..\):\(..\):/\1\2+\n/g' |
2084		# Strip the extra odd byte we pushed if left unconverted.
2085		sed 's/\(..\):$//'
2086
2087	    echo "10000 ~ +"	# Calculate and add carry.
2088	    echo "FFFF r - p"	# Bit-flip and print.
2089	) |
2090	    dc |
2091	    tr '[:upper:]' '[:lower:]'
2092}
2093
2094payload_template_expand_checksum()
2095{
2096	local payload=$1; shift
2097	local checksum=$1; shift
2098
2099	local ckbytes=$(u16_to_bytes $checksum)
2100
2101	echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
2102}
2103
2104payload_template_nbytes()
2105{
2106	local payload=$1; shift
2107
2108	payload_template_expand_checksum "${payload%:}" 0 |
2109		sed 's/:/\n/g' | wc -l
2110}
2111
2112igmpv3_is_in_get()
2113{
2114	local GRP=$1; shift
2115	local sources=("$@")
2116
2117	local igmpv3
2118	local nsources=$(u16_to_bytes ${#sources[@]})
2119
2120	# IS_IN ( $sources )
2121	igmpv3=$(:
2122		)"22:"$(			: Type - Membership Report
2123		)"00:"$(			: Reserved
2124		)"CHECKSUM:"$(			: Checksum
2125		)"00:00:"$(			: Reserved
2126		)"00:01:"$(			: Number of Group Records
2127		)"01:"$(			: Record Type - IS_IN
2128		)"00:"$(			: Aux Data Len
2129		)"${nsources}:"$(		: Number of Sources
2130		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
2131		)"$(for src in "${sources[@]}"; do
2132			ipv4_to_bytes $src
2133			echo -n :
2134		    done)"$(			: Source Addresses
2135		)
2136	local checksum=$(payload_template_calc_checksum "$igmpv3")
2137
2138	payload_template_expand_checksum "$igmpv3" $checksum
2139}
2140
2141igmpv2_leave_get()
2142{
2143	local GRP=$1; shift
2144
2145	local payload=$(:
2146		)"17:"$(			: Type - Leave Group
2147		)"00:"$(			: Max Resp Time - not meaningful
2148		)"CHECKSUM:"$(			: Checksum
2149		)"$(ipv4_to_bytes $GRP)"$(	: Group Address
2150		)
2151	local checksum=$(payload_template_calc_checksum "$payload")
2152
2153	payload_template_expand_checksum "$payload" $checksum
2154}
2155
2156mldv2_is_in_get()
2157{
2158	local SIP=$1; shift
2159	local GRP=$1; shift
2160	local sources=("$@")
2161
2162	local hbh
2163	local icmpv6
2164	local nsources=$(u16_to_bytes ${#sources[@]})
2165
2166	hbh=$(:
2167		)"3a:"$(			: Next Header - ICMPv6
2168		)"00:"$(			: Hdr Ext Len
2169		)"00:00:00:00:00:00:"$(		: Options and Padding
2170		)
2171
2172	icmpv6=$(:
2173		)"8f:"$(			: Type - MLDv2 Report
2174		)"00:"$(			: Code
2175		)"CHECKSUM:"$(			: Checksum
2176		)"00:00:"$(			: Reserved
2177		)"00:01:"$(			: Number of Group Records
2178		)"01:"$(			: Record Type - IS_IN
2179		)"00:"$(			: Aux Data Len
2180		)"${nsources}:"$(		: Number of Sources
2181		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2182		)"$(for src in "${sources[@]}"; do
2183			ipv6_to_bytes $src
2184			echo -n :
2185		    done)"$(			: Source Addresses
2186		)
2187
2188	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2189	local sudohdr=$(:
2190		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2191		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2192	        )"${len}:"$(			: Upper-layer length
2193	        )"00:3a:"$(			: Zero and next-header
2194	        )
2195	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2196
2197	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2198}
2199
2200mldv1_done_get()
2201{
2202	local SIP=$1; shift
2203	local GRP=$1; shift
2204
2205	local hbh
2206	local icmpv6
2207
2208	hbh=$(:
2209		)"3a:"$(			: Next Header - ICMPv6
2210		)"00:"$(			: Hdr Ext Len
2211		)"00:00:00:00:00:00:"$(		: Options and Padding
2212		)
2213
2214	icmpv6=$(:
2215		)"84:"$(			: Type - MLDv1 Done
2216		)"00:"$(			: Code
2217		)"CHECKSUM:"$(			: Checksum
2218		)"00:00:"$(			: Max Resp Delay - not meaningful
2219		)"00:00:"$(			: Reserved
2220		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2221		)
2222
2223	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2224	local sudohdr=$(:
2225		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2226		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2227	        )"${len}:"$(			: Upper-layer length
2228	        )"00:3a:"$(			: Zero and next-header
2229	        )
2230	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2231
2232	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2233}
2234
2235bail_on_lldpad()
2236{
2237	local reason1="$1"; shift
2238	local reason2="$1"; shift
2239	local caller=${FUNCNAME[1]}
2240	local src=${BASH_SOURCE[1]}
2241
2242	if systemctl is-active --quiet lldpad; then
2243
2244		cat >/dev/stderr <<-EOF
2245		WARNING: lldpad is running
2246
2247			lldpad will likely $reason1, and this test will
2248			$reason2. Both are not supported at the same time,
2249			one of them is arbitrarily going to overwrite the
2250			other. That will cause spurious failures (or, unlikely,
2251			passes) of this test.
2252		EOF
2253
2254		if [[ -z $ALLOW_LLDPAD ]]; then
2255			cat >/dev/stderr <<-EOF
2256
2257				If you want to run the test anyway, please set
2258				an environment variable ALLOW_LLDPAD to a
2259				non-empty string.
2260			EOF
2261			log_test_skip $src:$caller
2262			exit $EXIT_STATUS
2263		else
2264			return
2265		fi
2266	fi
2267}
2268
2269absval()
2270{
2271	local v=$1; shift
2272
2273	echo $((v > 0 ? v : -v))
2274}
2275
2276has_unicast_flt()
2277{
2278	local dev=$1; shift
2279	local mac_addr=$(mac_get $dev)
2280	local tmp=$(ether_addr_to_u64 $mac_addr)
2281	local promisc
2282
2283	ip link set $dev up
2284	ip link add link $dev name macvlan-tmp type macvlan mode private
2285	ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1)))
2286	ip link set macvlan-tmp up
2287
2288	promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity')
2289
2290	ip link del macvlan-tmp
2291
2292	[[ $promisc == 1 ]] && echo "no" || echo "yes"
2293}
2294