xref: /linux/tools/testing/selftests/net/forwarding/lib.sh (revision dfecb0c5af3b07ebfa84be63a7a21bfc9e29a872)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
4
5##############################################################################
6# Topology description. p1 looped back to p2, p3 to p4 and so on.
7
8declare -A NETIFS=(
9    [p1]=veth0
10    [p2]=veth1
11    [p3]=veth2
12    [p4]=veth3
13    [p5]=veth4
14    [p6]=veth5
15    [p7]=veth6
16    [p8]=veth7
17    [p9]=veth8
18    [p10]=veth9
19)
20
21# Port that does not have a cable connected.
22: "${NETIF_NO_CABLE:=eth8}"
23
24##############################################################################
25# Defines
26
27# Networking utilities.
28: "${PING:=ping}"
29: "${PING6:=ping6}"	# Some distros just use ping.
30: "${ARPING:=arping}"
31: "${TROUTE6:=traceroute6}"
32
33# Packet generator.
34: "${MZ:=mausezahn}"	# Some distributions use 'mz'.
35: "${MZ_DELAY:=0}"
36
37# Host configuration tools.
38: "${TEAMD:=teamd}"
39: "${MCD:=smcrouted}"
40: "${MC_CLI:=smcroutectl}"
41: "${MCD_TABLE_NAME:=selftests}"
42
43# Constants for netdevice bring-up:
44# Default time in seconds to wait for an interface to come up before giving up
45# and bailing out. Used during initial setup.
46: "${INTERFACE_TIMEOUT:=600}"
47# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
48: "${WAIT_TIMEOUT:=20}"
49# Time to wait after interfaces participating in the test are all UP.
50: "${WAIT_TIME:=5}"
51
52# Whether to pause on, respectively, after a failure and before cleanup.
53: "${PAUSE_ON_CLEANUP:=no}"
54
55# Whether to create virtual interfaces, and what netdevice type they should be.
56: "${NETIF_CREATE:=yes}"
57: "${NETIF_TYPE:=veth}"
58
59# Constants for ping tests:
60# How many packets should be sent.
61: "${PING_COUNT:=10}"
62# Timeout (in seconds) before ping exits regardless of how many packets have
63# been sent or received
64: "${PING_TIMEOUT:=5}"
65
66# Minimum ageing_time (in centiseconds) supported by hardware
67: "${LOW_AGEING_TIME:=1000}"
68
69# Whether to check for availability of certain tools.
70: "${REQUIRE_JQ:=yes}"
71: "${REQUIRE_MZ:=yes}"
72: "${REQUIRE_MTOOLS:=no}"
73: "${REQUIRE_TEAMD:=no}"
74
75# Whether to override MAC addresses on interfaces participating in the test.
76: "${STABLE_MAC_ADDRS:=no}"
77
78# Flags for tcpdump
79: "${TCPDUMP_EXTRA_FLAGS:=}"
80
81# Flags for TC filters.
82: "${TC_FLAG:=skip_hw}"
83
84# Whether the machine is "slow" -- i.e. might be incapable of running tests
85# involving heavy traffic. This might be the case on a debug kernel, a VM, or
86# e.g. a low-power board.
87: "${KSFT_MACHINE_SLOW:=no}"
88
89##############################################################################
90# Find netifs by test-specified driver name
91
92driver_name_get()
93{
94	local dev=$1; shift
95	local driver_path="/sys/class/net/$dev/device/driver"
96
97	if [[ -L $driver_path ]]; then
98		basename `realpath $driver_path`
99	fi
100}
101
102netif_find_driver()
103{
104	local ifnames=`ip -j link show | jq -r ".[].ifname"`
105	local count=0
106
107	for ifname in $ifnames
108	do
109		local driver_name=`driver_name_get $ifname`
110		if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
111			count=$((count + 1))
112			NETIFS[p$count]="$ifname"
113		fi
114	done
115}
116
117# Whether to find netdevice according to the driver speficied by the importer
118: "${NETIF_FIND_DRIVER:=}"
119
120if [[ $NETIF_FIND_DRIVER ]]; then
121	unset NETIFS
122	declare -A NETIFS
123	netif_find_driver
124fi
125
126net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
127
128if [[ -f $net_forwarding_dir/forwarding.config ]]; then
129	source "$net_forwarding_dir/forwarding.config"
130fi
131
132source "$net_forwarding_dir/../lib.sh"
133
134##############################################################################
135# Sanity checks
136
137check_tc_version()
138{
139	tc -j &> /dev/null
140	if [[ $? -ne 0 ]]; then
141		echo "SKIP: iproute2 too old; tc is missing JSON support"
142		exit $ksft_skip
143	fi
144}
145
146check_tc_erspan_support()
147{
148	local dev=$1; shift
149
150	tc filter add dev $dev ingress pref 1 handle 1 flower \
151		erspan_opts 1:0:0:0 &> /dev/null
152	if [[ $? -ne 0 ]]; then
153		echo "SKIP: iproute2 too old; tc is missing erspan support"
154		return $ksft_skip
155	fi
156	tc filter del dev $dev ingress pref 1 handle 1 flower \
157		erspan_opts 1:0:0:0 &> /dev/null
158}
159
160# Old versions of tc don't understand "mpls_uc"
161check_tc_mpls_support()
162{
163	local dev=$1; shift
164
165	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
166		matchall action pipe &> /dev/null
167	if [[ $? -ne 0 ]]; then
168		echo "SKIP: iproute2 too old; tc is missing MPLS support"
169		return $ksft_skip
170	fi
171	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
172		matchall
173}
174
175# Old versions of tc produce invalid json output for mpls lse statistics
176check_tc_mpls_lse_stats()
177{
178	local dev=$1; shift
179	local ret;
180
181	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
182		flower mpls lse depth 2                                 \
183		action continue &> /dev/null
184
185	if [[ $? -ne 0 ]]; then
186		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
187		return $ksft_skip
188	fi
189
190	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
191	ret=$?
192	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
193		flower
194
195	if [[ $ret -ne 0 ]]; then
196		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
197		return $ksft_skip
198	fi
199}
200
201check_tc_shblock_support()
202{
203	tc filter help 2>&1 | grep block &> /dev/null
204	if [[ $? -ne 0 ]]; then
205		echo "SKIP: iproute2 too old; tc is missing shared block support"
206		exit $ksft_skip
207	fi
208}
209
210check_tc_chain_support()
211{
212	tc help 2>&1|grep chain &> /dev/null
213	if [[ $? -ne 0 ]]; then
214		echo "SKIP: iproute2 too old; tc is missing chain support"
215		exit $ksft_skip
216	fi
217}
218
219check_tc_action_hw_stats_support()
220{
221	tc actions help 2>&1 | grep -q hw_stats
222	if [[ $? -ne 0 ]]; then
223		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
224		exit $ksft_skip
225	fi
226}
227
228check_tc_fp_support()
229{
230	tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
231	if [[ $? -ne 0 ]]; then
232		echo "SKIP: iproute2 too old; tc is missing frame preemption support"
233		exit $ksft_skip
234	fi
235}
236
237check_ethtool_lanes_support()
238{
239	ethtool --help 2>&1| grep lanes &> /dev/null
240	if [[ $? -ne 0 ]]; then
241		echo "SKIP: ethtool too old; it is missing lanes support"
242		exit $ksft_skip
243	fi
244}
245
246check_ethtool_mm_support()
247{
248	ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
249	if [[ $? -ne 0 ]]; then
250		echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
251		exit $ksft_skip
252	fi
253}
254
255check_ethtool_counter_group_support()
256{
257	ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
258	if [[ $? -ne 0 ]]; then
259		echo "SKIP: ethtool too old; it is missing standard counter group support"
260		exit $ksft_skip
261	fi
262}
263
264check_ethtool_pmac_std_stats_support()
265{
266	local dev=$1; shift
267	local grp=$1; shift
268
269	[ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
270		| jq ".[].\"$grp\" | length") ]
271}
272
273check_locked_port_support()
274{
275	if ! bridge -d link show | grep -q " locked"; then
276		echo "SKIP: iproute2 too old; Locked port feature not supported."
277		return $ksft_skip
278	fi
279}
280
281check_port_mab_support()
282{
283	if ! bridge -d link show | grep -q "mab"; then
284		echo "SKIP: iproute2 too old; MacAuth feature not supported."
285		return $ksft_skip
286	fi
287}
288
289if [[ "$(id -u)" -ne 0 ]]; then
290	echo "SKIP: need root privileges"
291	exit $ksft_skip
292fi
293
294check_driver()
295{
296	local dev=$1; shift
297	local expected=$1; shift
298	local driver_name=`driver_name_get $dev`
299
300	if [[ $driver_name != $expected ]]; then
301		echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
302		exit $ksft_skip
303	fi
304}
305
306if [[ "$CHECK_TC" = "yes" ]]; then
307	check_tc_version
308fi
309
310# IPv6 support was added in v3.0
311check_mtools_version()
312{
313	local version="$(msend -v)"
314	local major
315
316	version=${version##msend version }
317	major=$(echo $version | cut -d. -f1)
318
319	if [ $major -lt 3 ]; then
320		echo "SKIP: expected mtools version 3.0, got $version"
321		exit $ksft_skip
322	fi
323}
324
325if [[ "$REQUIRE_JQ" = "yes" ]]; then
326	require_command jq
327fi
328if [[ "$REQUIRE_MZ" = "yes" ]]; then
329	require_command $MZ
330fi
331if [[ "$REQUIRE_TEAMD" = "yes" ]]; then
332	require_command $TEAMD
333fi
334if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
335	# https://github.com/troglobit/mtools
336	require_command msend
337	require_command mreceive
338	check_mtools_version
339fi
340
341##############################################################################
342# Command line options handling
343
344check_env() {
345	if [[ ! (( -n "$LOCAL_V4" && -n "$REMOTE_V4") ||
346		 ( -n "$LOCAL_V6" && -n "$REMOTE_V6" )) ]]; then
347		echo "SKIP: Invalid environment, missing or inconsistent LOCAL_V4/REMOTE_V4/LOCAL_V6/REMOTE_V6"
348		echo "Please see tools/testing/selftests/drivers/net/README.rst"
349		exit "$ksft_skip"
350	fi
351
352	if [[ -z "$REMOTE_TYPE" ]]; then
353		echo "SKIP: Invalid environment, missing REMOTE_TYPE"
354		exit "$ksft_skip"
355	fi
356
357	if [[ -z "$REMOTE_ARGS" ]]; then
358		echo "SKIP: Invalid environment, missing REMOTE_ARGS"
359		exit "$ksft_skip"
360	fi
361}
362
363__run_on()
364{
365	local target=$1; shift
366	local type args
367
368	IFS=':' read -r type args <<< "$target"
369
370	case "$type" in
371	netns)
372		# Execute command in network namespace
373		# args contains the namespace name
374		ip netns exec "$args" "$@"
375		;;
376	ssh)
377		# Execute command via SSH args contains user@host
378		ssh -n "$args" "$@"
379		;;
380	local|*)
381		# Execute command locally. This is also the fallback
382		# case for when the interface's target is not found in
383		# the TARGETS array.
384		"$@"
385		;;
386	esac
387}
388
389run_on()
390{
391	local iface=$1; shift
392	local target="local:"
393
394	if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
395		target="${TARGETS[$iface]}"
396	fi
397
398	__run_on "$target" "$@"
399}
400
401get_ifname_by_ip()
402{
403	local target=$1; shift
404	local ip_addr=$1; shift
405
406	__run_on "$target" ip -j addr show to "$ip_addr" | jq -r '.[].ifname'
407}
408
409# Whether the test is conforming to the requirements and usage described in
410# drivers/net/README.rst.
411: "${DRIVER_TEST_CONFORMANT:=no}"
412
413declare -A TARGETS
414
415# Based on DRIVER_TEST_CONFORMANT, decide if to source drivers/net/net.config
416# or not. In the "yes" case, the test expects to pass the arguments through the
417# variables specified in drivers/net/README.rst file. If not, fallback on
418# parsing the script arguments for interface names.
419if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
420	if [[ -f $net_forwarding_dir/../../drivers/net/net.config ]]; then
421		source "$net_forwarding_dir/../../drivers/net/net.config"
422	fi
423
424	if (( NUM_NETIFS > 2)); then
425		echo "SKIP: DRIVER_TEST_CONFORMANT=yes and NUM_NETIFS is bigger than 2"
426		exit "$ksft_skip"
427	fi
428
429	check_env
430
431	# Populate the NETIFS and TARGETS arrays automatically based on the
432	# environment variables. The TARGETS array is indexed by the network
433	# interface name keeping track of the target on which the interface
434	# resides. Values will be strings of the following format -
435	# <type>:<args>.
436	#
437	# TARGETS[eth0]="local:" - meaning that the eth0 interface is
438	# accessible locally
439	# TARGETS[eth1]="netns:foo" - eth1 is in the foo netns
440	# TARGETS[eth2]="ssh:root@10.0.0.2" - eth2 is accessible through
441	# running the 'ssh root@10.0.0.2' command.
442
443	unset NETIFS
444	declare -A NETIFS
445
446	NETIFS[p1]="$NETIF"
447	TARGETS[$NETIF]="local:"
448
449	# Locate the name of the remote interface
450	remote_target="$REMOTE_TYPE:$REMOTE_ARGS"
451	if [[ -v REMOTE_V4 ]]; then
452		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V4")
453	else
454		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V6")
455	fi
456	if [[ ! -n "$remote_netif" ]]; then
457		echo "SKIP: cannot find remote interface"
458		exit "$ksft_skip"
459	fi
460
461	if [[ "$NETIF" == "$remote_netif" ]]; then
462		echo "SKIP: local and remote interfaces cannot have the same name"
463		exit "$ksft_skip"
464	fi
465
466	NETIFS[p2]="$remote_netif"
467	TARGETS[$remote_netif]="$REMOTE_TYPE:$REMOTE_ARGS"
468else
469	count=0
470
471	while [[ $# -gt 0 ]]; do
472		if [[ "$count" -eq "0" ]]; then
473			unset NETIFS
474			declare -A NETIFS
475		fi
476		count=$((count + 1))
477		NETIFS[p$count]="$1"
478		TARGETS[$1]="local:"
479		shift
480	done
481fi
482
483##############################################################################
484# Network interfaces configuration
485
486if [[ ! -v NUM_NETIFS ]]; then
487	echo "SKIP: importer does not define \"NUM_NETIFS\""
488	exit $ksft_skip
489fi
490
491if (( NUM_NETIFS > ${#NETIFS[@]} )); then
492	echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
493	exit $ksft_skip
494fi
495
496for i in $(seq ${#NETIFS[@]}); do
497	if [[ ! ${NETIFS[p$i]} ]]; then
498		echo "SKIP: NETIFS[p$i] not given"
499		exit $ksft_skip
500	fi
501done
502
503create_netif_veth()
504{
505	local i
506
507	for ((i = 1; i <= NUM_NETIFS; ++i)); do
508		local j=$((i+1))
509
510		if [ -z ${NETIFS[p$i]} ]; then
511			echo "SKIP: Cannot create interface. Name not specified"
512			exit $ksft_skip
513		fi
514
515		ip link show dev ${NETIFS[p$i]} &> /dev/null
516		if [[ $? -ne 0 ]]; then
517			ip link add ${NETIFS[p$i]} type veth \
518				peer name ${NETIFS[p$j]}
519			if [[ $? -ne 0 ]]; then
520				echo "Failed to create netif"
521				exit 1
522			fi
523		fi
524		i=$j
525	done
526}
527
528create_netif()
529{
530	case "$NETIF_TYPE" in
531	veth) create_netif_veth
532	      ;;
533	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
534	   exit 1
535	   ;;
536	esac
537}
538
539declare -A MAC_ADDR_ORIG
540mac_addr_prepare()
541{
542	local new_addr=
543	local dev=
544
545	for ((i = 1; i <= NUM_NETIFS; ++i)); do
546		dev=${NETIFS[p$i]}
547		new_addr=$(printf "00:01:02:03:04:%02x" $i)
548
549		MAC_ADDR_ORIG["$dev"]=$(run_on "$dev" \
550			ip -j link show dev "$dev" | jq -e '.[].address')
551		# Strip quotes
552		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
553		run_on "$dev" ip link set dev "$dev" address $new_addr
554	done
555}
556
557mac_addr_restore()
558{
559	local dev=
560
561	for ((i = 1; i <= NUM_NETIFS; ++i)); do
562		dev=${NETIFS[p$i]}
563		run_on "$dev" \
564			ip link set dev "$dev" address ${MAC_ADDR_ORIG["$dev"]}
565	done
566}
567
568if [[ "$NETIF_CREATE" = "yes" ]]; then
569	create_netif
570fi
571
572if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
573	mac_addr_prepare
574fi
575
576for ((i = 1; i <= NUM_NETIFS; ++i)); do
577	int="${NETIFS[p$i]}"
578
579	run_on "$int" ip link show dev "$int" &> /dev/null
580	if [[ $? -ne 0 ]]; then
581		echo "SKIP: could not find all required interfaces"
582		exit $ksft_skip
583	fi
584done
585
586##############################################################################
587# Helpers
588
589not()
590{
591	"$@"
592	[[ $? != 0 ]]
593}
594
595get_max()
596{
597	local arr=("$@")
598
599	max=${arr[0]}
600	for cur in ${arr[@]}; do
601		if [[ $cur -gt $max ]]; then
602			max=$cur
603		fi
604	done
605
606	echo $max
607}
608
609grep_bridge_fdb()
610{
611	local addr=$1; shift
612	local word
613	local flag
614
615	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
616		word=$1; shift
617		if [ "$1" == "-v" ]; then
618			flag=$1; shift
619		fi
620	fi
621
622	$@ | grep $addr | grep $flag "$word"
623}
624
625wait_for_port_up()
626{
627	"$@" | grep -q "Link detected: yes"
628}
629
630wait_for_offload()
631{
632	"$@" | grep -q offload
633}
634
635wait_for_trap()
636{
637	"$@" | grep -q trap
638}
639
640setup_wait_dev()
641{
642	local dev=$1; shift
643	local wait_time=${1:-$WAIT_TIME}; shift
644
645	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
646
647	if (($?)); then
648		check_err 1
649		log_test setup_wait_dev ": Interface $dev does not come up."
650		exit 1
651	fi
652}
653
654setup_wait_dev_with_timeout()
655{
656	local dev=$1; shift
657	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
658	local wait_time=${1:-$WAIT_TIME}; shift
659	local i
660
661	for ((i = 1; i <= $max_iterations; ++i)); do
662		run_on "$dev" ip link show dev "$dev" up \
663			| grep 'state UP' &> /dev/null
664		if [[ $? -ne 0 ]]; then
665			sleep 1
666		else
667			sleep $wait_time
668			return 0
669		fi
670	done
671
672	return 1
673}
674
675setup_wait_n()
676{
677	local num_netifs=$1; shift
678	local i
679
680	for ((i = 1; i <= num_netifs; ++i)); do
681		setup_wait_dev ${NETIFS[p$i]} 0
682	done
683
684	# Make sure links are ready.
685	sleep $WAIT_TIME
686}
687
688setup_wait()
689{
690	setup_wait_n "$NUM_NETIFS"
691}
692
693wait_for_dev()
694{
695        local dev=$1; shift
696        local timeout=${1:-$WAIT_TIMEOUT}; shift
697
698        slowwait $timeout ip link show dev $dev &> /dev/null
699        if (( $? )); then
700                check_err 1
701                log_test wait_for_dev "Interface $dev did not appear."
702                exit $EXIT_STATUS
703        fi
704}
705
706pre_cleanup()
707{
708	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
709		echo "Pausing before cleanup, hit any key to continue"
710		read
711	fi
712
713	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
714		mac_addr_restore
715	fi
716}
717
718vrf_prepare()
719{
720	ip -4 rule add pref 32765 table local
721	ip -4 rule del pref 0
722	ip -6 rule add pref 32765 table local
723	ip -6 rule del pref 0
724}
725
726vrf_cleanup()
727{
728	ip -6 rule add pref 0 table local
729	ip -6 rule del pref 32765
730	ip -4 rule add pref 0 table local
731	ip -4 rule del pref 32765
732}
733
734adf_vrf_prepare()
735{
736	vrf_prepare
737	defer vrf_cleanup
738}
739
740__last_tb_id=0
741declare -A __TB_IDS
742
743__vrf_td_id_assign()
744{
745	local vrf_name=$1
746
747	__last_tb_id=$((__last_tb_id + 1))
748	__TB_IDS[$vrf_name]=$__last_tb_id
749	return $__last_tb_id
750}
751
752__vrf_td_id_lookup()
753{
754	local vrf_name=$1
755
756	return ${__TB_IDS[$vrf_name]}
757}
758
759vrf_create()
760{
761	local vrf_name=$1
762	local tb_id
763
764	__vrf_td_id_assign $vrf_name
765	tb_id=$?
766
767	ip link add dev $vrf_name type vrf table $tb_id
768	ip -4 route add table $tb_id unreachable default metric 4278198272
769	ip -6 route add table $tb_id unreachable default metric 4278198272
770}
771
772vrf_destroy()
773{
774	local vrf_name=$1
775	local tb_id
776
777	__vrf_td_id_lookup $vrf_name
778	tb_id=$?
779
780	ip -6 route del table $tb_id unreachable default metric 4278198272
781	ip -4 route del table $tb_id unreachable default metric 4278198272
782	ip link del dev $vrf_name
783}
784
785__addr_add_del()
786{
787	local if_name=$1
788	local add_del=$2
789	local array
790
791	shift
792	shift
793	array=("${@}")
794
795	for addrstr in "${array[@]}"; do
796		ip address $add_del $addrstr dev $if_name
797	done
798}
799
800__simple_if_init()
801{
802	local if_name=$1; shift
803	local vrf_name=$1; shift
804	local addrs=("${@}")
805
806	ip link set dev $if_name master $vrf_name
807	ip link set dev $if_name up
808
809	__addr_add_del $if_name add "${addrs[@]}"
810}
811
812__simple_if_fini()
813{
814	local if_name=$1; shift
815	local addrs=("${@}")
816
817	__addr_add_del $if_name del "${addrs[@]}"
818
819	ip link set dev $if_name down
820	ip link set dev $if_name nomaster
821}
822
823simple_if_init()
824{
825	local if_name=$1
826	local vrf_name
827	local array
828
829	shift
830	vrf_name=v$if_name
831	array=("${@}")
832
833	vrf_create $vrf_name
834	ip link set dev $vrf_name up
835	__simple_if_init $if_name $vrf_name "${array[@]}"
836}
837
838simple_if_fini()
839{
840	local if_name=$1
841	local vrf_name
842	local array
843
844	shift
845	vrf_name=v$if_name
846	array=("${@}")
847
848	__simple_if_fini $if_name "${array[@]}"
849	vrf_destroy $vrf_name
850}
851
852adf_simple_if_init()
853{
854	simple_if_init "$@"
855	defer simple_if_fini "$@"
856}
857
858tunnel_create()
859{
860	local name=$1; shift
861	local type=$1; shift
862	local local=$1; shift
863	local remote=$1; shift
864
865	ip link add name $name type $type \
866	   local $local remote $remote "$@"
867	ip link set dev $name up
868}
869
870tunnel_destroy()
871{
872	local name=$1; shift
873
874	ip link del dev $name
875}
876
877vlan_create()
878{
879	local if_name=$1; shift
880	local vid=$1; shift
881	local vrf=$1; shift
882	local ips=("${@}")
883	local name=$if_name.$vid
884
885	ip link add name $name link $if_name type vlan id $vid
886	if [ "$vrf" != "" ]; then
887		ip link set dev $name master $vrf
888	fi
889	ip link set dev $name up
890	__addr_add_del $name add "${ips[@]}"
891}
892
893vlan_destroy()
894{
895	local if_name=$1; shift
896	local vid=$1; shift
897	local name=$if_name.$vid
898
899	ip link del dev $name
900}
901
902team_create()
903{
904	local if_name=$1; shift
905	local mode=$1; shift
906
907	require_command $TEAMD
908	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
909	for slave in "$@"; do
910		ip link set dev $slave down
911		ip link set dev $slave master $if_name
912		ip link set dev $slave up
913	done
914	ip link set dev $if_name up
915}
916
917team_destroy()
918{
919	local if_name=$1; shift
920
921	$TEAMD -t $if_name -k
922}
923
924master_name_get()
925{
926	local if_name=$1
927
928	ip -j link show dev $if_name | jq -r '.[]["master"]'
929}
930
931link_stats_get()
932{
933	local if_name=$1; shift
934	local dir=$1; shift
935	local stat=$1; shift
936
937	ip -j -s link show dev $if_name \
938		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
939}
940
941link_stats_tx_packets_get()
942{
943	link_stats_get $1 tx packets
944}
945
946link_stats_rx_errors_get()
947{
948	link_stats_get $1 rx errors
949}
950
951ethtool_stats_get()
952{
953	local dev=$1; shift
954	local stat=$1; shift
955
956	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
957}
958
959ethtool_std_stats_get()
960{
961	local dev=$1; shift
962	local grp=$1; shift
963	local name=$1; shift
964	local src=$1; shift
965
966	if [[ "$grp" == "pause" ]]; then
967		run_on "$dev" ethtool -I --json -a "$dev" --src "$src" | \
968			jq --arg name "$name" '.[].statistics[$name]'
969		return
970	fi
971
972	run_on "$dev" \
973		ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \
974		jq --arg grp "$grp" --arg name "$name" '.[][$grp][$name]'
975}
976
977qdisc_stats_get()
978{
979	local dev=$1; shift
980	local handle=$1; shift
981	local selector=$1; shift
982
983	tc -j -s qdisc show dev "$dev" \
984	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
985}
986
987qdisc_parent_stats_get()
988{
989	local dev=$1; shift
990	local parent=$1; shift
991	local selector=$1; shift
992
993	tc -j -s qdisc show dev "$dev" invisible \
994	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
995}
996
997ipv6_stats_get()
998{
999	local dev=$1; shift
1000	local stat=$1; shift
1001
1002	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
1003}
1004
1005hw_stats_get()
1006{
1007	local suite=$1; shift
1008	local if_name=$1; shift
1009	local dir=$1; shift
1010	local stat=$1; shift
1011
1012	ip -j stats show dev $if_name group offload subgroup $suite |
1013		jq ".[0].stats64.$dir.$stat"
1014}
1015
1016__nh_stats_get()
1017{
1018	local key=$1; shift
1019	local group_id=$1; shift
1020	local member_id=$1; shift
1021
1022	ip -j -s -s nexthop show id $group_id |
1023	    jq --argjson member_id "$member_id" --arg key "$key" \
1024	       '.[].group_stats[] | select(.id == $member_id) | .[$key]'
1025}
1026
1027nh_stats_get()
1028{
1029	local group_id=$1; shift
1030	local member_id=$1; shift
1031
1032	__nh_stats_get packets "$group_id" "$member_id"
1033}
1034
1035nh_stats_get_hw()
1036{
1037	local group_id=$1; shift
1038	local member_id=$1; shift
1039
1040	__nh_stats_get packets_hw "$group_id" "$member_id"
1041}
1042
1043humanize()
1044{
1045	local speed=$1; shift
1046
1047	for unit in bps Kbps Mbps Gbps; do
1048		if (($(echo "$speed < 1024" | bc))); then
1049			break
1050		fi
1051
1052		speed=$(echo "scale=1; $speed / 1024" | bc)
1053	done
1054
1055	echo "$speed${unit}"
1056}
1057
1058rate()
1059{
1060	local t0=$1; shift
1061	local t1=$1; shift
1062	local interval=$1; shift
1063
1064	echo $((8 * (t1 - t0) / interval))
1065}
1066
1067packets_rate()
1068{
1069	local t0=$1; shift
1070	local t1=$1; shift
1071	local interval=$1; shift
1072
1073	echo $(((t1 - t0) / interval))
1074}
1075
1076ether_addr_to_u64()
1077{
1078	local addr="$1"
1079	local order="$((1 << 40))"
1080	local val=0
1081	local byte
1082
1083	addr="${addr//:/ }"
1084
1085	for byte in $addr; do
1086		byte="0x$byte"
1087		val=$((val + order * byte))
1088		order=$((order >> 8))
1089	done
1090
1091	printf "0x%x" $val
1092}
1093
1094u64_to_ether_addr()
1095{
1096	local val=$1
1097	local byte
1098	local i
1099
1100	for ((i = 40; i >= 0; i -= 8)); do
1101		byte=$(((val & (0xff << i)) >> i))
1102		printf "%02x" $byte
1103		if [ $i -ne 0 ]; then
1104			printf ":"
1105		fi
1106	done
1107}
1108
1109ipv6_lladdr_get()
1110{
1111	local if_name=$1
1112
1113	ip -j addr show dev $if_name | \
1114		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
1115		head -1
1116}
1117
1118bridge_ageing_time_get()
1119{
1120	local bridge=$1
1121	local ageing_time
1122
1123	# Need to divide by 100 to convert to seconds.
1124	ageing_time=$(ip -j -d link show dev $bridge \
1125		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
1126	echo $((ageing_time / 100))
1127}
1128
1129declare -A SYSCTL_ORIG
1130sysctl_save()
1131{
1132	local key=$1; shift
1133
1134	SYSCTL_ORIG[$key]=$(sysctl -n $key)
1135}
1136
1137sysctl_set()
1138{
1139	local key=$1; shift
1140	local value=$1; shift
1141
1142	sysctl_save "$key"
1143	sysctl -qw $key="$value"
1144}
1145
1146sysctl_restore()
1147{
1148	local key=$1; shift
1149
1150	sysctl -qw $key="${SYSCTL_ORIG[$key]}"
1151}
1152
1153forwarding_enable()
1154{
1155	sysctl_set net.ipv4.conf.all.forwarding 1
1156	sysctl_set net.ipv6.conf.all.forwarding 1
1157}
1158
1159forwarding_restore()
1160{
1161	sysctl_restore net.ipv6.conf.all.forwarding
1162	sysctl_restore net.ipv4.conf.all.forwarding
1163}
1164
1165adf_forwarding_enable()
1166{
1167	forwarding_enable
1168	defer forwarding_restore
1169}
1170
1171declare -A MTU_ORIG
1172mtu_set()
1173{
1174	local dev=$1; shift
1175	local mtu=$1; shift
1176
1177	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
1178	ip link set dev $dev mtu $mtu
1179}
1180
1181mtu_restore()
1182{
1183	local dev=$1; shift
1184
1185	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
1186}
1187
1188tc_offload_check()
1189{
1190	local num_netifs=${1:-$NUM_NETIFS}
1191
1192	for ((i = 1; i <= num_netifs; ++i)); do
1193		ethtool -k ${NETIFS[p$i]} \
1194			| grep "hw-tc-offload: on" &> /dev/null
1195		if [[ $? -ne 0 ]]; then
1196			return 1
1197		fi
1198	done
1199
1200	return 0
1201}
1202
1203trap_install()
1204{
1205	local dev=$1; shift
1206	local direction=$1; shift
1207
1208	# Some devices may not support or need in-hardware trapping of traffic
1209	# (e.g. the veth pairs that this library creates for non-existent
1210	# loopbacks). Use continue instead, so that there is a filter in there
1211	# (some tests check counters), and so that other filters are still
1212	# processed.
1213	tc filter add dev $dev $direction pref 1 \
1214		flower skip_sw action trap 2>/dev/null \
1215	    || tc filter add dev $dev $direction pref 1 \
1216		       flower action continue
1217}
1218
1219trap_uninstall()
1220{
1221	local dev=$1; shift
1222	local direction=$1; shift
1223
1224	tc filter del dev $dev $direction pref 1 flower
1225}
1226
1227__icmp_capture_add_del()
1228{
1229	local add_del=$1; shift
1230	local pref=$1; shift
1231	local vsuf=$1; shift
1232	local tundev=$1; shift
1233	local filter=$1; shift
1234
1235	tc filter $add_del dev "$tundev" ingress \
1236	   proto ip$vsuf pref $pref \
1237	   flower ip_proto icmp$vsuf $filter \
1238	   action pass
1239}
1240
1241icmp_capture_install()
1242{
1243	local tundev=$1; shift
1244	local filter=$1; shift
1245
1246	__icmp_capture_add_del add 100 "" "$tundev" "$filter"
1247}
1248
1249icmp_capture_uninstall()
1250{
1251	local tundev=$1; shift
1252	local filter=$1; shift
1253
1254	__icmp_capture_add_del del 100 "" "$tundev" "$filter"
1255}
1256
1257icmp6_capture_install()
1258{
1259	local tundev=$1; shift
1260	local filter=$1; shift
1261
1262	__icmp_capture_add_del add 100 v6 "$tundev" "$filter"
1263}
1264
1265icmp6_capture_uninstall()
1266{
1267	local tundev=$1; shift
1268	local filter=$1; shift
1269
1270	__icmp_capture_add_del del 100 v6 "$tundev" "$filter"
1271}
1272
1273__vlan_capture_add_del()
1274{
1275	local add_del=$1; shift
1276	local pref=$1; shift
1277	local dev=$1; shift
1278	local filter=$1; shift
1279
1280	tc filter $add_del dev "$dev" ingress \
1281	   proto 802.1q pref $pref \
1282	   flower $filter \
1283	   action pass
1284}
1285
1286vlan_capture_install()
1287{
1288	local dev=$1; shift
1289	local filter=$1; shift
1290
1291	__vlan_capture_add_del add 100 "$dev" "$filter"
1292}
1293
1294vlan_capture_uninstall()
1295{
1296	local dev=$1; shift
1297	local filter=$1; shift
1298
1299	__vlan_capture_add_del del 100 "$dev" "$filter"
1300}
1301
1302__dscp_capture_add_del()
1303{
1304	local add_del=$1; shift
1305	local dev=$1; shift
1306	local base=$1; shift
1307	local dscp;
1308
1309	for prio in {0..7}; do
1310		dscp=$((base + prio))
1311		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1312				       "skip_hw ip_tos $((dscp << 2))"
1313	done
1314}
1315
1316dscp_capture_install()
1317{
1318	local dev=$1; shift
1319	local base=$1; shift
1320
1321	__dscp_capture_add_del add $dev $base
1322}
1323
1324dscp_capture_uninstall()
1325{
1326	local dev=$1; shift
1327	local base=$1; shift
1328
1329	__dscp_capture_add_del del $dev $base
1330}
1331
1332dscp_fetch_stats()
1333{
1334	local dev=$1; shift
1335	local base=$1; shift
1336
1337	for prio in {0..7}; do
1338		local dscp=$((base + prio))
1339		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1340		echo "[$dscp]=$t "
1341	done
1342}
1343
1344matchall_sink_create()
1345{
1346	local dev=$1; shift
1347
1348	tc qdisc add dev $dev clsact
1349	tc filter add dev $dev ingress \
1350	   pref 10000 \
1351	   matchall \
1352	   action drop
1353}
1354
1355cleanup()
1356{
1357	pre_cleanup
1358	defer_scopes_cleanup
1359}
1360
1361multipath_eval()
1362{
1363	local desc="$1"
1364	local weight_rp12=$2
1365	local weight_rp13=$3
1366	local packets_rp12=$4
1367	local packets_rp13=$5
1368	local weights_ratio packets_ratio diff
1369
1370	RET=0
1371
1372	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1373		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1374				| bc -l)
1375	else
1376		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1377				| bc -l)
1378	fi
1379
1380	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1381	       check_err 1 "Packet difference is 0"
1382	       log_test "Multipath"
1383	       log_info "Expected ratio $weights_ratio"
1384	       return
1385	fi
1386
1387	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1388		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1389				| bc -l)
1390	else
1391		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1392				| bc -l)
1393	fi
1394
1395	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1396	diff=${diff#-}
1397
1398	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1399	check_err $? "Too large discrepancy between expected and measured ratios"
1400	log_test "$desc"
1401	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1402}
1403
1404in_ns()
1405{
1406	local name=$1; shift
1407
1408	ip netns exec $name bash <<-EOF
1409		NUM_NETIFS=0
1410		source lib.sh
1411		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1412	EOF
1413}
1414
1415##############################################################################
1416# Tests
1417
1418ping_do()
1419{
1420	local if_name=$1
1421	local dip=$2
1422	local args=$3
1423	local vrf_name
1424
1425	vrf_name=$(master_name_get $if_name)
1426	ip vrf exec $vrf_name \
1427		$PING $args -c $PING_COUNT -i 0.1 \
1428		-w $PING_TIMEOUT $dip &> /dev/null
1429}
1430
1431ping_test()
1432{
1433	RET=0
1434
1435	ping_do $1 $2
1436	check_err $?
1437	log_test "ping$3"
1438}
1439
1440ping_test_fails()
1441{
1442	RET=0
1443
1444	ping_do $1 $2
1445	check_fail $?
1446	log_test "ping fails$3"
1447}
1448
1449ping6_do()
1450{
1451	local if_name=$1
1452	local dip=$2
1453	local args=$3
1454	local vrf_name
1455
1456	vrf_name=$(master_name_get $if_name)
1457	ip vrf exec $vrf_name \
1458		$PING6 $args -c $PING_COUNT -i 0.1 \
1459		-w $PING_TIMEOUT $dip &> /dev/null
1460}
1461
1462ping6_test()
1463{
1464	RET=0
1465
1466	ping6_do $1 $2
1467	check_err $?
1468	log_test "ping6$3"
1469}
1470
1471ping6_test_fails()
1472{
1473	RET=0
1474
1475	ping6_do $1 $2
1476	check_fail $?
1477	log_test "ping6 fails$3"
1478}
1479
1480learning_test()
1481{
1482	local bridge=$1
1483	local br_port1=$2	# Connected to `host1_if`.
1484	local host1_if=$3
1485	local host2_if=$4
1486	local mac=de:ad:be:ef:13:37
1487	local ageing_time
1488
1489	RET=0
1490
1491	bridge -j fdb show br $bridge brport $br_port1 \
1492		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1493	check_fail $? "Found FDB record when should not"
1494
1495	# Disable unknown unicast flooding on `br_port1` to make sure
1496	# packets are only forwarded through the port after a matching
1497	# FDB entry was installed.
1498	bridge link set dev $br_port1 flood off
1499
1500	ip link set $host1_if promisc on
1501	tc qdisc add dev $host1_if ingress
1502	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1503		flower dst_mac $mac action drop
1504
1505	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1506	sleep 1
1507
1508	tc -j -s filter show dev $host1_if ingress \
1509		| jq -e ".[] | select(.options.handle == 101) \
1510		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1511	check_fail $? "Packet reached first host when should not"
1512
1513	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1514	sleep 1
1515
1516	bridge -j fdb show br $bridge brport $br_port1 \
1517		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1518	check_err $? "Did not find FDB record when should"
1519
1520	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1521	sleep 1
1522
1523	tc -j -s filter show dev $host1_if ingress \
1524		| jq -e ".[] | select(.options.handle == 101) \
1525		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1526	check_err $? "Packet did not reach second host when should"
1527
1528	# Wait for 10 seconds after the ageing time to make sure FDB
1529	# record was aged-out.
1530	ageing_time=$(bridge_ageing_time_get $bridge)
1531	sleep $((ageing_time + 10))
1532
1533	bridge -j fdb show br $bridge brport $br_port1 \
1534		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1535	check_fail $? "Found FDB record when should not"
1536
1537	bridge link set dev $br_port1 learning off
1538
1539	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1540	sleep 1
1541
1542	bridge -j fdb show br $bridge brport $br_port1 \
1543		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1544	check_fail $? "Found FDB record when should not"
1545
1546	bridge link set dev $br_port1 learning on
1547
1548	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1549	tc qdisc del dev $host1_if ingress
1550	ip link set $host1_if promisc off
1551
1552	bridge link set dev $br_port1 flood on
1553
1554	log_test "FDB learning"
1555}
1556
1557flood_test_do()
1558{
1559	local should_flood=$1
1560	local mac=$2
1561	local ip=$3
1562	local host1_if=$4
1563	local host2_if=$5
1564	local err=0
1565
1566	# Add an ACL on `host2_if` which will tell us whether the packet
1567	# was flooded to it or not.
1568	ip link set $host2_if promisc on
1569	tc qdisc add dev $host2_if ingress
1570	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1571		flower dst_mac $mac action drop
1572
1573	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1574	sleep 1
1575
1576	tc -j -s filter show dev $host2_if ingress \
1577		| jq -e ".[] | select(.options.handle == 101) \
1578		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1579	if [[ $? -ne 0 && $should_flood == "true" || \
1580	      $? -eq 0 && $should_flood == "false" ]]; then
1581		err=1
1582	fi
1583
1584	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1585	tc qdisc del dev $host2_if ingress
1586	ip link set $host2_if promisc off
1587
1588	return $err
1589}
1590
1591flood_unicast_test()
1592{
1593	local br_port=$1
1594	local host1_if=$2
1595	local host2_if=$3
1596	local mac=de:ad:be:ef:13:37
1597	local ip=192.0.2.100
1598
1599	RET=0
1600
1601	bridge link set dev $br_port flood off
1602
1603	flood_test_do false $mac $ip $host1_if $host2_if
1604	check_err $? "Packet flooded when should not"
1605
1606	bridge link set dev $br_port flood on
1607
1608	flood_test_do true $mac $ip $host1_if $host2_if
1609	check_err $? "Packet was not flooded when should"
1610
1611	log_test "Unknown unicast flood"
1612}
1613
1614flood_multicast_test()
1615{
1616	local br_port=$1
1617	local host1_if=$2
1618	local host2_if=$3
1619	local mac=01:00:5e:00:00:01
1620	local ip=239.0.0.1
1621
1622	RET=0
1623
1624	bridge link set dev $br_port mcast_flood off
1625
1626	flood_test_do false $mac $ip $host1_if $host2_if
1627	check_err $? "Packet flooded when should not"
1628
1629	bridge link set dev $br_port mcast_flood on
1630
1631	flood_test_do true $mac $ip $host1_if $host2_if
1632	check_err $? "Packet was not flooded when should"
1633
1634	log_test "Unregistered multicast flood"
1635}
1636
1637flood_test()
1638{
1639	# `br_port` is connected to `host2_if`
1640	local br_port=$1
1641	local host1_if=$2
1642	local host2_if=$3
1643
1644	flood_unicast_test $br_port $host1_if $host2_if
1645	flood_multicast_test $br_port $host1_if $host2_if
1646}
1647
1648__start_traffic()
1649{
1650	local pktsize=$1; shift
1651	local proto=$1; shift
1652	local h_in=$1; shift    # Where the traffic egresses the host
1653	local sip=$1; shift
1654	local dip=$1; shift
1655	local dmac=$1; shift
1656	local -a mz_args=("$@")
1657
1658	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1659		-a own -b $dmac -t "$proto" -q "${mz_args[@]}" &
1660	sleep 1
1661}
1662
1663start_traffic_pktsize()
1664{
1665	local pktsize=$1; shift
1666	local h_in=$1; shift
1667	local sip=$1; shift
1668	local dip=$1; shift
1669	local dmac=$1; shift
1670	local -a mz_args=("$@")
1671
1672	__start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \
1673			"${mz_args[@]}"
1674}
1675
1676start_tcp_traffic_pktsize()
1677{
1678	local pktsize=$1; shift
1679	local h_in=$1; shift
1680	local sip=$1; shift
1681	local dip=$1; shift
1682	local dmac=$1; shift
1683	local -a mz_args=("$@")
1684
1685	__start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \
1686			"${mz_args[@]}"
1687}
1688
1689start_traffic()
1690{
1691	local h_in=$1; shift
1692	local sip=$1; shift
1693	local dip=$1; shift
1694	local dmac=$1; shift
1695	local -a mz_args=("$@")
1696
1697	start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1698			      "${mz_args[@]}"
1699}
1700
1701start_tcp_traffic()
1702{
1703	local h_in=$1; shift
1704	local sip=$1; shift
1705	local dip=$1; shift
1706	local dmac=$1; shift
1707	local -a mz_args=("$@")
1708
1709	start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1710				  "${mz_args[@]}"
1711}
1712
1713stop_traffic()
1714{
1715	local pid=${1-%%}; shift
1716
1717	kill_process "$pid"
1718}
1719
1720declare -A cappid
1721declare -A capfile
1722declare -A capout
1723
1724tcpdump_start()
1725{
1726	local if_name=$1; shift
1727	local ns=$1; shift
1728
1729	capfile[$if_name]=$(mktemp)
1730	capout[$if_name]=$(mktemp)
1731
1732	if [ -z $ns ]; then
1733		ns_cmd=""
1734	else
1735		ns_cmd="ip netns exec ${ns}"
1736	fi
1737
1738	if [ -z $SUDO_USER ] ; then
1739		capuser=""
1740	else
1741		capuser="-Z $SUDO_USER"
1742	fi
1743
1744	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1745		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1746		> "${capout[$if_name]}" 2>&1 &
1747	cappid[$if_name]=$!
1748
1749	sleep 1
1750}
1751
1752tcpdump_stop()
1753{
1754	local if_name=$1
1755	local pid=${cappid[$if_name]}
1756
1757	$ns_cmd kill "$pid" && wait "$pid"
1758	sleep 1
1759}
1760
1761tcpdump_cleanup()
1762{
1763	local if_name=$1
1764
1765	rm ${capfile[$if_name]} ${capout[$if_name]}
1766}
1767
1768tcpdump_show()
1769{
1770	local if_name=$1
1771
1772	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
1773}
1774
1775# return 0 if the packet wasn't seen on host2_if or 1 if it was
1776mcast_packet_test()
1777{
1778	local mac=$1
1779	local src_ip=$2
1780	local ip=$3
1781	local host1_if=$4
1782	local host2_if=$5
1783	local seen=0
1784	local tc_proto="ip"
1785	local mz_v6arg=""
1786
1787	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1788	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1789		tc_proto="ipv6"
1790		mz_v6arg="-6"
1791	fi
1792
1793	# Add an ACL on `host2_if` which will tell us whether the packet
1794	# was received by it or not.
1795	tc qdisc add dev $host2_if ingress
1796	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1797		flower ip_proto udp dst_mac $mac action drop
1798
1799	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1800	sleep 1
1801
1802	tc -j -s filter show dev $host2_if ingress \
1803		| jq -e ".[] | select(.options.handle == 101) \
1804		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1805	if [[ $? -eq 0 ]]; then
1806		seen=1
1807	fi
1808
1809	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1810	tc qdisc del dev $host2_if ingress
1811
1812	return $seen
1813}
1814
1815brmcast_check_sg_entries()
1816{
1817	local report=$1; shift
1818	local slist=("$@")
1819	local sarg=""
1820
1821	for src in "${slist[@]}"; do
1822		sarg="${sarg} and .source_list[].address == \"$src\""
1823	done
1824	bridge -j -d -s mdb show dev br0 \
1825		| jq -e ".[].mdb[] | \
1826			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1827	check_err $? "Wrong *,G entry source list after $report report"
1828
1829	for sgent in "${slist[@]}"; do
1830		bridge -j -d -s mdb show dev br0 \
1831			| jq -e ".[].mdb[] | \
1832				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1833		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1834	done
1835}
1836
1837brmcast_check_sg_fwding()
1838{
1839	local should_fwd=$1; shift
1840	local sources=("$@")
1841
1842	for src in "${sources[@]}"; do
1843		local retval=0
1844
1845		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1846		retval=$?
1847		if [ $should_fwd -eq 1 ]; then
1848			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1849		else
1850			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1851		fi
1852	done
1853}
1854
1855brmcast_check_sg_state()
1856{
1857	local is_blocked=$1; shift
1858	local sources=("$@")
1859	local should_fail=1
1860
1861	if [ $is_blocked -eq 1 ]; then
1862		should_fail=0
1863	fi
1864
1865	for src in "${sources[@]}"; do
1866		bridge -j -d -s mdb show dev br0 \
1867			| jq -e ".[].mdb[] | \
1868				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1869				 .source_list[] |
1870				 select(.address == \"$src\") |
1871				 select(.timer == \"0.00\")" &>/dev/null
1872		check_err_fail $should_fail $? "Entry $src has zero timer"
1873
1874		bridge -j -d -s mdb show dev br0 \
1875			| jq -e ".[].mdb[] | \
1876				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1877				 .flags[] == \"blocked\")" &>/dev/null
1878		check_err_fail $should_fail $? "Entry $src has blocked flag"
1879	done
1880}
1881
1882mc_join()
1883{
1884	local if_name=$1
1885	local group=$2
1886	local vrf_name=$(master_name_get $if_name)
1887
1888	# We don't care about actual reception, just about joining the
1889	# IP multicast group and adding the L2 address to the device's
1890	# MAC filtering table
1891	ip vrf exec $vrf_name \
1892		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1893	mreceive_pid=$!
1894
1895	sleep 1
1896}
1897
1898mc_leave()
1899{
1900	kill "$mreceive_pid" && wait "$mreceive_pid"
1901}
1902
1903mc_send()
1904{
1905	local if_name=$1
1906	local groups=$2
1907	local vrf_name=$(master_name_get $if_name)
1908
1909	ip vrf exec $vrf_name \
1910		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1911}
1912
1913adf_mcd_start()
1914{
1915	local ifs=("$@")
1916
1917	local table_name="$MCD_TABLE_NAME"
1918	local smcroutedir
1919	local pid
1920	local if
1921	local i
1922
1923	check_command "$MCD" || return 1
1924	check_command "$MC_CLI" || return 1
1925
1926	smcroutedir=$(mktemp -d)
1927	defer rm -rf "$smcroutedir"
1928
1929	for ((i = 1; i <= NUM_NETIFS; ++i)); do
1930		echo "phyint ${NETIFS[p$i]} enable" >> \
1931			"$smcroutedir/$table_name.conf"
1932	done
1933
1934	for if in "${ifs[@]}"; do
1935		if ! ip_link_has_flag "$if" MULTICAST; then
1936			ip link set dev "$if" multicast on
1937			defer ip link set dev "$if" multicast off
1938		fi
1939
1940		echo "phyint $if enable" >> \
1941			"$smcroutedir/$table_name.conf"
1942	done
1943
1944	"$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
1945		-P "$smcroutedir/$table_name.pid"
1946	busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
1947	pid=$(cat "$smcroutedir/$table_name.pid")
1948	defer kill_process "$pid"
1949}
1950
1951mc_cli()
1952{
1953	local table_name="$MCD_TABLE_NAME"
1954
1955        "$MC_CLI" -I "$table_name" "$@"
1956}
1957
1958start_ip_monitor()
1959{
1960	local mtype=$1; shift
1961	local ip=${1-ip}; shift
1962
1963	# start the monitor in the background
1964	tmpfile=`mktemp /var/run/nexthoptestXXX`
1965	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1966	sleep 0.2
1967	echo "$mpid $tmpfile"
1968}
1969
1970stop_ip_monitor()
1971{
1972	local mpid=$1; shift
1973	local tmpfile=$1; shift
1974	local el=$1; shift
1975	local what=$1; shift
1976
1977	sleep 0.2
1978	kill $mpid
1979	local lines=`grep '^\w' $tmpfile | wc -l`
1980	test $lines -eq $el
1981	check_err $? "$what: $lines lines of events, expected $el"
1982	rm -rf $tmpfile
1983}
1984
1985hw_stats_monitor_test()
1986{
1987	local dev=$1; shift
1988	local type=$1; shift
1989	local make_suitable=$1; shift
1990	local make_unsuitable=$1; shift
1991	local ip=${1-ip}; shift
1992
1993	RET=0
1994
1995	# Expect a notification about enablement.
1996	local ipmout=$(start_ip_monitor stats "$ip")
1997	$ip stats set dev $dev ${type}_stats on
1998	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
1999
2000	# Expect a notification about offload.
2001	local ipmout=$(start_ip_monitor stats "$ip")
2002	$make_suitable
2003	stop_ip_monitor $ipmout 1 "${type}_stats installation"
2004
2005	# Expect a notification about loss of offload.
2006	local ipmout=$(start_ip_monitor stats "$ip")
2007	$make_unsuitable
2008	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
2009
2010	# Expect a notification about disablement
2011	local ipmout=$(start_ip_monitor stats "$ip")
2012	$ip stats set dev $dev ${type}_stats off
2013	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
2014
2015	log_test "${type}_stats notifications"
2016}
2017
2018ipv4_to_bytes()
2019{
2020	local IP=$1; shift
2021
2022	printf '%02x:' ${IP//./ } |
2023	    sed 's/:$//'
2024}
2025
2026# Convert a given IPv6 address, `IP' such that the :: token, if present, is
2027# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
2028# digits. An optional `BYTESEP' parameter can be given to further separate
2029# individual bytes of each 16-bit group.
2030expand_ipv6()
2031{
2032	local IP=$1; shift
2033	local bytesep=$1; shift
2034
2035	local cvt_ip=${IP/::/_}
2036	local colons=${cvt_ip//[^:]/}
2037	local allcol=:::::::
2038	# IP where :: -> the appropriate number of colons:
2039	local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
2040
2041	echo $allcol_ip | tr : '\n' |
2042	    sed s/^/0000/ |
2043	    sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
2044	    tr '\n' : |
2045	    sed 's/:$//'
2046}
2047
2048ipv6_to_bytes()
2049{
2050	local IP=$1; shift
2051
2052	expand_ipv6 "$IP" :
2053}
2054
2055u16_to_bytes()
2056{
2057	local u16=$1; shift
2058
2059	printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
2060}
2061
2062# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
2063# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
2064# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
2065# stands for 00:00.
2066payload_template_calc_checksum()
2067{
2068	local payload=$1; shift
2069
2070	(
2071	    # Set input radix.
2072	    echo "16i"
2073	    # Push zero for the initial checksum.
2074	    echo 0
2075
2076	    # Pad the payload with a terminating 00: in case we get an odd
2077	    # number of bytes.
2078	    echo "${payload%:}:00:" |
2079		sed 's/CHECKSUM/00:00/g' |
2080		tr '[:lower:]' '[:upper:]' |
2081		# Add the word to the checksum.
2082		sed 's/\(..\):\(..\):/\1\2+\n/g' |
2083		# Strip the extra odd byte we pushed if left unconverted.
2084		sed 's/\(..\):$//'
2085
2086	    echo "10000 ~ +"	# Calculate and add carry.
2087	    echo "FFFF r - p"	# Bit-flip and print.
2088	) |
2089	    dc |
2090	    tr '[:upper:]' '[:lower:]'
2091}
2092
2093payload_template_expand_checksum()
2094{
2095	local payload=$1; shift
2096	local checksum=$1; shift
2097
2098	local ckbytes=$(u16_to_bytes $checksum)
2099
2100	echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
2101}
2102
2103payload_template_nbytes()
2104{
2105	local payload=$1; shift
2106
2107	payload_template_expand_checksum "${payload%:}" 0 |
2108		sed 's/:/\n/g' | wc -l
2109}
2110
2111igmpv3_is_in_get()
2112{
2113	local GRP=$1; shift
2114	local sources=("$@")
2115
2116	local igmpv3
2117	local nsources=$(u16_to_bytes ${#sources[@]})
2118
2119	# IS_IN ( $sources )
2120	igmpv3=$(:
2121		)"22:"$(			: Type - Membership Report
2122		)"00:"$(			: Reserved
2123		)"CHECKSUM:"$(			: Checksum
2124		)"00:00:"$(			: Reserved
2125		)"00:01:"$(			: Number of Group Records
2126		)"01:"$(			: Record Type - IS_IN
2127		)"00:"$(			: Aux Data Len
2128		)"${nsources}:"$(		: Number of Sources
2129		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
2130		)"$(for src in "${sources[@]}"; do
2131			ipv4_to_bytes $src
2132			echo -n :
2133		    done)"$(			: Source Addresses
2134		)
2135	local checksum=$(payload_template_calc_checksum "$igmpv3")
2136
2137	payload_template_expand_checksum "$igmpv3" $checksum
2138}
2139
2140igmpv2_leave_get()
2141{
2142	local GRP=$1; shift
2143
2144	local payload=$(:
2145		)"17:"$(			: Type - Leave Group
2146		)"00:"$(			: Max Resp Time - not meaningful
2147		)"CHECKSUM:"$(			: Checksum
2148		)"$(ipv4_to_bytes $GRP)"$(	: Group Address
2149		)
2150	local checksum=$(payload_template_calc_checksum "$payload")
2151
2152	payload_template_expand_checksum "$payload" $checksum
2153}
2154
2155mldv2_is_in_get()
2156{
2157	local SIP=$1; shift
2158	local GRP=$1; shift
2159	local sources=("$@")
2160
2161	local hbh
2162	local icmpv6
2163	local nsources=$(u16_to_bytes ${#sources[@]})
2164
2165	hbh=$(:
2166		)"3a:"$(			: Next Header - ICMPv6
2167		)"00:"$(			: Hdr Ext Len
2168		)"00:00:00:00:00:00:"$(		: Options and Padding
2169		)
2170
2171	icmpv6=$(:
2172		)"8f:"$(			: Type - MLDv2 Report
2173		)"00:"$(			: Code
2174		)"CHECKSUM:"$(			: Checksum
2175		)"00:00:"$(			: Reserved
2176		)"00:01:"$(			: Number of Group Records
2177		)"01:"$(			: Record Type - IS_IN
2178		)"00:"$(			: Aux Data Len
2179		)"${nsources}:"$(		: Number of Sources
2180		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2181		)"$(for src in "${sources[@]}"; do
2182			ipv6_to_bytes $src
2183			echo -n :
2184		    done)"$(			: Source Addresses
2185		)
2186
2187	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2188	local sudohdr=$(:
2189		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2190		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2191	        )"${len}:"$(			: Upper-layer length
2192	        )"00:3a:"$(			: Zero and next-header
2193	        )
2194	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2195
2196	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2197}
2198
2199mldv1_done_get()
2200{
2201	local SIP=$1; shift
2202	local GRP=$1; shift
2203
2204	local hbh
2205	local icmpv6
2206
2207	hbh=$(:
2208		)"3a:"$(			: Next Header - ICMPv6
2209		)"00:"$(			: Hdr Ext Len
2210		)"00:00:00:00:00:00:"$(		: Options and Padding
2211		)
2212
2213	icmpv6=$(:
2214		)"84:"$(			: Type - MLDv1 Done
2215		)"00:"$(			: Code
2216		)"CHECKSUM:"$(			: Checksum
2217		)"00:00:"$(			: Max Resp Delay - not meaningful
2218		)"00:00:"$(			: Reserved
2219		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2220		)
2221
2222	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2223	local sudohdr=$(:
2224		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2225		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2226	        )"${len}:"$(			: Upper-layer length
2227	        )"00:3a:"$(			: Zero and next-header
2228	        )
2229	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2230
2231	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2232}
2233
2234bail_on_lldpad()
2235{
2236	local reason1="$1"; shift
2237	local reason2="$1"; shift
2238	local caller=${FUNCNAME[1]}
2239	local src=${BASH_SOURCE[1]}
2240
2241	if systemctl is-active --quiet lldpad; then
2242
2243		cat >/dev/stderr <<-EOF
2244		WARNING: lldpad is running
2245
2246			lldpad will likely $reason1, and this test will
2247			$reason2. Both are not supported at the same time,
2248			one of them is arbitrarily going to overwrite the
2249			other. That will cause spurious failures (or, unlikely,
2250			passes) of this test.
2251		EOF
2252
2253		if [[ -z $ALLOW_LLDPAD ]]; then
2254			cat >/dev/stderr <<-EOF
2255
2256				If you want to run the test anyway, please set
2257				an environment variable ALLOW_LLDPAD to a
2258				non-empty string.
2259			EOF
2260			log_test_skip $src:$caller
2261			exit $EXIT_STATUS
2262		else
2263			return
2264		fi
2265	fi
2266}
2267
2268absval()
2269{
2270	local v=$1; shift
2271
2272	echo $((v > 0 ? v : -v))
2273}
2274
2275has_unicast_flt()
2276{
2277	local dev=$1; shift
2278	local mac_addr=$(mac_get $dev)
2279	local tmp=$(ether_addr_to_u64 $mac_addr)
2280	local promisc
2281
2282	ip link set $dev up
2283	ip link add link $dev name macvlan-tmp type macvlan mode private
2284	ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1)))
2285	ip link set macvlan-tmp up
2286
2287	promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity')
2288
2289	ip link del macvlan-tmp
2290
2291	[[ $promisc == 1 ]] && echo "no" || echo "yes"
2292}
2293