xref: /linux/tools/testing/selftests/net/netfilter/nft_flowtable.sh (revision f4b369c6fe0ceaba2da2daff8c9eb415f85926dd)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# This tests basic flowtable functionality.
5# Creates following default topology:
6#
7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8# Router1 is the one doing flow offloading, Router2 has no special
9# purpose other than having a link that is smaller than either Originator
10# and responder, i.e. TCPMSS announced values are too large and will still
11# result in fragmentation and/or PMTU discovery.
12#
13# You can check with different Orgininator/Link/Responder MTU eg:
14# nft_flowtable.sh -o8000 -l1500 -r2000
15#
16
17source lib.sh
18
19ret=0
20SOCAT_TIMEOUT=60
21
22nsin=""
23nsin_small=""
24ns1out=""
25ns2out=""
26
27log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
28
29checktool "nft --version" "run test without nft tool"
30checktool "socat -h" "run test without socat"
31
32setup_ns ns1 ns2 nsr1 nsr2
33
34cleanup() {
35	ip netns pids "$ns1" | xargs kill 2>/dev/null
36	ip netns pids "$ns2" | xargs kill 2>/dev/null
37
38	cleanup_all_ns
39
40	rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
41
42	[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
43}
44
45trap cleanup EXIT
46
47sysctl -q net.netfilter.nf_log_all_netns=1
48
49ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1"
50ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2"
51
52ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2"
53
54for dev in veth0 veth1; do
55    ip -net "$nsr1" link set "$dev" up
56    ip -net "$nsr2" link set "$dev" up
57done
58
59ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
60ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
61
62ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1
63ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad
64
65# set different MTUs so we need to push packets coming from ns1 (large MTU)
66# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
67# or to do PTMU discovery (send ICMP error back to originator).
68# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
69# is NOT the lowest link mtu.
70
71omtu=9000
72lmtu=1500
73rmtu=2000
74
75filesize=$((2 * 1024 * 1024))
76filesize_small=$((filesize / 16))
77
78usage(){
79	echo "nft_flowtable.sh [OPTIONS]"
80	echo
81	echo "MTU options"
82	echo "   -o originator"
83	echo "   -l link"
84	echo "   -r responder"
85	exit 1
86}
87
88while getopts "o:l:r:s:" o
89do
90	case $o in
91		o) omtu=$OPTARG;;
92		l) lmtu=$OPTARG;;
93		r) rmtu=$OPTARG;;
94		s)
95			filesize=$OPTARG
96			filesize_small=$((OPTARG / 16))
97		;;
98		*) usage;;
99	esac
100done
101
102if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then
103	exit 1
104fi
105
106ip -net "$ns1" link set eth0 mtu "$omtu"
107
108if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then
109	exit 1
110fi
111
112if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then
113	exit 1
114fi
115
116if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then
117	exit 1
118fi
119
120ip -net "$ns2" link set eth0 mtu "$rmtu"
121
122# transfer-net between nsr1 and nsr2.
123# these addresses are not used for connections.
124ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1
125ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
126
127ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
128ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
129
130ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
131ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
132for i in 0 1; do
133  ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
134  ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
135done
136
137for ns in "$ns1" "$ns2";do
138  ip -net "$ns" link set eth0 up
139
140  if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
141	echo "ERROR: Check Originator/Responder values (problem during address addition)"
142	exit 1
143  fi
144  # don't set ip DF bit for first two tests
145  ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
146done
147
148ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
149ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
150ip -net "$ns1" route add default via 10.0.1.1
151ip -net "$ns2" route add default via 10.0.2.1
152ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
153ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
154ip -net "$ns1" route add default via dead:1::1
155ip -net "$ns2" route add default via dead:2::1
156
157ip -net "$nsr1" route add default via 192.168.10.2
158ip -6 -net "$nsr1" route add default via fee1:2::2
159ip -net "$nsr2" route add default via 192.168.10.1
160ip -6 -net "$nsr2" route add default via fee1:2::1
161
162ip netns exec "$nsr1" nft -f - <<EOF
163table inet filter {
164  flowtable f1 {
165     hook ingress priority 0
166     devices = { veth0, veth1 }
167   }
168
169   counter routed_orig { }
170   counter routed_repl { }
171
172   chain forward {
173      type filter hook forward priority 0; policy drop;
174
175      # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
176      meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
177
178      # count packets supposedly offloaded as per direction.
179      ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
180
181      ct state established,related accept
182
183      meta nfproto ipv4 meta l4proto icmp accept
184      meta nfproto ipv6 meta l4proto icmpv6 accept
185   }
186}
187EOF
188
189if [ $? -ne 0 ]; then
190	echo "SKIP: Could not load nft ruleset"
191	exit $ksft_skip
192fi
193
194ip netns exec "$ns2" nft -f - <<EOF
195table inet filter {
196   counter ip4dscp0 { }
197   counter ip4dscp3 { }
198
199   chain input {
200      type filter hook input priority 0; policy accept;
201      meta l4proto tcp goto {
202	      ip dscp cs3 counter name ip4dscp3 accept
203	      ip dscp 0 counter name ip4dscp0 accept
204      }
205   }
206}
207EOF
208
209if [ $? -ne 0 ]; then
210	echo -n "SKIP: Could not load ruleset: "
211	nft --version
212	exit $ksft_skip
213fi
214
215# test basic connectivity
216if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
217  echo "ERROR: $ns1 cannot reach ns2" 1>&2
218  exit 1
219fi
220
221if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
222  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
223  exit 1
224fi
225
226nsin=$(mktemp)
227nsin_small=$(mktemp)
228ns1out=$(mktemp)
229ns2out=$(mktemp)
230
231make_file()
232{
233	name="$1"
234	sz="$2"
235
236	head -c "$sz" < /dev/urandom > "$name"
237}
238
239check_counters()
240{
241	local what=$1
242	local ok=1
243
244	local orig repl
245	orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets)
246	repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets)
247
248	local orig_cnt=${orig#*bytes}
249	local repl_cnt=${repl#*bytes}
250
251	local fs
252	fs=$(du -sb "$nsin")
253	local max_orig=${fs%%/*}
254	local max_repl=$((max_orig))
255
256	# flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook
257	# should always be lower than the size of the transmitted file (max_orig).
258	if [ "$orig_cnt" -gt "$max_orig" ];then
259		echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2
260		ret=1
261		ok=0
262	fi
263
264	if [ "$repl_cnt" -gt $max_repl ];then
265		echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2
266		ret=1
267		ok=0
268	fi
269
270	if [ $ok -eq 1 ]; then
271		echo "PASS: $what"
272	fi
273}
274
275check_dscp()
276{
277	local what=$1
278	local pmtud="$2"
279	local ok=1
280
281	local counter
282	counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets)
283
284	local pc4=${counter%*bytes*}
285	local pc4=${pc4#*packets}
286
287	counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets)
288	local pc4z=${counter%*bytes*}
289	local pc4z=${pc4z#*packets}
290
291	local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
292
293	case "$what" in
294	"dscp_none")
295		if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
296			echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
297			ret=1
298			ok=0
299		fi
300		;;
301	"dscp_fwd")
302		if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
303			echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
304			ret=1
305			ok=0
306		fi
307		;;
308	"dscp_ingress")
309		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
310			echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
311			ret=1
312			ok=0
313		fi
314		;;
315	"dscp_egress")
316		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
317			echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
318			ret=1
319			ok=0
320		fi
321		;;
322	*)
323		echo "$failmsg: Unknown DSCP check" 1>&2
324		ret=1
325		ok=0
326	esac
327
328	if [ "$ok" -eq 1 ] ;then
329		echo "PASS: $what: dscp packet counters match"
330	fi
331}
332
333check_transfer()
334{
335	local in=$1
336	local out=$2
337	local what=$3
338
339	if ! cmp "$in" "$out" > /dev/null 2>&1; then
340		echo "FAIL: file mismatch for $what" 1>&2
341		ls -l "$in"
342		ls -l "$out"
343		return 1
344	fi
345
346	return 0
347}
348
349listener_ready()
350{
351	ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345
352}
353
354test_tcp_forwarding_ip()
355{
356	local nsa=$1
357	local nsb=$2
358	local pmtu=$3
359	local proto=$4
360	local dstip=$5
361	local dstport=$6
362	local lret=0
363	local socatc
364	local socatl
365	local infile="$nsin"
366
367	if [ $pmtu -eq 0 ]; then
368		infile="$nsin_small"
369	fi
370
371	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \
372            TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
373	lpid=$!
374
375	busywait 1000 listener_ready
376
377	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \
378            TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
379	socatc=$?
380
381	wait $lpid
382	socatl=$?
383
384	if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
385		rc=1
386	fi
387
388	if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
389		lret=1
390		ret=1
391	fi
392
393	if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
394		lret=1
395		ret=1
396	fi
397
398	return $lret
399}
400
401test_tcp_forwarding()
402{
403	local pmtu="$3"
404	local proto="$4"
405	local dstip="$5"
406	local dstport="$6"
407
408	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
409
410	return $?
411}
412
413test_tcp_forwarding_set_dscp()
414{
415	local pmtu="$3"
416	local proto="$4"
417	local dstip="$5"
418	local dstport="$6"
419
420ip netns exec "$nsr1" nft -f - <<EOF
421table netdev dscpmangle {
422   chain setdscp0 {
423      type filter hook ingress device "veth0" priority 0; policy accept
424	ip dscp set cs3
425  }
426}
427EOF
428if [ $? -eq 0 ]; then
429	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
430	check_dscp "dscp_ingress" "$pmtu"
431
432	ip netns exec "$nsr1" nft delete table netdev dscpmangle
433else
434	echo "SKIP: Could not load netdev:ingress for veth0"
435fi
436
437ip netns exec "$nsr1" nft -f - <<EOF
438table netdev dscpmangle {
439   chain setdscp0 {
440      type filter hook egress device "veth1" priority 0; policy accept
441      ip dscp set cs3
442  }
443}
444EOF
445if [ $? -eq 0 ]; then
446	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
447	check_dscp "dscp_egress" "$pmtu"
448
449	ip netns exec "$nsr1" nft delete table netdev dscpmangle
450else
451	echo "SKIP: Could not load netdev:egress for veth1"
452fi
453
454	# partial.  If flowtable really works, then both dscp-is-0 and dscp-is-cs3
455	# counters should have seen packets (before and after ft offload kicks in).
456	ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
457	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
458	check_dscp "dscp_fwd" "$pmtu"
459}
460
461test_tcp_forwarding_nat()
462{
463	local nsa="$1"
464	local nsb="$2"
465	local pmtu="$3"
466	local what="$4"
467	local lret
468
469	[ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
470
471	test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345
472	lret=$?
473
474	if [ "$lret" -eq 0 ] ; then
475		if [ "$pmtu" -eq 1 ] ;then
476			check_counters "flow offload for ns1/ns2 with masquerade $what"
477		else
478			echo "PASS: flow offload for ns1/ns2 with masquerade $what"
479		fi
480
481		test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666
482		lret=$?
483		if [ "$pmtu" -eq 1 ] ;then
484			check_counters "flow offload for ns1/ns2 with dnat $what"
485		elif [ "$lret" -eq 0 ] ; then
486			echo "PASS: flow offload for ns1/ns2 with dnat $what"
487		fi
488	else
489		echo "FAIL: flow offload for ns1/ns2 with dnat $what"
490	fi
491
492	return $lret
493}
494
495make_file "$nsin" "$filesize"
496make_file "$nsin_small" "$filesize_small"
497
498# First test:
499# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
500# Due to MTU mismatch in both directions, all packets (except small packets like pure
501# acks) have to be handled by normal forwarding path.  Therefore, packet counters
502# are not checked.
503if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
504	echo "PASS: flow offloaded for ns1/ns2"
505else
506	echo "FAIL: flow offload for ns1/ns2:" 1>&2
507	ip netns exec "$nsr1" nft list ruleset
508	ret=1
509fi
510
511if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then
512	echo "PASS: IPv6 flow offloaded for ns1/ns2"
513else
514	echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2
515	ip netns exec "$nsr1" nft list ruleset
516	ret=1
517fi
518
519# delete default route, i.e. ns2 won't be able to reach ns1 and
520# will depend on ns1 being masqueraded in nsr1.
521# expect ns1 has nsr1 address.
522ip -net "$ns2" route del default via 10.0.2.1
523ip -net "$ns2" route del default via dead:2::1
524ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1
525
526# Second test:
527# Same, but with NAT enabled.  Same as in first test: we expect normal forward path
528# to handle most packets.
529ip netns exec "$nsr1" nft -f - <<EOF
530table ip nat {
531   chain prerouting {
532      type nat hook prerouting priority 0; policy accept;
533      meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
534   }
535
536   chain postrouting {
537      type nat hook postrouting priority 0; policy accept;
538      meta oifname "veth1" counter masquerade
539   }
540}
541EOF
542
543check_dscp "dscp_none" "0"
544if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
545	echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
546	exit 0
547fi
548
549if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then
550	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
551	ip netns exec "$nsr1" nft list ruleset
552	ret=1
553fi
554
555# Third test:
556# Same as second test, but with PMTU discovery enabled. This
557# means that we expect the fastpath to handle packets as soon
558# as the endpoints adjust the packet size.
559ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
560ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
561
562# reset counters.
563# With pmtu in-place we'll also check that nft counters
564# are lower than file size and packets were forwarded via flowtable layer.
565# For earlier tests (large mtus), packets cannot be handled via flowtable
566# (except pure acks and other small packets).
567ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
568ip netns exec "$ns2"  nft reset counters table inet filter >/dev/null
569
570if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
571	echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
572	exit 0
573fi
574
575ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
576
577if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
578	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
579	ip netns exec "$nsr1" nft list ruleset
580fi
581
582# IPIP tunnel test:
583# Add IPIP tunnel interfaces and check flowtable acceleration.
584test_ipip() {
585if ! ip -net "$nsr1" link add name tun0 type ipip \
586     local 192.168.10.1 remote 192.168.10.2 >/dev/null;then
587	echo "SKIP: could not add ipip tunnel"
588	[ "$ret" -eq 0 ] && ret=$ksft_skip
589	return
590fi
591ip -net "$nsr1" link set tun0 up
592ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
593ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
594
595ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
596ip -net "$nsr1" link set tun6 up
597ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
598
599ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
600ip -net "$nsr2" link set tun0 up
601ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
602ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
603
604ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1
605ip -net "$nsr2" link set tun6 up
606ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
607
608ip -net "$nsr1" route change default via 192.168.100.2
609ip -net "$nsr2" route change default via 192.168.100.1
610
611# do not use "route change" and delete old default so
612# socat fails to connect in case new default can't be added.
613ip -6 -net "$nsr1" route delete default
614ip -6 -net "$nsr1" route add default via fee1:3::2
615ip -6 -net "$nsr2" route delete default
616ip -6 -net "$nsr2" route add default via fee1:3::1
617ip -net "$ns2" route add default via 10.0.2.1
618ip -6 -net "$ns2" route add default via dead:2::1
619
620ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
621ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
622ip netns exec "$nsr1" nft -a insert rule inet filter forward \
623	'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
624
625if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
626	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2
627	ip netns exec "$nsr1" nft list ruleset
628	ret=1
629fi
630
631if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
632	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
633else
634	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
635	ip netns exec "$nsr1" nft list ruleset
636	ret=1
637fi
638
639# Create vlan tagged devices for IPIP traffic.
640ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
641ip -net "$nsr1" link set veth1.10 up
642ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
643ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
644ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
645ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
646
647ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
648ip -net "$nsr1" link set tun0.10 up
649ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
650ip -net "$nsr1" route change default via 192.168.200.2
651ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
652ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
653
654ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
655ip -net "$nsr1" link set tun6.10 up
656ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
657ip -6 -net "$nsr1" route delete default
658ip -6 -net "$nsr1" route add default via fee1:5::2
659ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
660
661ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
662ip -net "$nsr2" link set veth0.10 up
663ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
664ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
665ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
666
667ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
668ip -net "$nsr2" link set tun0.10 up
669ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
670ip -net "$nsr2" route change default via 192.168.200.1
671ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
672
673ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1
674ip -net "$nsr2" link set tun6.10 up
675ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
676ip -6 -net "$nsr2" route delete default
677ip -6 -net "$nsr2" route add default via fee1:5::1
678
679if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
680	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
681	ip netns exec "$nsr1" nft list ruleset
682	ret=1
683fi
684
685if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
686	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
687else
688	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
689	ip netns exec "$nsr1" nft list ruleset
690	ret=1
691fi
692
693# Restore the previous configuration
694ip -net "$nsr1" route change default via 192.168.10.2
695ip -net "$nsr2" route change default via 192.168.10.1
696ip -net "$ns2" route del default via 10.0.2.1
697ip -6 -net "$ns2" route del default via dead:2::1
698}
699
700# Another test:
701# Add bridge interface br0 to Router1, with NAT enabled.
702test_bridge() {
703if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then
704	echo "SKIP: could not add bridge br0"
705	[ "$ret" -eq 0 ] && ret=$ksft_skip
706	return
707fi
708ip -net "$nsr1" addr flush dev veth0
709ip -net "$nsr1" link set up dev veth0
710ip -net "$nsr1" link set veth0 master br0
711ip -net "$nsr1" addr add 10.0.1.1/24 dev br0
712ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad
713ip -net "$nsr1" link set up dev br0
714
715ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
716
717# br0 with NAT enabled.
718ip netns exec "$nsr1" nft -f - <<EOF
719flush table ip nat
720table ip nat {
721   chain prerouting {
722      type nat hook prerouting priority 0; policy accept;
723      meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
724   }
725
726   chain postrouting {
727      type nat hook postrouting priority 0; policy accept;
728      meta oifname "veth1" counter masquerade
729   }
730}
731EOF
732
733if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then
734	echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
735	ip netns exec "$nsr1" nft list ruleset
736	ret=1
737fi
738
739
740# Another test:
741# Add bridge interface br0 to Router1, with NAT and VLAN.
742ip -net "$nsr1" link set veth0 nomaster
743ip -net "$nsr1" link set down dev veth0
744ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10
745ip -net "$nsr1" link set up dev veth0
746ip -net "$nsr1" link set up dev veth0.10
747ip -net "$nsr1" link set veth0.10 master br0
748
749ip -net "$ns1" addr flush dev eth0
750ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10
751ip -net "$ns1" link set eth0 up
752ip -net "$ns1" link set eth0.10 up
753ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10
754ip -net "$ns1" route add default via 10.0.1.1
755ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad
756
757if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then
758	echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
759	ip netns exec "$nsr1" nft list ruleset
760	ret=1
761fi
762
763# restore test topology (remove bridge and VLAN)
764ip -net "$nsr1" link set veth0 nomaster
765ip -net "$nsr1" link set veth0 down
766ip -net "$nsr1" link set veth0.10 down
767ip -net "$nsr1" link delete veth0.10 type vlan
768ip -net "$nsr1" link delete br0 type bridge
769ip -net "$ns1" addr flush dev eth0.10
770ip -net "$ns1" link set eth0.10 down
771ip -net "$ns1" link set eth0 down
772ip -net "$ns1" link delete eth0.10 type vlan
773
774# restore address in ns1 and nsr1
775ip -net "$ns1" link set eth0 up
776ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
777ip -net "$ns1" route add default via 10.0.1.1
778ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
779ip -net "$ns1" route add default via dead:1::1
780ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
781ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
782ip -net "$nsr1" link set up dev veth0
783}
784
785test_ipip
786
787test_bridge
788
789KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
790KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
791SPI1=$RANDOM
792SPI2=$RANDOM
793
794if [ $SPI1 -eq $SPI2 ]; then
795	SPI2=$((SPI2+1))
796fi
797
798do_esp() {
799    local ns=$1
800    local me=$2
801    local remote=$3
802    local lnet=$4
803    local rnet=$5
804    local spi_out=$6
805    local spi_in=$7
806
807    ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in"  enc aes "$KEY_AES"  auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet"
808    ip -net "$ns" xfrm state add src "$me"  dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet"
809
810    # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
811    ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow
812    # to fwd decrypted packets after esp processing:
813    ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow
814}
815
816do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2"
817
818do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1"
819
820ip netns exec "$nsr1" nft delete table ip nat
821
822# restore default routes
823ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
824ip -net "$ns2" route add default via 10.0.2.1
825ip -net "$ns2" route add default via dead:2::1
826
827if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
828	check_counters "ipsec tunnel mode for ns1/ns2"
829else
830	echo "FAIL: ipsec tunnel mode for ns1/ns2"
831	ip netns exec "$nsr1" nft list ruleset 1>&2
832	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
833fi
834
835if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
836	check_counters "IPv6 ipsec tunnel mode for ns1/ns2"
837else
838	echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2"
839	ip netns exec "$nsr1" nft list ruleset 1>&2
840	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
841fi
842
843if [ "$1" = "" ]; then
844	low=1280
845	mtu=$((65536 - low))
846	o=$(((RANDOM%mtu) + low))
847	l=$(((RANDOM%mtu) + low))
848	r=$(((RANDOM%mtu) + low))
849
850	MINSIZE=$((2 *  1000 * 1000))
851	MAXSIZE=$((64 * 1000 * 1000))
852
853	filesize=$(((RANDOM * RANDOM) % MAXSIZE))
854	if [ "$filesize" -lt "$MINSIZE" ]; then
855		filesize=$((filesize+MINSIZE))
856	fi
857
858	echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
859	$0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
860fi
861
862exit $ret
863