xref: /linux/tools/testing/selftests/net/netfilter/nft_flowtable.sh (revision d69eb204c255c35abd9e8cb621484e8074c75eaa)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# This tests basic flowtable functionality.
5# Creates following default topology:
6#
7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8# Router1 is the one doing flow offloading, Router2 has no special
9# purpose other than having a link that is smaller than either Originator
10# and responder, i.e. TCPMSS announced values are too large and will still
11# result in fragmentation and/or PMTU discovery.
12#
13# You can check with different Orgininator/Link/Responder MTU eg:
14# nft_flowtable.sh -o8000 -l1500 -r2000
15#
16
17source lib.sh
18
19ret=0
20SOCAT_TIMEOUT=60
21
22nsin=""
23nsin_small=""
24ns1out=""
25ns2out=""
26
27log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
28
29checktool "nft --version" "run test without nft tool"
30checktool "socat -h" "run test without socat"
31
32setup_ns ns1 ns2 nsr1 nsr2
33
34cleanup() {
35	ip netns pids "$ns1" | xargs kill 2>/dev/null
36	ip netns pids "$ns2" | xargs kill 2>/dev/null
37
38	cleanup_all_ns
39
40	rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
41
42	[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
43}
44
45trap cleanup EXIT
46
47sysctl -q net.netfilter.nf_log_all_netns=1
48
49ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1"
50ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2"
51
52ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2"
53
54for dev in veth0 veth1; do
55    ip -net "$nsr1" link set "$dev" up
56    ip -net "$nsr2" link set "$dev" up
57done
58
59ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
60ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
61
62ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1
63ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad
64
65# set different MTUs so we need to push packets coming from ns1 (large MTU)
66# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
67# or to do PTMU discovery (send ICMP error back to originator).
68# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
69# is NOT the lowest link mtu.
70
71omtu=9000
72lmtu=1500
73rmtu=2000
74
75filesize=$((2 * 1024 * 1024))
76filesize_small=$((filesize / 16))
77
78usage(){
79	echo "nft_flowtable.sh [OPTIONS]"
80	echo
81	echo "MTU options"
82	echo "   -o originator"
83	echo "   -l link"
84	echo "   -r responder"
85	exit 1
86}
87
88while getopts "o:l:r:s:" o
89do
90	case $o in
91		o) omtu=$OPTARG;;
92		l) lmtu=$OPTARG;;
93		r) rmtu=$OPTARG;;
94		s)
95			filesize=$OPTARG
96			filesize_small=$((OPTARG / 16))
97		;;
98		*) usage;;
99	esac
100done
101
102if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then
103	exit 1
104fi
105
106ip -net "$ns1" link set eth0 mtu "$omtu"
107
108if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then
109	exit 1
110fi
111
112if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then
113	exit 1
114fi
115
116if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then
117	exit 1
118fi
119
120ip -net "$ns2" link set eth0 mtu "$rmtu"
121
122# transfer-net between nsr1 and nsr2.
123# these addresses are not used for connections.
124ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1
125ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
126
127ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
128ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
129
130for i in 0 1; do
131  ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
132  ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
133done
134
135for ns in "$ns1" "$ns2";do
136  ip -net "$ns" link set eth0 up
137
138  if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
139	echo "ERROR: Check Originator/Responder values (problem during address addition)"
140	exit 1
141  fi
142  # don't set ip DF bit for first two tests
143  ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
144done
145
146ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
147ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
148ip -net "$ns1" route add default via 10.0.1.1
149ip -net "$ns2" route add default via 10.0.2.1
150ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
151ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
152ip -net "$ns1" route add default via dead:1::1
153ip -net "$ns2" route add default via dead:2::1
154
155ip -net "$nsr1" route add default via 192.168.10.2
156ip -net "$nsr2" route add default via 192.168.10.1
157
158ip netns exec "$nsr1" nft -f - <<EOF
159table inet filter {
160  flowtable f1 {
161     hook ingress priority 0
162     devices = { veth0, veth1 }
163   }
164
165   counter routed_orig { }
166   counter routed_repl { }
167
168   chain forward {
169      type filter hook forward priority 0; policy drop;
170
171      # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
172      meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
173
174      # count packets supposedly offloaded as per direction.
175      ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
176
177      ct state established,related accept
178
179      meta nfproto ipv4 meta l4proto icmp accept
180      meta nfproto ipv6 meta l4proto icmpv6 accept
181   }
182}
183EOF
184
185if [ $? -ne 0 ]; then
186	echo "SKIP: Could not load nft ruleset"
187	exit $ksft_skip
188fi
189
190ip netns exec "$ns2" nft -f - <<EOF
191table inet filter {
192   counter ip4dscp0 { }
193   counter ip4dscp3 { }
194
195   chain input {
196      type filter hook input priority 0; policy accept;
197      meta l4proto tcp goto {
198	      ip dscp cs3 counter name ip4dscp3 accept
199	      ip dscp 0 counter name ip4dscp0 accept
200      }
201   }
202}
203EOF
204
205if [ $? -ne 0 ]; then
206	echo -n "SKIP: Could not load ruleset: "
207	nft --version
208	exit $ksft_skip
209fi
210
211# test basic connectivity
212if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
213  echo "ERROR: $ns1 cannot reach ns2" 1>&2
214  exit 1
215fi
216
217if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
218  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
219  exit 1
220fi
221
222nsin=$(mktemp)
223nsin_small=$(mktemp)
224ns1out=$(mktemp)
225ns2out=$(mktemp)
226
227make_file()
228{
229	name="$1"
230	sz="$2"
231
232	head -c "$sz" < /dev/urandom > "$name"
233}
234
235check_counters()
236{
237	local what=$1
238	local ok=1
239
240	local orig repl
241	orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets)
242	repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets)
243
244	local orig_cnt=${orig#*bytes}
245	local repl_cnt=${repl#*bytes}
246
247	local fs
248	fs=$(du -sb "$nsin")
249	local max_orig=${fs%%/*}
250	local max_repl=$((max_orig))
251
252	# flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook
253	# should always be lower than the size of the transmitted file (max_orig).
254	if [ "$orig_cnt" -gt "$max_orig" ];then
255		echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2
256		ret=1
257		ok=0
258	fi
259
260	if [ "$repl_cnt" -gt $max_repl ];then
261		echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2
262		ret=1
263		ok=0
264	fi
265
266	if [ $ok -eq 1 ]; then
267		echo "PASS: $what"
268	fi
269}
270
271check_dscp()
272{
273	local what=$1
274	local pmtud="$2"
275	local ok=1
276
277	local counter
278	counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets)
279
280	local pc4=${counter%*bytes*}
281	local pc4=${pc4#*packets}
282
283	counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets)
284	local pc4z=${counter%*bytes*}
285	local pc4z=${pc4z#*packets}
286
287	local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
288
289	case "$what" in
290	"dscp_none")
291		if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
292			echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
293			ret=1
294			ok=0
295		fi
296		;;
297	"dscp_fwd")
298		if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
299			echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
300			ret=1
301			ok=0
302		fi
303		;;
304	"dscp_ingress")
305		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
306			echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
307			ret=1
308			ok=0
309		fi
310		;;
311	"dscp_egress")
312		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
313			echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
314			ret=1
315			ok=0
316		fi
317		;;
318	*)
319		echo "$failmsg: Unknown DSCP check" 1>&2
320		ret=1
321		ok=0
322	esac
323
324	if [ "$ok" -eq 1 ] ;then
325		echo "PASS: $what: dscp packet counters match"
326	fi
327}
328
329check_transfer()
330{
331	local in=$1
332	local out=$2
333	local what=$3
334
335	if ! cmp "$in" "$out" > /dev/null 2>&1; then
336		echo "FAIL: file mismatch for $what" 1>&2
337		ls -l "$in"
338		ls -l "$out"
339		return 1
340	fi
341
342	return 0
343}
344
345listener_ready()
346{
347	ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345
348}
349
350test_tcp_forwarding_ip()
351{
352	local nsa=$1
353	local nsb=$2
354	local pmtu=$3
355	local dstip=$4
356	local dstport=$5
357	local lret=0
358	local socatc
359	local socatl
360	local infile="$nsin"
361
362	if [ $pmtu -eq 0 ]; then
363		infile="$nsin_small"
364	fi
365
366	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
367	lpid=$!
368
369	busywait 1000 listener_ready
370
371	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
372	socatc=$?
373
374	wait $lpid
375	socatl=$?
376
377	if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
378		rc=1
379	fi
380
381	if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
382		lret=1
383		ret=1
384	fi
385
386	if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
387		lret=1
388		ret=1
389	fi
390
391	return $lret
392}
393
394test_tcp_forwarding()
395{
396	local pmtu="$3"
397
398	test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
399
400	return $?
401}
402
403test_tcp_forwarding_set_dscp()
404{
405	local pmtu="$3"
406
407ip netns exec "$nsr1" nft -f - <<EOF
408table netdev dscpmangle {
409   chain setdscp0 {
410      type filter hook ingress device "veth0" priority 0; policy accept
411	ip dscp set cs3
412  }
413}
414EOF
415if [ $? -eq 0 ]; then
416	test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
417	check_dscp "dscp_ingress" "$pmtu"
418
419	ip netns exec "$nsr1" nft delete table netdev dscpmangle
420else
421	echo "SKIP: Could not load netdev:ingress for veth0"
422fi
423
424ip netns exec "$nsr1" nft -f - <<EOF
425table netdev dscpmangle {
426   chain setdscp0 {
427      type filter hook egress device "veth1" priority 0; policy accept
428      ip dscp set cs3
429  }
430}
431EOF
432if [ $? -eq 0 ]; then
433	test_tcp_forwarding_ip "$1" "$2" "$pmtu"  10.0.2.99 12345
434	check_dscp "dscp_egress" "$pmtu"
435
436	ip netns exec "$nsr1" nft delete table netdev dscpmangle
437else
438	echo "SKIP: Could not load netdev:egress for veth1"
439fi
440
441	# partial.  If flowtable really works, then both dscp-is-0 and dscp-is-cs3
442	# counters should have seen packets (before and after ft offload kicks in).
443	ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
444	test_tcp_forwarding_ip "$1" "$2" "$pmtu"  10.0.2.99 12345
445	check_dscp "dscp_fwd" "$pmtu"
446}
447
448test_tcp_forwarding_nat()
449{
450	local nsa="$1"
451	local nsb="$2"
452	local pmtu="$3"
453	local what="$4"
454	local lret
455
456	[ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
457
458	test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
459	lret=$?
460
461	if [ "$lret" -eq 0 ] ; then
462		if [ "$pmtu" -eq 1 ] ;then
463			check_counters "flow offload for ns1/ns2 with masquerade $what"
464		else
465			echo "PASS: flow offload for ns1/ns2 with masquerade $what"
466		fi
467
468		test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
469		lret=$?
470		if [ "$pmtu" -eq 1 ] ;then
471			check_counters "flow offload for ns1/ns2 with dnat $what"
472		elif [ "$lret" -eq 0 ] ; then
473			echo "PASS: flow offload for ns1/ns2 with dnat $what"
474		fi
475	else
476		echo "FAIL: flow offload for ns1/ns2 with dnat $what"
477	fi
478
479	return $lret
480}
481
482make_file "$nsin" "$filesize"
483make_file "$nsin_small" "$filesize_small"
484
485# First test:
486# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
487# Due to MTU mismatch in both directions, all packets (except small packets like pure
488# acks) have to be handled by normal forwarding path.  Therefore, packet counters
489# are not checked.
490if test_tcp_forwarding "$ns1" "$ns2" 0; then
491	echo "PASS: flow offloaded for ns1/ns2"
492else
493	echo "FAIL: flow offload for ns1/ns2:" 1>&2
494	ip netns exec "$nsr1" nft list ruleset
495	ret=1
496fi
497
498# delete default route, i.e. ns2 won't be able to reach ns1 and
499# will depend on ns1 being masqueraded in nsr1.
500# expect ns1 has nsr1 address.
501ip -net "$ns2" route del default via 10.0.2.1
502ip -net "$ns2" route del default via dead:2::1
503ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1
504
505# Second test:
506# Same, but with NAT enabled.  Same as in first test: we expect normal forward path
507# to handle most packets.
508ip netns exec "$nsr1" nft -f - <<EOF
509table ip nat {
510   chain prerouting {
511      type nat hook prerouting priority 0; policy accept;
512      meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
513   }
514
515   chain postrouting {
516      type nat hook postrouting priority 0; policy accept;
517      meta oifname "veth1" counter masquerade
518   }
519}
520EOF
521
522check_dscp "dscp_none" "0"
523if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
524	echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
525	exit 0
526fi
527
528if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then
529	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
530	ip netns exec "$nsr1" nft list ruleset
531	ret=1
532fi
533
534# Third test:
535# Same as second test, but with PMTU discovery enabled. This
536# means that we expect the fastpath to handle packets as soon
537# as the endpoints adjust the packet size.
538ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
539ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
540
541# reset counters.
542# With pmtu in-place we'll also check that nft counters
543# are lower than file size and packets were forwarded via flowtable layer.
544# For earlier tests (large mtus), packets cannot be handled via flowtable
545# (except pure acks and other small packets).
546ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
547ip netns exec "$ns2"  nft reset counters table inet filter >/dev/null
548
549if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
550	echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
551	exit 0
552fi
553
554ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
555
556if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
557	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
558	ip netns exec "$nsr1" nft list ruleset
559fi
560
561# Another test:
562# Add bridge interface br0 to Router1, with NAT enabled.
563test_bridge() {
564if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then
565	echo "SKIP: could not add bridge br0"
566	[ "$ret" -eq 0 ] && ret=$ksft_skip
567	return
568fi
569ip -net "$nsr1" addr flush dev veth0
570ip -net "$nsr1" link set up dev veth0
571ip -net "$nsr1" link set veth0 master br0
572ip -net "$nsr1" addr add 10.0.1.1/24 dev br0
573ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad
574ip -net "$nsr1" link set up dev br0
575
576ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
577
578# br0 with NAT enabled.
579ip netns exec "$nsr1" nft -f - <<EOF
580flush table ip nat
581table ip nat {
582   chain prerouting {
583      type nat hook prerouting priority 0; policy accept;
584      meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
585   }
586
587   chain postrouting {
588      type nat hook postrouting priority 0; policy accept;
589      meta oifname "veth1" counter masquerade
590   }
591}
592EOF
593
594if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then
595	echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
596	ip netns exec "$nsr1" nft list ruleset
597	ret=1
598fi
599
600
601# Another test:
602# Add bridge interface br0 to Router1, with NAT and VLAN.
603ip -net "$nsr1" link set veth0 nomaster
604ip -net "$nsr1" link set down dev veth0
605ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10
606ip -net "$nsr1" link set up dev veth0
607ip -net "$nsr1" link set up dev veth0.10
608ip -net "$nsr1" link set veth0.10 master br0
609
610ip -net "$ns1" addr flush dev eth0
611ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10
612ip -net "$ns1" link set eth0 up
613ip -net "$ns1" link set eth0.10 up
614ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10
615ip -net "$ns1" route add default via 10.0.1.1
616ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad
617
618if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then
619	echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
620	ip netns exec "$nsr1" nft list ruleset
621	ret=1
622fi
623
624# restore test topology (remove bridge and VLAN)
625ip -net "$nsr1" link set veth0 nomaster
626ip -net "$nsr1" link set veth0 down
627ip -net "$nsr1" link set veth0.10 down
628ip -net "$nsr1" link delete veth0.10 type vlan
629ip -net "$nsr1" link delete br0 type bridge
630ip -net "$ns1" addr flush dev eth0.10
631ip -net "$ns1" link set eth0.10 down
632ip -net "$ns1" link set eth0 down
633ip -net "$ns1" link delete eth0.10 type vlan
634
635# restore address in ns1 and nsr1
636ip -net "$ns1" link set eth0 up
637ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
638ip -net "$ns1" route add default via 10.0.1.1
639ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
640ip -net "$ns1" route add default via dead:1::1
641ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
642ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
643ip -net "$nsr1" link set up dev veth0
644}
645
646test_bridge
647
648KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
649KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
650SPI1=$RANDOM
651SPI2=$RANDOM
652
653if [ $SPI1 -eq $SPI2 ]; then
654	SPI2=$((SPI2+1))
655fi
656
657do_esp() {
658    local ns=$1
659    local me=$2
660    local remote=$3
661    local lnet=$4
662    local rnet=$5
663    local spi_out=$6
664    local spi_in=$7
665
666    ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in"  enc aes "$KEY_AES"  auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet"
667    ip -net "$ns" xfrm state add src "$me"  dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet"
668
669    # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
670    ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow
671    # to fwd decrypted packets after esp processing:
672    ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow
673}
674
675do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2"
676
677do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1"
678
679ip netns exec "$nsr1" nft delete table ip nat
680
681# restore default routes
682ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
683ip -net "$ns2" route add default via 10.0.2.1
684ip -net "$ns2" route add default via dead:2::1
685
686if test_tcp_forwarding "$ns1" "$ns2" 1; then
687	check_counters "ipsec tunnel mode for ns1/ns2"
688else
689	echo "FAIL: ipsec tunnel mode for ns1/ns2"
690	ip netns exec "$nsr1" nft list ruleset 1>&2
691	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
692fi
693
694if [ "$1" = "" ]; then
695	low=1280
696	mtu=$((65536 - low))
697	o=$(((RANDOM%mtu) + low))
698	l=$(((RANDOM%mtu) + low))
699	r=$(((RANDOM%mtu) + low))
700
701	MINSIZE=$((2 *  1000 * 1000))
702	MAXSIZE=$((64 * 1000 * 1000))
703
704	filesize=$(((RANDOM * RANDOM) % MAXSIZE))
705	if [ "$filesize" -lt "$MINSIZE" ]; then
706		filesize=$((filesize+MINSIZE))
707	fi
708
709	echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
710	$0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
711fi
712
713exit $ret
714