xref: /linux/tools/testing/selftests/net/netfilter/nft_queue.sh (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1#!/bin/bash
2#
3# This tests nf_queue:
4# 1. can process packets from all hooks
5# 2. support running nfqueue from more than one base chain
6#
7# shellcheck disable=SC2162,SC2317
8
9source lib.sh
10ret=0
11timeout=5
12
13SCTP_TEST_TIMEOUT=60
14
15cleanup()
16{
17	ip netns pids "$ns1" | xargs kill 2>/dev/null
18	ip netns pids "$ns2" | xargs kill 2>/dev/null
19	ip netns pids "$nsrouter" | xargs kill 2>/dev/null
20
21	cleanup_all_ns
22
23	rm -f "$TMPINPUT"
24	rm -f "$TMPFILE0"
25	rm -f "$TMPFILE1"
26	rm -f "$TMPFILE2" "$TMPFILE3"
27}
28
29checktool "nft --version" "test without nft tool"
30checktool "socat -h" "run test without socat"
31
32modprobe -q sctp
33
34trap cleanup EXIT
35
36setup_ns ns1 ns2 ns3 nsrouter
37
38TMPFILE0=$(mktemp)
39TMPFILE1=$(mktemp)
40TMPFILE2=$(mktemp)
41TMPFILE3=$(mktemp)
42
43TMPINPUT=$(mktemp)
44COUNT=200
45[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8))
46dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
47
48if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
49    echo "SKIP: No virtual ethernet pair device support in kernel"
50    exit $ksft_skip
51fi
52ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
53ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
54
55ip -net "$nsrouter" link set veth0 up
56ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
57ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
58
59ip -net "$nsrouter" link set veth1 up
60ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
61ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
62
63ip -net "$nsrouter" link set veth2 up
64ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
65ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
66
67ip -net "$ns1" link set eth0 up
68ip -net "$ns2" link set eth0 up
69ip -net "$ns3" link set eth0 up
70
71ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
72ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
73ip -net "$ns1" route add default via 10.0.1.1
74ip -net "$ns1" route add default via dead:1::1
75
76ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
77ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
78ip -net "$ns2" route add default via 10.0.2.1
79ip -net "$ns2" route add default via dead:2::1
80
81ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
82ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
83ip -net "$ns3" route add default via 10.0.3.1
84ip -net "$ns3" route add default via dead:3::1
85
86load_ruleset() {
87	local name=$1
88	local prio=$2
89
90ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
91table inet $name {
92	chain nfq {
93		ip protocol icmp queue bypass
94		icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
95	}
96	chain pre {
97		type filter hook prerouting priority $prio; policy accept;
98		jump nfq
99	}
100	chain input {
101		type filter hook input priority $prio; policy accept;
102		jump nfq
103	}
104	chain forward {
105		type filter hook forward priority $prio; policy accept;
106		tcp dport 12345 queue num 2
107		jump nfq
108	}
109	chain output {
110		type filter hook output priority $prio; policy accept;
111		tcp dport 12345 queue num 3
112		tcp sport 23456 queue num 3
113		jump nfq
114	}
115	chain post {
116		type filter hook postrouting priority $prio; policy accept;
117		jump nfq
118	}
119}
120EOF
121}
122
123load_counter_ruleset() {
124	local prio=$1
125
126ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
127table inet countrules {
128	chain pre {
129		type filter hook prerouting priority $prio; policy accept;
130		counter
131	}
132	chain input {
133		type filter hook input priority $prio; policy accept;
134		counter
135	}
136	chain forward {
137		type filter hook forward priority $prio; policy accept;
138		counter
139	}
140	chain output {
141		type filter hook output priority $prio; policy accept;
142		counter
143	}
144	chain post {
145		type filter hook postrouting priority $prio; policy accept;
146		counter
147	}
148}
149EOF
150}
151
152test_ping() {
153  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
154	return 1
155  fi
156
157  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
158	return 2
159  fi
160
161  return 0
162}
163
164test_ping_router() {
165  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
166	return 3
167  fi
168
169  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
170	return 4
171  fi
172
173  return 0
174}
175
176test_queue_blackhole() {
177	local proto=$1
178
179ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
180table $proto blackh {
181	chain forward {
182	type filter hook forward priority 0; policy accept;
183		queue num 600
184	}
185}
186EOF
187	if [ "$proto" = "ip" ] ;then
188		ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
189		lret=$?
190	elif [ "$proto" = "ip6" ]; then
191		ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null
192		lret=$?
193	else
194		lret=111
195	fi
196
197	# queue without bypass keyword should drop traffic if no listener exists.
198	if [ "$lret" -eq 0 ];then
199		echo "FAIL: $proto expected failure, got $lret" 1>&2
200		exit 1
201	fi
202
203	if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then
204	        echo "FAIL: $proto: Could not delete blackh table"
205	        exit 1
206	fi
207
208        echo "PASS: $proto: statement with no listener results in packet drop"
209}
210
211nf_queue_wait()
212{
213	local procfile="/proc/self/net/netfilter/nfnetlink_queue"
214	local netns id
215
216	netns="$1"
217	id="$2"
218
219	# if this file doesn't exist, nfnetlink_module isn't loaded.
220	# rather than loading it ourselves, wait for kernel module autoload
221	# completion, nfnetlink should do so automatically because nf_queue
222	# helper program, spawned in the background, asked for this functionality.
223	test -f "$procfile" &&
224		ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id "
225}
226
227test_queue()
228{
229	local expected="$1"
230	local last=""
231
232	# spawn nf_queue listeners
233	ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" &
234	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" &
235
236	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0
237	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
238
239	if ! test_ping;then
240		echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2
241		exit $ret
242	fi
243
244	if ! test_ping_router;then
245		echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2
246		exit $ret
247	fi
248
249	wait
250	ret=$?
251
252	for file in $TMPFILE0 $TMPFILE1; do
253		last=$(tail -n1 "$file")
254		if [ x"$last" != x"$expected packets total" ]; then
255			echo "FAIL: Expected $expected packets total, but got $last" 1>&2
256			ip netns exec "$nsrouter" nft list ruleset
257			exit 1
258		fi
259	done
260
261	echo "PASS: Expected and received $last"
262}
263
264listener_ready()
265{
266	ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345
267}
268
269test_tcp_forward()
270{
271	ip netns exec "$nsrouter" ./nf_queue -q 2 &
272	local nfqpid=$!
273
274	timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
275	local rpid=$!
276
277	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
278	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
279
280	local tthen=$(date +%s)
281
282	ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
283
284	wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen"
285	kill "$nfqpid"
286}
287
288test_tcp_localhost()
289{
290	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
291	local rpid=$!
292
293	ip netns exec "$nsrouter" ./nf_queue -q 3 &
294	local nfqpid=$!
295	local tthen=$(date +%s)
296
297	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
298	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
299
300	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
301
302	wait_and_check_retval "$rpid" "tcp via loopback" "$tthen"
303	kill "$nfqpid"
304}
305
306test_tcp_localhost_connectclose()
307{
308	ip netns exec "$nsrouter" ./nf_queue -q 3 &
309	local nfqpid=$!
310
311	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
312
313	timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3
314
315	kill "$nfqpid"
316	wait && echo "PASS: tcp via loopback with connect/close"
317}
318
319test_tcp_localhost_requeue()
320{
321ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
322flush ruleset
323table inet filter {
324	chain output {
325		type filter hook output priority 0; policy accept;
326		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
327	}
328	chain post {
329		type filter hook postrouting priority 0; policy accept;
330		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
331	}
332}
333EOF
334	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
335	local rpid=$!
336
337	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" &
338
339	# nfqueue 1 will be called via output hook.  But this time,
340        # re-queue the packet to nfqueue program on queue 2.
341	ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" &
342
343	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
344	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null
345
346	wait
347
348	if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
349		echo "FAIL: lost packets during requeue?!" 1>&2
350		return
351	fi
352
353	echo "PASS: tcp via loopback and re-queueing"
354}
355
356test_icmp_vrf() {
357	if ! ip -net "$ns1" link add tvrf type vrf table 9876;then
358		echo "SKIP: Could not add vrf device"
359		return
360	fi
361
362	ip -net "$ns1" li set eth0 master tvrf
363	ip -net "$ns1" li set tvrf up
364
365	ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
366ip netns exec "$ns1" nft -f /dev/stdin <<EOF
367flush ruleset
368table inet filter {
369	chain output {
370		type filter hook output priority 0; policy accept;
371		meta oifname "tvrf" icmp type echo-request counter queue num 1
372		meta oifname "eth0" icmp type echo-request counter queue num 1
373	}
374	chain post {
375		type filter hook postrouting priority 0; policy accept;
376		meta oifname "tvrf" icmp type echo-request counter queue num 1
377		meta oifname "eth0" icmp type echo-request counter queue num 1
378	}
379}
380EOF
381	ip netns exec "$ns1" ./nf_queue -q 1 &
382	local nfqpid=$!
383
384	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
385
386	ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
387
388	for n in output post; do
389		for d in tvrf eth0; do
390			if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
391				kill "$nfqpid"
392				echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
393				ip netns exec "$ns1" nft list ruleset
394				ret=1
395				return
396			fi
397		done
398	done
399
400	kill "$nfqpid"
401	echo "PASS: icmp+nfqueue via vrf"
402}
403
404sctp_listener_ready()
405{
406	ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345
407}
408
409check_output_files()
410{
411	local f1="$1"
412	local f2="$2"
413	local err="$3"
414
415	if ! cmp "$f1" "$f2" ; then
416		echo "FAIL: $err: input and output file differ" 1>&2
417		echo -n " Input file" 1>&2
418		ls -l "$f1" 1>&2
419		echo -n "Output file" 1>&2
420		ls -l "$f2" 1>&2
421		ret=1
422	fi
423}
424
425wait_and_check_retval()
426{
427	local rpid="$1"
428	local msg="$2"
429	local tthen="$3"
430	local tnow=$(date +%s)
431
432	if wait "$rpid";then
433		echo -n "PASS: "
434	else
435		echo -n "FAIL: "
436		ret=1
437	fi
438
439	printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen))
440}
441
442test_sctp_forward()
443{
444	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
445flush ruleset
446table inet sctpq {
447        chain forward {
448        type filter hook forward priority 0; policy accept;
449                sctp dport 12345 queue num 10
450        }
451}
452EOF
453	timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
454	local rpid=$!
455
456	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
457
458	ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
459	local nfqpid=$!
460	local tthen=$(date +%s)
461
462	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
463
464	if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then
465		echo "FAIL:  Could not delete sctpq table"
466		exit 1
467	fi
468
469	wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen"
470	kill "$nfqpid"
471
472	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
473}
474
475test_sctp_output()
476{
477        ip netns exec "$ns1" nft -f /dev/stdin <<EOF
478table inet sctpq {
479        chain output {
480        type filter hook output priority 0; policy accept;
481                sctp dport 12345 queue num 11
482        }
483}
484EOF
485	# reduce test file size, software segmentation causes sk wmem increase.
486	dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
487
488	timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
489	local rpid=$!
490
491	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
492
493	ip netns exec "$ns1" ./nf_queue -q 11 &
494	local nfqpid=$!
495	local tthen=$(date +%s)
496
497	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
498
499	if ! ip netns exec "$ns1" nft delete table inet sctpq; then
500		echo "FAIL:  Could not delete sctpq table"
501		exit 1
502	fi
503
504	# must wait before checking completeness of output file.
505	wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen"
506	kill "$nfqpid"
507
508	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
509}
510
511udp_listener_ready()
512{
513	ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
514}
515
516output_files_written()
517{
518	test -s "$1" && test -s "$2"
519}
520
521test_udp_ct_race()
522{
523        ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
524flush ruleset
525table inet udpq {
526	chain prerouting {
527		type nat hook prerouting priority dstnat - 5; policy accept;
528		ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 }
529	}
530        chain postrouting {
531		type filter hook postrouting priority srcnat - 5; policy accept;
532		udp dport 12345 counter queue num 12
533        }
534}
535EOF
536	:> "$TMPFILE1"
537	:> "$TMPFILE2"
538
539	timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
540	local rpid1=$!
541
542	timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE2",trunc &
543	local rpid2=$!
544
545	ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
546	local nfqpid=$!
547
548	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
549	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
550	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
551
552	# Send two packets, one should end up in ns1, other in ns2.
553	# This is because nfqueue will delay packet for long enough so that
554	# second packet will not find existing conntrack entry.
555	echo "Packet 1" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
556	echo "Packet 2" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
557
558	busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
559
560	kill "$nfqpid"
561
562	if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
563		echo "FAIL: Expected One udp conntrack entry"
564		ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345
565		ret=1
566	fi
567
568	if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
569		echo "FAIL: Could not delete udpq table"
570		ret=1
571		return
572	fi
573
574	NUMLINES1=$(wc -l < "$TMPFILE1")
575	NUMLINES2=$(wc -l < "$TMPFILE2")
576
577	if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then
578		ret=1
579		echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2"
580		echo -n "$TMPFILE1: ";cat "$TMPFILE1"
581		echo -n "$TMPFILE2: ";cat "$TMPFILE2"
582		return
583	fi
584
585	echo "PASS: both udp receivers got one packet each"
586}
587
588test_queue_removal()
589{
590	read tainted_then < /proc/sys/kernel/tainted
591
592	ip netns exec "$ns1" nft -f - <<EOF
593flush ruleset
594table ip filter {
595	chain output {
596		type filter hook output priority 0; policy accept;
597		ip protocol icmp queue num 0
598	}
599}
600EOF
601	ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 &
602	local nfqpid=$!
603
604	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
605
606	ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null
607	kill $nfqpid
608
609	ip netns exec "$ns1" nft flush ruleset
610
611	if [ "$tainted_then" -ne 0 ];then
612		return
613	fi
614
615	read tainted_now < /proc/sys/kernel/tainted
616	if [ "$tainted_now" -eq 0 ];then
617		echo "PASS: queue program exiting while packets queued"
618	else
619		echo "TAINT: queue program exiting while packets queued"
620		dmesg
621		ret=1
622	fi
623}
624
625ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
626ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
627ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
628ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
629
630load_ruleset "filter" 0
631
632if test_ping; then
633	# queue bypass works (rules were skipped, no listener)
634	echo "PASS: ${ns1} can reach ${ns2}"
635else
636	echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
637	exit $ret
638fi
639
640test_queue_blackhole ip
641test_queue_blackhole ip6
642
643# dummy ruleset to add base chains between the
644# queueing rules.  We don't want the second reinject
645# to re-execute the old hooks.
646load_counter_ruleset 10
647
648# we are hooking all: prerouting/input/forward/output/postrouting.
649# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
650# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
651# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
652# so we expect that userspace program receives 10 packets.
653test_queue 10
654
655# same.  We queue to a second program as well.
656load_ruleset "filter2" 20
657test_queue 20
658ip netns exec "$ns1" nft flush ruleset
659
660test_tcp_forward
661test_tcp_localhost
662test_tcp_localhost_connectclose
663test_tcp_localhost_requeue
664test_sctp_forward
665test_sctp_output
666test_udp_ct_race
667
668# should be last, adds vrf device in ns1 and changes routes
669test_icmp_vrf
670test_queue_removal
671
672exit $ret
673