1#!/bin/bash 2 3# This script demonstrates interaction of conntrack and vrf. 4# The vrf driver calls the netfilter hooks again, with oif/iif 5# pointing at the VRF device. 6# 7# For ingress, this means first iteration has iifname of lower/real 8# device. In this script, thats veth0. 9# Second iteration is iifname set to vrf device, tvrf in this script. 10# 11# For egress, this is reversed: first iteration has the vrf device, 12# second iteration is done with the lower/real/veth0 device. 13# 14# test_ct_zone_in demonstrates unexpected change of nftables 15# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack 16# connection on VRF rcv" 17# 18# It was possible to assign conntrack zone to a packet (or mark it for 19# `notracking`) in the prerouting chain before conntrack, based on real iif. 20# 21# After the change, the zone assignment is lost and the zone is assigned based 22# on the VRF master interface (in case such a rule exists). 23# assignment is lost. Instead, assignment based on the `iif` matching 24# Thus it is impossible to distinguish packets based on the original 25# interface. 26# 27# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem 28# that was supposed to be fixed by the commit mentioned above to make sure 29# that any fix to test case 1 won't break masquerade again. 30 31source lib.sh 32 33IP0=172.30.30.1 34IP1=172.30.30.2 35PFXL=30 36ret=0 37 38cleanup() 39{ 40 ip netns pids $ns0 | xargs kill 2>/dev/null 41 ip netns pids $ns1 | xargs kill 2>/dev/null 42 43 cleanup_all_ns 44} 45 46checktool "nft --version" "run test without nft" 47checktool "conntrack --version" "run test without conntrack" 48checktool "socat -h" "run test without socat" 49 50trap cleanup EXIT 51 52setup_ns ns0 ns1 53 54if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then 55 echo "SKIP: Could not add veth device" 56 exit $ksft_skip 57fi 58 59if ! ip -net "$ns0" li add tvrf type vrf table 9876; then 60 echo "SKIP: Could not add vrf device" 61 exit $ksft_skip 62fi 63 64ip -net "$ns0" li set veth0 master tvrf 65ip -net "$ns0" li set tvrf up 66ip -net "$ns0" li set veth0 up 67ip -net "$ns1" li set veth0 up 68 69ip -net "$ns0" addr add $IP0/$PFXL dev veth0 70ip -net "$ns1" addr add $IP1/$PFXL dev veth0 71 72listener_ready() 73{ 74 local ns="$1" 75 76 ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555" 77} 78 79ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null & 80busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 81 82# test vrf ingress handling. 83# The incoming connection should be placed in conntrack zone 1, 84# as decided by the first iteration of the ruleset. 85test_ct_zone_in() 86{ 87ip netns exec "$ns0" nft -f - <<EOF 88table testct { 89 chain rawpre { 90 type filter hook prerouting priority raw; 91 92 iif { veth0, tvrf } counter meta nftrace set 1 93 iif veth0 counter ct zone set 1 counter return 94 iif tvrf counter ct zone set 2 counter return 95 ip protocol icmp counter 96 notrack counter 97 } 98 99 chain rawout { 100 type filter hook output priority raw; 101 102 oif veth0 counter ct zone set 1 counter return 103 oif tvrf counter ct zone set 2 counter return 104 notrack counter 105 } 106} 107EOF 108 ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null 109 110 # should be in zone 1, not zone 2 111 count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) 112 if [ "$count" -eq 1 ]; then 113 echo "PASS: entry found in conntrack zone 1" 114 else 115 echo "FAIL: entry not found in conntrack zone 1" 116 count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) 117 if [ "$count" -eq 1 ]; then 118 echo "FAIL: entry found in zone 2 instead" 119 else 120 echo "FAIL: entry not in zone 1 or 2, dumping table" 121 ip netns exec "$ns0" conntrack -L 122 ip netns exec "$ns0" nft list ruleset 123 fi 124 fi 125} 126 127# add masq rule that gets evaluated w. outif set to vrf device. 128# This tests the first iteration of the packet through conntrack, 129# oifname is the vrf device. 130test_masquerade_vrf() 131{ 132 local qdisc=$1 133 134 if [ "$qdisc" != "default" ]; then 135 tc -net "$ns0" qdisc add dev tvrf root "$qdisc" 136 fi 137 138 ip netns exec "$ns0" conntrack -F 2>/dev/null 139 140ip netns exec "$ns0" nft -f - <<EOF 141flush ruleset 142table ip nat { 143 chain rawout { 144 type filter hook output priority raw; 145 146 oif tvrf ct state untracked counter 147 } 148 chain postrouting2 { 149 type filter hook postrouting priority mangle; 150 151 oif tvrf ct state untracked counter 152 } 153 chain postrouting { 154 type nat hook postrouting priority 0; 155 # NB: masquerade should always be combined with 'oif(name) bla', 156 # lack of this is intentional here, we want to exercise double-snat. 157 ip saddr 172.30.30.0/30 counter masquerade random 158 } 159} 160EOF 161 if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then 162 echo "FAIL: connect failure with masquerade + sport rewrite on vrf device" 163 ret=1 164 return 165 fi 166 167 # must also check that nat table was evaluated on second (lower device) iteration. 168 if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' && 169 ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then 170 echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" 171 else 172 echo "FAIL: vrf rules have unexpected counter value" 173 ret=1 174 fi 175 176 if [ "$qdisc" != "default" ]; then 177 tc -net "$ns0" qdisc del dev tvrf root 178 fi 179} 180 181# add masq rule that gets evaluated w. outif set to veth device. 182# This tests the 2nd iteration of the packet through conntrack, 183# oifname is the lower device (veth0 in this case). 184test_masquerade_veth() 185{ 186 ip netns exec "$ns0" conntrack -F 2>/dev/null 187ip netns exec "$ns0" nft -f - <<EOF 188flush ruleset 189table ip nat { 190 chain postrouting { 191 type nat hook postrouting priority 0; 192 meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random 193 } 194} 195EOF 196 if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then 197 echo "FAIL: connect failure with masquerade + sport rewrite on veth device" 198 ret=1 199 return 200 fi 201 202 # must also check that nat table was evaluated on second (lower device) iteration. 203 if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then 204 echo "PASS: connect with masquerade + sport rewrite on veth device" 205 else 206 echo "FAIL: vrf masq rule has unexpected counter value" 207 ret=1 208 fi 209} 210 211test_ct_zone_in 212test_masquerade_vrf "default" 213test_masquerade_vrf "pfifo" 214test_masquerade_veth 215 216exit $ret 217