1#!/bin/sh 2# SPDX-License-Identifier: GPL-2.0 3# 4# Check that route PMTU values match expectations, and that initial device MTU 5# values are assigned correctly 6# 7# Tests currently implemented: 8# 9# - pmtu_ipv4 10# Set up two namespaces, A and B, with two paths between them over routers 11# R1 and R2 (also implemented with namespaces), with different MTUs: 12# 13# segment a_r1 segment b_r1 a_r1: 2000 14# .--------------R1--------------. a_r2: 1500 15# A B a_r3: 2000 16# '--------------R2--------------' a_r4: 1400 17# segment a_r2 segment b_r2 18# 19# Check that PMTU exceptions with the correct PMTU are created. Then 20# decrease and increase the MTU of the local link for one of the paths, 21# A to R1, checking that route exception PMTU changes accordingly over 22# this path. Also check that locked exceptions are created when an ICMP 23# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is 24# received 25# 26# - pmtu_ipv6 27# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 28# 29# - pmtu_ipv4_vxlan4_exception 30# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel 31# over IPv4 between A and B, routed via R1. On the link between R1 and B, 32# set a MTU lower than the VXLAN MTU and the MTU on the link between A and 33# R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN 34# from A to B and check that the PMTU exception is created with the right 35# value on A 36# 37# - pmtu_ipv6_vxlan4_exception 38# Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B 39# 40# - pmtu_ipv4_vxlan6_exception 41# Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B 42# 43# - pmtu_ipv6_vxlan6_exception 44# Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B 45# 46# - pmtu_ipv4_geneve4_exception 47# Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of 48# VXLAN 49# 50# - pmtu_ipv6_geneve4_exception 51# Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of 52# VXLAN 53# 54# - pmtu_ipv4_geneve6_exception 55# Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of 56# VXLAN 57# 58# - pmtu_ipv6_geneve6_exception 59# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of 60# VXLAN 61# 62# - pmtu_ipv{4,6}_fou{4,6}_exception 63# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation 64# (FoU) over IPv4/IPv6, instead of VXLAN 65# 66# - pmtu_ipv{4,6}_fou{4,6}_exception 67# Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6 68# encapsulation (GUE) over IPv4/IPv6, instead of VXLAN 69# 70# - pmtu_vti4_exception 71# Set up vti tunnel on top of veth, with xfrm states and policies, in two 72# namespaces with matching endpoints. Check that route exception is not 73# created if link layer MTU is not exceeded, then exceed it and check that 74# exception is created with the expected PMTU. The approach described 75# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU 76# changes alone won't affect PMTU 77# 78# - pmtu_vti6_exception 79# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two 80# namespaces with matching endpoints. Check that route exception is 81# created by exceeding link layer MTU with ping to other endpoint. Then 82# decrease and increase MTU of tunnel, checking that route exception PMTU 83# changes accordingly 84# 85# - pmtu_vti4_default_mtu 86# Set up vti4 tunnel on top of veth, in two namespaces with matching 87# endpoints. Check that MTU assigned to vti interface is the MTU of the 88# lower layer (veth) minus additional lower layer headers (zero, for veth) 89# minus IPv4 header length 90# 91# - pmtu_vti6_default_mtu 92# Same as above, for IPv6 93# 94# - pmtu_vti4_link_add_mtu 95# Set up vti4 interface passing MTU value at link creation, check MTU is 96# configured, and that link is not created with invalid MTU values 97# 98# - pmtu_vti6_link_add_mtu 99# Same as above, for IPv6 100# 101# - pmtu_vti6_link_change_mtu 102# Set up two dummy interfaces with different MTUs, create a vti6 tunnel 103# and check that configured MTU is used on link creation and changes, and 104# that MTU is properly calculated instead when MTU is not configured from 105# userspace 106# 107# - cleanup_ipv4_exception 108# Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU 109# exceptions on multiple CPUs and check that the veth device tear-down 110# happens in a timely manner 111# 112# - cleanup_ipv6_exception 113# Same as above, but use IPv6 transport from A to B 114# 115# - list_flush_ipv6_exception 116# Using the same topology as in pmtu_ipv6, create exceptions, and check 117# they are shown when listing exception caches, gone after flushing them 118 119 120# Kselftest framework requirement - SKIP code is 4. 121ksft_skip=4 122 123PAUSE_ON_FAIL=no 124VERBOSE=0 125TRACING=0 126 127# Some systems don't have a ping6 binary anymore 128which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) 129 130# Name Description re-run with nh 131tests=" 132 pmtu_ipv4_exception ipv4: PMTU exceptions 1 133 pmtu_ipv6_exception ipv6: PMTU exceptions 1 134 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1 135 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1 136 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1 137 pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions 1 138 pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions 1 139 pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1 140 pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1 141 pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1 142 pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1 143 pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1 144 pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1 145 pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions 1 146 pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions 1 147 pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1 148 pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1 149 pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1 150 pmtu_vti6_exception vti6: PMTU exceptions 0 151 pmtu_vti4_exception vti4: PMTU exceptions 0 152 pmtu_vti4_default_mtu vti4: default MTU assignment 0 153 pmtu_vti6_default_mtu vti6: default MTU assignment 0 154 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 155 pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 0 156 pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0 157 cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 158 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 159 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" 160 161NS_A="ns-A" 162NS_B="ns-B" 163NS_R1="ns-R1" 164NS_R2="ns-R2" 165ns_a="ip netns exec ${NS_A}" 166ns_b="ip netns exec ${NS_B}" 167ns_r1="ip netns exec ${NS_R1}" 168ns_r2="ip netns exec ${NS_R2}" 169 170# Addressing and routing for tests with routers: four network segments, with 171# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an 172# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). 173# Addresses are: 174# - IPv4: PREFIX4.SEGMENT.ID (/24) 175# - IPv6: PREFIX6:SEGMENT::ID (/64) 176prefix4="10.0" 177prefix6="fc00" 178a_r1=1 179a_r2=2 180b_r1=3 181b_r2=4 182# ns peer segment 183routing_addrs=" 184 A R1 ${a_r1} 185 A R2 ${a_r2} 186 B R1 ${b_r1} 187 B R2 ${b_r2} 188" 189# Traffic from A to B goes through R1 by default, and through R2, if destined to 190# B's address on the b_r2 segment. 191# Traffic from B to A goes through R1. 192# ns destination gateway 193routes=" 194 A default ${prefix4}.${a_r1}.2 195 A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2 196 B default ${prefix4}.${b_r1}.2 197 198 A default ${prefix6}:${a_r1}::2 199 A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 200 B default ${prefix6}:${b_r1}::2 201" 202 203USE_NH="no" 204# ns family nh id destination gateway 205nexthops=" 206 A 4 41 ${prefix4}.${a_r1}.2 veth_A-R1 207 A 4 42 ${prefix4}.${a_r2}.2 veth_A-R2 208 B 4 41 ${prefix4}.${b_r1}.2 veth_B-R1 209 210 A 6 61 ${prefix6}:${a_r1}::2 veth_A-R1 211 A 6 62 ${prefix6}:${a_r2}::2 veth_A-R2 212 B 6 61 ${prefix6}:${b_r1}::2 veth_B-R1 213" 214 215# nexthop id correlates to id in nexthops config above 216# ns family prefix nh id 217routes_nh=" 218 A 4 default 41 219 A 4 ${prefix4}.${b_r2}.1 42 220 B 4 default 41 221 222 A 6 default 61 223 A 6 ${prefix6}:${b_r2}::1 62 224 B 6 default 61 225" 226 227veth4_a_addr="192.168.1.1" 228veth4_b_addr="192.168.1.2" 229veth4_mask="24" 230veth6_a_addr="fd00:1::a" 231veth6_b_addr="fd00:1::b" 232veth6_mask="64" 233 234tunnel4_a_addr="192.168.2.1" 235tunnel4_b_addr="192.168.2.2" 236tunnel4_mask="24" 237tunnel6_a_addr="fd00:2::a" 238tunnel6_b_addr="fd00:2::b" 239tunnel6_mask="64" 240 241dummy6_0_prefix="fc00:1000::" 242dummy6_1_prefix="fc00:1001::" 243dummy6_mask="64" 244 245err_buf= 246tcpdump_pids= 247 248err() { 249 err_buf="${err_buf}${1} 250" 251} 252 253err_flush() { 254 echo -n "${err_buf}" 255 err_buf= 256} 257 258run_cmd() { 259 cmd="$*" 260 261 if [ "$VERBOSE" = "1" ]; then 262 printf " COMMAND: $cmd\n" 263 fi 264 265 out="$($cmd 2>&1)" 266 rc=$? 267 if [ "$VERBOSE" = "1" -a -n "$out" ]; then 268 echo " $out" 269 echo 270 fi 271 272 return $rc 273} 274 275# Find the auto-generated name for this namespace 276nsname() { 277 eval echo \$NS_$1 278} 279 280setup_fou_or_gue() { 281 outer="${1}" 282 inner="${2}" 283 encap="${3}" 284 285 if [ "${outer}" = "4" ]; then 286 modprobe fou || return 2 287 a_addr="${prefix4}.${a_r1}.1" 288 b_addr="${prefix4}.${b_r1}.1" 289 if [ "${inner}" = "4" ]; then 290 type="ipip" 291 ipproto="4" 292 else 293 type="sit" 294 ipproto="41" 295 fi 296 else 297 modprobe fou6 || return 2 298 a_addr="${prefix6}:${a_r1}::1" 299 b_addr="${prefix6}:${b_r1}::1" 300 if [ "${inner}" = "4" ]; then 301 type="ip6tnl" 302 mode="mode ipip6" 303 ipproto="4 -6" 304 else 305 type="ip6tnl" 306 mode="mode ip6ip6" 307 ipproto="41 -6" 308 fi 309 fi 310 311 run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 312 run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 313 314 run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} 315 run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 316 317 if [ "${inner}" = "4" ]; then 318 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a 319 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b 320 else 321 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a 322 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b 323 fi 324 325 run_cmd ${ns_a} ip link set ${encap}_a up 326 run_cmd ${ns_b} ip link set ${encap}_b up 327} 328 329setup_fou44() { 330 setup_fou_or_gue 4 4 fou 331} 332 333setup_fou46() { 334 setup_fou_or_gue 4 6 fou 335} 336 337setup_fou64() { 338 setup_fou_or_gue 6 4 fou 339} 340 341setup_fou66() { 342 setup_fou_or_gue 6 6 fou 343} 344 345setup_gue44() { 346 setup_fou_or_gue 4 4 gue 347} 348 349setup_gue46() { 350 setup_fou_or_gue 4 6 gue 351} 352 353setup_gue64() { 354 setup_fou_or_gue 6 4 gue 355} 356 357setup_gue66() { 358 setup_fou_or_gue 6 6 gue 359} 360 361setup_namespaces() { 362 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do 363 ip netns add ${n} || return 1 364 365 # Disable DAD, so that we don't have to wait to use the 366 # configured IPv6 addresses 367 ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 368 done 369} 370 371setup_veth() { 372 run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1 373 run_cmd ${ns_a} ip link set veth_b netns ${NS_B} 374 375 run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a 376 run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b 377 378 run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a 379 run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b 380 381 run_cmd ${ns_a} ip link set veth_a up 382 run_cmd ${ns_b} ip link set veth_b up 383} 384 385setup_vti() { 386 proto=${1} 387 veth_a_addr="${2}" 388 veth_b_addr="${3}" 389 vti_a_addr="${4}" 390 vti_b_addr="${5}" 391 vti_mask=${6} 392 393 [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti" 394 395 run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1 396 run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10 397 398 run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a 399 run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b 400 401 run_cmd ${ns_a} ip link set vti${proto}_a up 402 run_cmd ${ns_b} ip link set vti${proto}_b up 403} 404 405setup_vti4() { 406 setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} 407} 408 409setup_vti6() { 410 setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} 411} 412 413setup_vxlan_or_geneve() { 414 type="${1}" 415 a_addr="${2}" 416 b_addr="${3}" 417 opts="${4}" 418 419 if [ "${type}" = "vxlan" ]; then 420 opts="${opts} ttl 64 dstport 4789" 421 opts_a="local ${a_addr}" 422 opts_b="local ${b_addr}" 423 else 424 opts_a="" 425 opts_b="" 426 fi 427 428 run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 429 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} 430 431 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a 432 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b 433 434 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a 435 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b 436 437 run_cmd ${ns_a} ip link set ${type}_a up 438 run_cmd ${ns_b} ip link set ${type}_b up 439} 440 441setup_geneve4() { 442 setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" 443} 444 445setup_vxlan4() { 446 setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" 447} 448 449setup_geneve6() { 450 setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 451} 452 453setup_vxlan6() { 454 setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 455} 456 457setup_xfrm() { 458 proto=${1} 459 veth_a_addr="${2}" 460 veth_b_addr="${3}" 461 462 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 463 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 464 run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 465 run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 466 467 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 468 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 469 run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 470 run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 471} 472 473setup_xfrm4() { 474 setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} 475} 476 477setup_xfrm6() { 478 setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} 479} 480 481setup_routing_old() { 482 for i in ${routes}; do 483 [ "${ns}" = "" ] && ns="${i}" && continue 484 [ "${addr}" = "" ] && addr="${i}" && continue 485 [ "${gw}" = "" ] && gw="${i}" 486 487 ns_name="$(nsname ${ns})" 488 489 ip -n ${ns_name} route add ${addr} via ${gw} 490 491 ns=""; addr=""; gw="" 492 done 493} 494 495setup_routing_new() { 496 for i in ${nexthops}; do 497 [ "${ns}" = "" ] && ns="${i}" && continue 498 [ "${fam}" = "" ] && fam="${i}" && continue 499 [ "${nhid}" = "" ] && nhid="${i}" && continue 500 [ "${gw}" = "" ] && gw="${i}" && continue 501 [ "${dev}" = "" ] && dev="${i}" 502 503 ns_name="$(nsname ${ns})" 504 505 ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev} 506 507 ns=""; fam=""; nhid=""; gw=""; dev="" 508 509 done 510 511 for i in ${routes_nh}; do 512 [ "${ns}" = "" ] && ns="${i}" && continue 513 [ "${fam}" = "" ] && fam="${i}" && continue 514 [ "${addr}" = "" ] && addr="${i}" && continue 515 [ "${nhid}" = "" ] && nhid="${i}" 516 517 ns_name="$(nsname ${ns})" 518 519 ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid} 520 521 ns=""; fam=""; addr=""; nhid="" 522 done 523} 524 525setup_routing() { 526 for i in ${NS_R1} ${NS_R2}; do 527 ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1 528 ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1 529 done 530 531 for i in ${routing_addrs}; do 532 [ "${ns}" = "" ] && ns="${i}" && continue 533 [ "${peer}" = "" ] && peer="${i}" && continue 534 [ "${segment}" = "" ] && segment="${i}" 535 536 ns_name="$(nsname ${ns})" 537 peer_name="$(nsname ${peer})" 538 if="veth_${ns}-${peer}" 539 ifpeer="veth_${peer}-${ns}" 540 541 # Create veth links 542 ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1 543 ip -n ${peer_name} link set dev ${ifpeer} up 544 545 # Add addresses 546 ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if} 547 ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if} 548 549 ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer} 550 ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer} 551 552 ns=""; peer=""; segment="" 553 done 554 555 if [ "$USE_NH" = "yes" ]; then 556 setup_routing_new 557 else 558 setup_routing_old 559 fi 560 561 return 0 562} 563 564setup() { 565 [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip 566 567 cleanup 568 for arg do 569 eval setup_${arg} || { echo " ${arg} not supported"; return 1; } 570 done 571} 572 573trace() { 574 [ $TRACING -eq 0 ] && return 575 576 for arg do 577 [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue 578 ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & 579 tcpdump_pids="${tcpdump_pids} $!" 580 ns_cmd= 581 done 582 sleep 1 583} 584 585cleanup() { 586 for pid in ${tcpdump_pids}; do 587 kill ${pid} 588 done 589 tcpdump_pids= 590 591 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do 592 ip netns del ${n} 2> /dev/null 593 done 594} 595 596mtu() { 597 ns_cmd="${1}" 598 dev="${2}" 599 mtu="${3}" 600 601 ${ns_cmd} ip link set dev ${dev} mtu ${mtu} 602} 603 604mtu_parse() { 605 input="${1}" 606 607 next=0 608 for i in ${input}; do 609 [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue 610 [ ${next} -eq 1 ] && echo "${i}" && return 611 [ ${next} -eq 2 ] && echo "lock ${i}" && return 612 [ "${i}" = "mtu" ] && next=1 613 done 614} 615 616link_get() { 617 ns_cmd="${1}" 618 name="${2}" 619 620 ${ns_cmd} ip link show dev "${name}" 621} 622 623link_get_mtu() { 624 ns_cmd="${1}" 625 name="${2}" 626 627 mtu_parse "$(link_get "${ns_cmd}" ${name})" 628} 629 630route_get_dst_exception() { 631 ns_cmd="${1}" 632 dst="${2}" 633 634 ${ns_cmd} ip route get "${dst}" 635} 636 637route_get_dst_pmtu_from_exception() { 638 ns_cmd="${1}" 639 dst="${2}" 640 641 mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" 642} 643 644check_pmtu_value() { 645 expected="${1}" 646 value="${2}" 647 event="${3}" 648 649 [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0 650 [ "${value}" = "${expected}" ] && return 0 651 [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1 652 [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1 653 err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}" 654 return 1 655} 656 657test_pmtu_ipvX() { 658 family=${1} 659 660 setup namespaces routing || return 2 661 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 662 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ 663 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ 664 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 665 666 if [ ${family} -eq 4 ]; then 667 ping=ping 668 dst1="${prefix4}.${b_r1}.1" 669 dst2="${prefix4}.${b_r2}.1" 670 else 671 ping=${ping6} 672 dst1="${prefix6}:${b_r1}::1" 673 dst2="${prefix6}:${b_r2}::1" 674 fi 675 676 # Set up initial MTU values 677 mtu "${ns_a}" veth_A-R1 2000 678 mtu "${ns_r1}" veth_R1-A 2000 679 mtu "${ns_r1}" veth_R1-B 1400 680 mtu "${ns_b}" veth_B-R1 1400 681 682 mtu "${ns_a}" veth_A-R2 2000 683 mtu "${ns_r2}" veth_R2-A 2000 684 mtu "${ns_r2}" veth_R2-B 1500 685 mtu "${ns_b}" veth_B-R2 1500 686 687 # Create route exceptions 688 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} 689 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} 690 691 # Check that exceptions have been created with the correct PMTU 692 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" 693 check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 694 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 695 check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 696 697 # Decrease local MTU below PMTU, check for PMTU decrease in route exception 698 mtu "${ns_a}" veth_A-R1 1300 699 mtu "${ns_r1}" veth_R1-A 1300 700 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" 701 check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1 702 # Second exception shouldn't be modified 703 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 704 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 705 706 # Increase MTU, check for PMTU increase in route exception 707 mtu "${ns_a}" veth_A-R1 1700 708 mtu "${ns_r1}" veth_R1-A 1700 709 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" 710 check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1 711 # Second exception shouldn't be modified 712 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 713 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 714 715 # Skip PMTU locking tests for IPv6 716 [ $family -eq 6 ] && return 0 717 718 # Decrease remote MTU on path via R2, get new exception 719 mtu "${ns_r2}" veth_R2-B 400 720 mtu "${ns_b}" veth_B-R2 400 721 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} 722 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 723 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 724 725 # Decrease local MTU below PMTU 726 mtu "${ns_a}" veth_A-R2 500 727 mtu "${ns_r2}" veth_R2-A 500 728 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 729 check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1 730 731 # Increase local MTU 732 mtu "${ns_a}" veth_A-R2 1500 733 mtu "${ns_r2}" veth_R2-A 1500 734 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 735 check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1 736 737 # Get new exception 738 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} 739 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 740 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 741} 742 743test_pmtu_ipv4_exception() { 744 test_pmtu_ipvX 4 745} 746 747test_pmtu_ipv6_exception() { 748 test_pmtu_ipvX 6 749} 750 751test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { 752 type=${1} 753 family=${2} 754 outer_family=${3} 755 ll_mtu=4000 756 757 if [ ${outer_family} -eq 4 ]; then 758 setup namespaces routing ${type}4 || return 2 759 # IPv4 header UDP header VXLAN/GENEVE header Ethernet header 760 exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) 761 else 762 setup namespaces routing ${type}6 || return 2 763 # IPv6 header UDP header VXLAN/GENEVE header Ethernet header 764 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) 765 fi 766 767 trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \ 768 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 769 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B 770 771 if [ ${family} -eq 4 ]; then 772 ping=ping 773 dst=${tunnel4_b_addr} 774 else 775 ping=${ping6} 776 dst=${tunnel6_b_addr} 777 fi 778 779 # Create route exception by exceeding link layer MTU 780 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) 781 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) 782 mtu "${ns_b}" veth_B-R1 ${ll_mtu} 783 mtu "${ns_r1}" veth_R1-B ${ll_mtu} 784 785 mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) 786 mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) 787 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} 788 789 # Check that exception was created 790 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" 791 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface" 792} 793 794test_pmtu_ipv4_vxlan4_exception() { 795 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 4 796} 797 798test_pmtu_ipv6_vxlan4_exception() { 799 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 4 800} 801 802test_pmtu_ipv4_geneve4_exception() { 803 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4 804} 805 806test_pmtu_ipv6_geneve4_exception() { 807 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4 808} 809 810test_pmtu_ipv4_vxlan6_exception() { 811 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 6 812} 813 814test_pmtu_ipv6_vxlan6_exception() { 815 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 6 816} 817 818test_pmtu_ipv4_geneve6_exception() { 819 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6 820} 821 822test_pmtu_ipv6_geneve6_exception() { 823 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6 824} 825 826test_pmtu_ipvX_over_fouY_or_gueY() { 827 inner_family=${1} 828 outer_family=${2} 829 encap=${3} 830 ll_mtu=4000 831 832 setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 833 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ 834 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 835 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B 836 837 if [ ${inner_family} -eq 4 ]; then 838 ping=ping 839 dst=${tunnel4_b_addr} 840 else 841 ping=${ping6} 842 dst=${tunnel6_b_addr} 843 fi 844 845 if [ "${encap}" = "gue" ]; then 846 encap_overhead=4 847 else 848 encap_overhead=0 849 fi 850 851 if [ ${outer_family} -eq 4 ]; then 852 # IPv4 header UDP header 853 exp_mtu=$((${ll_mtu} - 20 - 8 - ${encap_overhead})) 854 else 855 # IPv6 header Option 4 UDP header 856 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - ${encap_overhead})) 857 fi 858 859 # Create route exception by exceeding link layer MTU 860 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) 861 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) 862 mtu "${ns_b}" veth_B-R1 ${ll_mtu} 863 mtu "${ns_r1}" veth_R1-B ${ll_mtu} 864 865 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) 866 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) 867 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} 868 869 # Check that exception was created 870 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" 871 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface" 872} 873 874test_pmtu_ipv4_fou4_exception() { 875 test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou 876} 877 878test_pmtu_ipv6_fou4_exception() { 879 test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou 880} 881 882test_pmtu_ipv4_fou6_exception() { 883 test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou 884} 885 886test_pmtu_ipv6_fou6_exception() { 887 test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou 888} 889 890test_pmtu_ipv4_gue4_exception() { 891 test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue 892} 893 894test_pmtu_ipv6_gue4_exception() { 895 test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue 896} 897 898test_pmtu_ipv4_gue6_exception() { 899 test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue 900} 901 902test_pmtu_ipv6_gue6_exception() { 903 test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue 904} 905 906test_pmtu_vti4_exception() { 907 setup namespaces veth vti4 xfrm4 || return 2 908 trace "${ns_a}" veth_a "${ns_b}" veth_b \ 909 "${ns_a}" vti4_a "${ns_b}" vti4_b 910 911 veth_mtu=1500 912 vti_mtu=$((veth_mtu - 20)) 913 914 # SPI SN IV ICV pad length next header 915 esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1)) 916 ping_payload=$((esp_payload_rfc4106 - 28)) 917 918 mtu "${ns_a}" veth_a ${veth_mtu} 919 mtu "${ns_b}" veth_b ${veth_mtu} 920 mtu "${ns_a}" vti4_a ${vti_mtu} 921 mtu "${ns_b}" vti4_b ${vti_mtu} 922 923 # Send DF packet without exceeding link layer MTU, check that no 924 # exception is created 925 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} 926 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 927 check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 928 929 # Now exceed link layer MTU by one byte, check that exception is created 930 # with the right PMTU value 931 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} 932 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 933 check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" 934} 935 936test_pmtu_vti6_exception() { 937 setup namespaces veth vti6 xfrm6 || return 2 938 trace "${ns_a}" veth_a "${ns_b}" veth_b \ 939 "${ns_a}" vti6_a "${ns_b}" vti6_b 940 fail=0 941 942 # Create route exception by exceeding link layer MTU 943 mtu "${ns_a}" veth_a 4000 944 mtu "${ns_b}" veth_b 4000 945 mtu "${ns_a}" vti6_a 5000 946 mtu "${ns_b}" vti6_b 5000 947 run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} 948 949 # Check that exception was created 950 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 951 check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 952 953 # Decrease tunnel MTU, check for PMTU decrease in route exception 954 mtu "${ns_a}" vti6_a 3000 955 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 956 check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 957 958 # Increase tunnel MTU, check for PMTU increase in route exception 959 mtu "${ns_a}" vti6_a 9000 960 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 961 check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 962 963 return ${fail} 964} 965 966test_pmtu_vti4_default_mtu() { 967 setup namespaces veth vti4 || return 2 968 969 # Check that MTU of vti device is MTU of veth minus IPv4 header length 970 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" 971 vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)" 972 if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then 973 err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length" 974 return 1 975 fi 976} 977 978test_pmtu_vti6_default_mtu() { 979 setup namespaces veth vti6 || return 2 980 981 # Check that MTU of vti device is MTU of veth minus IPv6 header length 982 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" 983 vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)" 984 if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then 985 err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length" 986 return 1 987 fi 988} 989 990test_pmtu_vti4_link_add_mtu() { 991 setup namespaces || return 2 992 993 run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 994 [ $? -ne 0 ] && err " vti not supported" && return 2 995 run_cmd ${ns_a} ip link del vti4_a 996 997 fail=0 998 999 min=68 1000 max=$((65535 - 20)) 1001 # Check invalid values first 1002 for v in $((min - 1)) $((max + 1)); do 1003 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 1004 # This can fail, or MTU can be adjusted to a proper value 1005 [ $? -ne 0 ] && continue 1006 mtu="$(link_get_mtu "${ns_a}" vti4_a)" 1007 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then 1008 err " vti tunnel created with invalid MTU ${mtu}" 1009 fail=1 1010 fi 1011 run_cmd ${ns_a} ip link del vti4_a 1012 done 1013 1014 # Now check valid values 1015 for v in ${min} 1300 ${max}; do 1016 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 1017 mtu="$(link_get_mtu "${ns_a}" vti4_a)" 1018 run_cmd ${ns_a} ip link del vti4_a 1019 if [ "${mtu}" != "${v}" ]; then 1020 err " vti MTU ${mtu} doesn't match configured value ${v}" 1021 fail=1 1022 fi 1023 done 1024 1025 return ${fail} 1026} 1027 1028test_pmtu_vti6_link_add_mtu() { 1029 setup namespaces || return 2 1030 1031 run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 1032 [ $? -ne 0 ] && err " vti6 not supported" && return 2 1033 run_cmd ${ns_a} ip link del vti6_a 1034 1035 fail=0 1036 1037 min=68 # vti6 can carry IPv4 packets too 1038 max=$((65535 - 40)) 1039 # Check invalid values first 1040 for v in $((min - 1)) $((max + 1)); do 1041 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 1042 # This can fail, or MTU can be adjusted to a proper value 1043 [ $? -ne 0 ] && continue 1044 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1045 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then 1046 err " vti6 tunnel created with invalid MTU ${v}" 1047 fail=1 1048 fi 1049 run_cmd ${ns_a} ip link del vti6_a 1050 done 1051 1052 # Now check valid values 1053 for v in 68 1280 1300 $((65535 - 40)); do 1054 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 1055 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1056 run_cmd ${ns_a} ip link del vti6_a 1057 if [ "${mtu}" != "${v}" ]; then 1058 err " vti6 MTU ${mtu} doesn't match configured value ${v}" 1059 fail=1 1060 fi 1061 done 1062 1063 return ${fail} 1064} 1065 1066test_pmtu_vti6_link_change_mtu() { 1067 setup namespaces || return 2 1068 1069 run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy 1070 [ $? -ne 0 ] && err " dummy not supported" && return 2 1071 run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy 1072 run_cmd ${ns_a} ip link set dummy0 up 1073 run_cmd ${ns_a} ip link set dummy1 up 1074 1075 run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0 1076 run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1 1077 1078 fail=0 1079 1080 # Create vti6 interface bound to device, passing MTU, check it 1081 run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1 1082 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1083 if [ ${mtu} -ne 1300 ]; then 1084 err " vti6 MTU ${mtu} doesn't match configured value 1300" 1085 fail=1 1086 fi 1087 1088 # Move to another device with different MTU, without passing MTU, check 1089 # MTU is adjusted 1090 run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1 1091 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1092 if [ ${mtu} -ne $((3000 - 40)) ]; then 1093 err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length" 1094 fail=1 1095 fi 1096 1097 # Move it back, passing MTU, check MTU is not overridden 1098 run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1 1099 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1100 if [ ${mtu} -ne 1280 ]; then 1101 err " vti6 MTU ${mtu} doesn't match configured value 1280" 1102 fail=1 1103 fi 1104 1105 return ${fail} 1106} 1107 1108check_command() { 1109 cmd=${1} 1110 1111 if ! which ${cmd} > /dev/null 2>&1; then 1112 err " missing required command: '${cmd}'" 1113 return 1 1114 fi 1115 return 0 1116} 1117 1118test_cleanup_vxlanX_exception() { 1119 outer="${1}" 1120 encap="vxlan" 1121 ll_mtu=4000 1122 1123 check_command taskset || return 2 1124 cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) 1125 1126 setup namespaces routing ${encap}${outer} || return 2 1127 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ 1128 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1129 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B 1130 1131 # Create route exception by exceeding link layer MTU 1132 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) 1133 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) 1134 mtu "${ns_b}" veth_B-R1 ${ll_mtu} 1135 mtu "${ns_r1}" veth_R1-B ${ll_mtu} 1136 1137 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) 1138 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) 1139 1140 # Fill exception cache for multiple CPUs (2) 1141 # we can always use inner IPv4 for that 1142 for cpu in ${cpu_list}; do 1143 run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} 1144 done 1145 1146 ${ns_a} ip link del dev veth_A-R1 & 1147 iplink_pid=$! 1148 sleep 1 1149 if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then 1150 err " can't delete veth device in a timely manner, PMTU dst likely leaked" 1151 return 1 1152 fi 1153} 1154 1155test_cleanup_ipv6_exception() { 1156 test_cleanup_vxlanX_exception 6 1157} 1158 1159test_cleanup_ipv4_exception() { 1160 test_cleanup_vxlanX_exception 4 1161} 1162 1163run_test() { 1164 ( 1165 tname="$1" 1166 tdesc="$2" 1167 1168 unset IFS 1169 1170 if [ "$VERBOSE" = "1" ]; then 1171 printf "\n##########################################################################\n\n" 1172 fi 1173 1174 eval test_${tname} 1175 ret=$? 1176 1177 if [ $ret -eq 0 ]; then 1178 printf "TEST: %-60s [ OK ]\n" "${tdesc}" 1179 elif [ $ret -eq 1 ]; then 1180 printf "TEST: %-60s [FAIL]\n" "${tdesc}" 1181 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then 1182 echo 1183 echo "Pausing. Hit enter to continue" 1184 read a 1185 fi 1186 err_flush 1187 exit 1 1188 elif [ $ret -eq 2 ]; then 1189 printf "TEST: %-60s [SKIP]\n" "${tdesc}" 1190 err_flush 1191 fi 1192 1193 return $ret 1194 ) 1195 ret=$? 1196 [ $ret -ne 0 ] && exitcode=1 1197 1198 return $ret 1199} 1200 1201run_test_nh() { 1202 tname="$1" 1203 tdesc="$2" 1204 1205 USE_NH=yes 1206 run_test "${tname}" "${tdesc} - nexthop objects" 1207 USE_NH=no 1208} 1209 1210test_list_flush_ipv6_exception() { 1211 setup namespaces routing || return 2 1212 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1213 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ 1214 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ 1215 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 1216 1217 dst1="${prefix6}:${b_r1}::1" 1218 dst2="${prefix6}:${b_r2}::1" 1219 1220 # Set up initial MTU values 1221 mtu "${ns_a}" veth_A-R1 2000 1222 mtu "${ns_r1}" veth_R1-A 2000 1223 mtu "${ns_r1}" veth_R1-B 1500 1224 mtu "${ns_b}" veth_B-R1 1500 1225 1226 mtu "${ns_a}" veth_A-R2 2000 1227 mtu "${ns_r2}" veth_R2-A 2000 1228 mtu "${ns_r2}" veth_R2-B 1500 1229 mtu "${ns_b}" veth_B-R2 1500 1230 1231 fail=0 1232 1233 # Create route exceptions 1234 run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} 1235 run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} 1236 1237 if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 2 ]; then 1238 err " can't list cached exceptions" 1239 fail=1 1240 fi 1241 1242 run_cmd ${ns_a} ip -6 route flush cache 1243 sleep 1 1244 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" 1245 pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 1246 if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ]; then 1247 err " can't flush cached exceptions" 1248 fail=1 1249 fi 1250 1251 return ${fail} 1252} 1253 1254usage() { 1255 echo 1256 echo "$0 [OPTIONS] [TEST]..." 1257 echo "If no TEST argument is given, all tests will be run." 1258 echo 1259 echo "Options" 1260 echo " --trace: capture traffic to TEST_INTERFACE.pcap" 1261 echo 1262 echo "Available tests${tests}" 1263 exit 1 1264} 1265 1266################################################################################ 1267# 1268exitcode=0 1269desc=0 1270 1271while getopts :ptv o 1272do 1273 case $o in 1274 p) PAUSE_ON_FAIL=yes;; 1275 v) VERBOSE=1;; 1276 t) if which tcpdump > /dev/null 2>&1; then 1277 TRACING=1 1278 else 1279 echo "=== tcpdump not available, tracing disabled" 1280 fi 1281 ;; 1282 *) usage;; 1283 esac 1284done 1285shift $(($OPTIND-1)) 1286 1287IFS=" 1288" 1289 1290for arg do 1291 # Check first that all requested tests are available before running any 1292 command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; } 1293done 1294 1295trap cleanup EXIT 1296 1297# start clean 1298cleanup 1299 1300HAVE_NH=no 1301ip nexthop ls >/dev/null 2>&1 1302[ $? -eq 0 ] && HAVE_NH=yes 1303 1304name="" 1305desc="" 1306rerun_nh=0 1307for t in ${tests}; do 1308 [ "${name}" = "" ] && name="${t}" && continue 1309 [ "${desc}" = "" ] && desc="${t}" && continue 1310 1311 if [ "${HAVE_NH}" = "yes" ]; then 1312 rerun_nh="${t}" 1313 fi 1314 1315 run_this=1 1316 for arg do 1317 [ "${arg}" != "${arg#--*}" ] && continue 1318 [ "${arg}" = "${name}" ] && run_this=1 && break 1319 run_this=0 1320 done 1321 if [ $run_this -eq 1 ]; then 1322 run_test "${name}" "${desc}" 1323 # if test was skipped no need to retry with nexthop objects 1324 [ $? -eq 2 ] && rerun_nh=0 1325 1326 if [ "${rerun_nh}" = "1" ]; then 1327 run_test_nh "${name}" "${desc}" 1328 fi 1329 fi 1330 name="" 1331 desc="" 1332 rerun_nh=0 1333done 1334 1335exit ${exitcode} 1336