xref: /linux/tools/testing/selftests/net/pmtu.sh (revision 90eea4086d5ed31936889a44d536bf77afa4ca8a)
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Check that route PMTU values match expectations, and that initial device MTU
5# values are assigned correctly
6#
7# Tests currently implemented:
8#
9# - pmtu_ipv4
10#	Set up two namespaces, A and B, with two paths between them over routers
11#	R1 and R2 (also implemented with namespaces), with different MTUs:
12#
13#	  segment a_r1    segment b_r1		a_r1: 2000
14#	.--------------R1--------------.	a_r2: 1500
15#	A                               B	a_r3: 2000
16#	'--------------R2--------------'	a_r4: 1400
17#	  segment a_r2    segment b_r2
18#
19#	Check that PMTU exceptions with the correct PMTU are created. Then
20#	decrease and increase the MTU of the local link for one of the paths,
21#	A to R1, checking that route exception PMTU changes accordingly over
22#	this path. Also check that locked exceptions are created when an ICMP
23#	message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
24#	received
25#
26# - pmtu_ipv6
27#	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
28#
29# - pmtu_ipv4_vxlan4_exception
30#	Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
31#	over IPv4 between A and B, routed via R1. On the link between R1 and B,
32#	set a MTU lower than the VXLAN MTU and the MTU on the link between A and
33#	R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
34#	from A to B and check that the PMTU exception is created with the right
35#	value on A
36#
37# - pmtu_ipv6_vxlan4_exception
38#	Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
39#
40# - pmtu_ipv4_vxlan6_exception
41#	Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
42#
43# - pmtu_ipv6_vxlan6_exception
44#	Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
45#
46# - pmtu_ipv4_geneve4_exception
47#	Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
48#	VXLAN
49#
50# - pmtu_ipv6_geneve4_exception
51#	Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
52#	VXLAN
53#
54# - pmtu_ipv4_geneve6_exception
55#	Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
56#	VXLAN
57#
58# - pmtu_ipv6_geneve6_exception
59#	Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
60#	VXLAN
61#
62# - pmtu_ipv{4,6}_fou{4,6}_exception
63#	Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
64#	(FoU) over IPv4/IPv6, instead of VXLAN
65#
66# - pmtu_ipv{4,6}_fou{4,6}_exception
67#	Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
68#	encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
69#
70# - pmtu_vti4_exception
71#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
72#	namespaces with matching endpoints. Check that route exception is not
73#	created if link layer MTU is not exceeded, then exceed it and check that
74#	exception is created with the expected PMTU. The approach described
75#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
76#	changes alone won't affect PMTU
77#
78# - pmtu_vti6_exception
79#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
80#	namespaces with matching endpoints. Check that route exception is
81#	created by exceeding link layer MTU with ping to other endpoint. Then
82#	decrease and increase MTU of tunnel, checking that route exception PMTU
83#	changes accordingly
84#
85# - pmtu_vti4_default_mtu
86#	Set up vti4 tunnel on top of veth, in two namespaces with matching
87#	endpoints. Check that MTU assigned to vti interface is the MTU of the
88#	lower layer (veth) minus additional lower layer headers (zero, for veth)
89#	minus IPv4 header length
90#
91# - pmtu_vti6_default_mtu
92#	Same as above, for IPv6
93#
94# - pmtu_vti4_link_add_mtu
95#	Set up vti4 interface passing MTU value at link creation, check MTU is
96#	configured, and that link is not created with invalid MTU values
97#
98# - pmtu_vti6_link_add_mtu
99#	Same as above, for IPv6
100#
101# - pmtu_vti6_link_change_mtu
102#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
103#	and check that configured MTU is used on link creation and changes, and
104#	that MTU is properly calculated instead when MTU is not configured from
105#	userspace
106#
107# - cleanup_ipv4_exception
108#	Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
109#	exceptions on multiple CPUs and check that the veth device tear-down
110# 	happens in a timely manner
111#
112# - cleanup_ipv6_exception
113#	Same as above, but use IPv6 transport from A to B
114
115
116# Kselftest framework requirement - SKIP code is 4.
117ksft_skip=4
118
119PAUSE_ON_FAIL=no
120VERBOSE=0
121TRACING=0
122
123# Some systems don't have a ping6 binary anymore
124which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
125
126tests="
127	pmtu_ipv4_exception		ipv4: PMTU exceptions
128	pmtu_ipv6_exception		ipv6: PMTU exceptions
129	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions
130	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions
131	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions
132	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions
133	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions
134	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions
135	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions
136	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions
137	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions
138	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions
139	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions
140	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions
141	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions
142	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions
143	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions
144	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions
145	pmtu_vti6_exception		vti6: PMTU exceptions
146	pmtu_vti4_exception		vti4: PMTU exceptions
147	pmtu_vti4_default_mtu		vti4: default MTU assignment
148	pmtu_vti6_default_mtu		vti6: default MTU assignment
149	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
150	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
151	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes
152	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions
153	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions"
154
155NS_A="ns-A"
156NS_B="ns-B"
157NS_R1="ns-R1"
158NS_R2="ns-R2"
159ns_a="ip netns exec ${NS_A}"
160ns_b="ip netns exec ${NS_B}"
161ns_r1="ip netns exec ${NS_R1}"
162ns_r2="ip netns exec ${NS_R2}"
163
164# Addressing and routing for tests with routers: four network segments, with
165# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
166# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
167# Addresses are:
168# - IPv4: PREFIX4.SEGMENT.ID (/24)
169# - IPv6: PREFIX6:SEGMENT::ID (/64)
170prefix4="10.0"
171prefix6="fc00"
172a_r1=1
173a_r2=2
174b_r1=3
175b_r2=4
176#	ns	peer	segment
177routing_addrs="
178	A	R1	${a_r1}
179	A	R2	${a_r2}
180	B	R1	${b_r1}
181	B	R2	${b_r2}
182"
183# Traffic from A to B goes through R1 by default, and through R2, if destined to
184# B's address on the b_r2 segment.
185# Traffic from B to A goes through R1.
186#	ns	destination		gateway
187routes="
188	A	default			${prefix4}.${a_r1}.2
189	A	${prefix4}.${b_r2}.1	${prefix4}.${a_r2}.2
190	B	default			${prefix4}.${b_r1}.2
191
192	A	default			${prefix6}:${a_r1}::2
193	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
194	B	default			${prefix6}:${b_r1}::2
195"
196
197veth4_a_addr="192.168.1.1"
198veth4_b_addr="192.168.1.2"
199veth4_mask="24"
200veth6_a_addr="fd00:1::a"
201veth6_b_addr="fd00:1::b"
202veth6_mask="64"
203
204tunnel4_a_addr="192.168.2.1"
205tunnel4_b_addr="192.168.2.2"
206tunnel4_mask="24"
207tunnel6_a_addr="fd00:2::a"
208tunnel6_b_addr="fd00:2::b"
209tunnel6_mask="64"
210
211dummy6_0_addr="fc00:1000::0"
212dummy6_1_addr="fc00:1001::0"
213dummy6_mask="64"
214
215err_buf=
216tcpdump_pids=
217
218err() {
219	err_buf="${err_buf}${1}
220"
221}
222
223err_flush() {
224	echo -n "${err_buf}"
225	err_buf=
226}
227
228run_cmd() {
229	cmd="$*"
230
231	if [ "$VERBOSE" = "1" ]; then
232		printf "    COMMAND: $cmd\n"
233	fi
234
235	out="$($cmd 2>&1)"
236	rc=$?
237	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
238		echo "    $out"
239		echo
240	fi
241
242	return $rc
243}
244
245# Find the auto-generated name for this namespace
246nsname() {
247	eval echo \$NS_$1
248}
249
250setup_fou_or_gue() {
251	outer="${1}"
252	inner="${2}"
253	encap="${3}"
254
255	if [ "${outer}" = "4" ]; then
256		modprobe fou || return 2
257		a_addr="${prefix4}.${a_r1}.1"
258		b_addr="${prefix4}.${b_r1}.1"
259		if [ "${inner}" = "4" ]; then
260			type="ipip"
261			ipproto="4"
262		else
263			type="sit"
264			ipproto="41"
265		fi
266	else
267		modprobe fou6 || return 2
268		a_addr="${prefix6}:${a_r1}::1"
269		b_addr="${prefix6}:${b_r1}::1"
270		if [ "${inner}" = "4" ]; then
271			type="ip6tnl"
272			mode="mode ipip6"
273			ipproto="4 -6"
274		else
275			type="ip6tnl"
276			mode="mode ip6ip6"
277			ipproto="41 -6"
278		fi
279	fi
280
281	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
282	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
283
284	run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
285	run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
286
287	if [ "${inner}" = "4" ]; then
288		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
289		run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
290	else
291		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
292		run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
293	fi
294
295	run_cmd ${ns_a} ip link set ${encap}_a up
296	run_cmd ${ns_b} ip link set ${encap}_b up
297}
298
299setup_fou44() {
300	setup_fou_or_gue 4 4 fou
301}
302
303setup_fou46() {
304	setup_fou_or_gue 4 6 fou
305}
306
307setup_fou64() {
308	setup_fou_or_gue 6 4 fou
309}
310
311setup_fou66() {
312	setup_fou_or_gue 6 6 fou
313}
314
315setup_gue44() {
316	setup_fou_or_gue 4 4 gue
317}
318
319setup_gue46() {
320	setup_fou_or_gue 4 6 gue
321}
322
323setup_gue64() {
324	setup_fou_or_gue 6 4 gue
325}
326
327setup_gue66() {
328	setup_fou_or_gue 6 6 gue
329}
330
331setup_namespaces() {
332	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
333		ip netns add ${n} || return 1
334
335		# Disable DAD, so that we don't have to wait to use the
336		# configured IPv6 addresses
337		ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
338	done
339}
340
341setup_veth() {
342	run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
343	run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
344
345	run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
346	run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
347
348	run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
349	run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
350
351	run_cmd ${ns_a} ip link set veth_a up
352	run_cmd ${ns_b} ip link set veth_b up
353}
354
355setup_vti() {
356	proto=${1}
357	veth_a_addr="${2}"
358	veth_b_addr="${3}"
359	vti_a_addr="${4}"
360	vti_b_addr="${5}"
361	vti_mask=${6}
362
363	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
364
365	run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
366	run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
367
368	run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
369	run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
370
371	run_cmd ${ns_a} ip link set vti${proto}_a up
372	run_cmd ${ns_b} ip link set vti${proto}_b up
373}
374
375setup_vti4() {
376	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
377}
378
379setup_vti6() {
380	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
381}
382
383setup_vxlan_or_geneve() {
384	type="${1}"
385	a_addr="${2}"
386	b_addr="${3}"
387	opts="${4}"
388
389	if [ "${type}" = "vxlan" ]; then
390		opts="${opts} ttl 64 dstport 4789"
391		opts_a="local ${a_addr}"
392		opts_b="local ${b_addr}"
393	else
394		opts_a=""
395		opts_b=""
396	fi
397
398	run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
399	run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
400
401	run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
402	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
403
404	run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
405	run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
406
407	run_cmd ${ns_a} ip link set ${type}_a up
408	run_cmd ${ns_b} ip link set ${type}_b up
409}
410
411setup_geneve4() {
412	setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
413}
414
415setup_vxlan4() {
416	setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
417}
418
419setup_geneve6() {
420	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
421}
422
423setup_vxlan6() {
424	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
425}
426
427setup_xfrm() {
428	proto=${1}
429	veth_a_addr="${2}"
430	veth_b_addr="${3}"
431
432	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
433	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
434	run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
435	run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
436
437	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
438	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
439	run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
440	run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
441}
442
443setup_xfrm4() {
444	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
445}
446
447setup_xfrm6() {
448	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
449}
450
451setup_routing() {
452	for i in ${NS_R1} ${NS_R2}; do
453		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
454		ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
455	done
456
457	for i in ${routing_addrs}; do
458		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
459		[ "${peer}" = "" ]	&& peer="${i}"		&& continue
460		[ "${segment}" = "" ]	&& segment="${i}"
461
462		ns_name="$(nsname ${ns})"
463		peer_name="$(nsname ${peer})"
464		if="veth_${ns}-${peer}"
465		ifpeer="veth_${peer}-${ns}"
466
467		# Create veth links
468		ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
469		ip -n ${peer_name} link set dev ${ifpeer} up
470
471		# Add addresses
472		ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
473		ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
474
475		ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
476		ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
477
478		ns=""; peer=""; segment=""
479	done
480
481	for i in ${routes}; do
482		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
483		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
484		[ "${gw}" = "" ]	&& gw="${i}"
485
486		ns_name="$(nsname ${ns})"
487
488		ip -n ${ns_name} route add ${addr} via ${gw}
489
490		ns=""; addr=""; gw=""
491	done
492}
493
494setup() {
495	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
496
497	cleanup
498	for arg do
499		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
500	done
501}
502
503trace() {
504	[ $TRACING -eq 0 ] && return
505
506	for arg do
507		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
508		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
509		tcpdump_pids="${tcpdump_pids} $!"
510		ns_cmd=
511	done
512	sleep 1
513}
514
515cleanup() {
516	for pid in ${tcpdump_pids}; do
517		kill ${pid}
518	done
519	tcpdump_pids=
520
521	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
522		ip netns del ${n} 2> /dev/null
523	done
524}
525
526mtu() {
527	ns_cmd="${1}"
528	dev="${2}"
529	mtu="${3}"
530
531	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
532}
533
534mtu_parse() {
535	input="${1}"
536
537	next=0
538	for i in ${input}; do
539		[ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
540		[ ${next} -eq 1 ] && echo "${i}" && return
541		[ ${next} -eq 2 ] && echo "lock ${i}" && return
542		[ "${i}" = "mtu" ] && next=1
543	done
544}
545
546link_get() {
547	ns_cmd="${1}"
548	name="${2}"
549
550	${ns_cmd} ip link show dev "${name}"
551}
552
553link_get_mtu() {
554	ns_cmd="${1}"
555	name="${2}"
556
557	mtu_parse "$(link_get "${ns_cmd}" ${name})"
558}
559
560route_get_dst_exception() {
561	ns_cmd="${1}"
562	dst="${2}"
563
564	${ns_cmd} ip route get "${dst}"
565}
566
567route_get_dst_pmtu_from_exception() {
568	ns_cmd="${1}"
569	dst="${2}"
570
571	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
572}
573
574check_pmtu_value() {
575	expected="${1}"
576	value="${2}"
577	event="${3}"
578
579	[ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
580	[ "${value}" = "${expected}" ] && return 0
581	[ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
582	[ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
583	err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
584	return 1
585}
586
587test_pmtu_ipvX() {
588	family=${1}
589
590	setup namespaces routing || return 2
591	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
592	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
593	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
594	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
595
596	if [ ${family} -eq 4 ]; then
597		ping=ping
598		dst1="${prefix4}.${b_r1}.1"
599		dst2="${prefix4}.${b_r2}.1"
600	else
601		ping=${ping6}
602		dst1="${prefix6}:${b_r1}::1"
603		dst2="${prefix6}:${b_r2}::1"
604	fi
605
606	# Set up initial MTU values
607	mtu "${ns_a}"  veth_A-R1 2000
608	mtu "${ns_r1}" veth_R1-A 2000
609	mtu "${ns_r1}" veth_R1-B 1400
610	mtu "${ns_b}"  veth_B-R1 1400
611
612	mtu "${ns_a}"  veth_A-R2 2000
613	mtu "${ns_r2}" veth_R2-A 2000
614	mtu "${ns_r2}" veth_R2-B 1500
615	mtu "${ns_b}"  veth_B-R2 1500
616
617	# Create route exceptions
618	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
619	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
620
621	# Check that exceptions have been created with the correct PMTU
622	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
623	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
624	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
625	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
626
627	# Decrease local MTU below PMTU, check for PMTU decrease in route exception
628	mtu "${ns_a}"  veth_A-R1 1300
629	mtu "${ns_r1}" veth_R1-A 1300
630	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
631	check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
632	# Second exception shouldn't be modified
633	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
634	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
635
636	# Increase MTU, check for PMTU increase in route exception
637	mtu "${ns_a}"  veth_A-R1 1700
638	mtu "${ns_r1}" veth_R1-A 1700
639	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
640	check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
641	# Second exception shouldn't be modified
642	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
643	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
644
645	# Skip PMTU locking tests for IPv6
646	[ $family -eq 6 ] && return 0
647
648	# Decrease remote MTU on path via R2, get new exception
649	mtu "${ns_r2}" veth_R2-B 400
650	mtu "${ns_b}"  veth_B-R2 400
651	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
652	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
653	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
654
655	# Decrease local MTU below PMTU
656	mtu "${ns_a}"  veth_A-R2 500
657	mtu "${ns_r2}" veth_R2-A 500
658	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
659	check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
660
661	# Increase local MTU
662	mtu "${ns_a}"  veth_A-R2 1500
663	mtu "${ns_r2}" veth_R2-A 1500
664	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
665	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
666
667	# Get new exception
668	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
669	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
670	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
671}
672
673test_pmtu_ipv4_exception() {
674	test_pmtu_ipvX 4
675}
676
677test_pmtu_ipv6_exception() {
678	test_pmtu_ipvX 6
679}
680
681test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
682	type=${1}
683	family=${2}
684	outer_family=${3}
685	ll_mtu=4000
686
687	if [ ${outer_family} -eq 4 ]; then
688		setup namespaces routing ${type}4 || return 2
689		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
690		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
691	else
692		setup namespaces routing ${type}6 || return 2
693		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
694		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
695	fi
696
697	trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
698	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
699	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
700
701	if [ ${family} -eq 4 ]; then
702		ping=ping
703		dst=${tunnel4_b_addr}
704	else
705		ping=${ping6}
706		dst=${tunnel6_b_addr}
707	fi
708
709	# Create route exception by exceeding link layer MTU
710	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
711	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
712	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
713	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
714
715	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
716	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
717	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
718
719	# Check that exception was created
720	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
721	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
722}
723
724test_pmtu_ipv4_vxlan4_exception() {
725	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 4
726}
727
728test_pmtu_ipv6_vxlan4_exception() {
729	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 4
730}
731
732test_pmtu_ipv4_geneve4_exception() {
733	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
734}
735
736test_pmtu_ipv6_geneve4_exception() {
737	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
738}
739
740test_pmtu_ipv4_vxlan6_exception() {
741	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 6
742}
743
744test_pmtu_ipv6_vxlan6_exception() {
745	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 6
746}
747
748test_pmtu_ipv4_geneve6_exception() {
749	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
750}
751
752test_pmtu_ipv6_geneve6_exception() {
753	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
754}
755
756test_pmtu_ipvX_over_fouY_or_gueY() {
757	inner_family=${1}
758	outer_family=${2}
759	encap=${3}
760	ll_mtu=4000
761
762	setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
763	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
764	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
765	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
766
767	if [ ${inner_family} -eq 4 ]; then
768		ping=ping
769		dst=${tunnel4_b_addr}
770	else
771		ping=${ping6}
772		dst=${tunnel6_b_addr}
773	fi
774
775	if [ "${encap}" = "gue" ]; then
776		encap_overhead=4
777	else
778		encap_overhead=0
779	fi
780
781	if [ ${outer_family} -eq 4 ]; then
782		#                      IPv4 header   UDP header
783		exp_mtu=$((${ll_mtu} - 20          - 8         - ${encap_overhead}))
784	else
785		#                      IPv6 header   Option 4   UDP header
786		exp_mtu=$((${ll_mtu} - 40          - 8        - 8       - ${encap_overhead}))
787	fi
788
789	# Create route exception by exceeding link layer MTU
790	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
791	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
792	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
793	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
794
795	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
796	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
797	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
798
799	# Check that exception was created
800	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
801	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
802}
803
804test_pmtu_ipv4_fou4_exception() {
805	test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
806}
807
808test_pmtu_ipv6_fou4_exception() {
809	test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
810}
811
812test_pmtu_ipv4_fou6_exception() {
813	test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
814}
815
816test_pmtu_ipv6_fou6_exception() {
817	test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
818}
819
820test_pmtu_ipv4_gue4_exception() {
821	test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
822}
823
824test_pmtu_ipv6_gue4_exception() {
825	test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
826}
827
828test_pmtu_ipv4_gue6_exception() {
829	test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
830}
831
832test_pmtu_ipv6_gue6_exception() {
833	test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
834}
835
836test_pmtu_vti4_exception() {
837	setup namespaces veth vti4 xfrm4 || return 2
838	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
839	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
840
841	veth_mtu=1500
842	vti_mtu=$((veth_mtu - 20))
843
844	#                                SPI   SN   IV  ICV   pad length   next header
845	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
846	ping_payload=$((esp_payload_rfc4106 - 28))
847
848	mtu "${ns_a}" veth_a ${veth_mtu}
849	mtu "${ns_b}" veth_b ${veth_mtu}
850	mtu "${ns_a}" vti4_a ${vti_mtu}
851	mtu "${ns_b}" vti4_b ${vti_mtu}
852
853	# Send DF packet without exceeding link layer MTU, check that no
854	# exception is created
855	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
856	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
857	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
858
859	# Now exceed link layer MTU by one byte, check that exception is created
860	# with the right PMTU value
861	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
862	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
863	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
864}
865
866test_pmtu_vti6_exception() {
867	setup namespaces veth vti6 xfrm6 || return 2
868	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
869	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
870	fail=0
871
872	# Create route exception by exceeding link layer MTU
873	mtu "${ns_a}" veth_a 4000
874	mtu "${ns_b}" veth_b 4000
875	mtu "${ns_a}" vti6_a 5000
876	mtu "${ns_b}" vti6_b 5000
877	run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
878
879	# Check that exception was created
880	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
881	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
882
883	# Decrease tunnel MTU, check for PMTU decrease in route exception
884	mtu "${ns_a}" vti6_a 3000
885	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
886	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
887
888	# Increase tunnel MTU, check for PMTU increase in route exception
889	mtu "${ns_a}" vti6_a 9000
890	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
891	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
892
893	return ${fail}
894}
895
896test_pmtu_vti4_default_mtu() {
897	setup namespaces veth vti4 || return 2
898
899	# Check that MTU of vti device is MTU of veth minus IPv4 header length
900	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
901	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
902	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
903		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
904		return 1
905	fi
906}
907
908test_pmtu_vti6_default_mtu() {
909	setup namespaces veth vti6 || return 2
910
911	# Check that MTU of vti device is MTU of veth minus IPv6 header length
912	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
913	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
914	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
915		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
916		return 1
917	fi
918}
919
920test_pmtu_vti4_link_add_mtu() {
921	setup namespaces || return 2
922
923	run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
924	[ $? -ne 0 ] && err "  vti not supported" && return 2
925	run_cmd ${ns_a} ip link del vti4_a
926
927	fail=0
928
929	min=68
930	max=$((65535 - 20))
931	# Check invalid values first
932	for v in $((min - 1)) $((max + 1)); do
933		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
934		# This can fail, or MTU can be adjusted to a proper value
935		[ $? -ne 0 ] && continue
936		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
937		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
938			err "  vti tunnel created with invalid MTU ${mtu}"
939			fail=1
940		fi
941		run_cmd ${ns_a} ip link del vti4_a
942	done
943
944	# Now check valid values
945	for v in ${min} 1300 ${max}; do
946		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
947		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
948		run_cmd ${ns_a} ip link del vti4_a
949		if [ "${mtu}" != "${v}" ]; then
950			err "  vti MTU ${mtu} doesn't match configured value ${v}"
951			fail=1
952		fi
953	done
954
955	return ${fail}
956}
957
958test_pmtu_vti6_link_add_mtu() {
959	setup namespaces || return 2
960
961	run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
962	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
963	run_cmd ${ns_a} ip link del vti6_a
964
965	fail=0
966
967	min=68			# vti6 can carry IPv4 packets too
968	max=$((65535 - 40))
969	# Check invalid values first
970	for v in $((min - 1)) $((max + 1)); do
971		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
972		# This can fail, or MTU can be adjusted to a proper value
973		[ $? -ne 0 ] && continue
974		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
975		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
976			err "  vti6 tunnel created with invalid MTU ${v}"
977			fail=1
978		fi
979		run_cmd ${ns_a} ip link del vti6_a
980	done
981
982	# Now check valid values
983	for v in 68 1280 1300 $((65535 - 40)); do
984		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
985		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
986		run_cmd ${ns_a} ip link del vti6_a
987		if [ "${mtu}" != "${v}" ]; then
988			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
989			fail=1
990		fi
991	done
992
993	return ${fail}
994}
995
996test_pmtu_vti6_link_change_mtu() {
997	setup namespaces || return 2
998
999	run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
1000	[ $? -ne 0 ] && err "  dummy not supported" && return 2
1001	run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
1002	run_cmd ${ns_a} ip link set dummy0 up
1003	run_cmd ${ns_a} ip link set dummy1 up
1004
1005	run_cmd ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
1006	run_cmd ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
1007
1008	fail=0
1009
1010	# Create vti6 interface bound to device, passing MTU, check it
1011	run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
1012	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1013	if [ ${mtu} -ne 1300 ]; then
1014		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
1015		fail=1
1016	fi
1017
1018	# Move to another device with different MTU, without passing MTU, check
1019	# MTU is adjusted
1020	run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
1021	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1022	if [ ${mtu} -ne $((3000 - 40)) ]; then
1023		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
1024		fail=1
1025	fi
1026
1027	# Move it back, passing MTU, check MTU is not overridden
1028	run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
1029	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1030	if [ ${mtu} -ne 1280 ]; then
1031		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
1032		fail=1
1033	fi
1034
1035	return ${fail}
1036}
1037
1038check_command() {
1039	cmd=${1}
1040
1041	if ! which ${cmd} > /dev/null 2>&1; then
1042		err "  missing required command: '${cmd}'"
1043		return 1
1044	fi
1045	return 0
1046}
1047
1048test_cleanup_vxlanX_exception() {
1049	outer="${1}"
1050	encap="vxlan"
1051	ll_mtu=4000
1052
1053	check_command taskset || return 2
1054	cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
1055
1056	setup namespaces routing ${encap}${outer} || return 2
1057	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
1058	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
1059	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
1060
1061	# Create route exception by exceeding link layer MTU
1062	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
1063	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1064	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1065	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1066
1067	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
1068	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
1069
1070	# Fill exception cache for multiple CPUs (2)
1071	# we can always use inner IPv4 for that
1072	for cpu in ${cpu_list}; do
1073		run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
1074	done
1075
1076	${ns_a} ip link del dev veth_A-R1 &
1077	iplink_pid=$!
1078	sleep 1
1079	if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
1080		err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
1081		return 1
1082	fi
1083}
1084
1085test_cleanup_ipv6_exception() {
1086	test_cleanup_vxlanX_exception 6
1087}
1088
1089test_cleanup_ipv4_exception() {
1090	test_cleanup_vxlanX_exception 4
1091}
1092
1093usage() {
1094	echo
1095	echo "$0 [OPTIONS] [TEST]..."
1096	echo "If no TEST argument is given, all tests will be run."
1097	echo
1098	echo "Options"
1099	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
1100	echo
1101	echo "Available tests${tests}"
1102	exit 1
1103}
1104
1105################################################################################
1106#
1107exitcode=0
1108desc=0
1109
1110while getopts :ptv o
1111do
1112	case $o in
1113	p) PAUSE_ON_FAIL=yes;;
1114	v) VERBOSE=1;;
1115	t) if which tcpdump > /dev/null 2>&1; then
1116		TRACING=1
1117	   else
1118		echo "=== tcpdump not available, tracing disabled"
1119	   fi
1120	   ;;
1121	*) usage;;
1122	esac
1123done
1124shift $(($OPTIND-1))
1125
1126IFS="
1127"
1128
1129for arg do
1130	# Check first that all requested tests are available before running any
1131	command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
1132done
1133
1134trap cleanup EXIT
1135
1136# start clean
1137cleanup
1138
1139for t in ${tests}; do
1140	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
1141
1142	run_this=1
1143	for arg do
1144		[ "${arg}" != "${arg#--*}" ] && continue
1145		[ "${arg}" = "${name}" ] && run_this=1 && break
1146		run_this=0
1147	done
1148	[ $run_this -eq 0 ] && continue
1149
1150	(
1151		unset IFS
1152
1153		if [ "$VERBOSE" = "1" ]; then
1154			printf "\n##########################################################################\n\n"
1155		fi
1156
1157		eval test_${name}
1158		ret=$?
1159
1160		if [ $ret -eq 0 ]; then
1161			printf "TEST: %-60s  [ OK ]\n" "${t}"
1162		elif [ $ret -eq 1 ]; then
1163			printf "TEST: %-60s  [FAIL]\n" "${t}"
1164			if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
1165				echo
1166				echo "Pausing. Hit enter to continue"
1167				read a
1168			fi
1169			err_flush
1170			exit 1
1171		elif [ $ret -eq 2 ]; then
1172			printf "TEST: %-60s  [SKIP]\n" "${t}"
1173			err_flush
1174		fi
1175	)
1176	[ $? -ne 0 ] && exitcode=1
1177done
1178
1179exit ${exitcode}
1180