xref: /linux/tools/testing/selftests/net/forwarding/ipmr.c (revision 91a4855d6c03e770e42f17c798a36a3c46e63de2)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright 2026 Google LLC */
3 
4 #include <linux/if.h>
5 #include <linux/mroute.h>
6 #include <linux/netlink.h>
7 #include <linux/rtnetlink.h>
8 #include <linux/socket.h>
9 #include <sched.h>
10 #include <sys/ioctl.h>
11 #include <sys/socket.h>
12 
13 #include "kselftest_harness.h"
14 
15 FIXTURE(ipmr)
16 {
17 	int netlink_sk;
18 	int raw_sk;
19 	int veth_ifindex;
20 };
21 
22 FIXTURE_VARIANT(ipmr)
23 {
24 	int family;
25 	int protocol;
26 	int level;
27 	int opts[MRT_MAX - MRT_BASE + 1];
28 };
29 
30 FIXTURE_VARIANT_ADD(ipmr, ipv4)
31 {
32 	.family = AF_INET,
33 	.protocol = IPPROTO_IGMP,
34 	.level = IPPROTO_IP,
35 	.opts = {
36 		MRT_INIT,
37 		MRT_DONE,
38 		MRT_ADD_VIF,
39 		MRT_DEL_VIF,
40 		MRT_ADD_MFC,
41 		MRT_DEL_MFC,
42 		MRT_VERSION,
43 		MRT_ASSERT,
44 		MRT_PIM,
45 		MRT_TABLE,
46 		MRT_ADD_MFC_PROXY,
47 		MRT_DEL_MFC_PROXY,
48 		MRT_FLUSH,
49 	},
50 };
51 
52 struct mfc_attr {
53 	int table;
54 	__u32 origin;
55 	__u32 group;
56 	int ifindex;
57 	bool proxy;
58 };
59 
60 static struct rtattr *nl_add_rtattr(struct nlmsghdr *nlmsg, struct rtattr *rta,
61 				    int type, const void *data, int len)
62 {
63 	int unused = 0;
64 
65 	rta->rta_type = type;
66 	rta->rta_len = RTA_LENGTH(len);
67 	memcpy(RTA_DATA(rta), data, len);
68 
69 	nlmsg->nlmsg_len += NLMSG_ALIGN(rta->rta_len);
70 
71 	return RTA_NEXT(rta, unused);
72 }
73 
74 static int nl_sendmsg_mfc(struct __test_metadata *_metadata, FIXTURE_DATA(ipmr) *self,
75 			  __u16 nlmsg_type, struct mfc_attr *mfc_attr)
76 {
77 	struct {
78 		struct nlmsghdr nlmsg;
79 		struct rtmsg rtm;
80 		char buf[4096];
81 	} req = {
82 		.nlmsg = {
83 			.nlmsg_len = NLMSG_LENGTH(sizeof(req.rtm)),
84 			/* ipmr does not care about NLM_F_CREATE and NLM_F_EXCL ... */
85 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
86 			.nlmsg_type = nlmsg_type,
87 		},
88 		.rtm = {
89 			/* hard requirements in rtm_to_ipmr_mfcc() */
90 			.rtm_family = RTNL_FAMILY_IPMR,
91 			.rtm_dst_len = 32,
92 			.rtm_type = RTN_MULTICAST,
93 			.rtm_scope = RT_SCOPE_UNIVERSE,
94 			.rtm_protocol = RTPROT_MROUTED,
95 		},
96 	};
97 	struct nlmsghdr *nlmsg = &req.nlmsg;
98 	struct nlmsgerr *errmsg;
99 	struct rtattr *rta;
100 	int err;
101 
102 	rta = (struct rtattr *)&req.buf;
103 	rta = nl_add_rtattr(nlmsg, rta, RTA_TABLE, &mfc_attr->table, sizeof(mfc_attr->table));
104 	rta = nl_add_rtattr(nlmsg, rta, RTA_SRC, &mfc_attr->origin, sizeof(mfc_attr->origin));
105 	rta = nl_add_rtattr(nlmsg, rta, RTA_DST, &mfc_attr->group, sizeof(mfc_attr->group));
106 	if (mfc_attr->ifindex)
107 		rta = nl_add_rtattr(nlmsg, rta, RTA_IIF, &mfc_attr->ifindex, sizeof(mfc_attr->ifindex));
108 	if (mfc_attr->proxy)
109 		rta = nl_add_rtattr(nlmsg, rta, RTA_PREFSRC, NULL, 0);
110 
111 	err = send(self->netlink_sk, &req, req.nlmsg.nlmsg_len, 0);
112 	ASSERT_EQ(err, req.nlmsg.nlmsg_len);
113 
114 	memset(&req, 0, sizeof(req));
115 
116 	err = recv(self->netlink_sk, &req, sizeof(req), 0);
117 	ASSERT_TRUE(NLMSG_OK(nlmsg, err));
118 	ASSERT_EQ(NLMSG_ERROR, nlmsg->nlmsg_type);
119 
120 	errmsg = (struct nlmsgerr *)NLMSG_DATA(nlmsg);
121 	return errmsg->error;
122 }
123 
124 FIXTURE_SETUP(ipmr)
125 {
126 	struct ifreq ifr = {
127 		.ifr_name = "veth0",
128 	};
129 	int err;
130 
131 	err = unshare(CLONE_NEWNET);
132 	ASSERT_EQ(0, err);
133 
134 	self->netlink_sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
135 	ASSERT_LE(0, self->netlink_sk);
136 
137 	self->raw_sk = socket(variant->family, SOCK_RAW, variant->protocol);
138 	ASSERT_LT(0, self->raw_sk);
139 
140 	err = system("ip link add veth0 type veth peer veth1");
141 	ASSERT_EQ(0, err);
142 
143 	err = ioctl(self->raw_sk, SIOCGIFINDEX, &ifr);
144 	ASSERT_EQ(0, err);
145 
146 	self->veth_ifindex = ifr.ifr_ifindex;
147 }
148 
149 FIXTURE_TEARDOWN(ipmr)
150 {
151 	close(self->raw_sk);
152 	close(self->netlink_sk);
153 }
154 
155 TEST_F(ipmr, mrt_init)
156 {
157 	int err, val = 0;  /* any value is ok, but size must be int for MRT_INIT. */
158 
159 	err = setsockopt(self->raw_sk,
160 			 variant->level, variant->opts[MRT_INIT - MRT_BASE],
161 			 &val,  sizeof(val));
162 	ASSERT_EQ(0, err);
163 
164 	err = setsockopt(self->raw_sk,
165 			 variant->level, variant->opts[MRT_DONE - MRT_BASE],
166 			 &val,  sizeof(val));
167 	ASSERT_EQ(0, err);
168 }
169 
170 TEST_F(ipmr, mrt_add_vif_register)
171 {
172 	struct vifctl vif = {
173 		.vifc_vifi = 0,
174 		.vifc_flags = VIFF_REGISTER,
175 	};
176 	int err;
177 
178 	err = setsockopt(self->raw_sk,
179 			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
180 			 &vif,  sizeof(vif));
181 	ASSERT_EQ(0, err);
182 
183 	err = system("cat /proc/net/ip_mr_vif | grep -q pimreg");
184 	ASSERT_EQ(0, err);
185 
186 	err = setsockopt(self->raw_sk,
187 			 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE],
188 			 &vif,  sizeof(vif));
189 	ASSERT_EQ(0, err);
190 }
191 
192 TEST_F(ipmr, mrt_del_vif_unreg)
193 {
194 	struct vifctl vif = {
195 		.vifc_vifi = 0,
196 		.vifc_flags = VIFF_USE_IFINDEX,
197 		.vifc_lcl_ifindex = self->veth_ifindex,
198 	};
199 	int err;
200 
201 	err = setsockopt(self->raw_sk,
202 			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
203 			 &vif,  sizeof(vif));
204 	ASSERT_EQ(0, err);
205 
206 	err = system("cat /proc/net/ip_mr_vif | grep -q veth0");
207 	ASSERT_EQ(0, err);
208 
209 	/* VIF is removed along with its device. */
210 	err = system("ip link del veth0");
211 	ASSERT_EQ(0, err);
212 
213 	/* mrt->vif_table[veth_ifindex]->dev is NULL. */
214 	err = setsockopt(self->raw_sk,
215 			 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE],
216 			 &vif,  sizeof(vif));
217 	ASSERT_EQ(-1, err);
218 	ASSERT_EQ(EADDRNOTAVAIL, errno);
219 }
220 
221 TEST_F(ipmr, mrt_del_vif_netns_dismantle)
222 {
223 	struct vifctl vif = {
224 		.vifc_vifi = 0,
225 		.vifc_flags = VIFF_USE_IFINDEX,
226 		.vifc_lcl_ifindex = self->veth_ifindex,
227 	};
228 	int err;
229 
230 	err = setsockopt(self->raw_sk,
231 			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
232 			 &vif,  sizeof(vif));
233 	ASSERT_EQ(0, err);
234 
235 	/* Let cleanup_net() remove veth0 and VIF. */
236 }
237 
238 TEST_F(ipmr, mrt_add_mfc)
239 {
240 	struct mfcctl mfc = {};
241 	int err;
242 
243 	/* MRT_ADD_MFC / MRT_ADD_MFC_PROXY does not need vif to exist (unlike netlink). */
244 	err = setsockopt(self->raw_sk,
245 			 variant->level, variant->opts[MRT_ADD_MFC - MRT_BASE],
246 			 &mfc,  sizeof(mfc));
247 	ASSERT_EQ(0, err);
248 
249 	/* (0.0.0.0 -> 0.0.0.0) */
250 	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
251 	ASSERT_EQ(0, err);
252 
253 	err = setsockopt(self->raw_sk,
254 			 variant->level, variant->opts[MRT_DEL_MFC - MRT_BASE],
255 			 &mfc,  sizeof(mfc));
256 }
257 
258 TEST_F(ipmr, mrt_add_mfc_proxy)
259 {
260 	struct mfcctl mfc = {};
261 	int err;
262 
263 	err = setsockopt(self->raw_sk,
264 			 variant->level, variant->opts[MRT_ADD_MFC_PROXY - MRT_BASE],
265 			 &mfc,  sizeof(mfc));
266 	ASSERT_EQ(0, err);
267 
268 	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
269 	ASSERT_EQ(0, err);
270 
271 	err = setsockopt(self->raw_sk,
272 			 variant->level, variant->opts[MRT_DEL_MFC_PROXY - MRT_BASE],
273 			 &mfc,  sizeof(mfc));
274 }
275 
276 TEST_F(ipmr, mrt_add_mfc_netlink)
277 {
278 	struct vifctl vif = {
279 		.vifc_vifi = 0,
280 		.vifc_flags = VIFF_USE_IFINDEX,
281 		.vifc_lcl_ifindex = self->veth_ifindex,
282 	};
283 	struct mfc_attr mfc_attr = {
284 		.table = RT_TABLE_DEFAULT,
285 		.origin = 0,
286 		.group = 0,
287 		.ifindex = self->veth_ifindex,
288 		.proxy = false,
289 	};
290 	int err;
291 
292 	err = setsockopt(self->raw_sk,
293 			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
294 			 &vif,  sizeof(vif));
295 	ASSERT_EQ(0, err);
296 
297 	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
298 	ASSERT_EQ(0, err);
299 
300 	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
301 	ASSERT_EQ(0, err);
302 
303 	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
304 	ASSERT_EQ(0, err);
305 }
306 
307 TEST_F(ipmr, mrt_add_mfc_netlink_proxy)
308 {
309 	struct vifctl vif = {
310 		.vifc_vifi = 0,
311 		.vifc_flags = VIFF_USE_IFINDEX,
312 		.vifc_lcl_ifindex = self->veth_ifindex,
313 	};
314 	struct mfc_attr mfc_attr = {
315 		.table = RT_TABLE_DEFAULT,
316 		.origin = 0,
317 		.group = 0,
318 		.ifindex = self->veth_ifindex,
319 		.proxy = true,
320 	};
321 	int err;
322 
323 	err = setsockopt(self->raw_sk,
324 			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
325 			 &vif,  sizeof(vif));
326 	ASSERT_EQ(0, err);
327 
328 	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
329 	ASSERT_EQ(0, err);
330 
331 	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
332 	ASSERT_EQ(0, err);
333 
334 	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
335 	ASSERT_EQ(0, err);
336 }
337 
338 TEST_F(ipmr, mrt_add_mfc_netlink_no_vif)
339 {
340 	struct mfc_attr mfc_attr = {
341 		.table = RT_TABLE_DEFAULT,
342 		.origin = 0,
343 		.group = 0,
344 		.proxy = false,
345 	};
346 	int err;
347 
348 	/* netlink always requires RTA_IIF of an existing vif. */
349 	mfc_attr.ifindex = 0;
350 	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
351 	ASSERT_EQ(-ENFILE, err);
352 
353 	/* netlink always requires RTA_IIF of an existing vif. */
354 	mfc_attr.ifindex = self->veth_ifindex;
355 	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
356 	ASSERT_EQ(-ENFILE, err);
357 }
358 
359 TEST_F(ipmr, mrt_del_mfc_netlink_netns_dismantle)
360 {
361 	struct vifctl vifs[2] = {
362 		{
363 			.vifc_vifi = 0,
364 			.vifc_flags = VIFF_USE_IFINDEX,
365 			.vifc_lcl_ifindex = self->veth_ifindex,
366 		},
367 		{
368 			.vifc_vifi = 1,
369 			.vifc_flags = VIFF_REGISTER,
370 		}
371 	};
372 	struct mfc_attr mfc_attr = {
373 		.table = RT_TABLE_DEFAULT,
374 		.origin = 0,
375 		.group = 0,
376 		.ifindex = self->veth_ifindex,
377 		.proxy = false,
378 	};
379 	int i, err;
380 
381 	for (i = 0; i < 2; i++) {
382 		/* Create 2 VIFs just to avoid -ENFILE later. */
383 		err = setsockopt(self->raw_sk,
384 				 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
385 				 &vifs[i],  sizeof(vifs[i]));
386 		ASSERT_EQ(0, err);
387 	}
388 
389 	/* Create a MFC for mrt->vif_table[0]. */
390 	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
391 	ASSERT_EQ(0, err);
392 
393 	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
394 	ASSERT_EQ(0, err);
395 
396 	/* Remove mrt->vif_table[0]. */
397 	err = system("ip link del veth0");
398 	ASSERT_EQ(0, err);
399 
400 	/* MFC entry is NOT removed even if the tied VIF is removed... */
401 	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
402 	ASSERT_EQ(0, err);
403 
404 	/* ... and netlink is not capable of removing such an entry
405 	 * because netlink always requires a valid RTA_IIF ... :/
406 	 */
407 	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
408 	ASSERT_EQ(-ENODEV, err);
409 
410 	/* It can be removed by setsockopt(), but let cleanup_net() remove this time. */
411 }
412 
413 TEST_F(ipmr, mrt_table_flush)
414 {
415 	struct vifctl vif = {
416 		.vifc_vifi = 0,
417 		.vifc_flags = VIFF_USE_IFINDEX,
418 		.vifc_lcl_ifindex = self->veth_ifindex,
419 	};
420 	struct mfc_attr mfc_attr = {
421 		.origin = 0,
422 		.group = 0,
423 		.ifindex = self->veth_ifindex,
424 		.proxy = false,
425 	};
426 	int table_id = 92;
427 	int err, flags;
428 
429 	/* Set a random table id rather than RT_TABLE_DEFAULT.
430 	 * Note that /proc/net/ip_mr_{vif,cache} only supports RT_TABLE_DEFAULT.
431 	 */
432 	err = setsockopt(self->raw_sk,
433 			 variant->level, variant->opts[MRT_TABLE - MRT_BASE],
434 			 &table_id,  sizeof(table_id));
435 	ASSERT_EQ(0, err);
436 
437 	err = setsockopt(self->raw_sk,
438 			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
439 			 &vif,  sizeof(vif));
440 	ASSERT_EQ(0, err);
441 
442 	mfc_attr.table = table_id;
443 	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
444 	ASSERT_EQ(0, err);
445 
446 	/* Flush mrt->vif_table[] and all caches. */
447 	flags = MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
448 		MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC;
449 	err = setsockopt(self->raw_sk,
450 			 variant->level, variant->opts[MRT_FLUSH - MRT_BASE],
451 			 &flags,  sizeof(flags));
452 	ASSERT_EQ(0, err);
453 }
454 
455 TEST_HARNESS_MAIN
456