1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright 2026 Google LLC */ 3 4 #include <linux/if.h> 5 #include <linux/mroute.h> 6 #include <linux/netlink.h> 7 #include <linux/rtnetlink.h> 8 #include <linux/socket.h> 9 #include <sched.h> 10 #include <sys/ioctl.h> 11 #include <sys/socket.h> 12 13 #include "kselftest_harness.h" 14 15 FIXTURE(ipmr) 16 { 17 int netlink_sk; 18 int raw_sk; 19 int veth_ifindex; 20 }; 21 22 FIXTURE_VARIANT(ipmr) 23 { 24 int family; 25 int protocol; 26 int level; 27 int opts[MRT_MAX - MRT_BASE + 1]; 28 }; 29 30 FIXTURE_VARIANT_ADD(ipmr, ipv4) 31 { 32 .family = AF_INET, 33 .protocol = IPPROTO_IGMP, 34 .level = IPPROTO_IP, 35 .opts = { 36 MRT_INIT, 37 MRT_DONE, 38 MRT_ADD_VIF, 39 MRT_DEL_VIF, 40 MRT_ADD_MFC, 41 MRT_DEL_MFC, 42 MRT_VERSION, 43 MRT_ASSERT, 44 MRT_PIM, 45 MRT_TABLE, 46 MRT_ADD_MFC_PROXY, 47 MRT_DEL_MFC_PROXY, 48 MRT_FLUSH, 49 }, 50 }; 51 52 struct mfc_attr { 53 int table; 54 __u32 origin; 55 __u32 group; 56 int ifindex; 57 bool proxy; 58 }; 59 60 static struct rtattr *nl_add_rtattr(struct nlmsghdr *nlmsg, struct rtattr *rta, 61 int type, const void *data, int len) 62 { 63 int unused = 0; 64 65 rta->rta_type = type; 66 rta->rta_len = RTA_LENGTH(len); 67 memcpy(RTA_DATA(rta), data, len); 68 69 nlmsg->nlmsg_len += NLMSG_ALIGN(rta->rta_len); 70 71 return RTA_NEXT(rta, unused); 72 } 73 74 static int nl_sendmsg_mfc(struct __test_metadata *_metadata, FIXTURE_DATA(ipmr) *self, 75 __u16 nlmsg_type, struct mfc_attr *mfc_attr) 76 { 77 struct { 78 struct nlmsghdr nlmsg; 79 struct rtmsg rtm; 80 char buf[4096]; 81 } req = { 82 .nlmsg = { 83 .nlmsg_len = NLMSG_LENGTH(sizeof(req.rtm)), 84 /* ipmr does not care about NLM_F_CREATE and NLM_F_EXCL ... */ 85 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 86 .nlmsg_type = nlmsg_type, 87 }, 88 .rtm = { 89 /* hard requirements in rtm_to_ipmr_mfcc() */ 90 .rtm_family = RTNL_FAMILY_IPMR, 91 .rtm_dst_len = 32, 92 .rtm_type = RTN_MULTICAST, 93 .rtm_scope = RT_SCOPE_UNIVERSE, 94 .rtm_protocol = RTPROT_MROUTED, 95 }, 96 }; 97 struct nlmsghdr *nlmsg = &req.nlmsg; 98 struct nlmsgerr *errmsg; 99 struct rtattr *rta; 100 int err; 101 102 rta = (struct rtattr *)&req.buf; 103 rta = nl_add_rtattr(nlmsg, rta, RTA_TABLE, &mfc_attr->table, sizeof(mfc_attr->table)); 104 rta = nl_add_rtattr(nlmsg, rta, RTA_SRC, &mfc_attr->origin, sizeof(mfc_attr->origin)); 105 rta = nl_add_rtattr(nlmsg, rta, RTA_DST, &mfc_attr->group, sizeof(mfc_attr->group)); 106 if (mfc_attr->ifindex) 107 rta = nl_add_rtattr(nlmsg, rta, RTA_IIF, &mfc_attr->ifindex, sizeof(mfc_attr->ifindex)); 108 if (mfc_attr->proxy) 109 rta = nl_add_rtattr(nlmsg, rta, RTA_PREFSRC, NULL, 0); 110 111 err = send(self->netlink_sk, &req, req.nlmsg.nlmsg_len, 0); 112 ASSERT_EQ(err, req.nlmsg.nlmsg_len); 113 114 memset(&req, 0, sizeof(req)); 115 116 err = recv(self->netlink_sk, &req, sizeof(req), 0); 117 ASSERT_TRUE(NLMSG_OK(nlmsg, err)); 118 ASSERT_EQ(NLMSG_ERROR, nlmsg->nlmsg_type); 119 120 errmsg = (struct nlmsgerr *)NLMSG_DATA(nlmsg); 121 return errmsg->error; 122 } 123 124 FIXTURE_SETUP(ipmr) 125 { 126 struct ifreq ifr = { 127 .ifr_name = "veth0", 128 }; 129 int err; 130 131 err = unshare(CLONE_NEWNET); 132 ASSERT_EQ(0, err); 133 134 self->netlink_sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 135 ASSERT_LE(0, self->netlink_sk); 136 137 self->raw_sk = socket(variant->family, SOCK_RAW, variant->protocol); 138 ASSERT_LT(0, self->raw_sk); 139 140 err = system("ip link add veth0 type veth peer veth1"); 141 ASSERT_EQ(0, err); 142 143 err = ioctl(self->raw_sk, SIOCGIFINDEX, &ifr); 144 ASSERT_EQ(0, err); 145 146 self->veth_ifindex = ifr.ifr_ifindex; 147 } 148 149 FIXTURE_TEARDOWN(ipmr) 150 { 151 close(self->raw_sk); 152 close(self->netlink_sk); 153 } 154 155 TEST_F(ipmr, mrt_init) 156 { 157 int err, val = 0; /* any value is ok, but size must be int for MRT_INIT. */ 158 159 err = setsockopt(self->raw_sk, 160 variant->level, variant->opts[MRT_INIT - MRT_BASE], 161 &val, sizeof(val)); 162 ASSERT_EQ(0, err); 163 164 err = setsockopt(self->raw_sk, 165 variant->level, variant->opts[MRT_DONE - MRT_BASE], 166 &val, sizeof(val)); 167 ASSERT_EQ(0, err); 168 } 169 170 TEST_F(ipmr, mrt_add_vif_register) 171 { 172 struct vifctl vif = { 173 .vifc_vifi = 0, 174 .vifc_flags = VIFF_REGISTER, 175 }; 176 int err; 177 178 err = setsockopt(self->raw_sk, 179 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], 180 &vif, sizeof(vif)); 181 ASSERT_EQ(0, err); 182 183 err = system("cat /proc/net/ip_mr_vif | grep -q pimreg"); 184 ASSERT_EQ(0, err); 185 186 err = setsockopt(self->raw_sk, 187 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], 188 &vif, sizeof(vif)); 189 ASSERT_EQ(0, err); 190 } 191 192 TEST_F(ipmr, mrt_del_vif_unreg) 193 { 194 struct vifctl vif = { 195 .vifc_vifi = 0, 196 .vifc_flags = VIFF_USE_IFINDEX, 197 .vifc_lcl_ifindex = self->veth_ifindex, 198 }; 199 int err; 200 201 err = setsockopt(self->raw_sk, 202 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], 203 &vif, sizeof(vif)); 204 ASSERT_EQ(0, err); 205 206 err = system("cat /proc/net/ip_mr_vif | grep -q veth0"); 207 ASSERT_EQ(0, err); 208 209 /* VIF is removed along with its device. */ 210 err = system("ip link del veth0"); 211 ASSERT_EQ(0, err); 212 213 /* mrt->vif_table[veth_ifindex]->dev is NULL. */ 214 err = setsockopt(self->raw_sk, 215 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], 216 &vif, sizeof(vif)); 217 ASSERT_EQ(-1, err); 218 ASSERT_EQ(EADDRNOTAVAIL, errno); 219 } 220 221 TEST_F(ipmr, mrt_del_vif_netns_dismantle) 222 { 223 struct vifctl vif = { 224 .vifc_vifi = 0, 225 .vifc_flags = VIFF_USE_IFINDEX, 226 .vifc_lcl_ifindex = self->veth_ifindex, 227 }; 228 int err; 229 230 err = setsockopt(self->raw_sk, 231 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], 232 &vif, sizeof(vif)); 233 ASSERT_EQ(0, err); 234 235 /* Let cleanup_net() remove veth0 and VIF. */ 236 } 237 238 TEST_F(ipmr, mrt_add_mfc) 239 { 240 struct mfcctl mfc = {}; 241 int err; 242 243 /* MRT_ADD_MFC / MRT_ADD_MFC_PROXY does not need vif to exist (unlike netlink). */ 244 err = setsockopt(self->raw_sk, 245 variant->level, variant->opts[MRT_ADD_MFC - MRT_BASE], 246 &mfc, sizeof(mfc)); 247 ASSERT_EQ(0, err); 248 249 /* (0.0.0.0 -> 0.0.0.0) */ 250 err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); 251 ASSERT_EQ(0, err); 252 253 err = setsockopt(self->raw_sk, 254 variant->level, variant->opts[MRT_DEL_MFC - MRT_BASE], 255 &mfc, sizeof(mfc)); 256 } 257 258 TEST_F(ipmr, mrt_add_mfc_proxy) 259 { 260 struct mfcctl mfc = {}; 261 int err; 262 263 err = setsockopt(self->raw_sk, 264 variant->level, variant->opts[MRT_ADD_MFC_PROXY - MRT_BASE], 265 &mfc, sizeof(mfc)); 266 ASSERT_EQ(0, err); 267 268 err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); 269 ASSERT_EQ(0, err); 270 271 err = setsockopt(self->raw_sk, 272 variant->level, variant->opts[MRT_DEL_MFC_PROXY - MRT_BASE], 273 &mfc, sizeof(mfc)); 274 } 275 276 TEST_F(ipmr, mrt_add_mfc_netlink) 277 { 278 struct vifctl vif = { 279 .vifc_vifi = 0, 280 .vifc_flags = VIFF_USE_IFINDEX, 281 .vifc_lcl_ifindex = self->veth_ifindex, 282 }; 283 struct mfc_attr mfc_attr = { 284 .table = RT_TABLE_DEFAULT, 285 .origin = 0, 286 .group = 0, 287 .ifindex = self->veth_ifindex, 288 .proxy = false, 289 }; 290 int err; 291 292 err = setsockopt(self->raw_sk, 293 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], 294 &vif, sizeof(vif)); 295 ASSERT_EQ(0, err); 296 297 err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); 298 ASSERT_EQ(0, err); 299 300 err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); 301 ASSERT_EQ(0, err); 302 303 err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); 304 ASSERT_EQ(0, err); 305 } 306 307 TEST_F(ipmr, mrt_add_mfc_netlink_proxy) 308 { 309 struct vifctl vif = { 310 .vifc_vifi = 0, 311 .vifc_flags = VIFF_USE_IFINDEX, 312 .vifc_lcl_ifindex = self->veth_ifindex, 313 }; 314 struct mfc_attr mfc_attr = { 315 .table = RT_TABLE_DEFAULT, 316 .origin = 0, 317 .group = 0, 318 .ifindex = self->veth_ifindex, 319 .proxy = true, 320 }; 321 int err; 322 323 err = setsockopt(self->raw_sk, 324 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], 325 &vif, sizeof(vif)); 326 ASSERT_EQ(0, err); 327 328 err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); 329 ASSERT_EQ(0, err); 330 331 err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); 332 ASSERT_EQ(0, err); 333 334 err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); 335 ASSERT_EQ(0, err); 336 } 337 338 TEST_F(ipmr, mrt_add_mfc_netlink_no_vif) 339 { 340 struct mfc_attr mfc_attr = { 341 .table = RT_TABLE_DEFAULT, 342 .origin = 0, 343 .group = 0, 344 .proxy = false, 345 }; 346 int err; 347 348 /* netlink always requires RTA_IIF of an existing vif. */ 349 mfc_attr.ifindex = 0; 350 err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); 351 ASSERT_EQ(-ENFILE, err); 352 353 /* netlink always requires RTA_IIF of an existing vif. */ 354 mfc_attr.ifindex = self->veth_ifindex; 355 err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); 356 ASSERT_EQ(-ENFILE, err); 357 } 358 359 TEST_F(ipmr, mrt_del_mfc_netlink_netns_dismantle) 360 { 361 struct vifctl vifs[2] = { 362 { 363 .vifc_vifi = 0, 364 .vifc_flags = VIFF_USE_IFINDEX, 365 .vifc_lcl_ifindex = self->veth_ifindex, 366 }, 367 { 368 .vifc_vifi = 1, 369 .vifc_flags = VIFF_REGISTER, 370 } 371 }; 372 struct mfc_attr mfc_attr = { 373 .table = RT_TABLE_DEFAULT, 374 .origin = 0, 375 .group = 0, 376 .ifindex = self->veth_ifindex, 377 .proxy = false, 378 }; 379 int i, err; 380 381 for (i = 0; i < 2; i++) { 382 /* Create 2 VIFs just to avoid -ENFILE later. */ 383 err = setsockopt(self->raw_sk, 384 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], 385 &vifs[i], sizeof(vifs[i])); 386 ASSERT_EQ(0, err); 387 } 388 389 /* Create a MFC for mrt->vif_table[0]. */ 390 err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); 391 ASSERT_EQ(0, err); 392 393 err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); 394 ASSERT_EQ(0, err); 395 396 /* Remove mrt->vif_table[0]. */ 397 err = system("ip link del veth0"); 398 ASSERT_EQ(0, err); 399 400 /* MFC entry is NOT removed even if the tied VIF is removed... */ 401 err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); 402 ASSERT_EQ(0, err); 403 404 /* ... and netlink is not capable of removing such an entry 405 * because netlink always requires a valid RTA_IIF ... :/ 406 */ 407 err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); 408 ASSERT_EQ(-ENODEV, err); 409 410 /* It can be removed by setsockopt(), but let cleanup_net() remove this time. */ 411 } 412 413 TEST_F(ipmr, mrt_table_flush) 414 { 415 struct vifctl vif = { 416 .vifc_vifi = 0, 417 .vifc_flags = VIFF_USE_IFINDEX, 418 .vifc_lcl_ifindex = self->veth_ifindex, 419 }; 420 struct mfc_attr mfc_attr = { 421 .origin = 0, 422 .group = 0, 423 .ifindex = self->veth_ifindex, 424 .proxy = false, 425 }; 426 int table_id = 92; 427 int err, flags; 428 429 /* Set a random table id rather than RT_TABLE_DEFAULT. 430 * Note that /proc/net/ip_mr_{vif,cache} only supports RT_TABLE_DEFAULT. 431 */ 432 err = setsockopt(self->raw_sk, 433 variant->level, variant->opts[MRT_TABLE - MRT_BASE], 434 &table_id, sizeof(table_id)); 435 ASSERT_EQ(0, err); 436 437 err = setsockopt(self->raw_sk, 438 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], 439 &vif, sizeof(vif)); 440 ASSERT_EQ(0, err); 441 442 mfc_attr.table = table_id; 443 err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); 444 ASSERT_EQ(0, err); 445 446 /* Flush mrt->vif_table[] and all caches. */ 447 flags = MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | 448 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC; 449 err = setsockopt(self->raw_sk, 450 variant->level, variant->opts[MRT_FLUSH - MRT_BASE], 451 &flags, sizeof(flags)); 452 ASSERT_EQ(0, err); 453 } 454 455 TEST_HARNESS_MAIN 456