1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux IPv6 multicast routing support for BSD pim6sd 4 * Based on net/ipv4/ipmr.c. 5 * 6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 7 * LSIIT Laboratory, Strasbourg, France 8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 9 * 6WIND, Paris, France 10 * Copyright (C)2007,2008 USAGI/WIDE Project 11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 12 */ 13 14 #include <linux/uaccess.h> 15 #include <linux/types.h> 16 #include <linux/sched.h> 17 #include <linux/errno.h> 18 #include <linux/mm.h> 19 #include <linux/kernel.h> 20 #include <linux/fcntl.h> 21 #include <linux/stat.h> 22 #include <linux/socket.h> 23 #include <linux/inet.h> 24 #include <linux/netdevice.h> 25 #include <linux/inetdevice.h> 26 #include <linux/proc_fs.h> 27 #include <linux/seq_file.h> 28 #include <linux/init.h> 29 #include <linux/compat.h> 30 #include <linux/rhashtable.h> 31 #include <net/protocol.h> 32 #include <linux/skbuff.h> 33 #include <net/raw.h> 34 #include <linux/notifier.h> 35 #include <linux/if_arp.h> 36 #include <net/checksum.h> 37 #include <net/netlink.h> 38 #include <net/fib_rules.h> 39 40 #include <net/ipv6.h> 41 #include <net/ip6_route.h> 42 #include <linux/mroute6.h> 43 #include <linux/pim.h> 44 #include <net/addrconf.h> 45 #include <linux/netfilter_ipv6.h> 46 #include <linux/export.h> 47 #include <net/ip6_checksum.h> 48 #include <linux/netconf.h> 49 #include <net/ip_tunnels.h> 50 51 #include <linux/nospec.h> 52 53 struct ip6mr_rule { 54 struct fib_rule common; 55 }; 56 57 struct ip6mr_result { 58 struct mr_table *mrt; 59 }; 60 61 /* Big lock, protecting vif table, mrt cache and mroute socket state. 62 Note that the changes are semaphored via rtnl_lock. 63 */ 64 65 static DEFINE_SPINLOCK(mrt_lock); 66 67 static struct net_device *vif_dev_read(const struct vif_device *vif) 68 { 69 return rcu_dereference(vif->dev); 70 } 71 72 /* Multicast router control variables */ 73 74 /* Special spinlock for queue of unresolved entries */ 75 static DEFINE_SPINLOCK(mfc_unres_lock); 76 77 /* We return to original Alan's scheme. Hash table of resolved 78 entries is changed only in process context and protected 79 with weak lock mrt_lock. Queue of unresolved entries is protected 80 with strong spinlock mfc_unres_lock. 81 82 In this case data path is free of exclusive locks at all. 83 */ 84 85 static struct kmem_cache *mrt_cachep __read_mostly; 86 87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 88 static void ip6mr_free_table(struct mr_table *mrt); 89 90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 91 struct net_device *dev, struct sk_buff *skb, 92 struct mfc6_cache *cache); 93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 94 mifi_t mifi, int assert); 95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 96 int cmd); 97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 99 struct netlink_ext_ack *extack); 100 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 101 struct netlink_callback *cb); 102 static void mroute_clean_tables(struct mr_table *mrt, int flags); 103 static void ipmr_expire_process(struct timer_list *t); 104 105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 106 #define ip6mr_for_each_table(mrt, net) \ 107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ 108 lockdep_rtnl_is_held() || \ 109 list_empty(&net->ipv6.mr6_tables)) 110 111 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 112 struct mr_table *mrt) 113 { 114 struct mr_table *ret; 115 116 if (!mrt) 117 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 118 struct mr_table, list); 119 else 120 ret = list_entry_rcu(mrt->list.next, 121 struct mr_table, list); 122 123 if (&ret->list == &net->ipv6.mr6_tables) 124 return NULL; 125 return ret; 126 } 127 128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) 129 { 130 struct mr_table *mrt; 131 132 ip6mr_for_each_table(mrt, net) { 133 if (mrt->id == id) 134 return mrt; 135 } 136 return NULL; 137 } 138 139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 140 { 141 struct mr_table *mrt; 142 143 rcu_read_lock(); 144 mrt = __ip6mr_get_table(net, id); 145 rcu_read_unlock(); 146 return mrt; 147 } 148 149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 150 struct mr_table **mrt) 151 { 152 int err; 153 struct ip6mr_result res; 154 struct fib_lookup_arg arg = { 155 .result = &res, 156 .flags = FIB_LOOKUP_NOREF, 157 }; 158 159 /* update flow if oif or iif point to device enslaved to l3mdev */ 160 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 161 162 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 163 flowi6_to_flowi(flp6), 0, &arg); 164 if (err < 0) 165 return err; 166 *mrt = res.mrt; 167 return 0; 168 } 169 170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 171 int flags, struct fib_lookup_arg *arg) 172 { 173 struct ip6mr_result *res = arg->result; 174 struct mr_table *mrt; 175 176 switch (rule->action) { 177 case FR_ACT_TO_TBL: 178 break; 179 case FR_ACT_UNREACHABLE: 180 return -ENETUNREACH; 181 case FR_ACT_PROHIBIT: 182 return -EACCES; 183 case FR_ACT_BLACKHOLE: 184 default: 185 return -EINVAL; 186 } 187 188 arg->table = fib_rule_get_table(rule, arg); 189 190 mrt = __ip6mr_get_table(rule->fr_net, arg->table); 191 if (!mrt) 192 return -EAGAIN; 193 res->mrt = mrt; 194 return 0; 195 } 196 197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 198 { 199 return 1; 200 } 201 202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 203 struct fib_rule_hdr *frh, struct nlattr **tb, 204 struct netlink_ext_ack *extack) 205 { 206 return 0; 207 } 208 209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 210 struct nlattr **tb) 211 { 212 return 1; 213 } 214 215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 216 struct fib_rule_hdr *frh) 217 { 218 frh->dst_len = 0; 219 frh->src_len = 0; 220 frh->tos = 0; 221 return 0; 222 } 223 224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 225 .family = RTNL_FAMILY_IP6MR, 226 .rule_size = sizeof(struct ip6mr_rule), 227 .addr_size = sizeof(struct in6_addr), 228 .action = ip6mr_rule_action, 229 .match = ip6mr_rule_match, 230 .configure = ip6mr_rule_configure, 231 .compare = ip6mr_rule_compare, 232 .fill = ip6mr_rule_fill, 233 .nlgroup = RTNLGRP_IPV6_RULE, 234 .owner = THIS_MODULE, 235 }; 236 237 static int __net_init ip6mr_rules_init(struct net *net) 238 { 239 struct fib_rules_ops *ops; 240 struct mr_table *mrt; 241 int err; 242 243 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 244 if (IS_ERR(ops)) 245 return PTR_ERR(ops); 246 247 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 248 249 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 250 if (IS_ERR(mrt)) { 251 err = PTR_ERR(mrt); 252 goto err1; 253 } 254 255 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); 256 if (err < 0) 257 goto err2; 258 259 net->ipv6.mr6_rules_ops = ops; 260 return 0; 261 262 err2: 263 rtnl_lock(); 264 ip6mr_free_table(mrt); 265 rtnl_unlock(); 266 err1: 267 fib_rules_unregister(ops); 268 return err; 269 } 270 271 static void __net_exit ip6mr_rules_exit(struct net *net) 272 { 273 struct mr_table *mrt, *next; 274 275 ASSERT_RTNL(); 276 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 277 list_del(&mrt->list); 278 ip6mr_free_table(mrt); 279 } 280 fib_rules_unregister(net->ipv6.mr6_rules_ops); 281 } 282 283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 284 struct netlink_ext_ack *extack) 285 { 286 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); 287 } 288 289 static unsigned int ip6mr_rules_seq_read(const struct net *net) 290 { 291 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 292 } 293 294 bool ip6mr_rule_default(const struct fib_rule *rule) 295 { 296 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 297 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 298 } 299 EXPORT_SYMBOL(ip6mr_rule_default); 300 #else 301 #define ip6mr_for_each_table(mrt, net) \ 302 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 303 304 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 305 struct mr_table *mrt) 306 { 307 if (!mrt) 308 return net->ipv6.mrt6; 309 return NULL; 310 } 311 312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 313 { 314 return net->ipv6.mrt6; 315 } 316 317 #define __ip6mr_get_table ip6mr_get_table 318 319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 320 struct mr_table **mrt) 321 { 322 *mrt = net->ipv6.mrt6; 323 return 0; 324 } 325 326 static int __net_init ip6mr_rules_init(struct net *net) 327 { 328 struct mr_table *mrt; 329 330 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 331 if (IS_ERR(mrt)) 332 return PTR_ERR(mrt); 333 net->ipv6.mrt6 = mrt; 334 return 0; 335 } 336 337 static void __net_exit ip6mr_rules_exit(struct net *net) 338 { 339 ASSERT_RTNL(); 340 ip6mr_free_table(net->ipv6.mrt6); 341 net->ipv6.mrt6 = NULL; 342 } 343 344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 345 struct netlink_ext_ack *extack) 346 { 347 return 0; 348 } 349 350 static unsigned int ip6mr_rules_seq_read(const struct net *net) 351 { 352 return 0; 353 } 354 #endif 355 356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 357 const void *ptr) 358 { 359 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 360 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 361 362 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 363 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 364 } 365 366 static const struct rhashtable_params ip6mr_rht_params = { 367 .head_offset = offsetof(struct mr_mfc, mnode), 368 .key_offset = offsetof(struct mfc6_cache, cmparg), 369 .key_len = sizeof(struct mfc6_cache_cmp_arg), 370 .nelem_hint = 3, 371 .obj_cmpfn = ip6mr_hash_cmp, 372 .automatic_shrinking = true, 373 }; 374 375 static void ip6mr_new_table_set(struct mr_table *mrt, 376 struct net *net) 377 { 378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 379 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 380 #endif 381 } 382 383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 384 .mf6c_origin = IN6ADDR_ANY_INIT, 385 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 386 }; 387 388 static struct mr_table_ops ip6mr_mr_table_ops = { 389 .rht_params = &ip6mr_rht_params, 390 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 391 }; 392 393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 394 { 395 struct mr_table *mrt; 396 397 mrt = __ip6mr_get_table(net, id); 398 if (mrt) 399 return mrt; 400 401 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 402 ipmr_expire_process, ip6mr_new_table_set); 403 } 404 405 static void ip6mr_free_table(struct mr_table *mrt) 406 { 407 struct net *net = read_pnet(&mrt->net); 408 409 WARN_ON_ONCE(!mr_can_free_table(net)); 410 411 timer_shutdown_sync(&mrt->ipmr_expire_timer); 412 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | 413 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); 414 rhltable_destroy(&mrt->mfc_hash); 415 kfree(mrt); 416 } 417 418 #ifdef CONFIG_PROC_FS 419 /* The /proc interfaces to multicast routing 420 * /proc/ip6_mr_cache /proc/ip6_mr_vif 421 */ 422 423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 424 __acquires(RCU) 425 { 426 struct mr_vif_iter *iter = seq->private; 427 struct net *net = seq_file_net(seq); 428 struct mr_table *mrt; 429 430 rcu_read_lock(); 431 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 432 if (!mrt) { 433 rcu_read_unlock(); 434 return ERR_PTR(-ENOENT); 435 } 436 437 iter->mrt = mrt; 438 439 return mr_vif_seq_start(seq, pos); 440 } 441 442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 443 __releases(RCU) 444 { 445 rcu_read_unlock(); 446 } 447 448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 449 { 450 struct mr_vif_iter *iter = seq->private; 451 struct mr_table *mrt = iter->mrt; 452 453 if (v == SEQ_START_TOKEN) { 454 seq_puts(seq, 455 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 456 } else { 457 const struct vif_device *vif = v; 458 const struct net_device *vif_dev; 459 const char *name; 460 461 vif_dev = vif_dev_read(vif); 462 name = vif_dev ? vif_dev->name : "none"; 463 464 seq_printf(seq, 465 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 466 vif - mrt->vif_table, 467 name, vif->bytes_in, vif->pkt_in, 468 vif->bytes_out, vif->pkt_out, 469 vif->flags); 470 } 471 return 0; 472 } 473 474 static const struct seq_operations ip6mr_vif_seq_ops = { 475 .start = ip6mr_vif_seq_start, 476 .next = mr_vif_seq_next, 477 .stop = ip6mr_vif_seq_stop, 478 .show = ip6mr_vif_seq_show, 479 }; 480 481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 482 { 483 struct net *net = seq_file_net(seq); 484 struct mr_table *mrt; 485 486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 487 if (!mrt) 488 return ERR_PTR(-ENOENT); 489 490 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 491 } 492 493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 494 { 495 int n; 496 497 if (v == SEQ_START_TOKEN) { 498 seq_puts(seq, 499 "Group " 500 "Origin " 501 "Iif Pkts Bytes Wrong Oifs\n"); 502 } else { 503 const struct mfc6_cache *mfc = v; 504 const struct mr_mfc_iter *it = seq->private; 505 struct mr_table *mrt = it->mrt; 506 507 seq_printf(seq, "%pI6 %pI6 %-3hd", 508 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 509 mfc->_c.mfc_parent); 510 511 if (it->cache != &mrt->mfc_unres_queue) { 512 seq_printf(seq, " %8lu %8lu %8lu", 513 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 514 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 515 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 516 for (n = mfc->_c.mfc_un.res.minvif; 517 n < mfc->_c.mfc_un.res.maxvif; n++) { 518 if (VIF_EXISTS(mrt, n) && 519 mfc->_c.mfc_un.res.ttls[n] < 255) 520 seq_printf(seq, 521 " %2d:%-3d", n, 522 mfc->_c.mfc_un.res.ttls[n]); 523 } 524 } else { 525 /* unresolved mfc_caches don't contain 526 * pkt, bytes and wrong_if values 527 */ 528 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 529 } 530 seq_putc(seq, '\n'); 531 } 532 return 0; 533 } 534 535 static const struct seq_operations ipmr_mfc_seq_ops = { 536 .start = ipmr_mfc_seq_start, 537 .next = mr_mfc_seq_next, 538 .stop = mr_mfc_seq_stop, 539 .show = ipmr_mfc_seq_show, 540 }; 541 #endif 542 543 #ifdef CONFIG_IPV6_PIMSM_V2 544 545 static int pim6_rcv(struct sk_buff *skb) 546 { 547 struct pimreghdr *pim; 548 struct ipv6hdr *encap; 549 struct net_device *reg_dev = NULL; 550 struct net *net = dev_net(skb->dev); 551 struct mr_table *mrt; 552 struct flowi6 fl6 = { 553 .flowi6_iif = skb->dev->ifindex, 554 .flowi6_mark = skb->mark, 555 }; 556 int reg_vif_num; 557 558 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 559 goto drop; 560 561 pim = (struct pimreghdr *)skb_transport_header(skb); 562 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 563 (pim->flags & PIM_NULL_REGISTER) || 564 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 565 sizeof(*pim), IPPROTO_PIM, 566 csum_partial((void *)pim, sizeof(*pim), 0)) && 567 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 568 goto drop; 569 570 /* check if the inner packet is destined to mcast group */ 571 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 572 sizeof(*pim)); 573 574 if (!ipv6_addr_is_multicast(&encap->daddr) || 575 encap->payload_len == 0 || 576 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 577 goto drop; 578 579 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 580 goto drop; 581 582 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ 583 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 584 if (reg_vif_num >= 0) 585 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); 586 587 if (!reg_dev) 588 goto drop; 589 590 skb->mac_header = skb->network_header; 591 skb_pull(skb, (u8 *)encap - skb->data); 592 skb_reset_network_header(skb); 593 skb->protocol = htons(ETH_P_IPV6); 594 skb->ip_summed = CHECKSUM_NONE; 595 596 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 597 598 netif_rx(skb); 599 600 return 0; 601 drop: 602 kfree_skb(skb); 603 return 0; 604 } 605 606 static const struct inet6_protocol pim6_protocol = { 607 .handler = pim6_rcv, 608 }; 609 610 /* Service routines creating virtual interfaces: PIMREG */ 611 612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 613 struct net_device *dev) 614 { 615 struct net *net = dev_net(dev); 616 struct mr_table *mrt; 617 struct flowi6 fl6 = { 618 .flowi6_oif = dev->ifindex, 619 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 620 .flowi6_mark = skb->mark, 621 }; 622 623 if (!pskb_inet_may_pull(skb)) 624 goto tx_err; 625 626 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 627 goto tx_err; 628 629 DEV_STATS_ADD(dev, tx_bytes, skb->len); 630 DEV_STATS_INC(dev, tx_packets); 631 rcu_read_lock(); 632 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 633 MRT6MSG_WHOLEPKT); 634 rcu_read_unlock(); 635 kfree_skb(skb); 636 return NETDEV_TX_OK; 637 638 tx_err: 639 DEV_STATS_INC(dev, tx_errors); 640 kfree_skb(skb); 641 return NETDEV_TX_OK; 642 } 643 644 static int reg_vif_get_iflink(const struct net_device *dev) 645 { 646 return 0; 647 } 648 649 static const struct net_device_ops reg_vif_netdev_ops = { 650 .ndo_start_xmit = reg_vif_xmit, 651 .ndo_get_iflink = reg_vif_get_iflink, 652 }; 653 654 static void reg_vif_setup(struct net_device *dev) 655 { 656 dev->type = ARPHRD_PIMREG; 657 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 658 dev->flags = IFF_NOARP; 659 dev->netdev_ops = ®_vif_netdev_ops; 660 dev->needs_free_netdev = true; 661 dev->netns_immutable = true; 662 } 663 664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 665 { 666 struct net_device *dev; 667 char name[IFNAMSIZ]; 668 669 if (mrt->id == RT6_TABLE_DFLT) 670 sprintf(name, "pim6reg"); 671 else 672 sprintf(name, "pim6reg%u", mrt->id); 673 674 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 675 if (!dev) 676 return NULL; 677 678 dev_net_set(dev, net); 679 680 if (register_netdevice(dev)) { 681 free_netdev(dev); 682 return NULL; 683 } 684 685 if (dev_open(dev, NULL)) 686 goto failure; 687 688 dev_hold(dev); 689 return dev; 690 691 failure: 692 unregister_netdevice(dev); 693 return NULL; 694 } 695 #endif 696 697 static int call_ip6mr_vif_entry_notifiers(struct net *net, 698 enum fib_event_type event_type, 699 struct vif_device *vif, 700 struct net_device *vif_dev, 701 mifi_t vif_index, u32 tb_id) 702 { 703 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 704 vif, vif_dev, vif_index, tb_id, 705 &net->ipv6.ipmr_seq); 706 } 707 708 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 709 enum fib_event_type event_type, 710 struct mfc6_cache *mfc, u32 tb_id) 711 { 712 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 713 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 714 } 715 716 /* Delete a VIF entry */ 717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 718 struct list_head *head) 719 { 720 struct vif_device *v; 721 struct net_device *dev; 722 struct inet6_dev *in6_dev; 723 724 if (vifi < 0 || vifi >= mrt->maxvif) 725 return -EADDRNOTAVAIL; 726 727 v = &mrt->vif_table[vifi]; 728 729 dev = rtnl_dereference(v->dev); 730 if (!dev) 731 return -EADDRNOTAVAIL; 732 733 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 734 FIB_EVENT_VIF_DEL, v, dev, 735 vifi, mrt->id); 736 spin_lock(&mrt_lock); 737 RCU_INIT_POINTER(v->dev, NULL); 738 739 #ifdef CONFIG_IPV6_PIMSM_V2 740 if (vifi == mrt->mroute_reg_vif_num) { 741 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ 742 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 743 } 744 #endif 745 746 if (vifi + 1 == mrt->maxvif) { 747 int tmp; 748 for (tmp = vifi - 1; tmp >= 0; tmp--) { 749 if (VIF_EXISTS(mrt, tmp)) 750 break; 751 } 752 WRITE_ONCE(mrt->maxvif, tmp + 1); 753 } 754 755 spin_unlock(&mrt_lock); 756 757 dev_set_allmulti(dev, -1); 758 759 in6_dev = __in6_dev_get(dev); 760 if (in6_dev) { 761 atomic_dec(&in6_dev->cnf.mc_forwarding); 762 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 763 NETCONFA_MC_FORWARDING, 764 dev->ifindex, &in6_dev->cnf); 765 } 766 767 if ((v->flags & MIFF_REGISTER) && !notify) 768 unregister_netdevice_queue(dev, head); 769 770 netdev_put(dev, &v->dev_tracker); 771 return 0; 772 } 773 774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 775 { 776 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 777 778 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 779 } 780 781 static inline void ip6mr_cache_free(struct mfc6_cache *c) 782 { 783 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 784 } 785 786 /* Destroy an unresolved cache entry, killing queued skbs 787 and reporting error to netlink readers. 788 */ 789 790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 791 { 792 struct net *net = read_pnet(&mrt->net); 793 struct sk_buff *skb; 794 795 atomic_dec(&mrt->cache_resolve_queue_len); 796 797 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 798 if (ipv6_hdr(skb)->version == 0) { 799 struct nlmsghdr *nlh = skb_pull(skb, 800 sizeof(struct ipv6hdr)); 801 nlh->nlmsg_type = NLMSG_ERROR; 802 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 803 skb_trim(skb, nlh->nlmsg_len); 804 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 805 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 806 } else 807 kfree_skb(skb); 808 } 809 810 ip6mr_cache_free(c); 811 } 812 813 814 /* Timer process for all the unresolved queue. */ 815 816 static void ipmr_do_expire_process(struct mr_table *mrt) 817 { 818 unsigned long now = jiffies; 819 unsigned long expires = 10 * HZ; 820 struct mr_mfc *c, *next; 821 822 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 823 if (time_after(c->mfc_un.unres.expires, now)) { 824 /* not yet... */ 825 unsigned long interval = c->mfc_un.unres.expires - now; 826 if (interval < expires) 827 expires = interval; 828 continue; 829 } 830 831 list_del(&c->list); 832 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 833 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 834 } 835 836 if (!list_empty(&mrt->mfc_unres_queue)) 837 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 838 } 839 840 static void ipmr_expire_process(struct timer_list *t) 841 { 842 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 843 844 if (!spin_trylock(&mfc_unres_lock)) { 845 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 846 return; 847 } 848 849 if (!list_empty(&mrt->mfc_unres_queue)) 850 ipmr_do_expire_process(mrt); 851 852 spin_unlock(&mfc_unres_lock); 853 } 854 855 /* Fill oifs list. It is called under locked mrt_lock. */ 856 857 static void ip6mr_update_thresholds(struct mr_table *mrt, 858 struct mr_mfc *cache, 859 unsigned char *ttls) 860 { 861 int vifi; 862 863 cache->mfc_un.res.minvif = MAXMIFS; 864 cache->mfc_un.res.maxvif = 0; 865 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 866 867 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 868 if (VIF_EXISTS(mrt, vifi) && 869 ttls[vifi] && ttls[vifi] < 255) { 870 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 871 if (cache->mfc_un.res.minvif > vifi) 872 cache->mfc_un.res.minvif = vifi; 873 if (cache->mfc_un.res.maxvif <= vifi) 874 cache->mfc_un.res.maxvif = vifi + 1; 875 } 876 } 877 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 878 } 879 880 static int mif6_add(struct net *net, struct mr_table *mrt, 881 struct mif6ctl *vifc, int mrtsock) 882 { 883 int vifi = vifc->mif6c_mifi; 884 struct vif_device *v = &mrt->vif_table[vifi]; 885 struct net_device *dev; 886 struct inet6_dev *in6_dev; 887 int err; 888 889 /* Is vif busy ? */ 890 if (VIF_EXISTS(mrt, vifi)) 891 return -EADDRINUSE; 892 893 switch (vifc->mif6c_flags) { 894 #ifdef CONFIG_IPV6_PIMSM_V2 895 case MIFF_REGISTER: 896 /* 897 * Special Purpose VIF in PIM 898 * All the packets will be sent to the daemon 899 */ 900 if (mrt->mroute_reg_vif_num >= 0) 901 return -EADDRINUSE; 902 dev = ip6mr_reg_vif(net, mrt); 903 if (!dev) 904 return -ENOBUFS; 905 err = dev_set_allmulti(dev, 1); 906 if (err) { 907 unregister_netdevice(dev); 908 dev_put(dev); 909 return err; 910 } 911 break; 912 #endif 913 case 0: 914 dev = dev_get_by_index(net, vifc->mif6c_pifi); 915 if (!dev) 916 return -EADDRNOTAVAIL; 917 err = dev_set_allmulti(dev, 1); 918 if (err) { 919 dev_put(dev); 920 return err; 921 } 922 break; 923 default: 924 return -EINVAL; 925 } 926 927 in6_dev = __in6_dev_get(dev); 928 if (in6_dev) { 929 atomic_inc(&in6_dev->cnf.mc_forwarding); 930 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 931 NETCONFA_MC_FORWARDING, 932 dev->ifindex, &in6_dev->cnf); 933 } 934 935 /* Fill in the VIF structures */ 936 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 937 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 938 MIFF_REGISTER); 939 940 /* And finish update writing critical data */ 941 spin_lock(&mrt_lock); 942 rcu_assign_pointer(v->dev, dev); 943 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 944 #ifdef CONFIG_IPV6_PIMSM_V2 945 if (v->flags & MIFF_REGISTER) 946 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 947 #endif 948 if (vifi + 1 > mrt->maxvif) 949 WRITE_ONCE(mrt->maxvif, vifi + 1); 950 spin_unlock(&mrt_lock); 951 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 952 v, dev, vifi, mrt->id); 953 return 0; 954 } 955 956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 957 const struct in6_addr *origin, 958 const struct in6_addr *mcastgrp) 959 { 960 struct mfc6_cache_cmp_arg arg = { 961 .mf6c_origin = *origin, 962 .mf6c_mcastgrp = *mcastgrp, 963 }; 964 965 return mr_mfc_find(mrt, &arg); 966 } 967 968 /* Look for a (*,G) entry */ 969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 970 struct in6_addr *mcastgrp, 971 mifi_t mifi) 972 { 973 struct mfc6_cache_cmp_arg arg = { 974 .mf6c_origin = in6addr_any, 975 .mf6c_mcastgrp = *mcastgrp, 976 }; 977 978 if (ipv6_addr_any(mcastgrp)) 979 return mr_mfc_find_any_parent(mrt, mifi); 980 return mr_mfc_find_any(mrt, mifi, &arg); 981 } 982 983 /* Look for a (S,G,iif) entry if parent != -1 */ 984 static struct mfc6_cache * 985 ip6mr_cache_find_parent(struct mr_table *mrt, 986 const struct in6_addr *origin, 987 const struct in6_addr *mcastgrp, 988 int parent) 989 { 990 struct mfc6_cache_cmp_arg arg = { 991 .mf6c_origin = *origin, 992 .mf6c_mcastgrp = *mcastgrp, 993 }; 994 995 return mr_mfc_find_parent(mrt, &arg, parent); 996 } 997 998 /* Allocate a multicast cache entry */ 999 static struct mfc6_cache *ip6mr_cache_alloc(void) 1000 { 1001 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1002 if (!c) 1003 return NULL; 1004 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1005 c->_c.mfc_un.res.minvif = MAXMIFS; 1006 c->_c.free = ip6mr_cache_free_rcu; 1007 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1008 return c; 1009 } 1010 1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1012 { 1013 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1014 if (!c) 1015 return NULL; 1016 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1017 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1018 return c; 1019 } 1020 1021 /* 1022 * A cache entry has gone into a resolved state from queued 1023 */ 1024 1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1026 struct mfc6_cache *uc, struct mfc6_cache *c) 1027 { 1028 struct sk_buff *skb; 1029 1030 /* 1031 * Play the pending entries through our router 1032 */ 1033 1034 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1035 if (ipv6_hdr(skb)->version == 0) { 1036 struct nlmsghdr *nlh = skb_pull(skb, 1037 sizeof(struct ipv6hdr)); 1038 1039 if (mr_fill_mroute(mrt, skb, &c->_c, 1040 nlmsg_data(nlh)) > 0) { 1041 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1042 } else { 1043 nlh->nlmsg_type = NLMSG_ERROR; 1044 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1045 skb_trim(skb, nlh->nlmsg_len); 1046 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1047 } 1048 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1049 } else { 1050 rcu_read_lock(); 1051 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1052 rcu_read_unlock(); 1053 } 1054 } 1055 } 1056 1057 /* 1058 * Bounce a cache query up to pim6sd and netlink. 1059 * 1060 * Called under rcu_read_lock() 1061 */ 1062 1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 1064 mifi_t mifi, int assert) 1065 { 1066 struct sock *mroute6_sk; 1067 struct sk_buff *skb; 1068 struct mrt6msg *msg; 1069 int ret; 1070 1071 #ifdef CONFIG_IPV6_PIMSM_V2 1072 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) 1073 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1074 +sizeof(*msg)); 1075 else 1076 #endif 1077 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1078 1079 if (!skb) 1080 return -ENOBUFS; 1081 1082 /* I suppose that internal messages 1083 * do not require checksums */ 1084 1085 skb->ip_summed = CHECKSUM_UNNECESSARY; 1086 1087 #ifdef CONFIG_IPV6_PIMSM_V2 1088 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { 1089 /* Ugly, but we have no choice with this interface. 1090 Duplicate old header, fix length etc. 1091 And all this only to mangle msg->im6_msgtype and 1092 to set msg->im6_mbz to "mbz" :-) 1093 */ 1094 __skb_pull(skb, skb_network_offset(pkt)); 1095 1096 skb_push(skb, sizeof(*msg)); 1097 skb_reset_transport_header(skb); 1098 msg = (struct mrt6msg *)skb_transport_header(skb); 1099 msg->im6_mbz = 0; 1100 msg->im6_msgtype = assert; 1101 if (assert == MRT6MSG_WRMIFWHOLE) 1102 msg->im6_mif = mifi; 1103 else 1104 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); 1105 msg->im6_pad = 0; 1106 msg->im6_src = ipv6_hdr(pkt)->saddr; 1107 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1108 1109 skb->ip_summed = CHECKSUM_UNNECESSARY; 1110 } else 1111 #endif 1112 { 1113 /* 1114 * Copy the IP header 1115 */ 1116 1117 skb_put(skb, sizeof(struct ipv6hdr)); 1118 skb_reset_network_header(skb); 1119 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1120 1121 /* 1122 * Add our header 1123 */ 1124 skb_put(skb, sizeof(*msg)); 1125 skb_reset_transport_header(skb); 1126 msg = (struct mrt6msg *)skb_transport_header(skb); 1127 1128 msg->im6_mbz = 0; 1129 msg->im6_msgtype = assert; 1130 msg->im6_mif = mifi; 1131 msg->im6_pad = 0; 1132 msg->im6_src = ipv6_hdr(pkt)->saddr; 1133 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1134 1135 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1136 skb->ip_summed = CHECKSUM_UNNECESSARY; 1137 } 1138 1139 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1140 if (!mroute6_sk) { 1141 kfree_skb(skb); 1142 return -EINVAL; 1143 } 1144 1145 mrt6msg_netlink_event(mrt, skb); 1146 1147 /* Deliver to user space multicast routing algorithms */ 1148 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1149 1150 if (ret < 0) { 1151 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1152 kfree_skb(skb); 1153 } 1154 1155 return ret; 1156 } 1157 1158 /* Queue a packet for resolution. It gets locked cache entry! */ 1159 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1160 struct sk_buff *skb, struct net_device *dev) 1161 { 1162 struct mfc6_cache *c; 1163 bool found = false; 1164 int err; 1165 1166 spin_lock_bh(&mfc_unres_lock); 1167 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1168 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1169 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1170 found = true; 1171 break; 1172 } 1173 } 1174 1175 if (!found) { 1176 /* 1177 * Create a new entry if allowable 1178 */ 1179 1180 c = ip6mr_cache_alloc_unres(); 1181 if (!c) { 1182 spin_unlock_bh(&mfc_unres_lock); 1183 1184 kfree_skb(skb); 1185 return -ENOBUFS; 1186 } 1187 1188 /* Fill in the new cache entry */ 1189 c->_c.mfc_parent = -1; 1190 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1191 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1192 1193 /* 1194 * Reflect first query at pim6sd 1195 */ 1196 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1197 if (err < 0) { 1198 /* If the report failed throw the cache entry 1199 out - Brad Parker 1200 */ 1201 spin_unlock_bh(&mfc_unres_lock); 1202 1203 ip6mr_cache_free(c); 1204 kfree_skb(skb); 1205 return err; 1206 } 1207 1208 atomic_inc(&mrt->cache_resolve_queue_len); 1209 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1210 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1211 1212 ipmr_do_expire_process(mrt); 1213 } 1214 1215 /* See if we can append the packet */ 1216 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1217 kfree_skb(skb); 1218 err = -ENOBUFS; 1219 } else { 1220 if (dev) { 1221 skb->dev = dev; 1222 skb->skb_iif = dev->ifindex; 1223 } 1224 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1225 err = 0; 1226 } 1227 1228 spin_unlock_bh(&mfc_unres_lock); 1229 return err; 1230 } 1231 1232 /* 1233 * MFC6 cache manipulation by user space 1234 */ 1235 1236 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1237 int parent) 1238 { 1239 struct mfc6_cache *c; 1240 1241 /* The entries are added/deleted only under RTNL */ 1242 rcu_read_lock(); 1243 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1244 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1245 rcu_read_unlock(); 1246 if (!c) 1247 return -ENOENT; 1248 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1249 list_del_rcu(&c->_c.list); 1250 1251 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1252 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1253 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1254 mr_cache_put(&c->_c); 1255 return 0; 1256 } 1257 1258 static int ip6mr_device_event(struct notifier_block *this, 1259 unsigned long event, void *ptr) 1260 { 1261 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1262 struct net *net = dev_net(dev); 1263 struct mr_table *mrt; 1264 struct vif_device *v; 1265 int ct; 1266 1267 if (event != NETDEV_UNREGISTER) 1268 return NOTIFY_DONE; 1269 1270 ip6mr_for_each_table(mrt, net) { 1271 v = &mrt->vif_table[0]; 1272 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1273 if (rcu_access_pointer(v->dev) == dev) 1274 mif6_delete(mrt, ct, 1, NULL); 1275 } 1276 } 1277 1278 return NOTIFY_DONE; 1279 } 1280 1281 static unsigned int ip6mr_seq_read(const struct net *net) 1282 { 1283 return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); 1284 } 1285 1286 static int ip6mr_dump(struct net *net, struct notifier_block *nb, 1287 struct netlink_ext_ack *extack) 1288 { 1289 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1290 ip6mr_mr_table_iter, extack); 1291 } 1292 1293 static struct notifier_block ip6_mr_notifier = { 1294 .notifier_call = ip6mr_device_event 1295 }; 1296 1297 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1298 .family = RTNL_FAMILY_IP6MR, 1299 .fib_seq_read = ip6mr_seq_read, 1300 .fib_dump = ip6mr_dump, 1301 .owner = THIS_MODULE, 1302 }; 1303 1304 static int __net_init ip6mr_notifier_init(struct net *net) 1305 { 1306 struct fib_notifier_ops *ops; 1307 1308 net->ipv6.ipmr_seq = 0; 1309 1310 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1311 if (IS_ERR(ops)) 1312 return PTR_ERR(ops); 1313 1314 net->ipv6.ip6mr_notifier_ops = ops; 1315 1316 return 0; 1317 } 1318 1319 static void __net_exit ip6mr_notifier_exit(struct net *net) 1320 { 1321 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1322 net->ipv6.ip6mr_notifier_ops = NULL; 1323 } 1324 1325 /* Setup for IP multicast routing */ 1326 static int __net_init ip6mr_net_init(struct net *net) 1327 { 1328 int err; 1329 1330 err = ip6mr_notifier_init(net); 1331 if (err) 1332 return err; 1333 1334 err = ip6mr_rules_init(net); 1335 if (err < 0) 1336 goto ip6mr_rules_fail; 1337 1338 #ifdef CONFIG_PROC_FS 1339 err = -ENOMEM; 1340 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1341 sizeof(struct mr_vif_iter))) 1342 goto proc_vif_fail; 1343 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1344 sizeof(struct mr_mfc_iter))) 1345 goto proc_cache_fail; 1346 #endif 1347 1348 return 0; 1349 1350 #ifdef CONFIG_PROC_FS 1351 proc_cache_fail: 1352 remove_proc_entry("ip6_mr_vif", net->proc_net); 1353 proc_vif_fail: 1354 rtnl_lock(); 1355 ip6mr_rules_exit(net); 1356 rtnl_unlock(); 1357 #endif 1358 ip6mr_rules_fail: 1359 ip6mr_notifier_exit(net); 1360 return err; 1361 } 1362 1363 static void __net_exit ip6mr_net_exit(struct net *net) 1364 { 1365 #ifdef CONFIG_PROC_FS 1366 remove_proc_entry("ip6_mr_cache", net->proc_net); 1367 remove_proc_entry("ip6_mr_vif", net->proc_net); 1368 #endif 1369 ip6mr_notifier_exit(net); 1370 } 1371 1372 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list) 1373 { 1374 struct net *net; 1375 1376 rtnl_lock(); 1377 list_for_each_entry(net, net_list, exit_list) 1378 ip6mr_rules_exit(net); 1379 rtnl_unlock(); 1380 } 1381 1382 static struct pernet_operations ip6mr_net_ops = { 1383 .init = ip6mr_net_init, 1384 .exit = ip6mr_net_exit, 1385 .exit_batch = ip6mr_net_exit_batch, 1386 }; 1387 1388 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { 1389 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, 1390 .msgtype = RTM_GETROUTE, 1391 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute}, 1392 }; 1393 1394 int __init ip6_mr_init(void) 1395 { 1396 int err; 1397 1398 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN); 1399 if (!mrt_cachep) 1400 return -ENOMEM; 1401 1402 err = register_pernet_subsys(&ip6mr_net_ops); 1403 if (err) 1404 goto reg_pernet_fail; 1405 1406 err = register_netdevice_notifier(&ip6_mr_notifier); 1407 if (err) 1408 goto reg_notif_fail; 1409 #ifdef CONFIG_IPV6_PIMSM_V2 1410 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1411 pr_err("%s: can't add PIM protocol\n", __func__); 1412 err = -EAGAIN; 1413 goto add_proto_fail; 1414 } 1415 #endif 1416 err = rtnl_register_many(ip6mr_rtnl_msg_handlers); 1417 if (!err) 1418 return 0; 1419 1420 #ifdef CONFIG_IPV6_PIMSM_V2 1421 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1422 add_proto_fail: 1423 unregister_netdevice_notifier(&ip6_mr_notifier); 1424 #endif 1425 reg_notif_fail: 1426 unregister_pernet_subsys(&ip6mr_net_ops); 1427 reg_pernet_fail: 1428 kmem_cache_destroy(mrt_cachep); 1429 return err; 1430 } 1431 1432 void __init ip6_mr_cleanup(void) 1433 { 1434 rtnl_unregister_many(ip6mr_rtnl_msg_handlers); 1435 #ifdef CONFIG_IPV6_PIMSM_V2 1436 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1437 #endif 1438 unregister_netdevice_notifier(&ip6_mr_notifier); 1439 unregister_pernet_subsys(&ip6mr_net_ops); 1440 kmem_cache_destroy(mrt_cachep); 1441 } 1442 1443 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1444 struct mf6cctl *mfc, int mrtsock, int parent) 1445 { 1446 unsigned char ttls[MAXMIFS]; 1447 struct mfc6_cache *uc, *c; 1448 struct mr_mfc *_uc; 1449 bool found; 1450 int i, err; 1451 1452 if (mfc->mf6cc_parent >= MAXMIFS) 1453 return -ENFILE; 1454 1455 memset(ttls, 255, MAXMIFS); 1456 for (i = 0; i < MAXMIFS; i++) { 1457 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1458 ttls[i] = 1; 1459 } 1460 1461 /* The entries are added/deleted only under RTNL */ 1462 rcu_read_lock(); 1463 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1464 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1465 rcu_read_unlock(); 1466 if (c) { 1467 spin_lock(&mrt_lock); 1468 c->_c.mfc_parent = mfc->mf6cc_parent; 1469 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1470 if (!mrtsock) 1471 c->_c.mfc_flags |= MFC_STATIC; 1472 spin_unlock(&mrt_lock); 1473 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1474 c, mrt->id); 1475 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1476 return 0; 1477 } 1478 1479 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1480 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1481 return -EINVAL; 1482 1483 c = ip6mr_cache_alloc(); 1484 if (!c) 1485 return -ENOMEM; 1486 1487 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1488 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1489 c->_c.mfc_parent = mfc->mf6cc_parent; 1490 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1491 if (!mrtsock) 1492 c->_c.mfc_flags |= MFC_STATIC; 1493 1494 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1495 ip6mr_rht_params); 1496 if (err) { 1497 pr_err("ip6mr: rhtable insert error %d\n", err); 1498 ip6mr_cache_free(c); 1499 return err; 1500 } 1501 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1502 1503 /* Check to see if we resolved a queued list. If so we 1504 * need to send on the frames and tidy up. 1505 */ 1506 found = false; 1507 spin_lock_bh(&mfc_unres_lock); 1508 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1509 uc = (struct mfc6_cache *)_uc; 1510 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1511 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1512 list_del(&_uc->list); 1513 atomic_dec(&mrt->cache_resolve_queue_len); 1514 found = true; 1515 break; 1516 } 1517 } 1518 if (list_empty(&mrt->mfc_unres_queue)) 1519 timer_delete(&mrt->ipmr_expire_timer); 1520 spin_unlock_bh(&mfc_unres_lock); 1521 1522 if (found) { 1523 ip6mr_cache_resolve(net, mrt, uc, c); 1524 ip6mr_cache_free(uc); 1525 } 1526 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1527 c, mrt->id); 1528 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1529 return 0; 1530 } 1531 1532 /* 1533 * Close the multicast socket, and clear the vif tables etc 1534 */ 1535 1536 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1537 { 1538 struct mr_mfc *c, *tmp; 1539 LIST_HEAD(list); 1540 int i; 1541 1542 /* Shut down all active vif entries */ 1543 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { 1544 for (i = 0; i < mrt->maxvif; i++) { 1545 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1546 !(flags & MRT6_FLUSH_MIFS_STATIC)) || 1547 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) 1548 continue; 1549 mif6_delete(mrt, i, 0, &list); 1550 } 1551 unregister_netdevice_many(&list); 1552 } 1553 1554 /* Wipe the cache */ 1555 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { 1556 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1557 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || 1558 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) 1559 continue; 1560 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1561 list_del_rcu(&c->list); 1562 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1563 FIB_EVENT_ENTRY_DEL, 1564 (struct mfc6_cache *)c, mrt->id); 1565 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1566 mr_cache_put(c); 1567 } 1568 } 1569 1570 if (flags & MRT6_FLUSH_MFC) { 1571 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1572 spin_lock_bh(&mfc_unres_lock); 1573 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1574 list_del(&c->list); 1575 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1576 RTM_DELROUTE); 1577 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1578 } 1579 spin_unlock_bh(&mfc_unres_lock); 1580 } 1581 } 1582 } 1583 1584 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1585 { 1586 int err = 0; 1587 struct net *net = sock_net(sk); 1588 1589 rtnl_lock(); 1590 spin_lock(&mrt_lock); 1591 if (rtnl_dereference(mrt->mroute_sk)) { 1592 err = -EADDRINUSE; 1593 } else { 1594 rcu_assign_pointer(mrt->mroute_sk, sk); 1595 sock_set_flag(sk, SOCK_RCU_FREE); 1596 atomic_inc(&net->ipv6.devconf_all->mc_forwarding); 1597 } 1598 spin_unlock(&mrt_lock); 1599 1600 if (!err) 1601 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1602 NETCONFA_MC_FORWARDING, 1603 NETCONFA_IFINDEX_ALL, 1604 net->ipv6.devconf_all); 1605 rtnl_unlock(); 1606 1607 return err; 1608 } 1609 1610 int ip6mr_sk_done(struct sock *sk) 1611 { 1612 struct net *net = sock_net(sk); 1613 struct ipv6_devconf *devconf; 1614 struct mr_table *mrt; 1615 int err = -EACCES; 1616 1617 if (sk->sk_type != SOCK_RAW || 1618 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1619 return err; 1620 1621 devconf = net->ipv6.devconf_all; 1622 if (!devconf || !atomic_read(&devconf->mc_forwarding)) 1623 return err; 1624 1625 rtnl_lock(); 1626 ip6mr_for_each_table(mrt, net) { 1627 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1628 spin_lock(&mrt_lock); 1629 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1630 /* Note that mroute_sk had SOCK_RCU_FREE set, 1631 * so the RCU grace period before sk freeing 1632 * is guaranteed by sk_destruct() 1633 */ 1634 atomic_dec(&devconf->mc_forwarding); 1635 spin_unlock(&mrt_lock); 1636 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1637 NETCONFA_MC_FORWARDING, 1638 NETCONFA_IFINDEX_ALL, 1639 net->ipv6.devconf_all); 1640 1641 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); 1642 err = 0; 1643 break; 1644 } 1645 } 1646 rtnl_unlock(); 1647 1648 return err; 1649 } 1650 1651 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1652 { 1653 struct mr_table *mrt; 1654 struct flowi6 fl6 = { 1655 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1656 .flowi6_oif = skb->dev->ifindex, 1657 .flowi6_mark = skb->mark, 1658 }; 1659 1660 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1661 return NULL; 1662 1663 return rcu_access_pointer(mrt->mroute_sk); 1664 } 1665 EXPORT_SYMBOL(mroute6_is_socket); 1666 1667 /* 1668 * Socket options and virtual interface manipulation. The whole 1669 * virtual interface system is a complete heap, but unfortunately 1670 * that's how BSD mrouted happens to think. Maybe one day with a proper 1671 * MOSPF/PIM router set up we can clean this up. 1672 */ 1673 1674 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1675 unsigned int optlen) 1676 { 1677 int ret, parent = 0; 1678 struct mif6ctl vif; 1679 struct mf6cctl mfc; 1680 mifi_t mifi; 1681 struct net *net = sock_net(sk); 1682 struct mr_table *mrt; 1683 1684 if (sk->sk_type != SOCK_RAW || 1685 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1686 return -EOPNOTSUPP; 1687 1688 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1689 if (!mrt) 1690 return -ENOENT; 1691 1692 if (optname != MRT6_INIT) { 1693 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1694 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1695 return -EACCES; 1696 } 1697 1698 switch (optname) { 1699 case MRT6_INIT: 1700 if (optlen < sizeof(int)) 1701 return -EINVAL; 1702 1703 return ip6mr_sk_init(mrt, sk); 1704 1705 case MRT6_DONE: 1706 return ip6mr_sk_done(sk); 1707 1708 case MRT6_ADD_MIF: 1709 if (optlen < sizeof(vif)) 1710 return -EINVAL; 1711 if (copy_from_sockptr(&vif, optval, sizeof(vif))) 1712 return -EFAULT; 1713 if (vif.mif6c_mifi >= MAXMIFS) 1714 return -ENFILE; 1715 rtnl_lock(); 1716 ret = mif6_add(net, mrt, &vif, 1717 sk == rtnl_dereference(mrt->mroute_sk)); 1718 rtnl_unlock(); 1719 return ret; 1720 1721 case MRT6_DEL_MIF: 1722 if (optlen < sizeof(mifi_t)) 1723 return -EINVAL; 1724 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) 1725 return -EFAULT; 1726 rtnl_lock(); 1727 ret = mif6_delete(mrt, mifi, 0, NULL); 1728 rtnl_unlock(); 1729 return ret; 1730 1731 /* 1732 * Manipulate the forwarding caches. These live 1733 * in a sort of kernel/user symbiosis. 1734 */ 1735 case MRT6_ADD_MFC: 1736 case MRT6_DEL_MFC: 1737 parent = -1; 1738 fallthrough; 1739 case MRT6_ADD_MFC_PROXY: 1740 case MRT6_DEL_MFC_PROXY: 1741 if (optlen < sizeof(mfc)) 1742 return -EINVAL; 1743 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) 1744 return -EFAULT; 1745 if (parent == 0) 1746 parent = mfc.mf6cc_parent; 1747 rtnl_lock(); 1748 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1749 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1750 else 1751 ret = ip6mr_mfc_add(net, mrt, &mfc, 1752 sk == 1753 rtnl_dereference(mrt->mroute_sk), 1754 parent); 1755 rtnl_unlock(); 1756 return ret; 1757 1758 case MRT6_FLUSH: 1759 { 1760 int flags; 1761 1762 if (optlen != sizeof(flags)) 1763 return -EINVAL; 1764 if (copy_from_sockptr(&flags, optval, sizeof(flags))) 1765 return -EFAULT; 1766 rtnl_lock(); 1767 mroute_clean_tables(mrt, flags); 1768 rtnl_unlock(); 1769 return 0; 1770 } 1771 1772 /* 1773 * Control PIM assert (to activate pim will activate assert) 1774 */ 1775 case MRT6_ASSERT: 1776 { 1777 int v; 1778 1779 if (optlen != sizeof(v)) 1780 return -EINVAL; 1781 if (copy_from_sockptr(&v, optval, sizeof(v))) 1782 return -EFAULT; 1783 mrt->mroute_do_assert = v; 1784 return 0; 1785 } 1786 1787 #ifdef CONFIG_IPV6_PIMSM_V2 1788 case MRT6_PIM: 1789 { 1790 bool do_wrmifwhole; 1791 int v; 1792 1793 if (optlen != sizeof(v)) 1794 return -EINVAL; 1795 if (copy_from_sockptr(&v, optval, sizeof(v))) 1796 return -EFAULT; 1797 1798 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); 1799 v = !!v; 1800 rtnl_lock(); 1801 ret = 0; 1802 if (v != mrt->mroute_do_pim) { 1803 mrt->mroute_do_pim = v; 1804 mrt->mroute_do_assert = v; 1805 mrt->mroute_do_wrvifwhole = do_wrmifwhole; 1806 } 1807 rtnl_unlock(); 1808 return ret; 1809 } 1810 1811 #endif 1812 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1813 case MRT6_TABLE: 1814 { 1815 u32 v; 1816 1817 if (optlen != sizeof(u32)) 1818 return -EINVAL; 1819 if (copy_from_sockptr(&v, optval, sizeof(v))) 1820 return -EFAULT; 1821 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1822 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1823 return -EINVAL; 1824 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1825 return -EBUSY; 1826 1827 rtnl_lock(); 1828 ret = 0; 1829 mrt = ip6mr_new_table(net, v); 1830 if (IS_ERR(mrt)) 1831 ret = PTR_ERR(mrt); 1832 else 1833 raw6_sk(sk)->ip6mr_table = v; 1834 rtnl_unlock(); 1835 return ret; 1836 } 1837 #endif 1838 /* 1839 * Spurious command, or MRT6_VERSION which you cannot 1840 * set. 1841 */ 1842 default: 1843 return -ENOPROTOOPT; 1844 } 1845 } 1846 1847 /* 1848 * Getsock opt support for the multicast routing system. 1849 */ 1850 1851 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1852 sockptr_t optlen) 1853 { 1854 int olr; 1855 int val; 1856 struct net *net = sock_net(sk); 1857 struct mr_table *mrt; 1858 1859 if (sk->sk_type != SOCK_RAW || 1860 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1861 return -EOPNOTSUPP; 1862 1863 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1864 if (!mrt) 1865 return -ENOENT; 1866 1867 switch (optname) { 1868 case MRT6_VERSION: 1869 val = 0x0305; 1870 break; 1871 #ifdef CONFIG_IPV6_PIMSM_V2 1872 case MRT6_PIM: 1873 val = mrt->mroute_do_pim; 1874 break; 1875 #endif 1876 case MRT6_ASSERT: 1877 val = mrt->mroute_do_assert; 1878 break; 1879 default: 1880 return -ENOPROTOOPT; 1881 } 1882 1883 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1884 return -EFAULT; 1885 1886 olr = min_t(int, olr, sizeof(int)); 1887 if (olr < 0) 1888 return -EINVAL; 1889 1890 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1891 return -EFAULT; 1892 if (copy_to_sockptr(optval, &val, olr)) 1893 return -EFAULT; 1894 return 0; 1895 } 1896 1897 /* 1898 * The IP multicast ioctl support routines. 1899 */ 1900 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg) 1901 { 1902 struct sioc_sg_req6 *sr; 1903 struct sioc_mif_req6 *vr; 1904 struct vif_device *vif; 1905 struct mfc6_cache *c; 1906 struct net *net = sock_net(sk); 1907 struct mr_table *mrt; 1908 1909 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1910 if (!mrt) 1911 return -ENOENT; 1912 1913 switch (cmd) { 1914 case SIOCGETMIFCNT_IN6: 1915 vr = (struct sioc_mif_req6 *)arg; 1916 if (vr->mifi >= mrt->maxvif) 1917 return -EINVAL; 1918 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif); 1919 rcu_read_lock(); 1920 vif = &mrt->vif_table[vr->mifi]; 1921 if (VIF_EXISTS(mrt, vr->mifi)) { 1922 vr->icount = READ_ONCE(vif->pkt_in); 1923 vr->ocount = READ_ONCE(vif->pkt_out); 1924 vr->ibytes = READ_ONCE(vif->bytes_in); 1925 vr->obytes = READ_ONCE(vif->bytes_out); 1926 rcu_read_unlock(); 1927 return 0; 1928 } 1929 rcu_read_unlock(); 1930 return -EADDRNOTAVAIL; 1931 case SIOCGETSGCNT_IN6: 1932 sr = (struct sioc_sg_req6 *)arg; 1933 1934 rcu_read_lock(); 1935 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr, 1936 &sr->grp.sin6_addr); 1937 if (c) { 1938 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1939 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1940 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1941 rcu_read_unlock(); 1942 return 0; 1943 } 1944 rcu_read_unlock(); 1945 return -EADDRNOTAVAIL; 1946 default: 1947 return -ENOIOCTLCMD; 1948 } 1949 } 1950 1951 #ifdef CONFIG_COMPAT 1952 struct compat_sioc_sg_req6 { 1953 struct sockaddr_in6 src; 1954 struct sockaddr_in6 grp; 1955 compat_ulong_t pktcnt; 1956 compat_ulong_t bytecnt; 1957 compat_ulong_t wrong_if; 1958 }; 1959 1960 struct compat_sioc_mif_req6 { 1961 mifi_t mifi; 1962 compat_ulong_t icount; 1963 compat_ulong_t ocount; 1964 compat_ulong_t ibytes; 1965 compat_ulong_t obytes; 1966 }; 1967 1968 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1969 { 1970 struct compat_sioc_sg_req6 sr; 1971 struct compat_sioc_mif_req6 vr; 1972 struct vif_device *vif; 1973 struct mfc6_cache *c; 1974 struct net *net = sock_net(sk); 1975 struct mr_table *mrt; 1976 1977 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1978 if (!mrt) 1979 return -ENOENT; 1980 1981 switch (cmd) { 1982 case SIOCGETMIFCNT_IN6: 1983 if (copy_from_user(&vr, arg, sizeof(vr))) 1984 return -EFAULT; 1985 if (vr.mifi >= mrt->maxvif) 1986 return -EINVAL; 1987 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 1988 rcu_read_lock(); 1989 vif = &mrt->vif_table[vr.mifi]; 1990 if (VIF_EXISTS(mrt, vr.mifi)) { 1991 vr.icount = READ_ONCE(vif->pkt_in); 1992 vr.ocount = READ_ONCE(vif->pkt_out); 1993 vr.ibytes = READ_ONCE(vif->bytes_in); 1994 vr.obytes = READ_ONCE(vif->bytes_out); 1995 rcu_read_unlock(); 1996 1997 if (copy_to_user(arg, &vr, sizeof(vr))) 1998 return -EFAULT; 1999 return 0; 2000 } 2001 rcu_read_unlock(); 2002 return -EADDRNOTAVAIL; 2003 case SIOCGETSGCNT_IN6: 2004 if (copy_from_user(&sr, arg, sizeof(sr))) 2005 return -EFAULT; 2006 2007 rcu_read_lock(); 2008 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 2009 if (c) { 2010 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 2011 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 2012 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 2013 rcu_read_unlock(); 2014 2015 if (copy_to_user(arg, &sr, sizeof(sr))) 2016 return -EFAULT; 2017 return 0; 2018 } 2019 rcu_read_unlock(); 2020 return -EADDRNOTAVAIL; 2021 default: 2022 return -ENOIOCTLCMD; 2023 } 2024 } 2025 #endif 2026 2027 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 2028 { 2029 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 2030 IPSTATS_MIB_OUTFORWDATAGRAMS); 2031 return dst_output(net, sk, skb); 2032 } 2033 2034 /* 2035 * Processing handlers for ip6mr_forward 2036 */ 2037 2038 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 2039 struct sk_buff *skb, int vifi) 2040 { 2041 struct vif_device *vif = &mrt->vif_table[vifi]; 2042 struct net_device *vif_dev; 2043 struct ipv6hdr *ipv6h; 2044 struct dst_entry *dst; 2045 struct flowi6 fl6; 2046 2047 vif_dev = vif_dev_read(vif); 2048 if (!vif_dev) 2049 goto out_free; 2050 2051 #ifdef CONFIG_IPV6_PIMSM_V2 2052 if (vif->flags & MIFF_REGISTER) { 2053 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2054 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2055 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 2056 DEV_STATS_INC(vif_dev, tx_packets); 2057 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2058 goto out_free; 2059 } 2060 #endif 2061 2062 ipv6h = ipv6_hdr(skb); 2063 2064 fl6 = (struct flowi6) { 2065 .flowi6_oif = vif->link, 2066 .daddr = ipv6h->daddr, 2067 }; 2068 2069 dst = ip6_route_output(net, NULL, &fl6); 2070 if (dst->error) { 2071 dst_release(dst); 2072 goto out_free; 2073 } 2074 2075 skb_dst_drop(skb); 2076 skb_dst_set(skb, dst); 2077 2078 /* 2079 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2080 * not only before forwarding, but after forwarding on all output 2081 * interfaces. It is clear, if mrouter runs a multicasting 2082 * program, it should receive packets not depending to what interface 2083 * program is joined. 2084 * If we will not make it, the program will have to join on all 2085 * interfaces. On the other hand, multihoming host (or router, but 2086 * not mrouter) cannot join to more than one interface - it will 2087 * result in receiving multiple packets. 2088 */ 2089 skb->dev = vif_dev; 2090 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2091 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2092 2093 /* We are about to write */ 2094 /* XXX: extension headers? */ 2095 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) 2096 goto out_free; 2097 2098 ipv6h = ipv6_hdr(skb); 2099 ipv6h->hop_limit--; 2100 2101 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2102 2103 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2104 net, NULL, skb, skb->dev, vif_dev, 2105 ip6mr_forward2_finish); 2106 2107 out_free: 2108 kfree_skb(skb); 2109 return 0; 2110 } 2111 2112 /* Called with rcu_read_lock() */ 2113 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2114 { 2115 int ct; 2116 2117 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ 2118 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2119 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2120 break; 2121 } 2122 return ct; 2123 } 2124 2125 /* Called under rcu_read_lock() */ 2126 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2127 struct net_device *dev, struct sk_buff *skb, 2128 struct mfc6_cache *c) 2129 { 2130 int psend = -1; 2131 int vif, ct; 2132 int true_vifi = ip6mr_find_vif(mrt, dev); 2133 2134 vif = c->_c.mfc_parent; 2135 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2136 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2137 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2138 2139 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2140 struct mfc6_cache *cache_proxy; 2141 2142 /* For an (*,G) entry, we only check that the incoming 2143 * interface is part of the static tree. 2144 */ 2145 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2146 if (cache_proxy && 2147 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2148 goto forward; 2149 } 2150 2151 /* 2152 * Wrong interface: drop packet and (maybe) send PIM assert. 2153 */ 2154 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2155 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2156 2157 if (true_vifi >= 0 && mrt->mroute_do_assert && 2158 /* pimsm uses asserts, when switching from RPT to SPT, 2159 so that we cannot check that packet arrived on an oif. 2160 It is bad, but otherwise we would need to move pretty 2161 large chunk of pimd to kernel. Ough... --ANK 2162 */ 2163 (mrt->mroute_do_pim || 2164 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2165 time_after(jiffies, 2166 c->_c.mfc_un.res.last_assert + 2167 MFC_ASSERT_THRESH)) { 2168 c->_c.mfc_un.res.last_assert = jiffies; 2169 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2170 if (mrt->mroute_do_wrvifwhole) 2171 ip6mr_cache_report(mrt, skb, true_vifi, 2172 MRT6MSG_WRMIFWHOLE); 2173 } 2174 goto dont_forward; 2175 } 2176 2177 forward: 2178 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2179 mrt->vif_table[vif].pkt_in + 1); 2180 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2181 mrt->vif_table[vif].bytes_in + skb->len); 2182 2183 /* 2184 * Forward the frame 2185 */ 2186 if (ipv6_addr_any(&c->mf6c_origin) && 2187 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2188 if (true_vifi >= 0 && 2189 true_vifi != c->_c.mfc_parent && 2190 ipv6_hdr(skb)->hop_limit > 2191 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2192 /* It's an (*,*) entry and the packet is not coming from 2193 * the upstream: forward the packet to the upstream 2194 * only. 2195 */ 2196 psend = c->_c.mfc_parent; 2197 goto last_forward; 2198 } 2199 goto dont_forward; 2200 } 2201 for (ct = c->_c.mfc_un.res.maxvif - 1; 2202 ct >= c->_c.mfc_un.res.minvif; ct--) { 2203 /* For (*,G) entry, don't forward to the incoming interface */ 2204 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2205 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2206 if (psend != -1) { 2207 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2208 if (skb2) 2209 ip6mr_forward2(net, mrt, skb2, psend); 2210 } 2211 psend = ct; 2212 } 2213 } 2214 last_forward: 2215 if (psend != -1) { 2216 ip6mr_forward2(net, mrt, skb, psend); 2217 return; 2218 } 2219 2220 dont_forward: 2221 kfree_skb(skb); 2222 } 2223 2224 2225 /* 2226 * Multicast packets for forwarding arrive here 2227 */ 2228 2229 int ip6_mr_input(struct sk_buff *skb) 2230 { 2231 struct mfc6_cache *cache; 2232 struct net *net = dev_net(skb->dev); 2233 struct mr_table *mrt; 2234 struct flowi6 fl6 = { 2235 .flowi6_iif = skb->dev->ifindex, 2236 .flowi6_mark = skb->mark, 2237 }; 2238 int err; 2239 struct net_device *dev; 2240 2241 /* skb->dev passed in is the master dev for vrfs. 2242 * Get the proper interface that does have a vif associated with it. 2243 */ 2244 dev = skb->dev; 2245 if (netif_is_l3_master(skb->dev)) { 2246 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2247 if (!dev) { 2248 kfree_skb(skb); 2249 return -ENODEV; 2250 } 2251 } 2252 2253 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2254 if (err < 0) { 2255 kfree_skb(skb); 2256 return err; 2257 } 2258 2259 cache = ip6mr_cache_find(mrt, 2260 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2261 if (!cache) { 2262 int vif = ip6mr_find_vif(mrt, dev); 2263 2264 if (vif >= 0) 2265 cache = ip6mr_cache_find_any(mrt, 2266 &ipv6_hdr(skb)->daddr, 2267 vif); 2268 } 2269 2270 /* 2271 * No usable cache entry 2272 */ 2273 if (!cache) { 2274 int vif; 2275 2276 vif = ip6mr_find_vif(mrt, dev); 2277 if (vif >= 0) { 2278 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2279 2280 return err; 2281 } 2282 kfree_skb(skb); 2283 return -ENODEV; 2284 } 2285 2286 ip6_mr_forward(net, mrt, dev, skb, cache); 2287 2288 return 0; 2289 } 2290 2291 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2292 u32 portid) 2293 { 2294 int err; 2295 struct mr_table *mrt; 2296 struct mfc6_cache *cache; 2297 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 2298 2299 rcu_read_lock(); 2300 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 2301 if (!mrt) { 2302 rcu_read_unlock(); 2303 return -ENOENT; 2304 } 2305 2306 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2307 if (!cache && skb->dev) { 2308 int vif = ip6mr_find_vif(mrt, skb->dev); 2309 2310 if (vif >= 0) 2311 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2312 vif); 2313 } 2314 2315 if (!cache) { 2316 struct sk_buff *skb2; 2317 struct ipv6hdr *iph; 2318 struct net_device *dev; 2319 int vif; 2320 2321 dev = skb->dev; 2322 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2323 rcu_read_unlock(); 2324 return -ENODEV; 2325 } 2326 2327 /* really correct? */ 2328 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2329 if (!skb2) { 2330 rcu_read_unlock(); 2331 return -ENOMEM; 2332 } 2333 2334 NETLINK_CB(skb2).portid = portid; 2335 skb_reset_transport_header(skb2); 2336 2337 skb_put(skb2, sizeof(struct ipv6hdr)); 2338 skb_reset_network_header(skb2); 2339 2340 iph = ipv6_hdr(skb2); 2341 iph->version = 0; 2342 iph->priority = 0; 2343 iph->flow_lbl[0] = 0; 2344 iph->flow_lbl[1] = 0; 2345 iph->flow_lbl[2] = 0; 2346 iph->payload_len = 0; 2347 iph->nexthdr = IPPROTO_NONE; 2348 iph->hop_limit = 0; 2349 iph->saddr = rt->rt6i_src.addr; 2350 iph->daddr = rt->rt6i_dst.addr; 2351 2352 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2353 rcu_read_unlock(); 2354 2355 return err; 2356 } 2357 2358 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2359 rcu_read_unlock(); 2360 return err; 2361 } 2362 2363 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2364 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2365 int flags) 2366 { 2367 struct nlmsghdr *nlh; 2368 struct rtmsg *rtm; 2369 int err; 2370 2371 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2372 if (!nlh) 2373 return -EMSGSIZE; 2374 2375 rtm = nlmsg_data(nlh); 2376 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2377 rtm->rtm_dst_len = 128; 2378 rtm->rtm_src_len = 128; 2379 rtm->rtm_tos = 0; 2380 rtm->rtm_table = mrt->id; 2381 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2382 goto nla_put_failure; 2383 rtm->rtm_type = RTN_MULTICAST; 2384 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2385 if (c->_c.mfc_flags & MFC_STATIC) 2386 rtm->rtm_protocol = RTPROT_STATIC; 2387 else 2388 rtm->rtm_protocol = RTPROT_MROUTED; 2389 rtm->rtm_flags = 0; 2390 2391 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2392 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2393 goto nla_put_failure; 2394 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2395 /* do not break the dump if cache is unresolved */ 2396 if (err < 0 && err != -ENOENT) 2397 goto nla_put_failure; 2398 2399 nlmsg_end(skb, nlh); 2400 return 0; 2401 2402 nla_put_failure: 2403 nlmsg_cancel(skb, nlh); 2404 return -EMSGSIZE; 2405 } 2406 2407 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2408 u32 portid, u32 seq, struct mr_mfc *c, 2409 int cmd, int flags) 2410 { 2411 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2412 cmd, flags); 2413 } 2414 2415 static int mr6_msgsize(bool unresolved, int maxvif) 2416 { 2417 size_t len = 2418 NLMSG_ALIGN(sizeof(struct rtmsg)) 2419 + nla_total_size(4) /* RTA_TABLE */ 2420 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2421 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2422 ; 2423 2424 if (!unresolved) 2425 len = len 2426 + nla_total_size(4) /* RTA_IIF */ 2427 + nla_total_size(0) /* RTA_MULTIPATH */ 2428 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2429 /* RTA_MFC_STATS */ 2430 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2431 ; 2432 2433 return len; 2434 } 2435 2436 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2437 int cmd) 2438 { 2439 struct net *net = read_pnet(&mrt->net); 2440 struct sk_buff *skb; 2441 int err = -ENOBUFS; 2442 2443 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2444 GFP_ATOMIC); 2445 if (!skb) 2446 goto errout; 2447 2448 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2449 if (err < 0) 2450 goto errout; 2451 2452 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2453 return; 2454 2455 errout: 2456 kfree_skb(skb); 2457 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2458 } 2459 2460 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2461 { 2462 size_t len = 2463 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2464 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2465 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2466 /* IP6MRA_CREPORT_SRC_ADDR */ 2467 + nla_total_size(sizeof(struct in6_addr)) 2468 /* IP6MRA_CREPORT_DST_ADDR */ 2469 + nla_total_size(sizeof(struct in6_addr)) 2470 /* IP6MRA_CREPORT_PKT */ 2471 + nla_total_size(payloadlen) 2472 ; 2473 2474 return len; 2475 } 2476 2477 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2478 { 2479 struct net *net = read_pnet(&mrt->net); 2480 struct nlmsghdr *nlh; 2481 struct rtgenmsg *rtgenm; 2482 struct mrt6msg *msg; 2483 struct sk_buff *skb; 2484 struct nlattr *nla; 2485 int payloadlen; 2486 2487 payloadlen = pkt->len - sizeof(struct mrt6msg); 2488 msg = (struct mrt6msg *)skb_transport_header(pkt); 2489 2490 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2491 if (!skb) 2492 goto errout; 2493 2494 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2495 sizeof(struct rtgenmsg), 0); 2496 if (!nlh) 2497 goto errout; 2498 rtgenm = nlmsg_data(nlh); 2499 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2500 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2501 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2502 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2503 &msg->im6_src) || 2504 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2505 &msg->im6_dst)) 2506 goto nla_put_failure; 2507 2508 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2509 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2510 nla_data(nla), payloadlen)) 2511 goto nla_put_failure; 2512 2513 nlmsg_end(skb, nlh); 2514 2515 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2516 return; 2517 2518 nla_put_failure: 2519 nlmsg_cancel(skb, nlh); 2520 errout: 2521 kfree_skb(skb); 2522 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2523 } 2524 2525 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = { 2526 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2527 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2528 [RTA_TABLE] = { .type = NLA_U32 }, 2529 }; 2530 2531 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb, 2532 const struct nlmsghdr *nlh, 2533 struct nlattr **tb, 2534 struct netlink_ext_ack *extack) 2535 { 2536 struct rtmsg *rtm; 2537 int err; 2538 2539 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy, 2540 extack); 2541 if (err) 2542 return err; 2543 2544 rtm = nlmsg_data(nlh); 2545 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || 2546 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || 2547 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2548 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2549 NL_SET_ERR_MSG_MOD(extack, 2550 "Invalid values in header for multicast route get request"); 2551 return -EINVAL; 2552 } 2553 2554 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2555 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2556 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); 2557 return -EINVAL; 2558 } 2559 2560 return 0; 2561 } 2562 2563 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2564 struct netlink_ext_ack *extack) 2565 { 2566 struct net *net = sock_net(in_skb->sk); 2567 struct in6_addr src = {}, grp = {}; 2568 struct nlattr *tb[RTA_MAX + 1]; 2569 struct mfc6_cache *cache; 2570 struct mr_table *mrt; 2571 struct sk_buff *skb; 2572 u32 tableid; 2573 int err; 2574 2575 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2576 if (err < 0) 2577 return err; 2578 2579 if (tb[RTA_SRC]) 2580 src = nla_get_in6_addr(tb[RTA_SRC]); 2581 if (tb[RTA_DST]) 2582 grp = nla_get_in6_addr(tb[RTA_DST]); 2583 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2584 2585 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); 2586 if (!mrt) { 2587 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist"); 2588 return -ENOENT; 2589 } 2590 2591 /* entries are added/deleted only under RTNL */ 2592 rcu_read_lock(); 2593 cache = ip6mr_cache_find(mrt, &src, &grp); 2594 rcu_read_unlock(); 2595 if (!cache) { 2596 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found"); 2597 return -ENOENT; 2598 } 2599 2600 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL); 2601 if (!skb) 2602 return -ENOBUFS; 2603 2604 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2605 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); 2606 if (err < 0) { 2607 kfree_skb(skb); 2608 return err; 2609 } 2610 2611 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2612 } 2613 2614 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2615 { 2616 const struct nlmsghdr *nlh = cb->nlh; 2617 struct fib_dump_filter filter = { 2618 .rtnl_held = true, 2619 }; 2620 int err; 2621 2622 if (cb->strict_check) { 2623 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2624 &filter, cb); 2625 if (err < 0) 2626 return err; 2627 } 2628 2629 if (filter.table_id) { 2630 struct mr_table *mrt; 2631 2632 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2633 if (!mrt) { 2634 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) 2635 return skb->len; 2636 2637 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2638 return -ENOENT; 2639 } 2640 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2641 &mfc_unres_lock, &filter); 2642 return skb->len ? : err; 2643 } 2644 2645 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2646 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2647 } 2648