1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux IPv6 multicast routing support for BSD pim6sd 4 * Based on net/ipv4/ipmr.c. 5 * 6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 7 * LSIIT Laboratory, Strasbourg, France 8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 9 * 6WIND, Paris, France 10 * Copyright (C)2007,2008 USAGI/WIDE Project 11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 12 */ 13 14 #include <linux/uaccess.h> 15 #include <linux/types.h> 16 #include <linux/sched.h> 17 #include <linux/errno.h> 18 #include <linux/mm.h> 19 #include <linux/kernel.h> 20 #include <linux/fcntl.h> 21 #include <linux/stat.h> 22 #include <linux/socket.h> 23 #include <linux/inet.h> 24 #include <linux/netdevice.h> 25 #include <linux/inetdevice.h> 26 #include <linux/proc_fs.h> 27 #include <linux/seq_file.h> 28 #include <linux/init.h> 29 #include <linux/compat.h> 30 #include <linux/rhashtable.h> 31 #include <net/protocol.h> 32 #include <linux/skbuff.h> 33 #include <net/raw.h> 34 #include <linux/notifier.h> 35 #include <linux/if_arp.h> 36 #include <net/checksum.h> 37 #include <net/netlink.h> 38 #include <net/fib_rules.h> 39 40 #include <net/ipv6.h> 41 #include <net/ip6_route.h> 42 #include <linux/mroute6.h> 43 #include <linux/pim.h> 44 #include <net/addrconf.h> 45 #include <linux/netfilter_ipv6.h> 46 #include <linux/export.h> 47 #include <net/ip6_checksum.h> 48 #include <linux/netconf.h> 49 #include <net/ip_tunnels.h> 50 51 #include <linux/nospec.h> 52 53 struct ip6mr_rule { 54 struct fib_rule common; 55 }; 56 57 struct ip6mr_result { 58 struct mr_table *mrt; 59 }; 60 61 /* Big lock, protecting vif table, mrt cache and mroute socket state. 62 Note that the changes are semaphored via rtnl_lock. 63 */ 64 65 static DEFINE_RWLOCK(mrt_lock); 66 67 /* Multicast router control variables */ 68 69 /* Special spinlock for queue of unresolved entries */ 70 static DEFINE_SPINLOCK(mfc_unres_lock); 71 72 /* We return to original Alan's scheme. Hash table of resolved 73 entries is changed only in process context and protected 74 with weak lock mrt_lock. Queue of unresolved entries is protected 75 with strong spinlock mfc_unres_lock. 76 77 In this case data path is free of exclusive locks at all. 78 */ 79 80 static struct kmem_cache *mrt_cachep __read_mostly; 81 82 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 83 static void ip6mr_free_table(struct mr_table *mrt); 84 85 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 86 struct net_device *dev, struct sk_buff *skb, 87 struct mfc6_cache *cache); 88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 89 mifi_t mifi, int assert); 90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 91 int cmd); 92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 93 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 94 struct netlink_callback *cb); 95 static void mroute_clean_tables(struct mr_table *mrt, int flags); 96 static void ipmr_expire_process(struct timer_list *t); 97 98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 99 #define ip6mr_for_each_table(mrt, net) \ 100 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ 101 lockdep_rtnl_is_held() || \ 102 list_empty(&net->ipv6.mr6_tables)) 103 104 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 105 struct mr_table *mrt) 106 { 107 struct mr_table *ret; 108 109 if (!mrt) 110 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 111 struct mr_table, list); 112 else 113 ret = list_entry_rcu(mrt->list.next, 114 struct mr_table, list); 115 116 if (&ret->list == &net->ipv6.mr6_tables) 117 return NULL; 118 return ret; 119 } 120 121 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 122 { 123 struct mr_table *mrt; 124 125 ip6mr_for_each_table(mrt, net) { 126 if (mrt->id == id) 127 return mrt; 128 } 129 return NULL; 130 } 131 132 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 133 struct mr_table **mrt) 134 { 135 int err; 136 struct ip6mr_result res; 137 struct fib_lookup_arg arg = { 138 .result = &res, 139 .flags = FIB_LOOKUP_NOREF, 140 }; 141 142 /* update flow if oif or iif point to device enslaved to l3mdev */ 143 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 144 145 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 146 flowi6_to_flowi(flp6), 0, &arg); 147 if (err < 0) 148 return err; 149 *mrt = res.mrt; 150 return 0; 151 } 152 153 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 154 int flags, struct fib_lookup_arg *arg) 155 { 156 struct ip6mr_result *res = arg->result; 157 struct mr_table *mrt; 158 159 switch (rule->action) { 160 case FR_ACT_TO_TBL: 161 break; 162 case FR_ACT_UNREACHABLE: 163 return -ENETUNREACH; 164 case FR_ACT_PROHIBIT: 165 return -EACCES; 166 case FR_ACT_BLACKHOLE: 167 default: 168 return -EINVAL; 169 } 170 171 arg->table = fib_rule_get_table(rule, arg); 172 173 mrt = ip6mr_get_table(rule->fr_net, arg->table); 174 if (!mrt) 175 return -EAGAIN; 176 res->mrt = mrt; 177 return 0; 178 } 179 180 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 181 { 182 return 1; 183 } 184 185 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 186 struct fib_rule_hdr *frh, struct nlattr **tb, 187 struct netlink_ext_ack *extack) 188 { 189 return 0; 190 } 191 192 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 193 struct nlattr **tb) 194 { 195 return 1; 196 } 197 198 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 199 struct fib_rule_hdr *frh) 200 { 201 frh->dst_len = 0; 202 frh->src_len = 0; 203 frh->tos = 0; 204 return 0; 205 } 206 207 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 208 .family = RTNL_FAMILY_IP6MR, 209 .rule_size = sizeof(struct ip6mr_rule), 210 .addr_size = sizeof(struct in6_addr), 211 .action = ip6mr_rule_action, 212 .match = ip6mr_rule_match, 213 .configure = ip6mr_rule_configure, 214 .compare = ip6mr_rule_compare, 215 .fill = ip6mr_rule_fill, 216 .nlgroup = RTNLGRP_IPV6_RULE, 217 .owner = THIS_MODULE, 218 }; 219 220 static int __net_init ip6mr_rules_init(struct net *net) 221 { 222 struct fib_rules_ops *ops; 223 struct mr_table *mrt; 224 int err; 225 226 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 227 if (IS_ERR(ops)) 228 return PTR_ERR(ops); 229 230 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 231 232 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 233 if (IS_ERR(mrt)) { 234 err = PTR_ERR(mrt); 235 goto err1; 236 } 237 238 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 239 if (err < 0) 240 goto err2; 241 242 net->ipv6.mr6_rules_ops = ops; 243 return 0; 244 245 err2: 246 ip6mr_free_table(mrt); 247 err1: 248 fib_rules_unregister(ops); 249 return err; 250 } 251 252 static void __net_exit ip6mr_rules_exit(struct net *net) 253 { 254 struct mr_table *mrt, *next; 255 256 rtnl_lock(); 257 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 258 list_del(&mrt->list); 259 ip6mr_free_table(mrt); 260 } 261 fib_rules_unregister(net->ipv6.mr6_rules_ops); 262 rtnl_unlock(); 263 } 264 265 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 266 struct netlink_ext_ack *extack) 267 { 268 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); 269 } 270 271 static unsigned int ip6mr_rules_seq_read(struct net *net) 272 { 273 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 274 } 275 276 bool ip6mr_rule_default(const struct fib_rule *rule) 277 { 278 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 279 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 280 } 281 EXPORT_SYMBOL(ip6mr_rule_default); 282 #else 283 #define ip6mr_for_each_table(mrt, net) \ 284 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 285 286 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 287 struct mr_table *mrt) 288 { 289 if (!mrt) 290 return net->ipv6.mrt6; 291 return NULL; 292 } 293 294 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 295 { 296 return net->ipv6.mrt6; 297 } 298 299 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 300 struct mr_table **mrt) 301 { 302 *mrt = net->ipv6.mrt6; 303 return 0; 304 } 305 306 static int __net_init ip6mr_rules_init(struct net *net) 307 { 308 struct mr_table *mrt; 309 310 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 311 if (IS_ERR(mrt)) 312 return PTR_ERR(mrt); 313 net->ipv6.mrt6 = mrt; 314 return 0; 315 } 316 317 static void __net_exit ip6mr_rules_exit(struct net *net) 318 { 319 rtnl_lock(); 320 ip6mr_free_table(net->ipv6.mrt6); 321 net->ipv6.mrt6 = NULL; 322 rtnl_unlock(); 323 } 324 325 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 326 struct netlink_ext_ack *extack) 327 { 328 return 0; 329 } 330 331 static unsigned int ip6mr_rules_seq_read(struct net *net) 332 { 333 return 0; 334 } 335 #endif 336 337 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 338 const void *ptr) 339 { 340 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 341 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 342 343 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 344 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 345 } 346 347 static const struct rhashtable_params ip6mr_rht_params = { 348 .head_offset = offsetof(struct mr_mfc, mnode), 349 .key_offset = offsetof(struct mfc6_cache, cmparg), 350 .key_len = sizeof(struct mfc6_cache_cmp_arg), 351 .nelem_hint = 3, 352 .obj_cmpfn = ip6mr_hash_cmp, 353 .automatic_shrinking = true, 354 }; 355 356 static void ip6mr_new_table_set(struct mr_table *mrt, 357 struct net *net) 358 { 359 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 360 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 361 #endif 362 } 363 364 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 365 .mf6c_origin = IN6ADDR_ANY_INIT, 366 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 367 }; 368 369 static struct mr_table_ops ip6mr_mr_table_ops = { 370 .rht_params = &ip6mr_rht_params, 371 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 372 }; 373 374 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 375 { 376 struct mr_table *mrt; 377 378 mrt = ip6mr_get_table(net, id); 379 if (mrt) 380 return mrt; 381 382 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 383 ipmr_expire_process, ip6mr_new_table_set); 384 } 385 386 static void ip6mr_free_table(struct mr_table *mrt) 387 { 388 del_timer_sync(&mrt->ipmr_expire_timer); 389 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | 390 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); 391 rhltable_destroy(&mrt->mfc_hash); 392 kfree(mrt); 393 } 394 395 #ifdef CONFIG_PROC_FS 396 /* The /proc interfaces to multicast routing 397 * /proc/ip6_mr_cache /proc/ip6_mr_vif 398 */ 399 400 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 401 __acquires(mrt_lock) 402 { 403 struct mr_vif_iter *iter = seq->private; 404 struct net *net = seq_file_net(seq); 405 struct mr_table *mrt; 406 407 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 408 if (!mrt) 409 return ERR_PTR(-ENOENT); 410 411 iter->mrt = mrt; 412 413 read_lock(&mrt_lock); 414 return mr_vif_seq_start(seq, pos); 415 } 416 417 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 418 __releases(mrt_lock) 419 { 420 read_unlock(&mrt_lock); 421 } 422 423 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 424 { 425 struct mr_vif_iter *iter = seq->private; 426 struct mr_table *mrt = iter->mrt; 427 428 if (v == SEQ_START_TOKEN) { 429 seq_puts(seq, 430 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 431 } else { 432 const struct vif_device *vif = v; 433 const char *name = vif->dev ? vif->dev->name : "none"; 434 435 seq_printf(seq, 436 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 437 vif - mrt->vif_table, 438 name, vif->bytes_in, vif->pkt_in, 439 vif->bytes_out, vif->pkt_out, 440 vif->flags); 441 } 442 return 0; 443 } 444 445 static const struct seq_operations ip6mr_vif_seq_ops = { 446 .start = ip6mr_vif_seq_start, 447 .next = mr_vif_seq_next, 448 .stop = ip6mr_vif_seq_stop, 449 .show = ip6mr_vif_seq_show, 450 }; 451 452 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 453 { 454 struct net *net = seq_file_net(seq); 455 struct mr_table *mrt; 456 457 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 458 if (!mrt) 459 return ERR_PTR(-ENOENT); 460 461 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 462 } 463 464 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 465 { 466 int n; 467 468 if (v == SEQ_START_TOKEN) { 469 seq_puts(seq, 470 "Group " 471 "Origin " 472 "Iif Pkts Bytes Wrong Oifs\n"); 473 } else { 474 const struct mfc6_cache *mfc = v; 475 const struct mr_mfc_iter *it = seq->private; 476 struct mr_table *mrt = it->mrt; 477 478 seq_printf(seq, "%pI6 %pI6 %-3hd", 479 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 480 mfc->_c.mfc_parent); 481 482 if (it->cache != &mrt->mfc_unres_queue) { 483 seq_printf(seq, " %8lu %8lu %8lu", 484 mfc->_c.mfc_un.res.pkt, 485 mfc->_c.mfc_un.res.bytes, 486 mfc->_c.mfc_un.res.wrong_if); 487 for (n = mfc->_c.mfc_un.res.minvif; 488 n < mfc->_c.mfc_un.res.maxvif; n++) { 489 if (VIF_EXISTS(mrt, n) && 490 mfc->_c.mfc_un.res.ttls[n] < 255) 491 seq_printf(seq, 492 " %2d:%-3d", n, 493 mfc->_c.mfc_un.res.ttls[n]); 494 } 495 } else { 496 /* unresolved mfc_caches don't contain 497 * pkt, bytes and wrong_if values 498 */ 499 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 500 } 501 seq_putc(seq, '\n'); 502 } 503 return 0; 504 } 505 506 static const struct seq_operations ipmr_mfc_seq_ops = { 507 .start = ipmr_mfc_seq_start, 508 .next = mr_mfc_seq_next, 509 .stop = mr_mfc_seq_stop, 510 .show = ipmr_mfc_seq_show, 511 }; 512 #endif 513 514 #ifdef CONFIG_IPV6_PIMSM_V2 515 516 static int pim6_rcv(struct sk_buff *skb) 517 { 518 struct pimreghdr *pim; 519 struct ipv6hdr *encap; 520 struct net_device *reg_dev = NULL; 521 struct net *net = dev_net(skb->dev); 522 struct mr_table *mrt; 523 struct flowi6 fl6 = { 524 .flowi6_iif = skb->dev->ifindex, 525 .flowi6_mark = skb->mark, 526 }; 527 int reg_vif_num; 528 529 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 530 goto drop; 531 532 pim = (struct pimreghdr *)skb_transport_header(skb); 533 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 534 (pim->flags & PIM_NULL_REGISTER) || 535 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 536 sizeof(*pim), IPPROTO_PIM, 537 csum_partial((void *)pim, sizeof(*pim), 0)) && 538 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 539 goto drop; 540 541 /* check if the inner packet is destined to mcast group */ 542 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 543 sizeof(*pim)); 544 545 if (!ipv6_addr_is_multicast(&encap->daddr) || 546 encap->payload_len == 0 || 547 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 548 goto drop; 549 550 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 551 goto drop; 552 reg_vif_num = mrt->mroute_reg_vif_num; 553 554 read_lock(&mrt_lock); 555 if (reg_vif_num >= 0) 556 reg_dev = mrt->vif_table[reg_vif_num].dev; 557 dev_hold(reg_dev); 558 read_unlock(&mrt_lock); 559 560 if (!reg_dev) 561 goto drop; 562 563 skb->mac_header = skb->network_header; 564 skb_pull(skb, (u8 *)encap - skb->data); 565 skb_reset_network_header(skb); 566 skb->protocol = htons(ETH_P_IPV6); 567 skb->ip_summed = CHECKSUM_NONE; 568 569 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 570 571 netif_rx(skb); 572 573 dev_put(reg_dev); 574 return 0; 575 drop: 576 kfree_skb(skb); 577 return 0; 578 } 579 580 static const struct inet6_protocol pim6_protocol = { 581 .handler = pim6_rcv, 582 }; 583 584 /* Service routines creating virtual interfaces: PIMREG */ 585 586 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 587 struct net_device *dev) 588 { 589 struct net *net = dev_net(dev); 590 struct mr_table *mrt; 591 struct flowi6 fl6 = { 592 .flowi6_oif = dev->ifindex, 593 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 594 .flowi6_mark = skb->mark, 595 }; 596 597 if (!pskb_inet_may_pull(skb)) 598 goto tx_err; 599 600 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 601 goto tx_err; 602 603 read_lock(&mrt_lock); 604 dev->stats.tx_bytes += skb->len; 605 dev->stats.tx_packets++; 606 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 607 read_unlock(&mrt_lock); 608 kfree_skb(skb); 609 return NETDEV_TX_OK; 610 611 tx_err: 612 dev->stats.tx_errors++; 613 kfree_skb(skb); 614 return NETDEV_TX_OK; 615 } 616 617 static int reg_vif_get_iflink(const struct net_device *dev) 618 { 619 return 0; 620 } 621 622 static const struct net_device_ops reg_vif_netdev_ops = { 623 .ndo_start_xmit = reg_vif_xmit, 624 .ndo_get_iflink = reg_vif_get_iflink, 625 }; 626 627 static void reg_vif_setup(struct net_device *dev) 628 { 629 dev->type = ARPHRD_PIMREG; 630 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 631 dev->flags = IFF_NOARP; 632 dev->netdev_ops = ®_vif_netdev_ops; 633 dev->needs_free_netdev = true; 634 dev->features |= NETIF_F_NETNS_LOCAL; 635 } 636 637 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 638 { 639 struct net_device *dev; 640 char name[IFNAMSIZ]; 641 642 if (mrt->id == RT6_TABLE_DFLT) 643 sprintf(name, "pim6reg"); 644 else 645 sprintf(name, "pim6reg%u", mrt->id); 646 647 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 648 if (!dev) 649 return NULL; 650 651 dev_net_set(dev, net); 652 653 if (register_netdevice(dev)) { 654 free_netdev(dev); 655 return NULL; 656 } 657 658 if (dev_open(dev, NULL)) 659 goto failure; 660 661 dev_hold(dev); 662 return dev; 663 664 failure: 665 unregister_netdevice(dev); 666 return NULL; 667 } 668 #endif 669 670 static int call_ip6mr_vif_entry_notifiers(struct net *net, 671 enum fib_event_type event_type, 672 struct vif_device *vif, 673 mifi_t vif_index, u32 tb_id) 674 { 675 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 676 vif, vif_index, tb_id, 677 &net->ipv6.ipmr_seq); 678 } 679 680 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 681 enum fib_event_type event_type, 682 struct mfc6_cache *mfc, u32 tb_id) 683 { 684 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 685 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 686 } 687 688 /* Delete a VIF entry */ 689 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 690 struct list_head *head) 691 { 692 struct vif_device *v; 693 struct net_device *dev; 694 struct inet6_dev *in6_dev; 695 696 if (vifi < 0 || vifi >= mrt->maxvif) 697 return -EADDRNOTAVAIL; 698 699 v = &mrt->vif_table[vifi]; 700 701 if (VIF_EXISTS(mrt, vifi)) 702 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 703 FIB_EVENT_VIF_DEL, v, vifi, 704 mrt->id); 705 706 write_lock_bh(&mrt_lock); 707 dev = v->dev; 708 v->dev = NULL; 709 710 if (!dev) { 711 write_unlock_bh(&mrt_lock); 712 return -EADDRNOTAVAIL; 713 } 714 715 #ifdef CONFIG_IPV6_PIMSM_V2 716 if (vifi == mrt->mroute_reg_vif_num) 717 mrt->mroute_reg_vif_num = -1; 718 #endif 719 720 if (vifi + 1 == mrt->maxvif) { 721 int tmp; 722 for (tmp = vifi - 1; tmp >= 0; tmp--) { 723 if (VIF_EXISTS(mrt, tmp)) 724 break; 725 } 726 mrt->maxvif = tmp + 1; 727 } 728 729 write_unlock_bh(&mrt_lock); 730 731 dev_set_allmulti(dev, -1); 732 733 in6_dev = __in6_dev_get(dev); 734 if (in6_dev) { 735 in6_dev->cnf.mc_forwarding--; 736 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 737 NETCONFA_MC_FORWARDING, 738 dev->ifindex, &in6_dev->cnf); 739 } 740 741 if ((v->flags & MIFF_REGISTER) && !notify) 742 unregister_netdevice_queue(dev, head); 743 744 dev_put_track(dev, &v->dev_tracker); 745 return 0; 746 } 747 748 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 749 { 750 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 751 752 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 753 } 754 755 static inline void ip6mr_cache_free(struct mfc6_cache *c) 756 { 757 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 758 } 759 760 /* Destroy an unresolved cache entry, killing queued skbs 761 and reporting error to netlink readers. 762 */ 763 764 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 765 { 766 struct net *net = read_pnet(&mrt->net); 767 struct sk_buff *skb; 768 769 atomic_dec(&mrt->cache_resolve_queue_len); 770 771 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 772 if (ipv6_hdr(skb)->version == 0) { 773 struct nlmsghdr *nlh = skb_pull(skb, 774 sizeof(struct ipv6hdr)); 775 nlh->nlmsg_type = NLMSG_ERROR; 776 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 777 skb_trim(skb, nlh->nlmsg_len); 778 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 779 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 780 } else 781 kfree_skb(skb); 782 } 783 784 ip6mr_cache_free(c); 785 } 786 787 788 /* Timer process for all the unresolved queue. */ 789 790 static void ipmr_do_expire_process(struct mr_table *mrt) 791 { 792 unsigned long now = jiffies; 793 unsigned long expires = 10 * HZ; 794 struct mr_mfc *c, *next; 795 796 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 797 if (time_after(c->mfc_un.unres.expires, now)) { 798 /* not yet... */ 799 unsigned long interval = c->mfc_un.unres.expires - now; 800 if (interval < expires) 801 expires = interval; 802 continue; 803 } 804 805 list_del(&c->list); 806 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 807 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 808 } 809 810 if (!list_empty(&mrt->mfc_unres_queue)) 811 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 812 } 813 814 static void ipmr_expire_process(struct timer_list *t) 815 { 816 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 817 818 if (!spin_trylock(&mfc_unres_lock)) { 819 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 820 return; 821 } 822 823 if (!list_empty(&mrt->mfc_unres_queue)) 824 ipmr_do_expire_process(mrt); 825 826 spin_unlock(&mfc_unres_lock); 827 } 828 829 /* Fill oifs list. It is called under write locked mrt_lock. */ 830 831 static void ip6mr_update_thresholds(struct mr_table *mrt, 832 struct mr_mfc *cache, 833 unsigned char *ttls) 834 { 835 int vifi; 836 837 cache->mfc_un.res.minvif = MAXMIFS; 838 cache->mfc_un.res.maxvif = 0; 839 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 840 841 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 842 if (VIF_EXISTS(mrt, vifi) && 843 ttls[vifi] && ttls[vifi] < 255) { 844 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 845 if (cache->mfc_un.res.minvif > vifi) 846 cache->mfc_un.res.minvif = vifi; 847 if (cache->mfc_un.res.maxvif <= vifi) 848 cache->mfc_un.res.maxvif = vifi + 1; 849 } 850 } 851 cache->mfc_un.res.lastuse = jiffies; 852 } 853 854 static int mif6_add(struct net *net, struct mr_table *mrt, 855 struct mif6ctl *vifc, int mrtsock) 856 { 857 int vifi = vifc->mif6c_mifi; 858 struct vif_device *v = &mrt->vif_table[vifi]; 859 struct net_device *dev; 860 struct inet6_dev *in6_dev; 861 int err; 862 863 /* Is vif busy ? */ 864 if (VIF_EXISTS(mrt, vifi)) 865 return -EADDRINUSE; 866 867 switch (vifc->mif6c_flags) { 868 #ifdef CONFIG_IPV6_PIMSM_V2 869 case MIFF_REGISTER: 870 /* 871 * Special Purpose VIF in PIM 872 * All the packets will be sent to the daemon 873 */ 874 if (mrt->mroute_reg_vif_num >= 0) 875 return -EADDRINUSE; 876 dev = ip6mr_reg_vif(net, mrt); 877 if (!dev) 878 return -ENOBUFS; 879 err = dev_set_allmulti(dev, 1); 880 if (err) { 881 unregister_netdevice(dev); 882 dev_put(dev); 883 return err; 884 } 885 break; 886 #endif 887 case 0: 888 dev = dev_get_by_index(net, vifc->mif6c_pifi); 889 if (!dev) 890 return -EADDRNOTAVAIL; 891 err = dev_set_allmulti(dev, 1); 892 if (err) { 893 dev_put(dev); 894 return err; 895 } 896 break; 897 default: 898 return -EINVAL; 899 } 900 901 in6_dev = __in6_dev_get(dev); 902 if (in6_dev) { 903 in6_dev->cnf.mc_forwarding++; 904 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 905 NETCONFA_MC_FORWARDING, 906 dev->ifindex, &in6_dev->cnf); 907 } 908 909 /* Fill in the VIF structures */ 910 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 911 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 912 MIFF_REGISTER); 913 914 /* And finish update writing critical data */ 915 write_lock_bh(&mrt_lock); 916 v->dev = dev; 917 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 918 #ifdef CONFIG_IPV6_PIMSM_V2 919 if (v->flags & MIFF_REGISTER) 920 mrt->mroute_reg_vif_num = vifi; 921 #endif 922 if (vifi + 1 > mrt->maxvif) 923 mrt->maxvif = vifi + 1; 924 write_unlock_bh(&mrt_lock); 925 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 926 v, vifi, mrt->id); 927 return 0; 928 } 929 930 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 931 const struct in6_addr *origin, 932 const struct in6_addr *mcastgrp) 933 { 934 struct mfc6_cache_cmp_arg arg = { 935 .mf6c_origin = *origin, 936 .mf6c_mcastgrp = *mcastgrp, 937 }; 938 939 return mr_mfc_find(mrt, &arg); 940 } 941 942 /* Look for a (*,G) entry */ 943 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 944 struct in6_addr *mcastgrp, 945 mifi_t mifi) 946 { 947 struct mfc6_cache_cmp_arg arg = { 948 .mf6c_origin = in6addr_any, 949 .mf6c_mcastgrp = *mcastgrp, 950 }; 951 952 if (ipv6_addr_any(mcastgrp)) 953 return mr_mfc_find_any_parent(mrt, mifi); 954 return mr_mfc_find_any(mrt, mifi, &arg); 955 } 956 957 /* Look for a (S,G,iif) entry if parent != -1 */ 958 static struct mfc6_cache * 959 ip6mr_cache_find_parent(struct mr_table *mrt, 960 const struct in6_addr *origin, 961 const struct in6_addr *mcastgrp, 962 int parent) 963 { 964 struct mfc6_cache_cmp_arg arg = { 965 .mf6c_origin = *origin, 966 .mf6c_mcastgrp = *mcastgrp, 967 }; 968 969 return mr_mfc_find_parent(mrt, &arg, parent); 970 } 971 972 /* Allocate a multicast cache entry */ 973 static struct mfc6_cache *ip6mr_cache_alloc(void) 974 { 975 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 976 if (!c) 977 return NULL; 978 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 979 c->_c.mfc_un.res.minvif = MAXMIFS; 980 c->_c.free = ip6mr_cache_free_rcu; 981 refcount_set(&c->_c.mfc_un.res.refcount, 1); 982 return c; 983 } 984 985 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 986 { 987 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 988 if (!c) 989 return NULL; 990 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 991 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 992 return c; 993 } 994 995 /* 996 * A cache entry has gone into a resolved state from queued 997 */ 998 999 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1000 struct mfc6_cache *uc, struct mfc6_cache *c) 1001 { 1002 struct sk_buff *skb; 1003 1004 /* 1005 * Play the pending entries through our router 1006 */ 1007 1008 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1009 if (ipv6_hdr(skb)->version == 0) { 1010 struct nlmsghdr *nlh = skb_pull(skb, 1011 sizeof(struct ipv6hdr)); 1012 1013 if (mr_fill_mroute(mrt, skb, &c->_c, 1014 nlmsg_data(nlh)) > 0) { 1015 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1016 } else { 1017 nlh->nlmsg_type = NLMSG_ERROR; 1018 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1019 skb_trim(skb, nlh->nlmsg_len); 1020 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1021 } 1022 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1023 } else 1024 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1025 } 1026 } 1027 1028 /* 1029 * Bounce a cache query up to pim6sd and netlink. 1030 * 1031 * Called under mrt_lock. 1032 */ 1033 1034 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 1035 mifi_t mifi, int assert) 1036 { 1037 struct sock *mroute6_sk; 1038 struct sk_buff *skb; 1039 struct mrt6msg *msg; 1040 int ret; 1041 1042 #ifdef CONFIG_IPV6_PIMSM_V2 1043 if (assert == MRT6MSG_WHOLEPKT) 1044 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1045 +sizeof(*msg)); 1046 else 1047 #endif 1048 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1049 1050 if (!skb) 1051 return -ENOBUFS; 1052 1053 /* I suppose that internal messages 1054 * do not require checksums */ 1055 1056 skb->ip_summed = CHECKSUM_UNNECESSARY; 1057 1058 #ifdef CONFIG_IPV6_PIMSM_V2 1059 if (assert == MRT6MSG_WHOLEPKT) { 1060 /* Ugly, but we have no choice with this interface. 1061 Duplicate old header, fix length etc. 1062 And all this only to mangle msg->im6_msgtype and 1063 to set msg->im6_mbz to "mbz" :-) 1064 */ 1065 skb_push(skb, -skb_network_offset(pkt)); 1066 1067 skb_push(skb, sizeof(*msg)); 1068 skb_reset_transport_header(skb); 1069 msg = (struct mrt6msg *)skb_transport_header(skb); 1070 msg->im6_mbz = 0; 1071 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1072 msg->im6_mif = mrt->mroute_reg_vif_num; 1073 msg->im6_pad = 0; 1074 msg->im6_src = ipv6_hdr(pkt)->saddr; 1075 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1076 1077 skb->ip_summed = CHECKSUM_UNNECESSARY; 1078 } else 1079 #endif 1080 { 1081 /* 1082 * Copy the IP header 1083 */ 1084 1085 skb_put(skb, sizeof(struct ipv6hdr)); 1086 skb_reset_network_header(skb); 1087 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1088 1089 /* 1090 * Add our header 1091 */ 1092 skb_put(skb, sizeof(*msg)); 1093 skb_reset_transport_header(skb); 1094 msg = (struct mrt6msg *)skb_transport_header(skb); 1095 1096 msg->im6_mbz = 0; 1097 msg->im6_msgtype = assert; 1098 msg->im6_mif = mifi; 1099 msg->im6_pad = 0; 1100 msg->im6_src = ipv6_hdr(pkt)->saddr; 1101 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1102 1103 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1104 skb->ip_summed = CHECKSUM_UNNECESSARY; 1105 } 1106 1107 rcu_read_lock(); 1108 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1109 if (!mroute6_sk) { 1110 rcu_read_unlock(); 1111 kfree_skb(skb); 1112 return -EINVAL; 1113 } 1114 1115 mrt6msg_netlink_event(mrt, skb); 1116 1117 /* Deliver to user space multicast routing algorithms */ 1118 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1119 rcu_read_unlock(); 1120 if (ret < 0) { 1121 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1122 kfree_skb(skb); 1123 } 1124 1125 return ret; 1126 } 1127 1128 /* Queue a packet for resolution. It gets locked cache entry! */ 1129 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1130 struct sk_buff *skb, struct net_device *dev) 1131 { 1132 struct mfc6_cache *c; 1133 bool found = false; 1134 int err; 1135 1136 spin_lock_bh(&mfc_unres_lock); 1137 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1138 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1139 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1140 found = true; 1141 break; 1142 } 1143 } 1144 1145 if (!found) { 1146 /* 1147 * Create a new entry if allowable 1148 */ 1149 1150 c = ip6mr_cache_alloc_unres(); 1151 if (!c) { 1152 spin_unlock_bh(&mfc_unres_lock); 1153 1154 kfree_skb(skb); 1155 return -ENOBUFS; 1156 } 1157 1158 /* Fill in the new cache entry */ 1159 c->_c.mfc_parent = -1; 1160 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1161 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1162 1163 /* 1164 * Reflect first query at pim6sd 1165 */ 1166 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1167 if (err < 0) { 1168 /* If the report failed throw the cache entry 1169 out - Brad Parker 1170 */ 1171 spin_unlock_bh(&mfc_unres_lock); 1172 1173 ip6mr_cache_free(c); 1174 kfree_skb(skb); 1175 return err; 1176 } 1177 1178 atomic_inc(&mrt->cache_resolve_queue_len); 1179 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1180 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1181 1182 ipmr_do_expire_process(mrt); 1183 } 1184 1185 /* See if we can append the packet */ 1186 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1187 kfree_skb(skb); 1188 err = -ENOBUFS; 1189 } else { 1190 if (dev) { 1191 skb->dev = dev; 1192 skb->skb_iif = dev->ifindex; 1193 } 1194 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1195 err = 0; 1196 } 1197 1198 spin_unlock_bh(&mfc_unres_lock); 1199 return err; 1200 } 1201 1202 /* 1203 * MFC6 cache manipulation by user space 1204 */ 1205 1206 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1207 int parent) 1208 { 1209 struct mfc6_cache *c; 1210 1211 /* The entries are added/deleted only under RTNL */ 1212 rcu_read_lock(); 1213 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1214 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1215 rcu_read_unlock(); 1216 if (!c) 1217 return -ENOENT; 1218 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1219 list_del_rcu(&c->_c.list); 1220 1221 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1222 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1223 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1224 mr_cache_put(&c->_c); 1225 return 0; 1226 } 1227 1228 static int ip6mr_device_event(struct notifier_block *this, 1229 unsigned long event, void *ptr) 1230 { 1231 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1232 struct net *net = dev_net(dev); 1233 struct mr_table *mrt; 1234 struct vif_device *v; 1235 int ct; 1236 1237 if (event != NETDEV_UNREGISTER) 1238 return NOTIFY_DONE; 1239 1240 ip6mr_for_each_table(mrt, net) { 1241 v = &mrt->vif_table[0]; 1242 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1243 if (v->dev == dev) 1244 mif6_delete(mrt, ct, 1, NULL); 1245 } 1246 } 1247 1248 return NOTIFY_DONE; 1249 } 1250 1251 static unsigned int ip6mr_seq_read(struct net *net) 1252 { 1253 ASSERT_RTNL(); 1254 1255 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); 1256 } 1257 1258 static int ip6mr_dump(struct net *net, struct notifier_block *nb, 1259 struct netlink_ext_ack *extack) 1260 { 1261 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1262 ip6mr_mr_table_iter, &mrt_lock, extack); 1263 } 1264 1265 static struct notifier_block ip6_mr_notifier = { 1266 .notifier_call = ip6mr_device_event 1267 }; 1268 1269 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1270 .family = RTNL_FAMILY_IP6MR, 1271 .fib_seq_read = ip6mr_seq_read, 1272 .fib_dump = ip6mr_dump, 1273 .owner = THIS_MODULE, 1274 }; 1275 1276 static int __net_init ip6mr_notifier_init(struct net *net) 1277 { 1278 struct fib_notifier_ops *ops; 1279 1280 net->ipv6.ipmr_seq = 0; 1281 1282 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1283 if (IS_ERR(ops)) 1284 return PTR_ERR(ops); 1285 1286 net->ipv6.ip6mr_notifier_ops = ops; 1287 1288 return 0; 1289 } 1290 1291 static void __net_exit ip6mr_notifier_exit(struct net *net) 1292 { 1293 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1294 net->ipv6.ip6mr_notifier_ops = NULL; 1295 } 1296 1297 /* Setup for IP multicast routing */ 1298 static int __net_init ip6mr_net_init(struct net *net) 1299 { 1300 int err; 1301 1302 err = ip6mr_notifier_init(net); 1303 if (err) 1304 return err; 1305 1306 err = ip6mr_rules_init(net); 1307 if (err < 0) 1308 goto ip6mr_rules_fail; 1309 1310 #ifdef CONFIG_PROC_FS 1311 err = -ENOMEM; 1312 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1313 sizeof(struct mr_vif_iter))) 1314 goto proc_vif_fail; 1315 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1316 sizeof(struct mr_mfc_iter))) 1317 goto proc_cache_fail; 1318 #endif 1319 1320 return 0; 1321 1322 #ifdef CONFIG_PROC_FS 1323 proc_cache_fail: 1324 remove_proc_entry("ip6_mr_vif", net->proc_net); 1325 proc_vif_fail: 1326 ip6mr_rules_exit(net); 1327 #endif 1328 ip6mr_rules_fail: 1329 ip6mr_notifier_exit(net); 1330 return err; 1331 } 1332 1333 static void __net_exit ip6mr_net_exit(struct net *net) 1334 { 1335 #ifdef CONFIG_PROC_FS 1336 remove_proc_entry("ip6_mr_cache", net->proc_net); 1337 remove_proc_entry("ip6_mr_vif", net->proc_net); 1338 #endif 1339 ip6mr_rules_exit(net); 1340 ip6mr_notifier_exit(net); 1341 } 1342 1343 static struct pernet_operations ip6mr_net_ops = { 1344 .init = ip6mr_net_init, 1345 .exit = ip6mr_net_exit, 1346 }; 1347 1348 int __init ip6_mr_init(void) 1349 { 1350 int err; 1351 1352 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1353 sizeof(struct mfc6_cache), 1354 0, SLAB_HWCACHE_ALIGN, 1355 NULL); 1356 if (!mrt_cachep) 1357 return -ENOMEM; 1358 1359 err = register_pernet_subsys(&ip6mr_net_ops); 1360 if (err) 1361 goto reg_pernet_fail; 1362 1363 err = register_netdevice_notifier(&ip6_mr_notifier); 1364 if (err) 1365 goto reg_notif_fail; 1366 #ifdef CONFIG_IPV6_PIMSM_V2 1367 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1368 pr_err("%s: can't add PIM protocol\n", __func__); 1369 err = -EAGAIN; 1370 goto add_proto_fail; 1371 } 1372 #endif 1373 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1374 NULL, ip6mr_rtm_dumproute, 0); 1375 if (err == 0) 1376 return 0; 1377 1378 #ifdef CONFIG_IPV6_PIMSM_V2 1379 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1380 add_proto_fail: 1381 unregister_netdevice_notifier(&ip6_mr_notifier); 1382 #endif 1383 reg_notif_fail: 1384 unregister_pernet_subsys(&ip6mr_net_ops); 1385 reg_pernet_fail: 1386 kmem_cache_destroy(mrt_cachep); 1387 return err; 1388 } 1389 1390 void ip6_mr_cleanup(void) 1391 { 1392 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1393 #ifdef CONFIG_IPV6_PIMSM_V2 1394 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1395 #endif 1396 unregister_netdevice_notifier(&ip6_mr_notifier); 1397 unregister_pernet_subsys(&ip6mr_net_ops); 1398 kmem_cache_destroy(mrt_cachep); 1399 } 1400 1401 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1402 struct mf6cctl *mfc, int mrtsock, int parent) 1403 { 1404 unsigned char ttls[MAXMIFS]; 1405 struct mfc6_cache *uc, *c; 1406 struct mr_mfc *_uc; 1407 bool found; 1408 int i, err; 1409 1410 if (mfc->mf6cc_parent >= MAXMIFS) 1411 return -ENFILE; 1412 1413 memset(ttls, 255, MAXMIFS); 1414 for (i = 0; i < MAXMIFS; i++) { 1415 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1416 ttls[i] = 1; 1417 } 1418 1419 /* The entries are added/deleted only under RTNL */ 1420 rcu_read_lock(); 1421 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1422 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1423 rcu_read_unlock(); 1424 if (c) { 1425 write_lock_bh(&mrt_lock); 1426 c->_c.mfc_parent = mfc->mf6cc_parent; 1427 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1428 if (!mrtsock) 1429 c->_c.mfc_flags |= MFC_STATIC; 1430 write_unlock_bh(&mrt_lock); 1431 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1432 c, mrt->id); 1433 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1434 return 0; 1435 } 1436 1437 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1438 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1439 return -EINVAL; 1440 1441 c = ip6mr_cache_alloc(); 1442 if (!c) 1443 return -ENOMEM; 1444 1445 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1446 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1447 c->_c.mfc_parent = mfc->mf6cc_parent; 1448 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1449 if (!mrtsock) 1450 c->_c.mfc_flags |= MFC_STATIC; 1451 1452 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1453 ip6mr_rht_params); 1454 if (err) { 1455 pr_err("ip6mr: rhtable insert error %d\n", err); 1456 ip6mr_cache_free(c); 1457 return err; 1458 } 1459 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1460 1461 /* Check to see if we resolved a queued list. If so we 1462 * need to send on the frames and tidy up. 1463 */ 1464 found = false; 1465 spin_lock_bh(&mfc_unres_lock); 1466 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1467 uc = (struct mfc6_cache *)_uc; 1468 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1469 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1470 list_del(&_uc->list); 1471 atomic_dec(&mrt->cache_resolve_queue_len); 1472 found = true; 1473 break; 1474 } 1475 } 1476 if (list_empty(&mrt->mfc_unres_queue)) 1477 del_timer(&mrt->ipmr_expire_timer); 1478 spin_unlock_bh(&mfc_unres_lock); 1479 1480 if (found) { 1481 ip6mr_cache_resolve(net, mrt, uc, c); 1482 ip6mr_cache_free(uc); 1483 } 1484 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1485 c, mrt->id); 1486 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1487 return 0; 1488 } 1489 1490 /* 1491 * Close the multicast socket, and clear the vif tables etc 1492 */ 1493 1494 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1495 { 1496 struct mr_mfc *c, *tmp; 1497 LIST_HEAD(list); 1498 int i; 1499 1500 /* Shut down all active vif entries */ 1501 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { 1502 for (i = 0; i < mrt->maxvif; i++) { 1503 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1504 !(flags & MRT6_FLUSH_MIFS_STATIC)) || 1505 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) 1506 continue; 1507 mif6_delete(mrt, i, 0, &list); 1508 } 1509 unregister_netdevice_many(&list); 1510 } 1511 1512 /* Wipe the cache */ 1513 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { 1514 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1515 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || 1516 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) 1517 continue; 1518 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1519 list_del_rcu(&c->list); 1520 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1521 FIB_EVENT_ENTRY_DEL, 1522 (struct mfc6_cache *)c, mrt->id); 1523 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1524 mr_cache_put(c); 1525 } 1526 } 1527 1528 if (flags & MRT6_FLUSH_MFC) { 1529 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1530 spin_lock_bh(&mfc_unres_lock); 1531 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1532 list_del(&c->list); 1533 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1534 RTM_DELROUTE); 1535 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1536 } 1537 spin_unlock_bh(&mfc_unres_lock); 1538 } 1539 } 1540 } 1541 1542 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1543 { 1544 int err = 0; 1545 struct net *net = sock_net(sk); 1546 1547 rtnl_lock(); 1548 write_lock_bh(&mrt_lock); 1549 if (rtnl_dereference(mrt->mroute_sk)) { 1550 err = -EADDRINUSE; 1551 } else { 1552 rcu_assign_pointer(mrt->mroute_sk, sk); 1553 sock_set_flag(sk, SOCK_RCU_FREE); 1554 net->ipv6.devconf_all->mc_forwarding++; 1555 } 1556 write_unlock_bh(&mrt_lock); 1557 1558 if (!err) 1559 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1560 NETCONFA_MC_FORWARDING, 1561 NETCONFA_IFINDEX_ALL, 1562 net->ipv6.devconf_all); 1563 rtnl_unlock(); 1564 1565 return err; 1566 } 1567 1568 int ip6mr_sk_done(struct sock *sk) 1569 { 1570 int err = -EACCES; 1571 struct net *net = sock_net(sk); 1572 struct mr_table *mrt; 1573 1574 if (sk->sk_type != SOCK_RAW || 1575 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1576 return err; 1577 1578 rtnl_lock(); 1579 ip6mr_for_each_table(mrt, net) { 1580 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1581 write_lock_bh(&mrt_lock); 1582 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1583 /* Note that mroute_sk had SOCK_RCU_FREE set, 1584 * so the RCU grace period before sk freeing 1585 * is guaranteed by sk_destruct() 1586 */ 1587 net->ipv6.devconf_all->mc_forwarding--; 1588 write_unlock_bh(&mrt_lock); 1589 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1590 NETCONFA_MC_FORWARDING, 1591 NETCONFA_IFINDEX_ALL, 1592 net->ipv6.devconf_all); 1593 1594 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); 1595 err = 0; 1596 break; 1597 } 1598 } 1599 rtnl_unlock(); 1600 1601 return err; 1602 } 1603 1604 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1605 { 1606 struct mr_table *mrt; 1607 struct flowi6 fl6 = { 1608 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1609 .flowi6_oif = skb->dev->ifindex, 1610 .flowi6_mark = skb->mark, 1611 }; 1612 1613 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1614 return NULL; 1615 1616 return rcu_access_pointer(mrt->mroute_sk); 1617 } 1618 EXPORT_SYMBOL(mroute6_is_socket); 1619 1620 /* 1621 * Socket options and virtual interface manipulation. The whole 1622 * virtual interface system is a complete heap, but unfortunately 1623 * that's how BSD mrouted happens to think. Maybe one day with a proper 1624 * MOSPF/PIM router set up we can clean this up. 1625 */ 1626 1627 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1628 unsigned int optlen) 1629 { 1630 int ret, parent = 0; 1631 struct mif6ctl vif; 1632 struct mf6cctl mfc; 1633 mifi_t mifi; 1634 struct net *net = sock_net(sk); 1635 struct mr_table *mrt; 1636 1637 if (sk->sk_type != SOCK_RAW || 1638 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1639 return -EOPNOTSUPP; 1640 1641 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1642 if (!mrt) 1643 return -ENOENT; 1644 1645 if (optname != MRT6_INIT) { 1646 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1647 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1648 return -EACCES; 1649 } 1650 1651 switch (optname) { 1652 case MRT6_INIT: 1653 if (optlen < sizeof(int)) 1654 return -EINVAL; 1655 1656 return ip6mr_sk_init(mrt, sk); 1657 1658 case MRT6_DONE: 1659 return ip6mr_sk_done(sk); 1660 1661 case MRT6_ADD_MIF: 1662 if (optlen < sizeof(vif)) 1663 return -EINVAL; 1664 if (copy_from_sockptr(&vif, optval, sizeof(vif))) 1665 return -EFAULT; 1666 if (vif.mif6c_mifi >= MAXMIFS) 1667 return -ENFILE; 1668 rtnl_lock(); 1669 ret = mif6_add(net, mrt, &vif, 1670 sk == rtnl_dereference(mrt->mroute_sk)); 1671 rtnl_unlock(); 1672 return ret; 1673 1674 case MRT6_DEL_MIF: 1675 if (optlen < sizeof(mifi_t)) 1676 return -EINVAL; 1677 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) 1678 return -EFAULT; 1679 rtnl_lock(); 1680 ret = mif6_delete(mrt, mifi, 0, NULL); 1681 rtnl_unlock(); 1682 return ret; 1683 1684 /* 1685 * Manipulate the forwarding caches. These live 1686 * in a sort of kernel/user symbiosis. 1687 */ 1688 case MRT6_ADD_MFC: 1689 case MRT6_DEL_MFC: 1690 parent = -1; 1691 fallthrough; 1692 case MRT6_ADD_MFC_PROXY: 1693 case MRT6_DEL_MFC_PROXY: 1694 if (optlen < sizeof(mfc)) 1695 return -EINVAL; 1696 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) 1697 return -EFAULT; 1698 if (parent == 0) 1699 parent = mfc.mf6cc_parent; 1700 rtnl_lock(); 1701 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1702 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1703 else 1704 ret = ip6mr_mfc_add(net, mrt, &mfc, 1705 sk == 1706 rtnl_dereference(mrt->mroute_sk), 1707 parent); 1708 rtnl_unlock(); 1709 return ret; 1710 1711 case MRT6_FLUSH: 1712 { 1713 int flags; 1714 1715 if (optlen != sizeof(flags)) 1716 return -EINVAL; 1717 if (copy_from_sockptr(&flags, optval, sizeof(flags))) 1718 return -EFAULT; 1719 rtnl_lock(); 1720 mroute_clean_tables(mrt, flags); 1721 rtnl_unlock(); 1722 return 0; 1723 } 1724 1725 /* 1726 * Control PIM assert (to activate pim will activate assert) 1727 */ 1728 case MRT6_ASSERT: 1729 { 1730 int v; 1731 1732 if (optlen != sizeof(v)) 1733 return -EINVAL; 1734 if (copy_from_sockptr(&v, optval, sizeof(v))) 1735 return -EFAULT; 1736 mrt->mroute_do_assert = v; 1737 return 0; 1738 } 1739 1740 #ifdef CONFIG_IPV6_PIMSM_V2 1741 case MRT6_PIM: 1742 { 1743 int v; 1744 1745 if (optlen != sizeof(v)) 1746 return -EINVAL; 1747 if (copy_from_sockptr(&v, optval, sizeof(v))) 1748 return -EFAULT; 1749 v = !!v; 1750 rtnl_lock(); 1751 ret = 0; 1752 if (v != mrt->mroute_do_pim) { 1753 mrt->mroute_do_pim = v; 1754 mrt->mroute_do_assert = v; 1755 } 1756 rtnl_unlock(); 1757 return ret; 1758 } 1759 1760 #endif 1761 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1762 case MRT6_TABLE: 1763 { 1764 u32 v; 1765 1766 if (optlen != sizeof(u32)) 1767 return -EINVAL; 1768 if (copy_from_sockptr(&v, optval, sizeof(v))) 1769 return -EFAULT; 1770 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1771 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1772 return -EINVAL; 1773 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1774 return -EBUSY; 1775 1776 rtnl_lock(); 1777 ret = 0; 1778 mrt = ip6mr_new_table(net, v); 1779 if (IS_ERR(mrt)) 1780 ret = PTR_ERR(mrt); 1781 else 1782 raw6_sk(sk)->ip6mr_table = v; 1783 rtnl_unlock(); 1784 return ret; 1785 } 1786 #endif 1787 /* 1788 * Spurious command, or MRT6_VERSION which you cannot 1789 * set. 1790 */ 1791 default: 1792 return -ENOPROTOOPT; 1793 } 1794 } 1795 1796 /* 1797 * Getsock opt support for the multicast routing system. 1798 */ 1799 1800 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1801 int __user *optlen) 1802 { 1803 int olr; 1804 int val; 1805 struct net *net = sock_net(sk); 1806 struct mr_table *mrt; 1807 1808 if (sk->sk_type != SOCK_RAW || 1809 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1810 return -EOPNOTSUPP; 1811 1812 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1813 if (!mrt) 1814 return -ENOENT; 1815 1816 switch (optname) { 1817 case MRT6_VERSION: 1818 val = 0x0305; 1819 break; 1820 #ifdef CONFIG_IPV6_PIMSM_V2 1821 case MRT6_PIM: 1822 val = mrt->mroute_do_pim; 1823 break; 1824 #endif 1825 case MRT6_ASSERT: 1826 val = mrt->mroute_do_assert; 1827 break; 1828 default: 1829 return -ENOPROTOOPT; 1830 } 1831 1832 if (get_user(olr, optlen)) 1833 return -EFAULT; 1834 1835 olr = min_t(int, olr, sizeof(int)); 1836 if (olr < 0) 1837 return -EINVAL; 1838 1839 if (put_user(olr, optlen)) 1840 return -EFAULT; 1841 if (copy_to_user(optval, &val, olr)) 1842 return -EFAULT; 1843 return 0; 1844 } 1845 1846 /* 1847 * The IP multicast ioctl support routines. 1848 */ 1849 1850 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1851 { 1852 struct sioc_sg_req6 sr; 1853 struct sioc_mif_req6 vr; 1854 struct vif_device *vif; 1855 struct mfc6_cache *c; 1856 struct net *net = sock_net(sk); 1857 struct mr_table *mrt; 1858 1859 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1860 if (!mrt) 1861 return -ENOENT; 1862 1863 switch (cmd) { 1864 case SIOCGETMIFCNT_IN6: 1865 if (copy_from_user(&vr, arg, sizeof(vr))) 1866 return -EFAULT; 1867 if (vr.mifi >= mrt->maxvif) 1868 return -EINVAL; 1869 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 1870 read_lock(&mrt_lock); 1871 vif = &mrt->vif_table[vr.mifi]; 1872 if (VIF_EXISTS(mrt, vr.mifi)) { 1873 vr.icount = vif->pkt_in; 1874 vr.ocount = vif->pkt_out; 1875 vr.ibytes = vif->bytes_in; 1876 vr.obytes = vif->bytes_out; 1877 read_unlock(&mrt_lock); 1878 1879 if (copy_to_user(arg, &vr, sizeof(vr))) 1880 return -EFAULT; 1881 return 0; 1882 } 1883 read_unlock(&mrt_lock); 1884 return -EADDRNOTAVAIL; 1885 case SIOCGETSGCNT_IN6: 1886 if (copy_from_user(&sr, arg, sizeof(sr))) 1887 return -EFAULT; 1888 1889 rcu_read_lock(); 1890 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1891 if (c) { 1892 sr.pktcnt = c->_c.mfc_un.res.pkt; 1893 sr.bytecnt = c->_c.mfc_un.res.bytes; 1894 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1895 rcu_read_unlock(); 1896 1897 if (copy_to_user(arg, &sr, sizeof(sr))) 1898 return -EFAULT; 1899 return 0; 1900 } 1901 rcu_read_unlock(); 1902 return -EADDRNOTAVAIL; 1903 default: 1904 return -ENOIOCTLCMD; 1905 } 1906 } 1907 1908 #ifdef CONFIG_COMPAT 1909 struct compat_sioc_sg_req6 { 1910 struct sockaddr_in6 src; 1911 struct sockaddr_in6 grp; 1912 compat_ulong_t pktcnt; 1913 compat_ulong_t bytecnt; 1914 compat_ulong_t wrong_if; 1915 }; 1916 1917 struct compat_sioc_mif_req6 { 1918 mifi_t mifi; 1919 compat_ulong_t icount; 1920 compat_ulong_t ocount; 1921 compat_ulong_t ibytes; 1922 compat_ulong_t obytes; 1923 }; 1924 1925 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1926 { 1927 struct compat_sioc_sg_req6 sr; 1928 struct compat_sioc_mif_req6 vr; 1929 struct vif_device *vif; 1930 struct mfc6_cache *c; 1931 struct net *net = sock_net(sk); 1932 struct mr_table *mrt; 1933 1934 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1935 if (!mrt) 1936 return -ENOENT; 1937 1938 switch (cmd) { 1939 case SIOCGETMIFCNT_IN6: 1940 if (copy_from_user(&vr, arg, sizeof(vr))) 1941 return -EFAULT; 1942 if (vr.mifi >= mrt->maxvif) 1943 return -EINVAL; 1944 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 1945 read_lock(&mrt_lock); 1946 vif = &mrt->vif_table[vr.mifi]; 1947 if (VIF_EXISTS(mrt, vr.mifi)) { 1948 vr.icount = vif->pkt_in; 1949 vr.ocount = vif->pkt_out; 1950 vr.ibytes = vif->bytes_in; 1951 vr.obytes = vif->bytes_out; 1952 read_unlock(&mrt_lock); 1953 1954 if (copy_to_user(arg, &vr, sizeof(vr))) 1955 return -EFAULT; 1956 return 0; 1957 } 1958 read_unlock(&mrt_lock); 1959 return -EADDRNOTAVAIL; 1960 case SIOCGETSGCNT_IN6: 1961 if (copy_from_user(&sr, arg, sizeof(sr))) 1962 return -EFAULT; 1963 1964 rcu_read_lock(); 1965 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1966 if (c) { 1967 sr.pktcnt = c->_c.mfc_un.res.pkt; 1968 sr.bytecnt = c->_c.mfc_un.res.bytes; 1969 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1970 rcu_read_unlock(); 1971 1972 if (copy_to_user(arg, &sr, sizeof(sr))) 1973 return -EFAULT; 1974 return 0; 1975 } 1976 rcu_read_unlock(); 1977 return -EADDRNOTAVAIL; 1978 default: 1979 return -ENOIOCTLCMD; 1980 } 1981 } 1982 #endif 1983 1984 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1985 { 1986 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1987 IPSTATS_MIB_OUTFORWDATAGRAMS); 1988 IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 1989 IPSTATS_MIB_OUTOCTETS, skb->len); 1990 return dst_output(net, sk, skb); 1991 } 1992 1993 /* 1994 * Processing handlers for ip6mr_forward 1995 */ 1996 1997 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 1998 struct sk_buff *skb, int vifi) 1999 { 2000 struct ipv6hdr *ipv6h; 2001 struct vif_device *vif = &mrt->vif_table[vifi]; 2002 struct net_device *dev; 2003 struct dst_entry *dst; 2004 struct flowi6 fl6; 2005 2006 if (!vif->dev) 2007 goto out_free; 2008 2009 #ifdef CONFIG_IPV6_PIMSM_V2 2010 if (vif->flags & MIFF_REGISTER) { 2011 vif->pkt_out++; 2012 vif->bytes_out += skb->len; 2013 vif->dev->stats.tx_bytes += skb->len; 2014 vif->dev->stats.tx_packets++; 2015 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2016 goto out_free; 2017 } 2018 #endif 2019 2020 ipv6h = ipv6_hdr(skb); 2021 2022 fl6 = (struct flowi6) { 2023 .flowi6_oif = vif->link, 2024 .daddr = ipv6h->daddr, 2025 }; 2026 2027 dst = ip6_route_output(net, NULL, &fl6); 2028 if (dst->error) { 2029 dst_release(dst); 2030 goto out_free; 2031 } 2032 2033 skb_dst_drop(skb); 2034 skb_dst_set(skb, dst); 2035 2036 /* 2037 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2038 * not only before forwarding, but after forwarding on all output 2039 * interfaces. It is clear, if mrouter runs a multicasting 2040 * program, it should receive packets not depending to what interface 2041 * program is joined. 2042 * If we will not make it, the program will have to join on all 2043 * interfaces. On the other hand, multihoming host (or router, but 2044 * not mrouter) cannot join to more than one interface - it will 2045 * result in receiving multiple packets. 2046 */ 2047 dev = vif->dev; 2048 skb->dev = dev; 2049 vif->pkt_out++; 2050 vif->bytes_out += skb->len; 2051 2052 /* We are about to write */ 2053 /* XXX: extension headers? */ 2054 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 2055 goto out_free; 2056 2057 ipv6h = ipv6_hdr(skb); 2058 ipv6h->hop_limit--; 2059 2060 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2061 2062 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2063 net, NULL, skb, skb->dev, dev, 2064 ip6mr_forward2_finish); 2065 2066 out_free: 2067 kfree_skb(skb); 2068 return 0; 2069 } 2070 2071 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2072 { 2073 int ct; 2074 2075 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 2076 if (mrt->vif_table[ct].dev == dev) 2077 break; 2078 } 2079 return ct; 2080 } 2081 2082 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2083 struct net_device *dev, struct sk_buff *skb, 2084 struct mfc6_cache *c) 2085 { 2086 int psend = -1; 2087 int vif, ct; 2088 int true_vifi = ip6mr_find_vif(mrt, dev); 2089 2090 vif = c->_c.mfc_parent; 2091 c->_c.mfc_un.res.pkt++; 2092 c->_c.mfc_un.res.bytes += skb->len; 2093 c->_c.mfc_un.res.lastuse = jiffies; 2094 2095 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2096 struct mfc6_cache *cache_proxy; 2097 2098 /* For an (*,G) entry, we only check that the incoming 2099 * interface is part of the static tree. 2100 */ 2101 rcu_read_lock(); 2102 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2103 if (cache_proxy && 2104 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { 2105 rcu_read_unlock(); 2106 goto forward; 2107 } 2108 rcu_read_unlock(); 2109 } 2110 2111 /* 2112 * Wrong interface: drop packet and (maybe) send PIM assert. 2113 */ 2114 if (mrt->vif_table[vif].dev != dev) { 2115 c->_c.mfc_un.res.wrong_if++; 2116 2117 if (true_vifi >= 0 && mrt->mroute_do_assert && 2118 /* pimsm uses asserts, when switching from RPT to SPT, 2119 so that we cannot check that packet arrived on an oif. 2120 It is bad, but otherwise we would need to move pretty 2121 large chunk of pimd to kernel. Ough... --ANK 2122 */ 2123 (mrt->mroute_do_pim || 2124 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2125 time_after(jiffies, 2126 c->_c.mfc_un.res.last_assert + 2127 MFC_ASSERT_THRESH)) { 2128 c->_c.mfc_un.res.last_assert = jiffies; 2129 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2130 } 2131 goto dont_forward; 2132 } 2133 2134 forward: 2135 mrt->vif_table[vif].pkt_in++; 2136 mrt->vif_table[vif].bytes_in += skb->len; 2137 2138 /* 2139 * Forward the frame 2140 */ 2141 if (ipv6_addr_any(&c->mf6c_origin) && 2142 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2143 if (true_vifi >= 0 && 2144 true_vifi != c->_c.mfc_parent && 2145 ipv6_hdr(skb)->hop_limit > 2146 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2147 /* It's an (*,*) entry and the packet is not coming from 2148 * the upstream: forward the packet to the upstream 2149 * only. 2150 */ 2151 psend = c->_c.mfc_parent; 2152 goto last_forward; 2153 } 2154 goto dont_forward; 2155 } 2156 for (ct = c->_c.mfc_un.res.maxvif - 1; 2157 ct >= c->_c.mfc_un.res.minvif; ct--) { 2158 /* For (*,G) entry, don't forward to the incoming interface */ 2159 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2160 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2161 if (psend != -1) { 2162 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2163 if (skb2) 2164 ip6mr_forward2(net, mrt, skb2, psend); 2165 } 2166 psend = ct; 2167 } 2168 } 2169 last_forward: 2170 if (psend != -1) { 2171 ip6mr_forward2(net, mrt, skb, psend); 2172 return; 2173 } 2174 2175 dont_forward: 2176 kfree_skb(skb); 2177 } 2178 2179 2180 /* 2181 * Multicast packets for forwarding arrive here 2182 */ 2183 2184 int ip6_mr_input(struct sk_buff *skb) 2185 { 2186 struct mfc6_cache *cache; 2187 struct net *net = dev_net(skb->dev); 2188 struct mr_table *mrt; 2189 struct flowi6 fl6 = { 2190 .flowi6_iif = skb->dev->ifindex, 2191 .flowi6_mark = skb->mark, 2192 }; 2193 int err; 2194 struct net_device *dev; 2195 2196 /* skb->dev passed in is the master dev for vrfs. 2197 * Get the proper interface that does have a vif associated with it. 2198 */ 2199 dev = skb->dev; 2200 if (netif_is_l3_master(skb->dev)) { 2201 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2202 if (!dev) { 2203 kfree_skb(skb); 2204 return -ENODEV; 2205 } 2206 } 2207 2208 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2209 if (err < 0) { 2210 kfree_skb(skb); 2211 return err; 2212 } 2213 2214 read_lock(&mrt_lock); 2215 cache = ip6mr_cache_find(mrt, 2216 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2217 if (!cache) { 2218 int vif = ip6mr_find_vif(mrt, dev); 2219 2220 if (vif >= 0) 2221 cache = ip6mr_cache_find_any(mrt, 2222 &ipv6_hdr(skb)->daddr, 2223 vif); 2224 } 2225 2226 /* 2227 * No usable cache entry 2228 */ 2229 if (!cache) { 2230 int vif; 2231 2232 vif = ip6mr_find_vif(mrt, dev); 2233 if (vif >= 0) { 2234 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2235 read_unlock(&mrt_lock); 2236 2237 return err; 2238 } 2239 read_unlock(&mrt_lock); 2240 kfree_skb(skb); 2241 return -ENODEV; 2242 } 2243 2244 ip6_mr_forward(net, mrt, dev, skb, cache); 2245 2246 read_unlock(&mrt_lock); 2247 2248 return 0; 2249 } 2250 2251 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2252 u32 portid) 2253 { 2254 int err; 2255 struct mr_table *mrt; 2256 struct mfc6_cache *cache; 2257 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2258 2259 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2260 if (!mrt) 2261 return -ENOENT; 2262 2263 read_lock(&mrt_lock); 2264 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2265 if (!cache && skb->dev) { 2266 int vif = ip6mr_find_vif(mrt, skb->dev); 2267 2268 if (vif >= 0) 2269 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2270 vif); 2271 } 2272 2273 if (!cache) { 2274 struct sk_buff *skb2; 2275 struct ipv6hdr *iph; 2276 struct net_device *dev; 2277 int vif; 2278 2279 dev = skb->dev; 2280 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2281 read_unlock(&mrt_lock); 2282 return -ENODEV; 2283 } 2284 2285 /* really correct? */ 2286 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2287 if (!skb2) { 2288 read_unlock(&mrt_lock); 2289 return -ENOMEM; 2290 } 2291 2292 NETLINK_CB(skb2).portid = portid; 2293 skb_reset_transport_header(skb2); 2294 2295 skb_put(skb2, sizeof(struct ipv6hdr)); 2296 skb_reset_network_header(skb2); 2297 2298 iph = ipv6_hdr(skb2); 2299 iph->version = 0; 2300 iph->priority = 0; 2301 iph->flow_lbl[0] = 0; 2302 iph->flow_lbl[1] = 0; 2303 iph->flow_lbl[2] = 0; 2304 iph->payload_len = 0; 2305 iph->nexthdr = IPPROTO_NONE; 2306 iph->hop_limit = 0; 2307 iph->saddr = rt->rt6i_src.addr; 2308 iph->daddr = rt->rt6i_dst.addr; 2309 2310 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2311 read_unlock(&mrt_lock); 2312 2313 return err; 2314 } 2315 2316 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2317 read_unlock(&mrt_lock); 2318 return err; 2319 } 2320 2321 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2322 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2323 int flags) 2324 { 2325 struct nlmsghdr *nlh; 2326 struct rtmsg *rtm; 2327 int err; 2328 2329 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2330 if (!nlh) 2331 return -EMSGSIZE; 2332 2333 rtm = nlmsg_data(nlh); 2334 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2335 rtm->rtm_dst_len = 128; 2336 rtm->rtm_src_len = 128; 2337 rtm->rtm_tos = 0; 2338 rtm->rtm_table = mrt->id; 2339 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2340 goto nla_put_failure; 2341 rtm->rtm_type = RTN_MULTICAST; 2342 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2343 if (c->_c.mfc_flags & MFC_STATIC) 2344 rtm->rtm_protocol = RTPROT_STATIC; 2345 else 2346 rtm->rtm_protocol = RTPROT_MROUTED; 2347 rtm->rtm_flags = 0; 2348 2349 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2350 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2351 goto nla_put_failure; 2352 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2353 /* do not break the dump if cache is unresolved */ 2354 if (err < 0 && err != -ENOENT) 2355 goto nla_put_failure; 2356 2357 nlmsg_end(skb, nlh); 2358 return 0; 2359 2360 nla_put_failure: 2361 nlmsg_cancel(skb, nlh); 2362 return -EMSGSIZE; 2363 } 2364 2365 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2366 u32 portid, u32 seq, struct mr_mfc *c, 2367 int cmd, int flags) 2368 { 2369 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2370 cmd, flags); 2371 } 2372 2373 static int mr6_msgsize(bool unresolved, int maxvif) 2374 { 2375 size_t len = 2376 NLMSG_ALIGN(sizeof(struct rtmsg)) 2377 + nla_total_size(4) /* RTA_TABLE */ 2378 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2379 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2380 ; 2381 2382 if (!unresolved) 2383 len = len 2384 + nla_total_size(4) /* RTA_IIF */ 2385 + nla_total_size(0) /* RTA_MULTIPATH */ 2386 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2387 /* RTA_MFC_STATS */ 2388 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2389 ; 2390 2391 return len; 2392 } 2393 2394 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2395 int cmd) 2396 { 2397 struct net *net = read_pnet(&mrt->net); 2398 struct sk_buff *skb; 2399 int err = -ENOBUFS; 2400 2401 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2402 GFP_ATOMIC); 2403 if (!skb) 2404 goto errout; 2405 2406 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2407 if (err < 0) 2408 goto errout; 2409 2410 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2411 return; 2412 2413 errout: 2414 kfree_skb(skb); 2415 if (err < 0) 2416 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2417 } 2418 2419 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2420 { 2421 size_t len = 2422 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2423 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2424 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2425 /* IP6MRA_CREPORT_SRC_ADDR */ 2426 + nla_total_size(sizeof(struct in6_addr)) 2427 /* IP6MRA_CREPORT_DST_ADDR */ 2428 + nla_total_size(sizeof(struct in6_addr)) 2429 /* IP6MRA_CREPORT_PKT */ 2430 + nla_total_size(payloadlen) 2431 ; 2432 2433 return len; 2434 } 2435 2436 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2437 { 2438 struct net *net = read_pnet(&mrt->net); 2439 struct nlmsghdr *nlh; 2440 struct rtgenmsg *rtgenm; 2441 struct mrt6msg *msg; 2442 struct sk_buff *skb; 2443 struct nlattr *nla; 2444 int payloadlen; 2445 2446 payloadlen = pkt->len - sizeof(struct mrt6msg); 2447 msg = (struct mrt6msg *)skb_transport_header(pkt); 2448 2449 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2450 if (!skb) 2451 goto errout; 2452 2453 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2454 sizeof(struct rtgenmsg), 0); 2455 if (!nlh) 2456 goto errout; 2457 rtgenm = nlmsg_data(nlh); 2458 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2459 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2460 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2461 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2462 &msg->im6_src) || 2463 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2464 &msg->im6_dst)) 2465 goto nla_put_failure; 2466 2467 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2468 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2469 nla_data(nla), payloadlen)) 2470 goto nla_put_failure; 2471 2472 nlmsg_end(skb, nlh); 2473 2474 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2475 return; 2476 2477 nla_put_failure: 2478 nlmsg_cancel(skb, nlh); 2479 errout: 2480 kfree_skb(skb); 2481 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2482 } 2483 2484 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2485 { 2486 const struct nlmsghdr *nlh = cb->nlh; 2487 struct fib_dump_filter filter = {}; 2488 int err; 2489 2490 if (cb->strict_check) { 2491 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2492 &filter, cb); 2493 if (err < 0) 2494 return err; 2495 } 2496 2497 if (filter.table_id) { 2498 struct mr_table *mrt; 2499 2500 mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2501 if (!mrt) { 2502 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) 2503 return skb->len; 2504 2505 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2506 return -ENOENT; 2507 } 2508 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2509 &mfc_unres_lock, &filter); 2510 return skb->len ? : err; 2511 } 2512 2513 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2514 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2515 } 2516