1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <linux/uaccess.h> 20 #include <linux/types.h> 21 #include <linux/sched.h> 22 #include <linux/errno.h> 23 #include <linux/mm.h> 24 #include <linux/kernel.h> 25 #include <linux/fcntl.h> 26 #include <linux/stat.h> 27 #include <linux/socket.h> 28 #include <linux/inet.h> 29 #include <linux/netdevice.h> 30 #include <linux/inetdevice.h> 31 #include <linux/proc_fs.h> 32 #include <linux/seq_file.h> 33 #include <linux/init.h> 34 #include <linux/compat.h> 35 #include <net/protocol.h> 36 #include <linux/skbuff.h> 37 #include <net/raw.h> 38 #include <linux/notifier.h> 39 #include <linux/if_arp.h> 40 #include <net/checksum.h> 41 #include <net/netlink.h> 42 #include <net/fib_rules.h> 43 44 #include <net/ipv6.h> 45 #include <net/ip6_route.h> 46 #include <linux/mroute6.h> 47 #include <linux/pim.h> 48 #include <net/addrconf.h> 49 #include <linux/netfilter_ipv6.h> 50 #include <linux/export.h> 51 #include <net/ip6_checksum.h> 52 #include <linux/netconf.h> 53 54 struct ip6mr_rule { 55 struct fib_rule common; 56 }; 57 58 struct ip6mr_result { 59 struct mr_table *mrt; 60 }; 61 62 /* Big lock, protecting vif table, mrt cache and mroute socket state. 63 Note that the changes are semaphored via rtnl_lock. 64 */ 65 66 static DEFINE_RWLOCK(mrt_lock); 67 68 /* Multicast router control variables */ 69 70 /* Special spinlock for queue of unresolved entries */ 71 static DEFINE_SPINLOCK(mfc_unres_lock); 72 73 /* We return to original Alan's scheme. Hash table of resolved 74 entries is changed only in process context and protected 75 with weak lock mrt_lock. Queue of unresolved entries is protected 76 with strong spinlock mfc_unres_lock. 77 78 In this case data path is free of exclusive locks at all. 79 */ 80 81 static struct kmem_cache *mrt_cachep __read_mostly; 82 83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 84 static void ip6mr_free_table(struct mr_table *mrt); 85 86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 87 struct sk_buff *skb, struct mfc6_cache *cache); 88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 89 mifi_t mifi, int assert); 90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 91 int cmd); 92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 93 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 94 struct netlink_callback *cb); 95 static void mroute_clean_tables(struct mr_table *mrt, bool all); 96 static void ipmr_expire_process(struct timer_list *t); 97 98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 99 #define ip6mr_for_each_table(mrt, net) \ 100 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 101 102 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 103 struct mr_table *mrt) 104 { 105 struct mr_table *ret; 106 107 if (!mrt) 108 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 109 struct mr_table, list); 110 else 111 ret = list_entry_rcu(mrt->list.next, 112 struct mr_table, list); 113 114 if (&ret->list == &net->ipv6.mr6_tables) 115 return NULL; 116 return ret; 117 } 118 119 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 120 { 121 struct mr_table *mrt; 122 123 ip6mr_for_each_table(mrt, net) { 124 if (mrt->id == id) 125 return mrt; 126 } 127 return NULL; 128 } 129 130 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 131 struct mr_table **mrt) 132 { 133 int err; 134 struct ip6mr_result res; 135 struct fib_lookup_arg arg = { 136 .result = &res, 137 .flags = FIB_LOOKUP_NOREF, 138 }; 139 140 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 141 flowi6_to_flowi(flp6), 0, &arg); 142 if (err < 0) 143 return err; 144 *mrt = res.mrt; 145 return 0; 146 } 147 148 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 149 int flags, struct fib_lookup_arg *arg) 150 { 151 struct ip6mr_result *res = arg->result; 152 struct mr_table *mrt; 153 154 switch (rule->action) { 155 case FR_ACT_TO_TBL: 156 break; 157 case FR_ACT_UNREACHABLE: 158 return -ENETUNREACH; 159 case FR_ACT_PROHIBIT: 160 return -EACCES; 161 case FR_ACT_BLACKHOLE: 162 default: 163 return -EINVAL; 164 } 165 166 mrt = ip6mr_get_table(rule->fr_net, rule->table); 167 if (!mrt) 168 return -EAGAIN; 169 res->mrt = mrt; 170 return 0; 171 } 172 173 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 174 { 175 return 1; 176 } 177 178 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 179 FRA_GENERIC_POLICY, 180 }; 181 182 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 183 struct fib_rule_hdr *frh, struct nlattr **tb) 184 { 185 return 0; 186 } 187 188 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 189 struct nlattr **tb) 190 { 191 return 1; 192 } 193 194 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 195 struct fib_rule_hdr *frh) 196 { 197 frh->dst_len = 0; 198 frh->src_len = 0; 199 frh->tos = 0; 200 return 0; 201 } 202 203 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 204 .family = RTNL_FAMILY_IP6MR, 205 .rule_size = sizeof(struct ip6mr_rule), 206 .addr_size = sizeof(struct in6_addr), 207 .action = ip6mr_rule_action, 208 .match = ip6mr_rule_match, 209 .configure = ip6mr_rule_configure, 210 .compare = ip6mr_rule_compare, 211 .fill = ip6mr_rule_fill, 212 .nlgroup = RTNLGRP_IPV6_RULE, 213 .policy = ip6mr_rule_policy, 214 .owner = THIS_MODULE, 215 }; 216 217 static int __net_init ip6mr_rules_init(struct net *net) 218 { 219 struct fib_rules_ops *ops; 220 struct mr_table *mrt; 221 int err; 222 223 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 224 if (IS_ERR(ops)) 225 return PTR_ERR(ops); 226 227 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 228 229 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 230 if (!mrt) { 231 err = -ENOMEM; 232 goto err1; 233 } 234 235 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 236 if (err < 0) 237 goto err2; 238 239 net->ipv6.mr6_rules_ops = ops; 240 return 0; 241 242 err2: 243 ip6mr_free_table(mrt); 244 err1: 245 fib_rules_unregister(ops); 246 return err; 247 } 248 249 static void __net_exit ip6mr_rules_exit(struct net *net) 250 { 251 struct mr_table *mrt, *next; 252 253 rtnl_lock(); 254 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 255 list_del(&mrt->list); 256 ip6mr_free_table(mrt); 257 } 258 fib_rules_unregister(net->ipv6.mr6_rules_ops); 259 rtnl_unlock(); 260 } 261 262 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 263 { 264 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); 265 } 266 267 static unsigned int ip6mr_rules_seq_read(struct net *net) 268 { 269 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 270 } 271 272 bool ip6mr_rule_default(const struct fib_rule *rule) 273 { 274 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 275 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 276 } 277 EXPORT_SYMBOL(ip6mr_rule_default); 278 #else 279 #define ip6mr_for_each_table(mrt, net) \ 280 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 281 282 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 283 struct mr_table *mrt) 284 { 285 if (!mrt) 286 return net->ipv6.mrt6; 287 return NULL; 288 } 289 290 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 291 { 292 return net->ipv6.mrt6; 293 } 294 295 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 296 struct mr_table **mrt) 297 { 298 *mrt = net->ipv6.mrt6; 299 return 0; 300 } 301 302 static int __net_init ip6mr_rules_init(struct net *net) 303 { 304 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT); 305 return net->ipv6.mrt6 ? 0 : -ENOMEM; 306 } 307 308 static void __net_exit ip6mr_rules_exit(struct net *net) 309 { 310 rtnl_lock(); 311 ip6mr_free_table(net->ipv6.mrt6); 312 net->ipv6.mrt6 = NULL; 313 rtnl_unlock(); 314 } 315 316 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 317 { 318 return 0; 319 } 320 321 static unsigned int ip6mr_rules_seq_read(struct net *net) 322 { 323 return 0; 324 } 325 #endif 326 327 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 328 const void *ptr) 329 { 330 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 331 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 332 333 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 334 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 335 } 336 337 static const struct rhashtable_params ip6mr_rht_params = { 338 .head_offset = offsetof(struct mr_mfc, mnode), 339 .key_offset = offsetof(struct mfc6_cache, cmparg), 340 .key_len = sizeof(struct mfc6_cache_cmp_arg), 341 .nelem_hint = 3, 342 .locks_mul = 1, 343 .obj_cmpfn = ip6mr_hash_cmp, 344 .automatic_shrinking = true, 345 }; 346 347 static void ip6mr_new_table_set(struct mr_table *mrt, 348 struct net *net) 349 { 350 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 351 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 352 #endif 353 } 354 355 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 356 .mf6c_origin = IN6ADDR_ANY_INIT, 357 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 358 }; 359 360 static struct mr_table_ops ip6mr_mr_table_ops = { 361 .rht_params = &ip6mr_rht_params, 362 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 363 }; 364 365 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 366 { 367 struct mr_table *mrt; 368 369 mrt = ip6mr_get_table(net, id); 370 if (mrt) 371 return mrt; 372 373 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 374 ipmr_expire_process, ip6mr_new_table_set); 375 } 376 377 static void ip6mr_free_table(struct mr_table *mrt) 378 { 379 del_timer_sync(&mrt->ipmr_expire_timer); 380 mroute_clean_tables(mrt, true); 381 rhltable_destroy(&mrt->mfc_hash); 382 kfree(mrt); 383 } 384 385 #ifdef CONFIG_PROC_FS 386 /* The /proc interfaces to multicast routing 387 * /proc/ip6_mr_cache /proc/ip6_mr_vif 388 */ 389 390 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 391 __acquires(mrt_lock) 392 { 393 struct mr_vif_iter *iter = seq->private; 394 struct net *net = seq_file_net(seq); 395 struct mr_table *mrt; 396 397 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 398 if (!mrt) 399 return ERR_PTR(-ENOENT); 400 401 iter->mrt = mrt; 402 403 read_lock(&mrt_lock); 404 return mr_vif_seq_start(seq, pos); 405 } 406 407 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 408 __releases(mrt_lock) 409 { 410 read_unlock(&mrt_lock); 411 } 412 413 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 414 { 415 struct mr_vif_iter *iter = seq->private; 416 struct mr_table *mrt = iter->mrt; 417 418 if (v == SEQ_START_TOKEN) { 419 seq_puts(seq, 420 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 421 } else { 422 const struct vif_device *vif = v; 423 const char *name = vif->dev ? vif->dev->name : "none"; 424 425 seq_printf(seq, 426 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 427 vif - mrt->vif_table, 428 name, vif->bytes_in, vif->pkt_in, 429 vif->bytes_out, vif->pkt_out, 430 vif->flags); 431 } 432 return 0; 433 } 434 435 static const struct seq_operations ip6mr_vif_seq_ops = { 436 .start = ip6mr_vif_seq_start, 437 .next = mr_vif_seq_next, 438 .stop = ip6mr_vif_seq_stop, 439 .show = ip6mr_vif_seq_show, 440 }; 441 442 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 443 { 444 struct net *net = seq_file_net(seq); 445 struct mr_table *mrt; 446 447 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 448 if (!mrt) 449 return ERR_PTR(-ENOENT); 450 451 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 452 } 453 454 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 455 { 456 int n; 457 458 if (v == SEQ_START_TOKEN) { 459 seq_puts(seq, 460 "Group " 461 "Origin " 462 "Iif Pkts Bytes Wrong Oifs\n"); 463 } else { 464 const struct mfc6_cache *mfc = v; 465 const struct mr_mfc_iter *it = seq->private; 466 struct mr_table *mrt = it->mrt; 467 468 seq_printf(seq, "%pI6 %pI6 %-3hd", 469 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 470 mfc->_c.mfc_parent); 471 472 if (it->cache != &mrt->mfc_unres_queue) { 473 seq_printf(seq, " %8lu %8lu %8lu", 474 mfc->_c.mfc_un.res.pkt, 475 mfc->_c.mfc_un.res.bytes, 476 mfc->_c.mfc_un.res.wrong_if); 477 for (n = mfc->_c.mfc_un.res.minvif; 478 n < mfc->_c.mfc_un.res.maxvif; n++) { 479 if (VIF_EXISTS(mrt, n) && 480 mfc->_c.mfc_un.res.ttls[n] < 255) 481 seq_printf(seq, 482 " %2d:%-3d", n, 483 mfc->_c.mfc_un.res.ttls[n]); 484 } 485 } else { 486 /* unresolved mfc_caches don't contain 487 * pkt, bytes and wrong_if values 488 */ 489 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 490 } 491 seq_putc(seq, '\n'); 492 } 493 return 0; 494 } 495 496 static const struct seq_operations ipmr_mfc_seq_ops = { 497 .start = ipmr_mfc_seq_start, 498 .next = mr_mfc_seq_next, 499 .stop = mr_mfc_seq_stop, 500 .show = ipmr_mfc_seq_show, 501 }; 502 #endif 503 504 #ifdef CONFIG_IPV6_PIMSM_V2 505 506 static int pim6_rcv(struct sk_buff *skb) 507 { 508 struct pimreghdr *pim; 509 struct ipv6hdr *encap; 510 struct net_device *reg_dev = NULL; 511 struct net *net = dev_net(skb->dev); 512 struct mr_table *mrt; 513 struct flowi6 fl6 = { 514 .flowi6_iif = skb->dev->ifindex, 515 .flowi6_mark = skb->mark, 516 }; 517 int reg_vif_num; 518 519 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 520 goto drop; 521 522 pim = (struct pimreghdr *)skb_transport_header(skb); 523 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 524 (pim->flags & PIM_NULL_REGISTER) || 525 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 526 sizeof(*pim), IPPROTO_PIM, 527 csum_partial((void *)pim, sizeof(*pim), 0)) && 528 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 529 goto drop; 530 531 /* check if the inner packet is destined to mcast group */ 532 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 533 sizeof(*pim)); 534 535 if (!ipv6_addr_is_multicast(&encap->daddr) || 536 encap->payload_len == 0 || 537 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 538 goto drop; 539 540 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 541 goto drop; 542 reg_vif_num = mrt->mroute_reg_vif_num; 543 544 read_lock(&mrt_lock); 545 if (reg_vif_num >= 0) 546 reg_dev = mrt->vif_table[reg_vif_num].dev; 547 if (reg_dev) 548 dev_hold(reg_dev); 549 read_unlock(&mrt_lock); 550 551 if (!reg_dev) 552 goto drop; 553 554 skb->mac_header = skb->network_header; 555 skb_pull(skb, (u8 *)encap - skb->data); 556 skb_reset_network_header(skb); 557 skb->protocol = htons(ETH_P_IPV6); 558 skb->ip_summed = CHECKSUM_NONE; 559 560 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 561 562 netif_rx(skb); 563 564 dev_put(reg_dev); 565 return 0; 566 drop: 567 kfree_skb(skb); 568 return 0; 569 } 570 571 static const struct inet6_protocol pim6_protocol = { 572 .handler = pim6_rcv, 573 }; 574 575 /* Service routines creating virtual interfaces: PIMREG */ 576 577 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 578 struct net_device *dev) 579 { 580 struct net *net = dev_net(dev); 581 struct mr_table *mrt; 582 struct flowi6 fl6 = { 583 .flowi6_oif = dev->ifindex, 584 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 585 .flowi6_mark = skb->mark, 586 }; 587 int err; 588 589 err = ip6mr_fib_lookup(net, &fl6, &mrt); 590 if (err < 0) { 591 kfree_skb(skb); 592 return err; 593 } 594 595 read_lock(&mrt_lock); 596 dev->stats.tx_bytes += skb->len; 597 dev->stats.tx_packets++; 598 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 599 read_unlock(&mrt_lock); 600 kfree_skb(skb); 601 return NETDEV_TX_OK; 602 } 603 604 static int reg_vif_get_iflink(const struct net_device *dev) 605 { 606 return 0; 607 } 608 609 static const struct net_device_ops reg_vif_netdev_ops = { 610 .ndo_start_xmit = reg_vif_xmit, 611 .ndo_get_iflink = reg_vif_get_iflink, 612 }; 613 614 static void reg_vif_setup(struct net_device *dev) 615 { 616 dev->type = ARPHRD_PIMREG; 617 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 618 dev->flags = IFF_NOARP; 619 dev->netdev_ops = ®_vif_netdev_ops; 620 dev->needs_free_netdev = true; 621 dev->features |= NETIF_F_NETNS_LOCAL; 622 } 623 624 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 625 { 626 struct net_device *dev; 627 char name[IFNAMSIZ]; 628 629 if (mrt->id == RT6_TABLE_DFLT) 630 sprintf(name, "pim6reg"); 631 else 632 sprintf(name, "pim6reg%u", mrt->id); 633 634 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 635 if (!dev) 636 return NULL; 637 638 dev_net_set(dev, net); 639 640 if (register_netdevice(dev)) { 641 free_netdev(dev); 642 return NULL; 643 } 644 645 if (dev_open(dev)) 646 goto failure; 647 648 dev_hold(dev); 649 return dev; 650 651 failure: 652 unregister_netdevice(dev); 653 return NULL; 654 } 655 #endif 656 657 static int call_ip6mr_vif_entry_notifiers(struct net *net, 658 enum fib_event_type event_type, 659 struct vif_device *vif, 660 mifi_t vif_index, u32 tb_id) 661 { 662 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 663 vif, vif_index, tb_id, 664 &net->ipv6.ipmr_seq); 665 } 666 667 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 668 enum fib_event_type event_type, 669 struct mfc6_cache *mfc, u32 tb_id) 670 { 671 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 672 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 673 } 674 675 /* Delete a VIF entry */ 676 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 677 struct list_head *head) 678 { 679 struct vif_device *v; 680 struct net_device *dev; 681 struct inet6_dev *in6_dev; 682 683 if (vifi < 0 || vifi >= mrt->maxvif) 684 return -EADDRNOTAVAIL; 685 686 v = &mrt->vif_table[vifi]; 687 688 if (VIF_EXISTS(mrt, vifi)) 689 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 690 FIB_EVENT_VIF_DEL, v, vifi, 691 mrt->id); 692 693 write_lock_bh(&mrt_lock); 694 dev = v->dev; 695 v->dev = NULL; 696 697 if (!dev) { 698 write_unlock_bh(&mrt_lock); 699 return -EADDRNOTAVAIL; 700 } 701 702 #ifdef CONFIG_IPV6_PIMSM_V2 703 if (vifi == mrt->mroute_reg_vif_num) 704 mrt->mroute_reg_vif_num = -1; 705 #endif 706 707 if (vifi + 1 == mrt->maxvif) { 708 int tmp; 709 for (tmp = vifi - 1; tmp >= 0; tmp--) { 710 if (VIF_EXISTS(mrt, tmp)) 711 break; 712 } 713 mrt->maxvif = tmp + 1; 714 } 715 716 write_unlock_bh(&mrt_lock); 717 718 dev_set_allmulti(dev, -1); 719 720 in6_dev = __in6_dev_get(dev); 721 if (in6_dev) { 722 in6_dev->cnf.mc_forwarding--; 723 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 724 NETCONFA_MC_FORWARDING, 725 dev->ifindex, &in6_dev->cnf); 726 } 727 728 if ((v->flags & MIFF_REGISTER) && !notify) 729 unregister_netdevice_queue(dev, head); 730 731 dev_put(dev); 732 return 0; 733 } 734 735 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 736 { 737 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 738 739 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 740 } 741 742 static inline void ip6mr_cache_free(struct mfc6_cache *c) 743 { 744 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 745 } 746 747 /* Destroy an unresolved cache entry, killing queued skbs 748 and reporting error to netlink readers. 749 */ 750 751 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 752 { 753 struct net *net = read_pnet(&mrt->net); 754 struct sk_buff *skb; 755 756 atomic_dec(&mrt->cache_resolve_queue_len); 757 758 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 759 if (ipv6_hdr(skb)->version == 0) { 760 struct nlmsghdr *nlh = skb_pull(skb, 761 sizeof(struct ipv6hdr)); 762 nlh->nlmsg_type = NLMSG_ERROR; 763 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 764 skb_trim(skb, nlh->nlmsg_len); 765 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 766 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 767 } else 768 kfree_skb(skb); 769 } 770 771 ip6mr_cache_free(c); 772 } 773 774 775 /* Timer process for all the unresolved queue. */ 776 777 static void ipmr_do_expire_process(struct mr_table *mrt) 778 { 779 unsigned long now = jiffies; 780 unsigned long expires = 10 * HZ; 781 struct mr_mfc *c, *next; 782 783 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 784 if (time_after(c->mfc_un.unres.expires, now)) { 785 /* not yet... */ 786 unsigned long interval = c->mfc_un.unres.expires - now; 787 if (interval < expires) 788 expires = interval; 789 continue; 790 } 791 792 list_del(&c->list); 793 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 794 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 795 } 796 797 if (!list_empty(&mrt->mfc_unres_queue)) 798 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 799 } 800 801 static void ipmr_expire_process(struct timer_list *t) 802 { 803 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 804 805 if (!spin_trylock(&mfc_unres_lock)) { 806 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 807 return; 808 } 809 810 if (!list_empty(&mrt->mfc_unres_queue)) 811 ipmr_do_expire_process(mrt); 812 813 spin_unlock(&mfc_unres_lock); 814 } 815 816 /* Fill oifs list. It is called under write locked mrt_lock. */ 817 818 static void ip6mr_update_thresholds(struct mr_table *mrt, 819 struct mr_mfc *cache, 820 unsigned char *ttls) 821 { 822 int vifi; 823 824 cache->mfc_un.res.minvif = MAXMIFS; 825 cache->mfc_un.res.maxvif = 0; 826 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 827 828 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 829 if (VIF_EXISTS(mrt, vifi) && 830 ttls[vifi] && ttls[vifi] < 255) { 831 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 832 if (cache->mfc_un.res.minvif > vifi) 833 cache->mfc_un.res.minvif = vifi; 834 if (cache->mfc_un.res.maxvif <= vifi) 835 cache->mfc_un.res.maxvif = vifi + 1; 836 } 837 } 838 cache->mfc_un.res.lastuse = jiffies; 839 } 840 841 static int mif6_add(struct net *net, struct mr_table *mrt, 842 struct mif6ctl *vifc, int mrtsock) 843 { 844 int vifi = vifc->mif6c_mifi; 845 struct vif_device *v = &mrt->vif_table[vifi]; 846 struct net_device *dev; 847 struct inet6_dev *in6_dev; 848 int err; 849 850 /* Is vif busy ? */ 851 if (VIF_EXISTS(mrt, vifi)) 852 return -EADDRINUSE; 853 854 switch (vifc->mif6c_flags) { 855 #ifdef CONFIG_IPV6_PIMSM_V2 856 case MIFF_REGISTER: 857 /* 858 * Special Purpose VIF in PIM 859 * All the packets will be sent to the daemon 860 */ 861 if (mrt->mroute_reg_vif_num >= 0) 862 return -EADDRINUSE; 863 dev = ip6mr_reg_vif(net, mrt); 864 if (!dev) 865 return -ENOBUFS; 866 err = dev_set_allmulti(dev, 1); 867 if (err) { 868 unregister_netdevice(dev); 869 dev_put(dev); 870 return err; 871 } 872 break; 873 #endif 874 case 0: 875 dev = dev_get_by_index(net, vifc->mif6c_pifi); 876 if (!dev) 877 return -EADDRNOTAVAIL; 878 err = dev_set_allmulti(dev, 1); 879 if (err) { 880 dev_put(dev); 881 return err; 882 } 883 break; 884 default: 885 return -EINVAL; 886 } 887 888 in6_dev = __in6_dev_get(dev); 889 if (in6_dev) { 890 in6_dev->cnf.mc_forwarding++; 891 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 892 NETCONFA_MC_FORWARDING, 893 dev->ifindex, &in6_dev->cnf); 894 } 895 896 /* Fill in the VIF structures */ 897 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 898 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 899 MIFF_REGISTER); 900 901 /* And finish update writing critical data */ 902 write_lock_bh(&mrt_lock); 903 v->dev = dev; 904 #ifdef CONFIG_IPV6_PIMSM_V2 905 if (v->flags & MIFF_REGISTER) 906 mrt->mroute_reg_vif_num = vifi; 907 #endif 908 if (vifi + 1 > mrt->maxvif) 909 mrt->maxvif = vifi + 1; 910 write_unlock_bh(&mrt_lock); 911 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 912 v, vifi, mrt->id); 913 return 0; 914 } 915 916 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 917 const struct in6_addr *origin, 918 const struct in6_addr *mcastgrp) 919 { 920 struct mfc6_cache_cmp_arg arg = { 921 .mf6c_origin = *origin, 922 .mf6c_mcastgrp = *mcastgrp, 923 }; 924 925 return mr_mfc_find(mrt, &arg); 926 } 927 928 /* Look for a (*,G) entry */ 929 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 930 struct in6_addr *mcastgrp, 931 mifi_t mifi) 932 { 933 struct mfc6_cache_cmp_arg arg = { 934 .mf6c_origin = in6addr_any, 935 .mf6c_mcastgrp = *mcastgrp, 936 }; 937 938 if (ipv6_addr_any(mcastgrp)) 939 return mr_mfc_find_any_parent(mrt, mifi); 940 return mr_mfc_find_any(mrt, mifi, &arg); 941 } 942 943 /* Look for a (S,G,iif) entry if parent != -1 */ 944 static struct mfc6_cache * 945 ip6mr_cache_find_parent(struct mr_table *mrt, 946 const struct in6_addr *origin, 947 const struct in6_addr *mcastgrp, 948 int parent) 949 { 950 struct mfc6_cache_cmp_arg arg = { 951 .mf6c_origin = *origin, 952 .mf6c_mcastgrp = *mcastgrp, 953 }; 954 955 return mr_mfc_find_parent(mrt, &arg, parent); 956 } 957 958 /* Allocate a multicast cache entry */ 959 static struct mfc6_cache *ip6mr_cache_alloc(void) 960 { 961 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 962 if (!c) 963 return NULL; 964 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 965 c->_c.mfc_un.res.minvif = MAXMIFS; 966 c->_c.free = ip6mr_cache_free_rcu; 967 refcount_set(&c->_c.mfc_un.res.refcount, 1); 968 return c; 969 } 970 971 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 972 { 973 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 974 if (!c) 975 return NULL; 976 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 977 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 978 return c; 979 } 980 981 /* 982 * A cache entry has gone into a resolved state from queued 983 */ 984 985 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 986 struct mfc6_cache *uc, struct mfc6_cache *c) 987 { 988 struct sk_buff *skb; 989 990 /* 991 * Play the pending entries through our router 992 */ 993 994 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 995 if (ipv6_hdr(skb)->version == 0) { 996 struct nlmsghdr *nlh = skb_pull(skb, 997 sizeof(struct ipv6hdr)); 998 999 if (mr_fill_mroute(mrt, skb, &c->_c, 1000 nlmsg_data(nlh)) > 0) { 1001 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1002 } else { 1003 nlh->nlmsg_type = NLMSG_ERROR; 1004 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1005 skb_trim(skb, nlh->nlmsg_len); 1006 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1007 } 1008 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1009 } else 1010 ip6_mr_forward(net, mrt, skb, c); 1011 } 1012 } 1013 1014 /* 1015 * Bounce a cache query up to pim6sd and netlink. 1016 * 1017 * Called under mrt_lock. 1018 */ 1019 1020 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 1021 mifi_t mifi, int assert) 1022 { 1023 struct sock *mroute6_sk; 1024 struct sk_buff *skb; 1025 struct mrt6msg *msg; 1026 int ret; 1027 1028 #ifdef CONFIG_IPV6_PIMSM_V2 1029 if (assert == MRT6MSG_WHOLEPKT) 1030 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1031 +sizeof(*msg)); 1032 else 1033 #endif 1034 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1035 1036 if (!skb) 1037 return -ENOBUFS; 1038 1039 /* I suppose that internal messages 1040 * do not require checksums */ 1041 1042 skb->ip_summed = CHECKSUM_UNNECESSARY; 1043 1044 #ifdef CONFIG_IPV6_PIMSM_V2 1045 if (assert == MRT6MSG_WHOLEPKT) { 1046 /* Ugly, but we have no choice with this interface. 1047 Duplicate old header, fix length etc. 1048 And all this only to mangle msg->im6_msgtype and 1049 to set msg->im6_mbz to "mbz" :-) 1050 */ 1051 skb_push(skb, -skb_network_offset(pkt)); 1052 1053 skb_push(skb, sizeof(*msg)); 1054 skb_reset_transport_header(skb); 1055 msg = (struct mrt6msg *)skb_transport_header(skb); 1056 msg->im6_mbz = 0; 1057 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1058 msg->im6_mif = mrt->mroute_reg_vif_num; 1059 msg->im6_pad = 0; 1060 msg->im6_src = ipv6_hdr(pkt)->saddr; 1061 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1062 1063 skb->ip_summed = CHECKSUM_UNNECESSARY; 1064 } else 1065 #endif 1066 { 1067 /* 1068 * Copy the IP header 1069 */ 1070 1071 skb_put(skb, sizeof(struct ipv6hdr)); 1072 skb_reset_network_header(skb); 1073 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1074 1075 /* 1076 * Add our header 1077 */ 1078 skb_put(skb, sizeof(*msg)); 1079 skb_reset_transport_header(skb); 1080 msg = (struct mrt6msg *)skb_transport_header(skb); 1081 1082 msg->im6_mbz = 0; 1083 msg->im6_msgtype = assert; 1084 msg->im6_mif = mifi; 1085 msg->im6_pad = 0; 1086 msg->im6_src = ipv6_hdr(pkt)->saddr; 1087 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1088 1089 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1090 skb->ip_summed = CHECKSUM_UNNECESSARY; 1091 } 1092 1093 rcu_read_lock(); 1094 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1095 if (!mroute6_sk) { 1096 rcu_read_unlock(); 1097 kfree_skb(skb); 1098 return -EINVAL; 1099 } 1100 1101 mrt6msg_netlink_event(mrt, skb); 1102 1103 /* Deliver to user space multicast routing algorithms */ 1104 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1105 rcu_read_unlock(); 1106 if (ret < 0) { 1107 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1108 kfree_skb(skb); 1109 } 1110 1111 return ret; 1112 } 1113 1114 /* Queue a packet for resolution. It gets locked cache entry! */ 1115 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1116 struct sk_buff *skb) 1117 { 1118 struct mfc6_cache *c; 1119 bool found = false; 1120 int err; 1121 1122 spin_lock_bh(&mfc_unres_lock); 1123 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1124 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1125 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1126 found = true; 1127 break; 1128 } 1129 } 1130 1131 if (!found) { 1132 /* 1133 * Create a new entry if allowable 1134 */ 1135 1136 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1137 (c = ip6mr_cache_alloc_unres()) == NULL) { 1138 spin_unlock_bh(&mfc_unres_lock); 1139 1140 kfree_skb(skb); 1141 return -ENOBUFS; 1142 } 1143 1144 /* Fill in the new cache entry */ 1145 c->_c.mfc_parent = -1; 1146 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1147 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1148 1149 /* 1150 * Reflect first query at pim6sd 1151 */ 1152 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1153 if (err < 0) { 1154 /* If the report failed throw the cache entry 1155 out - Brad Parker 1156 */ 1157 spin_unlock_bh(&mfc_unres_lock); 1158 1159 ip6mr_cache_free(c); 1160 kfree_skb(skb); 1161 return err; 1162 } 1163 1164 atomic_inc(&mrt->cache_resolve_queue_len); 1165 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1166 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1167 1168 ipmr_do_expire_process(mrt); 1169 } 1170 1171 /* See if we can append the packet */ 1172 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1173 kfree_skb(skb); 1174 err = -ENOBUFS; 1175 } else { 1176 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1177 err = 0; 1178 } 1179 1180 spin_unlock_bh(&mfc_unres_lock); 1181 return err; 1182 } 1183 1184 /* 1185 * MFC6 cache manipulation by user space 1186 */ 1187 1188 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1189 int parent) 1190 { 1191 struct mfc6_cache *c; 1192 1193 /* The entries are added/deleted only under RTNL */ 1194 rcu_read_lock(); 1195 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1196 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1197 rcu_read_unlock(); 1198 if (!c) 1199 return -ENOENT; 1200 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1201 list_del_rcu(&c->_c.list); 1202 1203 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1204 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1205 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1206 mr_cache_put(&c->_c); 1207 return 0; 1208 } 1209 1210 static int ip6mr_device_event(struct notifier_block *this, 1211 unsigned long event, void *ptr) 1212 { 1213 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1214 struct net *net = dev_net(dev); 1215 struct mr_table *mrt; 1216 struct vif_device *v; 1217 int ct; 1218 1219 if (event != NETDEV_UNREGISTER) 1220 return NOTIFY_DONE; 1221 1222 ip6mr_for_each_table(mrt, net) { 1223 v = &mrt->vif_table[0]; 1224 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1225 if (v->dev == dev) 1226 mif6_delete(mrt, ct, 1, NULL); 1227 } 1228 } 1229 1230 return NOTIFY_DONE; 1231 } 1232 1233 static unsigned int ip6mr_seq_read(struct net *net) 1234 { 1235 ASSERT_RTNL(); 1236 1237 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); 1238 } 1239 1240 static int ip6mr_dump(struct net *net, struct notifier_block *nb) 1241 { 1242 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1243 ip6mr_mr_table_iter, &mrt_lock); 1244 } 1245 1246 static struct notifier_block ip6_mr_notifier = { 1247 .notifier_call = ip6mr_device_event 1248 }; 1249 1250 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1251 .family = RTNL_FAMILY_IP6MR, 1252 .fib_seq_read = ip6mr_seq_read, 1253 .fib_dump = ip6mr_dump, 1254 .owner = THIS_MODULE, 1255 }; 1256 1257 static int __net_init ip6mr_notifier_init(struct net *net) 1258 { 1259 struct fib_notifier_ops *ops; 1260 1261 net->ipv6.ipmr_seq = 0; 1262 1263 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1264 if (IS_ERR(ops)) 1265 return PTR_ERR(ops); 1266 1267 net->ipv6.ip6mr_notifier_ops = ops; 1268 1269 return 0; 1270 } 1271 1272 static void __net_exit ip6mr_notifier_exit(struct net *net) 1273 { 1274 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1275 net->ipv6.ip6mr_notifier_ops = NULL; 1276 } 1277 1278 /* Setup for IP multicast routing */ 1279 static int __net_init ip6mr_net_init(struct net *net) 1280 { 1281 int err; 1282 1283 err = ip6mr_notifier_init(net); 1284 if (err) 1285 return err; 1286 1287 err = ip6mr_rules_init(net); 1288 if (err < 0) 1289 goto ip6mr_rules_fail; 1290 1291 #ifdef CONFIG_PROC_FS 1292 err = -ENOMEM; 1293 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1294 sizeof(struct mr_vif_iter))) 1295 goto proc_vif_fail; 1296 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1297 sizeof(struct mr_mfc_iter))) 1298 goto proc_cache_fail; 1299 #endif 1300 1301 return 0; 1302 1303 #ifdef CONFIG_PROC_FS 1304 proc_cache_fail: 1305 remove_proc_entry("ip6_mr_vif", net->proc_net); 1306 proc_vif_fail: 1307 ip6mr_rules_exit(net); 1308 #endif 1309 ip6mr_rules_fail: 1310 ip6mr_notifier_exit(net); 1311 return err; 1312 } 1313 1314 static void __net_exit ip6mr_net_exit(struct net *net) 1315 { 1316 #ifdef CONFIG_PROC_FS 1317 remove_proc_entry("ip6_mr_cache", net->proc_net); 1318 remove_proc_entry("ip6_mr_vif", net->proc_net); 1319 #endif 1320 ip6mr_rules_exit(net); 1321 ip6mr_notifier_exit(net); 1322 } 1323 1324 static struct pernet_operations ip6mr_net_ops = { 1325 .init = ip6mr_net_init, 1326 .exit = ip6mr_net_exit, 1327 }; 1328 1329 int __init ip6_mr_init(void) 1330 { 1331 int err; 1332 1333 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1334 sizeof(struct mfc6_cache), 1335 0, SLAB_HWCACHE_ALIGN, 1336 NULL); 1337 if (!mrt_cachep) 1338 return -ENOMEM; 1339 1340 err = register_pernet_subsys(&ip6mr_net_ops); 1341 if (err) 1342 goto reg_pernet_fail; 1343 1344 err = register_netdevice_notifier(&ip6_mr_notifier); 1345 if (err) 1346 goto reg_notif_fail; 1347 #ifdef CONFIG_IPV6_PIMSM_V2 1348 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1349 pr_err("%s: can't add PIM protocol\n", __func__); 1350 err = -EAGAIN; 1351 goto add_proto_fail; 1352 } 1353 #endif 1354 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1355 NULL, ip6mr_rtm_dumproute, 0); 1356 if (err == 0) 1357 return 0; 1358 1359 #ifdef CONFIG_IPV6_PIMSM_V2 1360 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1361 add_proto_fail: 1362 unregister_netdevice_notifier(&ip6_mr_notifier); 1363 #endif 1364 reg_notif_fail: 1365 unregister_pernet_subsys(&ip6mr_net_ops); 1366 reg_pernet_fail: 1367 kmem_cache_destroy(mrt_cachep); 1368 return err; 1369 } 1370 1371 void ip6_mr_cleanup(void) 1372 { 1373 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1374 #ifdef CONFIG_IPV6_PIMSM_V2 1375 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1376 #endif 1377 unregister_netdevice_notifier(&ip6_mr_notifier); 1378 unregister_pernet_subsys(&ip6mr_net_ops); 1379 kmem_cache_destroy(mrt_cachep); 1380 } 1381 1382 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1383 struct mf6cctl *mfc, int mrtsock, int parent) 1384 { 1385 unsigned char ttls[MAXMIFS]; 1386 struct mfc6_cache *uc, *c; 1387 struct mr_mfc *_uc; 1388 bool found; 1389 int i, err; 1390 1391 if (mfc->mf6cc_parent >= MAXMIFS) 1392 return -ENFILE; 1393 1394 memset(ttls, 255, MAXMIFS); 1395 for (i = 0; i < MAXMIFS; i++) { 1396 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1397 ttls[i] = 1; 1398 } 1399 1400 /* The entries are added/deleted only under RTNL */ 1401 rcu_read_lock(); 1402 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1403 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1404 rcu_read_unlock(); 1405 if (c) { 1406 write_lock_bh(&mrt_lock); 1407 c->_c.mfc_parent = mfc->mf6cc_parent; 1408 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1409 if (!mrtsock) 1410 c->_c.mfc_flags |= MFC_STATIC; 1411 write_unlock_bh(&mrt_lock); 1412 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1413 c, mrt->id); 1414 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1415 return 0; 1416 } 1417 1418 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1419 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1420 return -EINVAL; 1421 1422 c = ip6mr_cache_alloc(); 1423 if (!c) 1424 return -ENOMEM; 1425 1426 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1427 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1428 c->_c.mfc_parent = mfc->mf6cc_parent; 1429 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1430 if (!mrtsock) 1431 c->_c.mfc_flags |= MFC_STATIC; 1432 1433 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1434 ip6mr_rht_params); 1435 if (err) { 1436 pr_err("ip6mr: rhtable insert error %d\n", err); 1437 ip6mr_cache_free(c); 1438 return err; 1439 } 1440 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1441 1442 /* Check to see if we resolved a queued list. If so we 1443 * need to send on the frames and tidy up. 1444 */ 1445 found = false; 1446 spin_lock_bh(&mfc_unres_lock); 1447 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1448 uc = (struct mfc6_cache *)_uc; 1449 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1450 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1451 list_del(&_uc->list); 1452 atomic_dec(&mrt->cache_resolve_queue_len); 1453 found = true; 1454 break; 1455 } 1456 } 1457 if (list_empty(&mrt->mfc_unres_queue)) 1458 del_timer(&mrt->ipmr_expire_timer); 1459 spin_unlock_bh(&mfc_unres_lock); 1460 1461 if (found) { 1462 ip6mr_cache_resolve(net, mrt, uc, c); 1463 ip6mr_cache_free(uc); 1464 } 1465 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1466 c, mrt->id); 1467 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1468 return 0; 1469 } 1470 1471 /* 1472 * Close the multicast socket, and clear the vif tables etc 1473 */ 1474 1475 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1476 { 1477 struct mr_mfc *c, *tmp; 1478 LIST_HEAD(list); 1479 int i; 1480 1481 /* Shut down all active vif entries */ 1482 for (i = 0; i < mrt->maxvif; i++) { 1483 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1484 continue; 1485 mif6_delete(mrt, i, 0, &list); 1486 } 1487 unregister_netdevice_many(&list); 1488 1489 /* Wipe the cache */ 1490 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1491 if (!all && (c->mfc_flags & MFC_STATIC)) 1492 continue; 1493 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1494 list_del_rcu(&c->list); 1495 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1496 mr_cache_put(c); 1497 } 1498 1499 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1500 spin_lock_bh(&mfc_unres_lock); 1501 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1502 list_del(&c->list); 1503 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1504 FIB_EVENT_ENTRY_DEL, 1505 (struct mfc6_cache *)c, 1506 mrt->id); 1507 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1508 RTM_DELROUTE); 1509 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1510 } 1511 spin_unlock_bh(&mfc_unres_lock); 1512 } 1513 } 1514 1515 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1516 { 1517 int err = 0; 1518 struct net *net = sock_net(sk); 1519 1520 rtnl_lock(); 1521 write_lock_bh(&mrt_lock); 1522 if (rtnl_dereference(mrt->mroute_sk)) { 1523 err = -EADDRINUSE; 1524 } else { 1525 rcu_assign_pointer(mrt->mroute_sk, sk); 1526 sock_set_flag(sk, SOCK_RCU_FREE); 1527 net->ipv6.devconf_all->mc_forwarding++; 1528 } 1529 write_unlock_bh(&mrt_lock); 1530 1531 if (!err) 1532 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1533 NETCONFA_MC_FORWARDING, 1534 NETCONFA_IFINDEX_ALL, 1535 net->ipv6.devconf_all); 1536 rtnl_unlock(); 1537 1538 return err; 1539 } 1540 1541 int ip6mr_sk_done(struct sock *sk) 1542 { 1543 int err = -EACCES; 1544 struct net *net = sock_net(sk); 1545 struct mr_table *mrt; 1546 1547 if (sk->sk_type != SOCK_RAW || 1548 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1549 return err; 1550 1551 rtnl_lock(); 1552 ip6mr_for_each_table(mrt, net) { 1553 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1554 write_lock_bh(&mrt_lock); 1555 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1556 /* Note that mroute_sk had SOCK_RCU_FREE set, 1557 * so the RCU grace period before sk freeing 1558 * is guaranteed by sk_destruct() 1559 */ 1560 net->ipv6.devconf_all->mc_forwarding--; 1561 write_unlock_bh(&mrt_lock); 1562 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1563 NETCONFA_MC_FORWARDING, 1564 NETCONFA_IFINDEX_ALL, 1565 net->ipv6.devconf_all); 1566 1567 mroute_clean_tables(mrt, false); 1568 err = 0; 1569 break; 1570 } 1571 } 1572 rtnl_unlock(); 1573 1574 return err; 1575 } 1576 1577 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1578 { 1579 struct mr_table *mrt; 1580 struct flowi6 fl6 = { 1581 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1582 .flowi6_oif = skb->dev->ifindex, 1583 .flowi6_mark = skb->mark, 1584 }; 1585 1586 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1587 return NULL; 1588 1589 return rcu_access_pointer(mrt->mroute_sk); 1590 } 1591 EXPORT_SYMBOL(mroute6_is_socket); 1592 1593 /* 1594 * Socket options and virtual interface manipulation. The whole 1595 * virtual interface system is a complete heap, but unfortunately 1596 * that's how BSD mrouted happens to think. Maybe one day with a proper 1597 * MOSPF/PIM router set up we can clean this up. 1598 */ 1599 1600 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1601 { 1602 int ret, parent = 0; 1603 struct mif6ctl vif; 1604 struct mf6cctl mfc; 1605 mifi_t mifi; 1606 struct net *net = sock_net(sk); 1607 struct mr_table *mrt; 1608 1609 if (sk->sk_type != SOCK_RAW || 1610 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1611 return -EOPNOTSUPP; 1612 1613 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1614 if (!mrt) 1615 return -ENOENT; 1616 1617 if (optname != MRT6_INIT) { 1618 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1619 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1620 return -EACCES; 1621 } 1622 1623 switch (optname) { 1624 case MRT6_INIT: 1625 if (optlen < sizeof(int)) 1626 return -EINVAL; 1627 1628 return ip6mr_sk_init(mrt, sk); 1629 1630 case MRT6_DONE: 1631 return ip6mr_sk_done(sk); 1632 1633 case MRT6_ADD_MIF: 1634 if (optlen < sizeof(vif)) 1635 return -EINVAL; 1636 if (copy_from_user(&vif, optval, sizeof(vif))) 1637 return -EFAULT; 1638 if (vif.mif6c_mifi >= MAXMIFS) 1639 return -ENFILE; 1640 rtnl_lock(); 1641 ret = mif6_add(net, mrt, &vif, 1642 sk == rtnl_dereference(mrt->mroute_sk)); 1643 rtnl_unlock(); 1644 return ret; 1645 1646 case MRT6_DEL_MIF: 1647 if (optlen < sizeof(mifi_t)) 1648 return -EINVAL; 1649 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1650 return -EFAULT; 1651 rtnl_lock(); 1652 ret = mif6_delete(mrt, mifi, 0, NULL); 1653 rtnl_unlock(); 1654 return ret; 1655 1656 /* 1657 * Manipulate the forwarding caches. These live 1658 * in a sort of kernel/user symbiosis. 1659 */ 1660 case MRT6_ADD_MFC: 1661 case MRT6_DEL_MFC: 1662 parent = -1; 1663 /* fall through */ 1664 case MRT6_ADD_MFC_PROXY: 1665 case MRT6_DEL_MFC_PROXY: 1666 if (optlen < sizeof(mfc)) 1667 return -EINVAL; 1668 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1669 return -EFAULT; 1670 if (parent == 0) 1671 parent = mfc.mf6cc_parent; 1672 rtnl_lock(); 1673 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1674 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1675 else 1676 ret = ip6mr_mfc_add(net, mrt, &mfc, 1677 sk == 1678 rtnl_dereference(mrt->mroute_sk), 1679 parent); 1680 rtnl_unlock(); 1681 return ret; 1682 1683 /* 1684 * Control PIM assert (to activate pim will activate assert) 1685 */ 1686 case MRT6_ASSERT: 1687 { 1688 int v; 1689 1690 if (optlen != sizeof(v)) 1691 return -EINVAL; 1692 if (get_user(v, (int __user *)optval)) 1693 return -EFAULT; 1694 mrt->mroute_do_assert = v; 1695 return 0; 1696 } 1697 1698 #ifdef CONFIG_IPV6_PIMSM_V2 1699 case MRT6_PIM: 1700 { 1701 int v; 1702 1703 if (optlen != sizeof(v)) 1704 return -EINVAL; 1705 if (get_user(v, (int __user *)optval)) 1706 return -EFAULT; 1707 v = !!v; 1708 rtnl_lock(); 1709 ret = 0; 1710 if (v != mrt->mroute_do_pim) { 1711 mrt->mroute_do_pim = v; 1712 mrt->mroute_do_assert = v; 1713 } 1714 rtnl_unlock(); 1715 return ret; 1716 } 1717 1718 #endif 1719 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1720 case MRT6_TABLE: 1721 { 1722 u32 v; 1723 1724 if (optlen != sizeof(u32)) 1725 return -EINVAL; 1726 if (get_user(v, (u32 __user *)optval)) 1727 return -EFAULT; 1728 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1729 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1730 return -EINVAL; 1731 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1732 return -EBUSY; 1733 1734 rtnl_lock(); 1735 ret = 0; 1736 if (!ip6mr_new_table(net, v)) 1737 ret = -ENOMEM; 1738 raw6_sk(sk)->ip6mr_table = v; 1739 rtnl_unlock(); 1740 return ret; 1741 } 1742 #endif 1743 /* 1744 * Spurious command, or MRT6_VERSION which you cannot 1745 * set. 1746 */ 1747 default: 1748 return -ENOPROTOOPT; 1749 } 1750 } 1751 1752 /* 1753 * Getsock opt support for the multicast routing system. 1754 */ 1755 1756 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1757 int __user *optlen) 1758 { 1759 int olr; 1760 int val; 1761 struct net *net = sock_net(sk); 1762 struct mr_table *mrt; 1763 1764 if (sk->sk_type != SOCK_RAW || 1765 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1766 return -EOPNOTSUPP; 1767 1768 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1769 if (!mrt) 1770 return -ENOENT; 1771 1772 switch (optname) { 1773 case MRT6_VERSION: 1774 val = 0x0305; 1775 break; 1776 #ifdef CONFIG_IPV6_PIMSM_V2 1777 case MRT6_PIM: 1778 val = mrt->mroute_do_pim; 1779 break; 1780 #endif 1781 case MRT6_ASSERT: 1782 val = mrt->mroute_do_assert; 1783 break; 1784 default: 1785 return -ENOPROTOOPT; 1786 } 1787 1788 if (get_user(olr, optlen)) 1789 return -EFAULT; 1790 1791 olr = min_t(int, olr, sizeof(int)); 1792 if (olr < 0) 1793 return -EINVAL; 1794 1795 if (put_user(olr, optlen)) 1796 return -EFAULT; 1797 if (copy_to_user(optval, &val, olr)) 1798 return -EFAULT; 1799 return 0; 1800 } 1801 1802 /* 1803 * The IP multicast ioctl support routines. 1804 */ 1805 1806 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1807 { 1808 struct sioc_sg_req6 sr; 1809 struct sioc_mif_req6 vr; 1810 struct vif_device *vif; 1811 struct mfc6_cache *c; 1812 struct net *net = sock_net(sk); 1813 struct mr_table *mrt; 1814 1815 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1816 if (!mrt) 1817 return -ENOENT; 1818 1819 switch (cmd) { 1820 case SIOCGETMIFCNT_IN6: 1821 if (copy_from_user(&vr, arg, sizeof(vr))) 1822 return -EFAULT; 1823 if (vr.mifi >= mrt->maxvif) 1824 return -EINVAL; 1825 read_lock(&mrt_lock); 1826 vif = &mrt->vif_table[vr.mifi]; 1827 if (VIF_EXISTS(mrt, vr.mifi)) { 1828 vr.icount = vif->pkt_in; 1829 vr.ocount = vif->pkt_out; 1830 vr.ibytes = vif->bytes_in; 1831 vr.obytes = vif->bytes_out; 1832 read_unlock(&mrt_lock); 1833 1834 if (copy_to_user(arg, &vr, sizeof(vr))) 1835 return -EFAULT; 1836 return 0; 1837 } 1838 read_unlock(&mrt_lock); 1839 return -EADDRNOTAVAIL; 1840 case SIOCGETSGCNT_IN6: 1841 if (copy_from_user(&sr, arg, sizeof(sr))) 1842 return -EFAULT; 1843 1844 rcu_read_lock(); 1845 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1846 if (c) { 1847 sr.pktcnt = c->_c.mfc_un.res.pkt; 1848 sr.bytecnt = c->_c.mfc_un.res.bytes; 1849 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1850 rcu_read_unlock(); 1851 1852 if (copy_to_user(arg, &sr, sizeof(sr))) 1853 return -EFAULT; 1854 return 0; 1855 } 1856 rcu_read_unlock(); 1857 return -EADDRNOTAVAIL; 1858 default: 1859 return -ENOIOCTLCMD; 1860 } 1861 } 1862 1863 #ifdef CONFIG_COMPAT 1864 struct compat_sioc_sg_req6 { 1865 struct sockaddr_in6 src; 1866 struct sockaddr_in6 grp; 1867 compat_ulong_t pktcnt; 1868 compat_ulong_t bytecnt; 1869 compat_ulong_t wrong_if; 1870 }; 1871 1872 struct compat_sioc_mif_req6 { 1873 mifi_t mifi; 1874 compat_ulong_t icount; 1875 compat_ulong_t ocount; 1876 compat_ulong_t ibytes; 1877 compat_ulong_t obytes; 1878 }; 1879 1880 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1881 { 1882 struct compat_sioc_sg_req6 sr; 1883 struct compat_sioc_mif_req6 vr; 1884 struct vif_device *vif; 1885 struct mfc6_cache *c; 1886 struct net *net = sock_net(sk); 1887 struct mr_table *mrt; 1888 1889 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1890 if (!mrt) 1891 return -ENOENT; 1892 1893 switch (cmd) { 1894 case SIOCGETMIFCNT_IN6: 1895 if (copy_from_user(&vr, arg, sizeof(vr))) 1896 return -EFAULT; 1897 if (vr.mifi >= mrt->maxvif) 1898 return -EINVAL; 1899 read_lock(&mrt_lock); 1900 vif = &mrt->vif_table[vr.mifi]; 1901 if (VIF_EXISTS(mrt, vr.mifi)) { 1902 vr.icount = vif->pkt_in; 1903 vr.ocount = vif->pkt_out; 1904 vr.ibytes = vif->bytes_in; 1905 vr.obytes = vif->bytes_out; 1906 read_unlock(&mrt_lock); 1907 1908 if (copy_to_user(arg, &vr, sizeof(vr))) 1909 return -EFAULT; 1910 return 0; 1911 } 1912 read_unlock(&mrt_lock); 1913 return -EADDRNOTAVAIL; 1914 case SIOCGETSGCNT_IN6: 1915 if (copy_from_user(&sr, arg, sizeof(sr))) 1916 return -EFAULT; 1917 1918 rcu_read_lock(); 1919 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1920 if (c) { 1921 sr.pktcnt = c->_c.mfc_un.res.pkt; 1922 sr.bytecnt = c->_c.mfc_un.res.bytes; 1923 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1924 rcu_read_unlock(); 1925 1926 if (copy_to_user(arg, &sr, sizeof(sr))) 1927 return -EFAULT; 1928 return 0; 1929 } 1930 rcu_read_unlock(); 1931 return -EADDRNOTAVAIL; 1932 default: 1933 return -ENOIOCTLCMD; 1934 } 1935 } 1936 #endif 1937 1938 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1939 { 1940 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1941 IPSTATS_MIB_OUTFORWDATAGRAMS); 1942 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 1943 IPSTATS_MIB_OUTOCTETS, skb->len); 1944 return dst_output(net, sk, skb); 1945 } 1946 1947 /* 1948 * Processing handlers for ip6mr_forward 1949 */ 1950 1951 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 1952 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1953 { 1954 struct ipv6hdr *ipv6h; 1955 struct vif_device *vif = &mrt->vif_table[vifi]; 1956 struct net_device *dev; 1957 struct dst_entry *dst; 1958 struct flowi6 fl6; 1959 1960 if (!vif->dev) 1961 goto out_free; 1962 1963 #ifdef CONFIG_IPV6_PIMSM_V2 1964 if (vif->flags & MIFF_REGISTER) { 1965 vif->pkt_out++; 1966 vif->bytes_out += skb->len; 1967 vif->dev->stats.tx_bytes += skb->len; 1968 vif->dev->stats.tx_packets++; 1969 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 1970 goto out_free; 1971 } 1972 #endif 1973 1974 ipv6h = ipv6_hdr(skb); 1975 1976 fl6 = (struct flowi6) { 1977 .flowi6_oif = vif->link, 1978 .daddr = ipv6h->daddr, 1979 }; 1980 1981 dst = ip6_route_output(net, NULL, &fl6); 1982 if (dst->error) { 1983 dst_release(dst); 1984 goto out_free; 1985 } 1986 1987 skb_dst_drop(skb); 1988 skb_dst_set(skb, dst); 1989 1990 /* 1991 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1992 * not only before forwarding, but after forwarding on all output 1993 * interfaces. It is clear, if mrouter runs a multicasting 1994 * program, it should receive packets not depending to what interface 1995 * program is joined. 1996 * If we will not make it, the program will have to join on all 1997 * interfaces. On the other hand, multihoming host (or router, but 1998 * not mrouter) cannot join to more than one interface - it will 1999 * result in receiving multiple packets. 2000 */ 2001 dev = vif->dev; 2002 skb->dev = dev; 2003 vif->pkt_out++; 2004 vif->bytes_out += skb->len; 2005 2006 /* We are about to write */ 2007 /* XXX: extension headers? */ 2008 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 2009 goto out_free; 2010 2011 ipv6h = ipv6_hdr(skb); 2012 ipv6h->hop_limit--; 2013 2014 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2015 2016 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2017 net, NULL, skb, skb->dev, dev, 2018 ip6mr_forward2_finish); 2019 2020 out_free: 2021 kfree_skb(skb); 2022 return 0; 2023 } 2024 2025 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2026 { 2027 int ct; 2028 2029 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 2030 if (mrt->vif_table[ct].dev == dev) 2031 break; 2032 } 2033 return ct; 2034 } 2035 2036 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2037 struct sk_buff *skb, struct mfc6_cache *c) 2038 { 2039 int psend = -1; 2040 int vif, ct; 2041 int true_vifi = ip6mr_find_vif(mrt, skb->dev); 2042 2043 vif = c->_c.mfc_parent; 2044 c->_c.mfc_un.res.pkt++; 2045 c->_c.mfc_un.res.bytes += skb->len; 2046 c->_c.mfc_un.res.lastuse = jiffies; 2047 2048 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2049 struct mfc6_cache *cache_proxy; 2050 2051 /* For an (*,G) entry, we only check that the incoming 2052 * interface is part of the static tree. 2053 */ 2054 rcu_read_lock(); 2055 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2056 if (cache_proxy && 2057 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { 2058 rcu_read_unlock(); 2059 goto forward; 2060 } 2061 rcu_read_unlock(); 2062 } 2063 2064 /* 2065 * Wrong interface: drop packet and (maybe) send PIM assert. 2066 */ 2067 if (mrt->vif_table[vif].dev != skb->dev) { 2068 c->_c.mfc_un.res.wrong_if++; 2069 2070 if (true_vifi >= 0 && mrt->mroute_do_assert && 2071 /* pimsm uses asserts, when switching from RPT to SPT, 2072 so that we cannot check that packet arrived on an oif. 2073 It is bad, but otherwise we would need to move pretty 2074 large chunk of pimd to kernel. Ough... --ANK 2075 */ 2076 (mrt->mroute_do_pim || 2077 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2078 time_after(jiffies, 2079 c->_c.mfc_un.res.last_assert + 2080 MFC_ASSERT_THRESH)) { 2081 c->_c.mfc_un.res.last_assert = jiffies; 2082 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2083 } 2084 goto dont_forward; 2085 } 2086 2087 forward: 2088 mrt->vif_table[vif].pkt_in++; 2089 mrt->vif_table[vif].bytes_in += skb->len; 2090 2091 /* 2092 * Forward the frame 2093 */ 2094 if (ipv6_addr_any(&c->mf6c_origin) && 2095 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2096 if (true_vifi >= 0 && 2097 true_vifi != c->_c.mfc_parent && 2098 ipv6_hdr(skb)->hop_limit > 2099 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2100 /* It's an (*,*) entry and the packet is not coming from 2101 * the upstream: forward the packet to the upstream 2102 * only. 2103 */ 2104 psend = c->_c.mfc_parent; 2105 goto last_forward; 2106 } 2107 goto dont_forward; 2108 } 2109 for (ct = c->_c.mfc_un.res.maxvif - 1; 2110 ct >= c->_c.mfc_un.res.minvif; ct--) { 2111 /* For (*,G) entry, don't forward to the incoming interface */ 2112 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2113 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2114 if (psend != -1) { 2115 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2116 if (skb2) 2117 ip6mr_forward2(net, mrt, skb2, 2118 c, psend); 2119 } 2120 psend = ct; 2121 } 2122 } 2123 last_forward: 2124 if (psend != -1) { 2125 ip6mr_forward2(net, mrt, skb, c, psend); 2126 return; 2127 } 2128 2129 dont_forward: 2130 kfree_skb(skb); 2131 } 2132 2133 2134 /* 2135 * Multicast packets for forwarding arrive here 2136 */ 2137 2138 int ip6_mr_input(struct sk_buff *skb) 2139 { 2140 struct mfc6_cache *cache; 2141 struct net *net = dev_net(skb->dev); 2142 struct mr_table *mrt; 2143 struct flowi6 fl6 = { 2144 .flowi6_iif = skb->dev->ifindex, 2145 .flowi6_mark = skb->mark, 2146 }; 2147 int err; 2148 2149 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2150 if (err < 0) { 2151 kfree_skb(skb); 2152 return err; 2153 } 2154 2155 read_lock(&mrt_lock); 2156 cache = ip6mr_cache_find(mrt, 2157 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2158 if (!cache) { 2159 int vif = ip6mr_find_vif(mrt, skb->dev); 2160 2161 if (vif >= 0) 2162 cache = ip6mr_cache_find_any(mrt, 2163 &ipv6_hdr(skb)->daddr, 2164 vif); 2165 } 2166 2167 /* 2168 * No usable cache entry 2169 */ 2170 if (!cache) { 2171 int vif; 2172 2173 vif = ip6mr_find_vif(mrt, skb->dev); 2174 if (vif >= 0) { 2175 int err = ip6mr_cache_unresolved(mrt, vif, skb); 2176 read_unlock(&mrt_lock); 2177 2178 return err; 2179 } 2180 read_unlock(&mrt_lock); 2181 kfree_skb(skb); 2182 return -ENODEV; 2183 } 2184 2185 ip6_mr_forward(net, mrt, skb, cache); 2186 2187 read_unlock(&mrt_lock); 2188 2189 return 0; 2190 } 2191 2192 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2193 u32 portid) 2194 { 2195 int err; 2196 struct mr_table *mrt; 2197 struct mfc6_cache *cache; 2198 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2199 2200 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2201 if (!mrt) 2202 return -ENOENT; 2203 2204 read_lock(&mrt_lock); 2205 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2206 if (!cache && skb->dev) { 2207 int vif = ip6mr_find_vif(mrt, skb->dev); 2208 2209 if (vif >= 0) 2210 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2211 vif); 2212 } 2213 2214 if (!cache) { 2215 struct sk_buff *skb2; 2216 struct ipv6hdr *iph; 2217 struct net_device *dev; 2218 int vif; 2219 2220 dev = skb->dev; 2221 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2222 read_unlock(&mrt_lock); 2223 return -ENODEV; 2224 } 2225 2226 /* really correct? */ 2227 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2228 if (!skb2) { 2229 read_unlock(&mrt_lock); 2230 return -ENOMEM; 2231 } 2232 2233 NETLINK_CB(skb2).portid = portid; 2234 skb_reset_transport_header(skb2); 2235 2236 skb_put(skb2, sizeof(struct ipv6hdr)); 2237 skb_reset_network_header(skb2); 2238 2239 iph = ipv6_hdr(skb2); 2240 iph->version = 0; 2241 iph->priority = 0; 2242 iph->flow_lbl[0] = 0; 2243 iph->flow_lbl[1] = 0; 2244 iph->flow_lbl[2] = 0; 2245 iph->payload_len = 0; 2246 iph->nexthdr = IPPROTO_NONE; 2247 iph->hop_limit = 0; 2248 iph->saddr = rt->rt6i_src.addr; 2249 iph->daddr = rt->rt6i_dst.addr; 2250 2251 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2252 read_unlock(&mrt_lock); 2253 2254 return err; 2255 } 2256 2257 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2258 read_unlock(&mrt_lock); 2259 return err; 2260 } 2261 2262 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2263 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2264 int flags) 2265 { 2266 struct nlmsghdr *nlh; 2267 struct rtmsg *rtm; 2268 int err; 2269 2270 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2271 if (!nlh) 2272 return -EMSGSIZE; 2273 2274 rtm = nlmsg_data(nlh); 2275 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2276 rtm->rtm_dst_len = 128; 2277 rtm->rtm_src_len = 128; 2278 rtm->rtm_tos = 0; 2279 rtm->rtm_table = mrt->id; 2280 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2281 goto nla_put_failure; 2282 rtm->rtm_type = RTN_MULTICAST; 2283 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2284 if (c->_c.mfc_flags & MFC_STATIC) 2285 rtm->rtm_protocol = RTPROT_STATIC; 2286 else 2287 rtm->rtm_protocol = RTPROT_MROUTED; 2288 rtm->rtm_flags = 0; 2289 2290 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2291 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2292 goto nla_put_failure; 2293 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2294 /* do not break the dump if cache is unresolved */ 2295 if (err < 0 && err != -ENOENT) 2296 goto nla_put_failure; 2297 2298 nlmsg_end(skb, nlh); 2299 return 0; 2300 2301 nla_put_failure: 2302 nlmsg_cancel(skb, nlh); 2303 return -EMSGSIZE; 2304 } 2305 2306 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2307 u32 portid, u32 seq, struct mr_mfc *c, 2308 int cmd, int flags) 2309 { 2310 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2311 cmd, flags); 2312 } 2313 2314 static int mr6_msgsize(bool unresolved, int maxvif) 2315 { 2316 size_t len = 2317 NLMSG_ALIGN(sizeof(struct rtmsg)) 2318 + nla_total_size(4) /* RTA_TABLE */ 2319 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2320 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2321 ; 2322 2323 if (!unresolved) 2324 len = len 2325 + nla_total_size(4) /* RTA_IIF */ 2326 + nla_total_size(0) /* RTA_MULTIPATH */ 2327 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2328 /* RTA_MFC_STATS */ 2329 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2330 ; 2331 2332 return len; 2333 } 2334 2335 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2336 int cmd) 2337 { 2338 struct net *net = read_pnet(&mrt->net); 2339 struct sk_buff *skb; 2340 int err = -ENOBUFS; 2341 2342 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2343 GFP_ATOMIC); 2344 if (!skb) 2345 goto errout; 2346 2347 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2348 if (err < 0) 2349 goto errout; 2350 2351 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2352 return; 2353 2354 errout: 2355 kfree_skb(skb); 2356 if (err < 0) 2357 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2358 } 2359 2360 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2361 { 2362 size_t len = 2363 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2364 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2365 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2366 /* IP6MRA_CREPORT_SRC_ADDR */ 2367 + nla_total_size(sizeof(struct in6_addr)) 2368 /* IP6MRA_CREPORT_DST_ADDR */ 2369 + nla_total_size(sizeof(struct in6_addr)) 2370 /* IP6MRA_CREPORT_PKT */ 2371 + nla_total_size(payloadlen) 2372 ; 2373 2374 return len; 2375 } 2376 2377 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2378 { 2379 struct net *net = read_pnet(&mrt->net); 2380 struct nlmsghdr *nlh; 2381 struct rtgenmsg *rtgenm; 2382 struct mrt6msg *msg; 2383 struct sk_buff *skb; 2384 struct nlattr *nla; 2385 int payloadlen; 2386 2387 payloadlen = pkt->len - sizeof(struct mrt6msg); 2388 msg = (struct mrt6msg *)skb_transport_header(pkt); 2389 2390 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2391 if (!skb) 2392 goto errout; 2393 2394 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2395 sizeof(struct rtgenmsg), 0); 2396 if (!nlh) 2397 goto errout; 2398 rtgenm = nlmsg_data(nlh); 2399 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2400 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2401 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2402 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2403 &msg->im6_src) || 2404 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2405 &msg->im6_dst)) 2406 goto nla_put_failure; 2407 2408 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2409 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2410 nla_data(nla), payloadlen)) 2411 goto nla_put_failure; 2412 2413 nlmsg_end(skb, nlh); 2414 2415 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2416 return; 2417 2418 nla_put_failure: 2419 nlmsg_cancel(skb, nlh); 2420 errout: 2421 kfree_skb(skb); 2422 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2423 } 2424 2425 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2426 { 2427 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2428 _ip6mr_fill_mroute, &mfc_unres_lock); 2429 } 2430