1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <asm/uaccess.h> 30 #include <linux/types.h> 31 #include <linux/capability.h> 32 #include <linux/errno.h> 33 #include <linux/timer.h> 34 #include <linux/mm.h> 35 #include <linux/kernel.h> 36 #include <linux/fcntl.h> 37 #include <linux/stat.h> 38 #include <linux/socket.h> 39 #include <linux/in.h> 40 #include <linux/inet.h> 41 #include <linux/netdevice.h> 42 #include <linux/inetdevice.h> 43 #include <linux/igmp.h> 44 #include <linux/proc_fs.h> 45 #include <linux/seq_file.h> 46 #include <linux/mroute.h> 47 #include <linux/init.h> 48 #include <linux/if_ether.h> 49 #include <linux/slab.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <linux/compat.h> 63 #include <linux/export.h> 64 #include <net/ip_tunnels.h> 65 #include <net/checksum.h> 66 #include <net/netlink.h> 67 #include <net/fib_rules.h> 68 #include <linux/netconf.h> 69 #include <net/nexthop.h> 70 71 struct ipmr_rule { 72 struct fib_rule common; 73 }; 74 75 struct ipmr_result { 76 struct mr_table *mrt; 77 }; 78 79 /* Big lock, protecting vif table, mrt cache and mroute socket state. 80 * Note that the changes are semaphored via rtnl_lock. 81 */ 82 83 static DEFINE_RWLOCK(mrt_lock); 84 85 /* Multicast router control variables */ 86 87 /* Special spinlock for queue of unresolved entries */ 88 static DEFINE_SPINLOCK(mfc_unres_lock); 89 90 /* We return to original Alan's scheme. Hash table of resolved 91 * entries is changed only in process context and protected 92 * with weak lock mrt_lock. Queue of unresolved entries is protected 93 * with strong spinlock mfc_unres_lock. 94 * 95 * In this case data path is free of exclusive locks at all. 96 */ 97 98 static struct kmem_cache *mrt_cachep __read_mostly; 99 100 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 101 static void ipmr_free_table(struct mr_table *mrt); 102 103 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 104 struct sk_buff *skb, struct mfc_cache *cache, 105 int local); 106 static int ipmr_cache_report(struct mr_table *mrt, 107 struct sk_buff *pkt, vifi_t vifi, int assert); 108 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 109 struct mfc_cache *c, struct rtmsg *rtm); 110 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 111 int cmd); 112 static void mroute_clean_tables(struct mr_table *mrt, bool all); 113 static void ipmr_expire_process(unsigned long arg); 114 115 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 116 #define ipmr_for_each_table(mrt, net) \ 117 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 118 119 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 120 { 121 struct mr_table *mrt; 122 123 ipmr_for_each_table(mrt, net) { 124 if (mrt->id == id) 125 return mrt; 126 } 127 return NULL; 128 } 129 130 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 131 struct mr_table **mrt) 132 { 133 int err; 134 struct ipmr_result res; 135 struct fib_lookup_arg arg = { 136 .result = &res, 137 .flags = FIB_LOOKUP_NOREF, 138 }; 139 140 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 141 flowi4_to_flowi(flp4), 0, &arg); 142 if (err < 0) 143 return err; 144 *mrt = res.mrt; 145 return 0; 146 } 147 148 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 149 int flags, struct fib_lookup_arg *arg) 150 { 151 struct ipmr_result *res = arg->result; 152 struct mr_table *mrt; 153 154 switch (rule->action) { 155 case FR_ACT_TO_TBL: 156 break; 157 case FR_ACT_UNREACHABLE: 158 return -ENETUNREACH; 159 case FR_ACT_PROHIBIT: 160 return -EACCES; 161 case FR_ACT_BLACKHOLE: 162 default: 163 return -EINVAL; 164 } 165 166 mrt = ipmr_get_table(rule->fr_net, rule->table); 167 if (!mrt) 168 return -EAGAIN; 169 res->mrt = mrt; 170 return 0; 171 } 172 173 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 174 { 175 return 1; 176 } 177 178 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 179 FRA_GENERIC_POLICY, 180 }; 181 182 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 183 struct fib_rule_hdr *frh, struct nlattr **tb) 184 { 185 return 0; 186 } 187 188 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 189 struct nlattr **tb) 190 { 191 return 1; 192 } 193 194 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 195 struct fib_rule_hdr *frh) 196 { 197 frh->dst_len = 0; 198 frh->src_len = 0; 199 frh->tos = 0; 200 return 0; 201 } 202 203 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 204 .family = RTNL_FAMILY_IPMR, 205 .rule_size = sizeof(struct ipmr_rule), 206 .addr_size = sizeof(u32), 207 .action = ipmr_rule_action, 208 .match = ipmr_rule_match, 209 .configure = ipmr_rule_configure, 210 .compare = ipmr_rule_compare, 211 .fill = ipmr_rule_fill, 212 .nlgroup = RTNLGRP_IPV4_RULE, 213 .policy = ipmr_rule_policy, 214 .owner = THIS_MODULE, 215 }; 216 217 static int __net_init ipmr_rules_init(struct net *net) 218 { 219 struct fib_rules_ops *ops; 220 struct mr_table *mrt; 221 int err; 222 223 ops = fib_rules_register(&ipmr_rules_ops_template, net); 224 if (IS_ERR(ops)) 225 return PTR_ERR(ops); 226 227 INIT_LIST_HEAD(&net->ipv4.mr_tables); 228 229 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 230 if (IS_ERR(mrt)) { 231 err = PTR_ERR(mrt); 232 goto err1; 233 } 234 235 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 236 if (err < 0) 237 goto err2; 238 239 net->ipv4.mr_rules_ops = ops; 240 return 0; 241 242 err2: 243 ipmr_free_table(mrt); 244 err1: 245 fib_rules_unregister(ops); 246 return err; 247 } 248 249 static void __net_exit ipmr_rules_exit(struct net *net) 250 { 251 struct mr_table *mrt, *next; 252 253 rtnl_lock(); 254 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 255 list_del(&mrt->list); 256 ipmr_free_table(mrt); 257 } 258 fib_rules_unregister(net->ipv4.mr_rules_ops); 259 rtnl_unlock(); 260 } 261 #else 262 #define ipmr_for_each_table(mrt, net) \ 263 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 264 265 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 266 { 267 return net->ipv4.mrt; 268 } 269 270 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 271 struct mr_table **mrt) 272 { 273 *mrt = net->ipv4.mrt; 274 return 0; 275 } 276 277 static int __net_init ipmr_rules_init(struct net *net) 278 { 279 struct mr_table *mrt; 280 281 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 282 if (IS_ERR(mrt)) 283 return PTR_ERR(mrt); 284 net->ipv4.mrt = mrt; 285 return 0; 286 } 287 288 static void __net_exit ipmr_rules_exit(struct net *net) 289 { 290 rtnl_lock(); 291 ipmr_free_table(net->ipv4.mrt); 292 net->ipv4.mrt = NULL; 293 rtnl_unlock(); 294 } 295 #endif 296 297 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 298 { 299 struct mr_table *mrt; 300 unsigned int i; 301 302 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 303 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 304 return ERR_PTR(-EINVAL); 305 306 mrt = ipmr_get_table(net, id); 307 if (mrt) 308 return mrt; 309 310 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 311 if (!mrt) 312 return ERR_PTR(-ENOMEM); 313 write_pnet(&mrt->net, net); 314 mrt->id = id; 315 316 /* Forwarding cache */ 317 for (i = 0; i < MFC_LINES; i++) 318 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); 319 320 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 321 322 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 323 (unsigned long)mrt); 324 325 mrt->mroute_reg_vif_num = -1; 326 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 327 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 328 #endif 329 return mrt; 330 } 331 332 static void ipmr_free_table(struct mr_table *mrt) 333 { 334 del_timer_sync(&mrt->ipmr_expire_timer); 335 mroute_clean_tables(mrt, true); 336 kfree(mrt); 337 } 338 339 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 340 341 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 342 { 343 struct net *net = dev_net(dev); 344 345 dev_close(dev); 346 347 dev = __dev_get_by_name(net, "tunl0"); 348 if (dev) { 349 const struct net_device_ops *ops = dev->netdev_ops; 350 struct ifreq ifr; 351 struct ip_tunnel_parm p; 352 353 memset(&p, 0, sizeof(p)); 354 p.iph.daddr = v->vifc_rmt_addr.s_addr; 355 p.iph.saddr = v->vifc_lcl_addr.s_addr; 356 p.iph.version = 4; 357 p.iph.ihl = 5; 358 p.iph.protocol = IPPROTO_IPIP; 359 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 360 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 361 362 if (ops->ndo_do_ioctl) { 363 mm_segment_t oldfs = get_fs(); 364 365 set_fs(KERNEL_DS); 366 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 367 set_fs(oldfs); 368 } 369 } 370 } 371 372 /* Initialize ipmr pimreg/tunnel in_device */ 373 static bool ipmr_init_vif_indev(const struct net_device *dev) 374 { 375 struct in_device *in_dev; 376 377 ASSERT_RTNL(); 378 379 in_dev = __in_dev_get_rtnl(dev); 380 if (!in_dev) 381 return false; 382 ipv4_devconf_setall(in_dev); 383 neigh_parms_data_state_setall(in_dev->arp_parms); 384 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 385 386 return true; 387 } 388 389 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 390 { 391 struct net_device *dev; 392 393 dev = __dev_get_by_name(net, "tunl0"); 394 395 if (dev) { 396 const struct net_device_ops *ops = dev->netdev_ops; 397 int err; 398 struct ifreq ifr; 399 struct ip_tunnel_parm p; 400 401 memset(&p, 0, sizeof(p)); 402 p.iph.daddr = v->vifc_rmt_addr.s_addr; 403 p.iph.saddr = v->vifc_lcl_addr.s_addr; 404 p.iph.version = 4; 405 p.iph.ihl = 5; 406 p.iph.protocol = IPPROTO_IPIP; 407 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 408 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 409 410 if (ops->ndo_do_ioctl) { 411 mm_segment_t oldfs = get_fs(); 412 413 set_fs(KERNEL_DS); 414 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 415 set_fs(oldfs); 416 } else { 417 err = -EOPNOTSUPP; 418 } 419 dev = NULL; 420 421 if (err == 0 && 422 (dev = __dev_get_by_name(net, p.name)) != NULL) { 423 dev->flags |= IFF_MULTICAST; 424 if (!ipmr_init_vif_indev(dev)) 425 goto failure; 426 if (dev_open(dev)) 427 goto failure; 428 dev_hold(dev); 429 } 430 } 431 return dev; 432 433 failure: 434 unregister_netdevice(dev); 435 return NULL; 436 } 437 438 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 439 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 440 { 441 struct net *net = dev_net(dev); 442 struct mr_table *mrt; 443 struct flowi4 fl4 = { 444 .flowi4_oif = dev->ifindex, 445 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 446 .flowi4_mark = skb->mark, 447 }; 448 int err; 449 450 err = ipmr_fib_lookup(net, &fl4, &mrt); 451 if (err < 0) { 452 kfree_skb(skb); 453 return err; 454 } 455 456 read_lock(&mrt_lock); 457 dev->stats.tx_bytes += skb->len; 458 dev->stats.tx_packets++; 459 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 460 read_unlock(&mrt_lock); 461 kfree_skb(skb); 462 return NETDEV_TX_OK; 463 } 464 465 static int reg_vif_get_iflink(const struct net_device *dev) 466 { 467 return 0; 468 } 469 470 static const struct net_device_ops reg_vif_netdev_ops = { 471 .ndo_start_xmit = reg_vif_xmit, 472 .ndo_get_iflink = reg_vif_get_iflink, 473 }; 474 475 static void reg_vif_setup(struct net_device *dev) 476 { 477 dev->type = ARPHRD_PIMREG; 478 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 479 dev->flags = IFF_NOARP; 480 dev->netdev_ops = ®_vif_netdev_ops; 481 dev->destructor = free_netdev; 482 dev->features |= NETIF_F_NETNS_LOCAL; 483 } 484 485 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 486 { 487 struct net_device *dev; 488 char name[IFNAMSIZ]; 489 490 if (mrt->id == RT_TABLE_DEFAULT) 491 sprintf(name, "pimreg"); 492 else 493 sprintf(name, "pimreg%u", mrt->id); 494 495 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 496 497 if (!dev) 498 return NULL; 499 500 dev_net_set(dev, net); 501 502 if (register_netdevice(dev)) { 503 free_netdev(dev); 504 return NULL; 505 } 506 507 if (!ipmr_init_vif_indev(dev)) 508 goto failure; 509 if (dev_open(dev)) 510 goto failure; 511 512 dev_hold(dev); 513 514 return dev; 515 516 failure: 517 unregister_netdevice(dev); 518 return NULL; 519 } 520 521 /* called with rcu_read_lock() */ 522 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 523 unsigned int pimlen) 524 { 525 struct net_device *reg_dev = NULL; 526 struct iphdr *encap; 527 528 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 529 /* Check that: 530 * a. packet is really sent to a multicast group 531 * b. packet is not a NULL-REGISTER 532 * c. packet is not truncated 533 */ 534 if (!ipv4_is_multicast(encap->daddr) || 535 encap->tot_len == 0 || 536 ntohs(encap->tot_len) + pimlen > skb->len) 537 return 1; 538 539 read_lock(&mrt_lock); 540 if (mrt->mroute_reg_vif_num >= 0) 541 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 542 read_unlock(&mrt_lock); 543 544 if (!reg_dev) 545 return 1; 546 547 skb->mac_header = skb->network_header; 548 skb_pull(skb, (u8 *)encap - skb->data); 549 skb_reset_network_header(skb); 550 skb->protocol = htons(ETH_P_IP); 551 skb->ip_summed = CHECKSUM_NONE; 552 553 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 554 555 netif_rx(skb); 556 557 return NET_RX_SUCCESS; 558 } 559 #else 560 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 561 { 562 return NULL; 563 } 564 #endif 565 566 /** 567 * vif_delete - Delete a VIF entry 568 * @notify: Set to 1, if the caller is a notifier_call 569 */ 570 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 571 struct list_head *head) 572 { 573 struct vif_device *v; 574 struct net_device *dev; 575 struct in_device *in_dev; 576 577 if (vifi < 0 || vifi >= mrt->maxvif) 578 return -EADDRNOTAVAIL; 579 580 v = &mrt->vif_table[vifi]; 581 582 write_lock_bh(&mrt_lock); 583 dev = v->dev; 584 v->dev = NULL; 585 586 if (!dev) { 587 write_unlock_bh(&mrt_lock); 588 return -EADDRNOTAVAIL; 589 } 590 591 if (vifi == mrt->mroute_reg_vif_num) 592 mrt->mroute_reg_vif_num = -1; 593 594 if (vifi + 1 == mrt->maxvif) { 595 int tmp; 596 597 for (tmp = vifi - 1; tmp >= 0; tmp--) { 598 if (VIF_EXISTS(mrt, tmp)) 599 break; 600 } 601 mrt->maxvif = tmp+1; 602 } 603 604 write_unlock_bh(&mrt_lock); 605 606 dev_set_allmulti(dev, -1); 607 608 in_dev = __in_dev_get_rtnl(dev); 609 if (in_dev) { 610 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 611 inet_netconf_notify_devconf(dev_net(dev), 612 NETCONFA_MC_FORWARDING, 613 dev->ifindex, &in_dev->cnf); 614 ip_rt_multicast_event(in_dev); 615 } 616 617 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 618 unregister_netdevice_queue(dev, head); 619 620 dev_put(dev); 621 return 0; 622 } 623 624 static void ipmr_cache_free_rcu(struct rcu_head *head) 625 { 626 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu); 627 628 kmem_cache_free(mrt_cachep, c); 629 } 630 631 static inline void ipmr_cache_free(struct mfc_cache *c) 632 { 633 call_rcu(&c->rcu, ipmr_cache_free_rcu); 634 } 635 636 /* Destroy an unresolved cache entry, killing queued skbs 637 * and reporting error to netlink readers. 638 */ 639 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 640 { 641 struct net *net = read_pnet(&mrt->net); 642 struct sk_buff *skb; 643 struct nlmsgerr *e; 644 645 atomic_dec(&mrt->cache_resolve_queue_len); 646 647 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 648 if (ip_hdr(skb)->version == 0) { 649 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 650 nlh->nlmsg_type = NLMSG_ERROR; 651 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 652 skb_trim(skb, nlh->nlmsg_len); 653 e = nlmsg_data(nlh); 654 e->error = -ETIMEDOUT; 655 memset(&e->msg, 0, sizeof(e->msg)); 656 657 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 658 } else { 659 kfree_skb(skb); 660 } 661 } 662 663 ipmr_cache_free(c); 664 } 665 666 /* Timer process for the unresolved queue. */ 667 static void ipmr_expire_process(unsigned long arg) 668 { 669 struct mr_table *mrt = (struct mr_table *)arg; 670 unsigned long now; 671 unsigned long expires; 672 struct mfc_cache *c, *next; 673 674 if (!spin_trylock(&mfc_unres_lock)) { 675 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 676 return; 677 } 678 679 if (list_empty(&mrt->mfc_unres_queue)) 680 goto out; 681 682 now = jiffies; 683 expires = 10*HZ; 684 685 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 686 if (time_after(c->mfc_un.unres.expires, now)) { 687 unsigned long interval = c->mfc_un.unres.expires - now; 688 if (interval < expires) 689 expires = interval; 690 continue; 691 } 692 693 list_del(&c->list); 694 mroute_netlink_event(mrt, c, RTM_DELROUTE); 695 ipmr_destroy_unres(mrt, c); 696 } 697 698 if (!list_empty(&mrt->mfc_unres_queue)) 699 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 700 701 out: 702 spin_unlock(&mfc_unres_lock); 703 } 704 705 /* Fill oifs list. It is called under write locked mrt_lock. */ 706 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 707 unsigned char *ttls) 708 { 709 int vifi; 710 711 cache->mfc_un.res.minvif = MAXVIFS; 712 cache->mfc_un.res.maxvif = 0; 713 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 714 715 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 716 if (VIF_EXISTS(mrt, vifi) && 717 ttls[vifi] && ttls[vifi] < 255) { 718 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 719 if (cache->mfc_un.res.minvif > vifi) 720 cache->mfc_un.res.minvif = vifi; 721 if (cache->mfc_un.res.maxvif <= vifi) 722 cache->mfc_un.res.maxvif = vifi + 1; 723 } 724 } 725 } 726 727 static int vif_add(struct net *net, struct mr_table *mrt, 728 struct vifctl *vifc, int mrtsock) 729 { 730 int vifi = vifc->vifc_vifi; 731 struct vif_device *v = &mrt->vif_table[vifi]; 732 struct net_device *dev; 733 struct in_device *in_dev; 734 int err; 735 736 /* Is vif busy ? */ 737 if (VIF_EXISTS(mrt, vifi)) 738 return -EADDRINUSE; 739 740 switch (vifc->vifc_flags) { 741 case VIFF_REGISTER: 742 if (!ipmr_pimsm_enabled()) 743 return -EINVAL; 744 /* Special Purpose VIF in PIM 745 * All the packets will be sent to the daemon 746 */ 747 if (mrt->mroute_reg_vif_num >= 0) 748 return -EADDRINUSE; 749 dev = ipmr_reg_vif(net, mrt); 750 if (!dev) 751 return -ENOBUFS; 752 err = dev_set_allmulti(dev, 1); 753 if (err) { 754 unregister_netdevice(dev); 755 dev_put(dev); 756 return err; 757 } 758 break; 759 case VIFF_TUNNEL: 760 dev = ipmr_new_tunnel(net, vifc); 761 if (!dev) 762 return -ENOBUFS; 763 err = dev_set_allmulti(dev, 1); 764 if (err) { 765 ipmr_del_tunnel(dev, vifc); 766 dev_put(dev); 767 return err; 768 } 769 break; 770 case VIFF_USE_IFINDEX: 771 case 0: 772 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 773 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 774 if (dev && !__in_dev_get_rtnl(dev)) { 775 dev_put(dev); 776 return -EADDRNOTAVAIL; 777 } 778 } else { 779 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 780 } 781 if (!dev) 782 return -EADDRNOTAVAIL; 783 err = dev_set_allmulti(dev, 1); 784 if (err) { 785 dev_put(dev); 786 return err; 787 } 788 break; 789 default: 790 return -EINVAL; 791 } 792 793 in_dev = __in_dev_get_rtnl(dev); 794 if (!in_dev) { 795 dev_put(dev); 796 return -EADDRNOTAVAIL; 797 } 798 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 799 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex, 800 &in_dev->cnf); 801 ip_rt_multicast_event(in_dev); 802 803 /* Fill in the VIF structures */ 804 805 v->rate_limit = vifc->vifc_rate_limit; 806 v->local = vifc->vifc_lcl_addr.s_addr; 807 v->remote = vifc->vifc_rmt_addr.s_addr; 808 v->flags = vifc->vifc_flags; 809 if (!mrtsock) 810 v->flags |= VIFF_STATIC; 811 v->threshold = vifc->vifc_threshold; 812 v->bytes_in = 0; 813 v->bytes_out = 0; 814 v->pkt_in = 0; 815 v->pkt_out = 0; 816 v->link = dev->ifindex; 817 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) 818 v->link = dev_get_iflink(dev); 819 820 /* And finish update writing critical data */ 821 write_lock_bh(&mrt_lock); 822 v->dev = dev; 823 if (v->flags & VIFF_REGISTER) 824 mrt->mroute_reg_vif_num = vifi; 825 if (vifi+1 > mrt->maxvif) 826 mrt->maxvif = vifi+1; 827 write_unlock_bh(&mrt_lock); 828 return 0; 829 } 830 831 /* called with rcu_read_lock() */ 832 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 833 __be32 origin, 834 __be32 mcastgrp) 835 { 836 int line = MFC_HASH(mcastgrp, origin); 837 struct mfc_cache *c; 838 839 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) { 840 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) 841 return c; 842 } 843 return NULL; 844 } 845 846 /* Look for a (*,*,oif) entry */ 847 static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, 848 int vifi) 849 { 850 int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY)); 851 struct mfc_cache *c; 852 853 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) 854 if (c->mfc_origin == htonl(INADDR_ANY) && 855 c->mfc_mcastgrp == htonl(INADDR_ANY) && 856 c->mfc_un.res.ttls[vifi] < 255) 857 return c; 858 859 return NULL; 860 } 861 862 /* Look for a (*,G) entry */ 863 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 864 __be32 mcastgrp, int vifi) 865 { 866 int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY)); 867 struct mfc_cache *c, *proxy; 868 869 if (mcastgrp == htonl(INADDR_ANY)) 870 goto skip; 871 872 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) 873 if (c->mfc_origin == htonl(INADDR_ANY) && 874 c->mfc_mcastgrp == mcastgrp) { 875 if (c->mfc_un.res.ttls[vifi] < 255) 876 return c; 877 878 /* It's ok if the vifi is part of the static tree */ 879 proxy = ipmr_cache_find_any_parent(mrt, 880 c->mfc_parent); 881 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) 882 return c; 883 } 884 885 skip: 886 return ipmr_cache_find_any_parent(mrt, vifi); 887 } 888 889 /* Allocate a multicast cache entry */ 890 static struct mfc_cache *ipmr_cache_alloc(void) 891 { 892 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 893 894 if (c) { 895 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 896 c->mfc_un.res.minvif = MAXVIFS; 897 } 898 return c; 899 } 900 901 static struct mfc_cache *ipmr_cache_alloc_unres(void) 902 { 903 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 904 905 if (c) { 906 skb_queue_head_init(&c->mfc_un.unres.unresolved); 907 c->mfc_un.unres.expires = jiffies + 10*HZ; 908 } 909 return c; 910 } 911 912 /* A cache entry has gone into a resolved state from queued */ 913 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 914 struct mfc_cache *uc, struct mfc_cache *c) 915 { 916 struct sk_buff *skb; 917 struct nlmsgerr *e; 918 919 /* Play the pending entries through our router */ 920 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 921 if (ip_hdr(skb)->version == 0) { 922 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 923 924 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { 925 nlh->nlmsg_len = skb_tail_pointer(skb) - 926 (u8 *)nlh; 927 } else { 928 nlh->nlmsg_type = NLMSG_ERROR; 929 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 930 skb_trim(skb, nlh->nlmsg_len); 931 e = nlmsg_data(nlh); 932 e->error = -EMSGSIZE; 933 memset(&e->msg, 0, sizeof(e->msg)); 934 } 935 936 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 937 } else { 938 ip_mr_forward(net, mrt, skb, c, 0); 939 } 940 } 941 } 942 943 /* Bounce a cache query up to mrouted. We could use netlink for this but mrouted 944 * expects the following bizarre scheme. 945 * 946 * Called under mrt_lock. 947 */ 948 static int ipmr_cache_report(struct mr_table *mrt, 949 struct sk_buff *pkt, vifi_t vifi, int assert) 950 { 951 const int ihl = ip_hdrlen(pkt); 952 struct sock *mroute_sk; 953 struct igmphdr *igmp; 954 struct igmpmsg *msg; 955 struct sk_buff *skb; 956 int ret; 957 958 if (assert == IGMPMSG_WHOLEPKT) 959 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 960 else 961 skb = alloc_skb(128, GFP_ATOMIC); 962 963 if (!skb) 964 return -ENOBUFS; 965 966 if (assert == IGMPMSG_WHOLEPKT) { 967 /* Ugly, but we have no choice with this interface. 968 * Duplicate old header, fix ihl, length etc. 969 * And all this only to mangle msg->im_msgtype and 970 * to set msg->im_mbz to "mbz" :-) 971 */ 972 skb_push(skb, sizeof(struct iphdr)); 973 skb_reset_network_header(skb); 974 skb_reset_transport_header(skb); 975 msg = (struct igmpmsg *)skb_network_header(skb); 976 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 977 msg->im_msgtype = IGMPMSG_WHOLEPKT; 978 msg->im_mbz = 0; 979 msg->im_vif = mrt->mroute_reg_vif_num; 980 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 981 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 982 sizeof(struct iphdr)); 983 } else { 984 /* Copy the IP header */ 985 skb_set_network_header(skb, skb->len); 986 skb_put(skb, ihl); 987 skb_copy_to_linear_data(skb, pkt->data, ihl); 988 /* Flag to the kernel this is a route add */ 989 ip_hdr(skb)->protocol = 0; 990 msg = (struct igmpmsg *)skb_network_header(skb); 991 msg->im_vif = vifi; 992 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 993 /* Add our header */ 994 igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 995 igmp->type = assert; 996 msg->im_msgtype = assert; 997 igmp->code = 0; 998 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 999 skb->transport_header = skb->network_header; 1000 } 1001 1002 rcu_read_lock(); 1003 mroute_sk = rcu_dereference(mrt->mroute_sk); 1004 if (!mroute_sk) { 1005 rcu_read_unlock(); 1006 kfree_skb(skb); 1007 return -EINVAL; 1008 } 1009 1010 /* Deliver to mrouted */ 1011 ret = sock_queue_rcv_skb(mroute_sk, skb); 1012 rcu_read_unlock(); 1013 if (ret < 0) { 1014 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1015 kfree_skb(skb); 1016 } 1017 1018 return ret; 1019 } 1020 1021 /* Queue a packet for resolution. It gets locked cache entry! */ 1022 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1023 struct sk_buff *skb) 1024 { 1025 bool found = false; 1026 int err; 1027 struct mfc_cache *c; 1028 const struct iphdr *iph = ip_hdr(skb); 1029 1030 spin_lock_bh(&mfc_unres_lock); 1031 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 1032 if (c->mfc_mcastgrp == iph->daddr && 1033 c->mfc_origin == iph->saddr) { 1034 found = true; 1035 break; 1036 } 1037 } 1038 1039 if (!found) { 1040 /* Create a new entry if allowable */ 1041 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1042 (c = ipmr_cache_alloc_unres()) == NULL) { 1043 spin_unlock_bh(&mfc_unres_lock); 1044 1045 kfree_skb(skb); 1046 return -ENOBUFS; 1047 } 1048 1049 /* Fill in the new cache entry */ 1050 c->mfc_parent = -1; 1051 c->mfc_origin = iph->saddr; 1052 c->mfc_mcastgrp = iph->daddr; 1053 1054 /* Reflect first query at mrouted. */ 1055 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1056 if (err < 0) { 1057 /* If the report failed throw the cache entry 1058 out - Brad Parker 1059 */ 1060 spin_unlock_bh(&mfc_unres_lock); 1061 1062 ipmr_cache_free(c); 1063 kfree_skb(skb); 1064 return err; 1065 } 1066 1067 atomic_inc(&mrt->cache_resolve_queue_len); 1068 list_add(&c->list, &mrt->mfc_unres_queue); 1069 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1070 1071 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1072 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1073 } 1074 1075 /* See if we can append the packet */ 1076 if (c->mfc_un.unres.unresolved.qlen > 3) { 1077 kfree_skb(skb); 1078 err = -ENOBUFS; 1079 } else { 1080 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1081 err = 0; 1082 } 1083 1084 spin_unlock_bh(&mfc_unres_lock); 1085 return err; 1086 } 1087 1088 /* MFC cache manipulation by user space mroute daemon */ 1089 1090 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1091 { 1092 int line; 1093 struct mfc_cache *c, *next; 1094 1095 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1096 1097 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { 1098 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1099 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && 1100 (parent == -1 || parent == c->mfc_parent)) { 1101 list_del_rcu(&c->list); 1102 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1103 ipmr_cache_free(c); 1104 return 0; 1105 } 1106 } 1107 return -ENOENT; 1108 } 1109 1110 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1111 struct mfcctl *mfc, int mrtsock, int parent) 1112 { 1113 bool found = false; 1114 int line; 1115 struct mfc_cache *uc, *c; 1116 1117 if (mfc->mfcc_parent >= MAXVIFS) 1118 return -ENFILE; 1119 1120 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1121 1122 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 1123 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1124 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && 1125 (parent == -1 || parent == c->mfc_parent)) { 1126 found = true; 1127 break; 1128 } 1129 } 1130 1131 if (found) { 1132 write_lock_bh(&mrt_lock); 1133 c->mfc_parent = mfc->mfcc_parent; 1134 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1135 if (!mrtsock) 1136 c->mfc_flags |= MFC_STATIC; 1137 write_unlock_bh(&mrt_lock); 1138 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1139 return 0; 1140 } 1141 1142 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1143 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1144 return -EINVAL; 1145 1146 c = ipmr_cache_alloc(); 1147 if (!c) 1148 return -ENOMEM; 1149 1150 c->mfc_origin = mfc->mfcc_origin.s_addr; 1151 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1152 c->mfc_parent = mfc->mfcc_parent; 1153 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1154 if (!mrtsock) 1155 c->mfc_flags |= MFC_STATIC; 1156 1157 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); 1158 1159 /* Check to see if we resolved a queued list. If so we 1160 * need to send on the frames and tidy up. 1161 */ 1162 found = false; 1163 spin_lock_bh(&mfc_unres_lock); 1164 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1165 if (uc->mfc_origin == c->mfc_origin && 1166 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1167 list_del(&uc->list); 1168 atomic_dec(&mrt->cache_resolve_queue_len); 1169 found = true; 1170 break; 1171 } 1172 } 1173 if (list_empty(&mrt->mfc_unres_queue)) 1174 del_timer(&mrt->ipmr_expire_timer); 1175 spin_unlock_bh(&mfc_unres_lock); 1176 1177 if (found) { 1178 ipmr_cache_resolve(net, mrt, uc, c); 1179 ipmr_cache_free(uc); 1180 } 1181 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1182 return 0; 1183 } 1184 1185 /* Close the multicast socket, and clear the vif tables etc */ 1186 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1187 { 1188 int i; 1189 LIST_HEAD(list); 1190 struct mfc_cache *c, *next; 1191 1192 /* Shut down all active vif entries */ 1193 for (i = 0; i < mrt->maxvif; i++) { 1194 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1195 continue; 1196 vif_delete(mrt, i, 0, &list); 1197 } 1198 unregister_netdevice_many(&list); 1199 1200 /* Wipe the cache */ 1201 for (i = 0; i < MFC_LINES; i++) { 1202 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1203 if (!all && (c->mfc_flags & MFC_STATIC)) 1204 continue; 1205 list_del_rcu(&c->list); 1206 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1207 ipmr_cache_free(c); 1208 } 1209 } 1210 1211 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1212 spin_lock_bh(&mfc_unres_lock); 1213 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 1214 list_del(&c->list); 1215 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1216 ipmr_destroy_unres(mrt, c); 1217 } 1218 spin_unlock_bh(&mfc_unres_lock); 1219 } 1220 } 1221 1222 /* called from ip_ra_control(), before an RCU grace period, 1223 * we dont need to call synchronize_rcu() here 1224 */ 1225 static void mrtsock_destruct(struct sock *sk) 1226 { 1227 struct net *net = sock_net(sk); 1228 struct mr_table *mrt; 1229 1230 rtnl_lock(); 1231 ipmr_for_each_table(mrt, net) { 1232 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1233 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1234 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, 1235 NETCONFA_IFINDEX_ALL, 1236 net->ipv4.devconf_all); 1237 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1238 mroute_clean_tables(mrt, false); 1239 } 1240 } 1241 rtnl_unlock(); 1242 } 1243 1244 /* Socket options and virtual interface manipulation. The whole 1245 * virtual interface system is a complete heap, but unfortunately 1246 * that's how BSD mrouted happens to think. Maybe one day with a proper 1247 * MOSPF/PIM router set up we can clean this up. 1248 */ 1249 1250 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1251 unsigned int optlen) 1252 { 1253 struct net *net = sock_net(sk); 1254 int val, ret = 0, parent = 0; 1255 struct mr_table *mrt; 1256 struct vifctl vif; 1257 struct mfcctl mfc; 1258 u32 uval; 1259 1260 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1261 rtnl_lock(); 1262 if (sk->sk_type != SOCK_RAW || 1263 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1264 ret = -EOPNOTSUPP; 1265 goto out_unlock; 1266 } 1267 1268 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1269 if (!mrt) { 1270 ret = -ENOENT; 1271 goto out_unlock; 1272 } 1273 if (optname != MRT_INIT) { 1274 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1275 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1276 ret = -EACCES; 1277 goto out_unlock; 1278 } 1279 } 1280 1281 switch (optname) { 1282 case MRT_INIT: 1283 if (optlen != sizeof(int)) { 1284 ret = -EINVAL; 1285 break; 1286 } 1287 if (rtnl_dereference(mrt->mroute_sk)) { 1288 ret = -EADDRINUSE; 1289 break; 1290 } 1291 1292 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1293 if (ret == 0) { 1294 rcu_assign_pointer(mrt->mroute_sk, sk); 1295 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1296 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, 1297 NETCONFA_IFINDEX_ALL, 1298 net->ipv4.devconf_all); 1299 } 1300 break; 1301 case MRT_DONE: 1302 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1303 ret = -EACCES; 1304 } else { 1305 /* We need to unlock here because mrtsock_destruct takes 1306 * care of rtnl itself and we can't change that due to 1307 * the IP_ROUTER_ALERT setsockopt which runs without it. 1308 */ 1309 rtnl_unlock(); 1310 ret = ip_ra_control(sk, 0, NULL); 1311 goto out; 1312 } 1313 break; 1314 case MRT_ADD_VIF: 1315 case MRT_DEL_VIF: 1316 if (optlen != sizeof(vif)) { 1317 ret = -EINVAL; 1318 break; 1319 } 1320 if (copy_from_user(&vif, optval, sizeof(vif))) { 1321 ret = -EFAULT; 1322 break; 1323 } 1324 if (vif.vifc_vifi >= MAXVIFS) { 1325 ret = -ENFILE; 1326 break; 1327 } 1328 if (optname == MRT_ADD_VIF) { 1329 ret = vif_add(net, mrt, &vif, 1330 sk == rtnl_dereference(mrt->mroute_sk)); 1331 } else { 1332 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1333 } 1334 break; 1335 /* Manipulate the forwarding caches. These live 1336 * in a sort of kernel/user symbiosis. 1337 */ 1338 case MRT_ADD_MFC: 1339 case MRT_DEL_MFC: 1340 parent = -1; 1341 case MRT_ADD_MFC_PROXY: 1342 case MRT_DEL_MFC_PROXY: 1343 if (optlen != sizeof(mfc)) { 1344 ret = -EINVAL; 1345 break; 1346 } 1347 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1348 ret = -EFAULT; 1349 break; 1350 } 1351 if (parent == 0) 1352 parent = mfc.mfcc_parent; 1353 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1354 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1355 else 1356 ret = ipmr_mfc_add(net, mrt, &mfc, 1357 sk == rtnl_dereference(mrt->mroute_sk), 1358 parent); 1359 break; 1360 /* Control PIM assert. */ 1361 case MRT_ASSERT: 1362 if (optlen != sizeof(val)) { 1363 ret = -EINVAL; 1364 break; 1365 } 1366 if (get_user(val, (int __user *)optval)) { 1367 ret = -EFAULT; 1368 break; 1369 } 1370 mrt->mroute_do_assert = val; 1371 break; 1372 case MRT_PIM: 1373 if (!ipmr_pimsm_enabled()) { 1374 ret = -ENOPROTOOPT; 1375 break; 1376 } 1377 if (optlen != sizeof(val)) { 1378 ret = -EINVAL; 1379 break; 1380 } 1381 if (get_user(val, (int __user *)optval)) { 1382 ret = -EFAULT; 1383 break; 1384 } 1385 1386 val = !!val; 1387 if (val != mrt->mroute_do_pim) { 1388 mrt->mroute_do_pim = val; 1389 mrt->mroute_do_assert = val; 1390 } 1391 break; 1392 case MRT_TABLE: 1393 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1394 ret = -ENOPROTOOPT; 1395 break; 1396 } 1397 if (optlen != sizeof(uval)) { 1398 ret = -EINVAL; 1399 break; 1400 } 1401 if (get_user(uval, (u32 __user *)optval)) { 1402 ret = -EFAULT; 1403 break; 1404 } 1405 1406 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1407 ret = -EBUSY; 1408 } else { 1409 mrt = ipmr_new_table(net, uval); 1410 if (IS_ERR(mrt)) 1411 ret = PTR_ERR(mrt); 1412 else 1413 raw_sk(sk)->ipmr_table = uval; 1414 } 1415 break; 1416 /* Spurious command, or MRT_VERSION which you cannot set. */ 1417 default: 1418 ret = -ENOPROTOOPT; 1419 } 1420 out_unlock: 1421 rtnl_unlock(); 1422 out: 1423 return ret; 1424 } 1425 1426 /* Getsock opt support for the multicast routing system. */ 1427 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1428 { 1429 int olr; 1430 int val; 1431 struct net *net = sock_net(sk); 1432 struct mr_table *mrt; 1433 1434 if (sk->sk_type != SOCK_RAW || 1435 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1436 return -EOPNOTSUPP; 1437 1438 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1439 if (!mrt) 1440 return -ENOENT; 1441 1442 switch (optname) { 1443 case MRT_VERSION: 1444 val = 0x0305; 1445 break; 1446 case MRT_PIM: 1447 if (!ipmr_pimsm_enabled()) 1448 return -ENOPROTOOPT; 1449 val = mrt->mroute_do_pim; 1450 break; 1451 case MRT_ASSERT: 1452 val = mrt->mroute_do_assert; 1453 break; 1454 default: 1455 return -ENOPROTOOPT; 1456 } 1457 1458 if (get_user(olr, optlen)) 1459 return -EFAULT; 1460 olr = min_t(unsigned int, olr, sizeof(int)); 1461 if (olr < 0) 1462 return -EINVAL; 1463 if (put_user(olr, optlen)) 1464 return -EFAULT; 1465 if (copy_to_user(optval, &val, olr)) 1466 return -EFAULT; 1467 return 0; 1468 } 1469 1470 /* The IP multicast ioctl support routines. */ 1471 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1472 { 1473 struct sioc_sg_req sr; 1474 struct sioc_vif_req vr; 1475 struct vif_device *vif; 1476 struct mfc_cache *c; 1477 struct net *net = sock_net(sk); 1478 struct mr_table *mrt; 1479 1480 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1481 if (!mrt) 1482 return -ENOENT; 1483 1484 switch (cmd) { 1485 case SIOCGETVIFCNT: 1486 if (copy_from_user(&vr, arg, sizeof(vr))) 1487 return -EFAULT; 1488 if (vr.vifi >= mrt->maxvif) 1489 return -EINVAL; 1490 read_lock(&mrt_lock); 1491 vif = &mrt->vif_table[vr.vifi]; 1492 if (VIF_EXISTS(mrt, vr.vifi)) { 1493 vr.icount = vif->pkt_in; 1494 vr.ocount = vif->pkt_out; 1495 vr.ibytes = vif->bytes_in; 1496 vr.obytes = vif->bytes_out; 1497 read_unlock(&mrt_lock); 1498 1499 if (copy_to_user(arg, &vr, sizeof(vr))) 1500 return -EFAULT; 1501 return 0; 1502 } 1503 read_unlock(&mrt_lock); 1504 return -EADDRNOTAVAIL; 1505 case SIOCGETSGCNT: 1506 if (copy_from_user(&sr, arg, sizeof(sr))) 1507 return -EFAULT; 1508 1509 rcu_read_lock(); 1510 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1511 if (c) { 1512 sr.pktcnt = c->mfc_un.res.pkt; 1513 sr.bytecnt = c->mfc_un.res.bytes; 1514 sr.wrong_if = c->mfc_un.res.wrong_if; 1515 rcu_read_unlock(); 1516 1517 if (copy_to_user(arg, &sr, sizeof(sr))) 1518 return -EFAULT; 1519 return 0; 1520 } 1521 rcu_read_unlock(); 1522 return -EADDRNOTAVAIL; 1523 default: 1524 return -ENOIOCTLCMD; 1525 } 1526 } 1527 1528 #ifdef CONFIG_COMPAT 1529 struct compat_sioc_sg_req { 1530 struct in_addr src; 1531 struct in_addr grp; 1532 compat_ulong_t pktcnt; 1533 compat_ulong_t bytecnt; 1534 compat_ulong_t wrong_if; 1535 }; 1536 1537 struct compat_sioc_vif_req { 1538 vifi_t vifi; /* Which iface */ 1539 compat_ulong_t icount; 1540 compat_ulong_t ocount; 1541 compat_ulong_t ibytes; 1542 compat_ulong_t obytes; 1543 }; 1544 1545 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1546 { 1547 struct compat_sioc_sg_req sr; 1548 struct compat_sioc_vif_req vr; 1549 struct vif_device *vif; 1550 struct mfc_cache *c; 1551 struct net *net = sock_net(sk); 1552 struct mr_table *mrt; 1553 1554 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1555 if (!mrt) 1556 return -ENOENT; 1557 1558 switch (cmd) { 1559 case SIOCGETVIFCNT: 1560 if (copy_from_user(&vr, arg, sizeof(vr))) 1561 return -EFAULT; 1562 if (vr.vifi >= mrt->maxvif) 1563 return -EINVAL; 1564 read_lock(&mrt_lock); 1565 vif = &mrt->vif_table[vr.vifi]; 1566 if (VIF_EXISTS(mrt, vr.vifi)) { 1567 vr.icount = vif->pkt_in; 1568 vr.ocount = vif->pkt_out; 1569 vr.ibytes = vif->bytes_in; 1570 vr.obytes = vif->bytes_out; 1571 read_unlock(&mrt_lock); 1572 1573 if (copy_to_user(arg, &vr, sizeof(vr))) 1574 return -EFAULT; 1575 return 0; 1576 } 1577 read_unlock(&mrt_lock); 1578 return -EADDRNOTAVAIL; 1579 case SIOCGETSGCNT: 1580 if (copy_from_user(&sr, arg, sizeof(sr))) 1581 return -EFAULT; 1582 1583 rcu_read_lock(); 1584 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1585 if (c) { 1586 sr.pktcnt = c->mfc_un.res.pkt; 1587 sr.bytecnt = c->mfc_un.res.bytes; 1588 sr.wrong_if = c->mfc_un.res.wrong_if; 1589 rcu_read_unlock(); 1590 1591 if (copy_to_user(arg, &sr, sizeof(sr))) 1592 return -EFAULT; 1593 return 0; 1594 } 1595 rcu_read_unlock(); 1596 return -EADDRNOTAVAIL; 1597 default: 1598 return -ENOIOCTLCMD; 1599 } 1600 } 1601 #endif 1602 1603 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1604 { 1605 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1606 struct net *net = dev_net(dev); 1607 struct mr_table *mrt; 1608 struct vif_device *v; 1609 int ct; 1610 1611 if (event != NETDEV_UNREGISTER) 1612 return NOTIFY_DONE; 1613 1614 ipmr_for_each_table(mrt, net) { 1615 v = &mrt->vif_table[0]; 1616 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1617 if (v->dev == dev) 1618 vif_delete(mrt, ct, 1, NULL); 1619 } 1620 } 1621 return NOTIFY_DONE; 1622 } 1623 1624 static struct notifier_block ip_mr_notifier = { 1625 .notifier_call = ipmr_device_event, 1626 }; 1627 1628 /* Encapsulate a packet by attaching a valid IPIP header to it. 1629 * This avoids tunnel drivers and other mess and gives us the speed so 1630 * important for multicast video. 1631 */ 1632 static void ip_encap(struct net *net, struct sk_buff *skb, 1633 __be32 saddr, __be32 daddr) 1634 { 1635 struct iphdr *iph; 1636 const struct iphdr *old_iph = ip_hdr(skb); 1637 1638 skb_push(skb, sizeof(struct iphdr)); 1639 skb->transport_header = skb->network_header; 1640 skb_reset_network_header(skb); 1641 iph = ip_hdr(skb); 1642 1643 iph->version = 4; 1644 iph->tos = old_iph->tos; 1645 iph->ttl = old_iph->ttl; 1646 iph->frag_off = 0; 1647 iph->daddr = daddr; 1648 iph->saddr = saddr; 1649 iph->protocol = IPPROTO_IPIP; 1650 iph->ihl = 5; 1651 iph->tot_len = htons(skb->len); 1652 ip_select_ident(net, skb, NULL); 1653 ip_send_check(iph); 1654 1655 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1656 nf_reset(skb); 1657 } 1658 1659 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1660 struct sk_buff *skb) 1661 { 1662 struct ip_options *opt = &(IPCB(skb)->opt); 1663 1664 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1665 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1666 1667 if (unlikely(opt->optlen)) 1668 ip_forward_options(skb); 1669 1670 return dst_output(net, sk, skb); 1671 } 1672 1673 /* Processing handlers for ipmr_forward */ 1674 1675 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1676 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1677 { 1678 const struct iphdr *iph = ip_hdr(skb); 1679 struct vif_device *vif = &mrt->vif_table[vifi]; 1680 struct net_device *dev; 1681 struct rtable *rt; 1682 struct flowi4 fl4; 1683 int encap = 0; 1684 1685 if (!vif->dev) 1686 goto out_free; 1687 1688 if (vif->flags & VIFF_REGISTER) { 1689 vif->pkt_out++; 1690 vif->bytes_out += skb->len; 1691 vif->dev->stats.tx_bytes += skb->len; 1692 vif->dev->stats.tx_packets++; 1693 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1694 goto out_free; 1695 } 1696 1697 if (vif->flags & VIFF_TUNNEL) { 1698 rt = ip_route_output_ports(net, &fl4, NULL, 1699 vif->remote, vif->local, 1700 0, 0, 1701 IPPROTO_IPIP, 1702 RT_TOS(iph->tos), vif->link); 1703 if (IS_ERR(rt)) 1704 goto out_free; 1705 encap = sizeof(struct iphdr); 1706 } else { 1707 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1708 0, 0, 1709 IPPROTO_IPIP, 1710 RT_TOS(iph->tos), vif->link); 1711 if (IS_ERR(rt)) 1712 goto out_free; 1713 } 1714 1715 dev = rt->dst.dev; 1716 1717 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1718 /* Do not fragment multicasts. Alas, IPv4 does not 1719 * allow to send ICMP, so that packets will disappear 1720 * to blackhole. 1721 */ 1722 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1723 ip_rt_put(rt); 1724 goto out_free; 1725 } 1726 1727 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1728 1729 if (skb_cow(skb, encap)) { 1730 ip_rt_put(rt); 1731 goto out_free; 1732 } 1733 1734 vif->pkt_out++; 1735 vif->bytes_out += skb->len; 1736 1737 skb_dst_drop(skb); 1738 skb_dst_set(skb, &rt->dst); 1739 ip_decrease_ttl(ip_hdr(skb)); 1740 1741 /* FIXME: forward and output firewalls used to be called here. 1742 * What do we do with netfilter? -- RR 1743 */ 1744 if (vif->flags & VIFF_TUNNEL) { 1745 ip_encap(net, skb, vif->local, vif->remote); 1746 /* FIXME: extra output firewall step used to be here. --RR */ 1747 vif->dev->stats.tx_packets++; 1748 vif->dev->stats.tx_bytes += skb->len; 1749 } 1750 1751 IPCB(skb)->flags |= IPSKB_FORWARDED; 1752 1753 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1754 * not only before forwarding, but after forwarding on all output 1755 * interfaces. It is clear, if mrouter runs a multicasting 1756 * program, it should receive packets not depending to what interface 1757 * program is joined. 1758 * If we will not make it, the program will have to join on all 1759 * interfaces. On the other hand, multihoming host (or router, but 1760 * not mrouter) cannot join to more than one interface - it will 1761 * result in receiving multiple packets. 1762 */ 1763 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1764 net, NULL, skb, skb->dev, dev, 1765 ipmr_forward_finish); 1766 return; 1767 1768 out_free: 1769 kfree_skb(skb); 1770 } 1771 1772 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1773 { 1774 int ct; 1775 1776 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1777 if (mrt->vif_table[ct].dev == dev) 1778 break; 1779 } 1780 return ct; 1781 } 1782 1783 /* "local" means that we should preserve one skb (for local delivery) */ 1784 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 1785 struct sk_buff *skb, struct mfc_cache *cache, 1786 int local) 1787 { 1788 int psend = -1; 1789 int vif, ct; 1790 int true_vifi = ipmr_find_vif(mrt, skb->dev); 1791 1792 vif = cache->mfc_parent; 1793 cache->mfc_un.res.pkt++; 1794 cache->mfc_un.res.bytes += skb->len; 1795 1796 if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 1797 struct mfc_cache *cache_proxy; 1798 1799 /* For an (*,G) entry, we only check that the incomming 1800 * interface is part of the static tree. 1801 */ 1802 cache_proxy = ipmr_cache_find_any_parent(mrt, vif); 1803 if (cache_proxy && 1804 cache_proxy->mfc_un.res.ttls[true_vifi] < 255) 1805 goto forward; 1806 } 1807 1808 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 1809 if (mrt->vif_table[vif].dev != skb->dev) { 1810 if (rt_is_output_route(skb_rtable(skb))) { 1811 /* It is our own packet, looped back. 1812 * Very complicated situation... 1813 * 1814 * The best workaround until routing daemons will be 1815 * fixed is not to redistribute packet, if it was 1816 * send through wrong interface. It means, that 1817 * multicast applications WILL NOT work for 1818 * (S,G), which have default multicast route pointing 1819 * to wrong oif. In any case, it is not a good 1820 * idea to use multicasting applications on router. 1821 */ 1822 goto dont_forward; 1823 } 1824 1825 cache->mfc_un.res.wrong_if++; 1826 1827 if (true_vifi >= 0 && mrt->mroute_do_assert && 1828 /* pimsm uses asserts, when switching from RPT to SPT, 1829 * so that we cannot check that packet arrived on an oif. 1830 * It is bad, but otherwise we would need to move pretty 1831 * large chunk of pimd to kernel. Ough... --ANK 1832 */ 1833 (mrt->mroute_do_pim || 1834 cache->mfc_un.res.ttls[true_vifi] < 255) && 1835 time_after(jiffies, 1836 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1837 cache->mfc_un.res.last_assert = jiffies; 1838 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1839 } 1840 goto dont_forward; 1841 } 1842 1843 forward: 1844 mrt->vif_table[vif].pkt_in++; 1845 mrt->vif_table[vif].bytes_in += skb->len; 1846 1847 /* Forward the frame */ 1848 if (cache->mfc_origin == htonl(INADDR_ANY) && 1849 cache->mfc_mcastgrp == htonl(INADDR_ANY)) { 1850 if (true_vifi >= 0 && 1851 true_vifi != cache->mfc_parent && 1852 ip_hdr(skb)->ttl > 1853 cache->mfc_un.res.ttls[cache->mfc_parent]) { 1854 /* It's an (*,*) entry and the packet is not coming from 1855 * the upstream: forward the packet to the upstream 1856 * only. 1857 */ 1858 psend = cache->mfc_parent; 1859 goto last_forward; 1860 } 1861 goto dont_forward; 1862 } 1863 for (ct = cache->mfc_un.res.maxvif - 1; 1864 ct >= cache->mfc_un.res.minvif; ct--) { 1865 /* For (*,G) entry, don't forward to the incoming interface */ 1866 if ((cache->mfc_origin != htonl(INADDR_ANY) || 1867 ct != true_vifi) && 1868 ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1869 if (psend != -1) { 1870 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1871 1872 if (skb2) 1873 ipmr_queue_xmit(net, mrt, skb2, cache, 1874 psend); 1875 } 1876 psend = ct; 1877 } 1878 } 1879 last_forward: 1880 if (psend != -1) { 1881 if (local) { 1882 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1883 1884 if (skb2) 1885 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1886 } else { 1887 ipmr_queue_xmit(net, mrt, skb, cache, psend); 1888 return; 1889 } 1890 } 1891 1892 dont_forward: 1893 if (!local) 1894 kfree_skb(skb); 1895 } 1896 1897 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 1898 { 1899 struct rtable *rt = skb_rtable(skb); 1900 struct iphdr *iph = ip_hdr(skb); 1901 struct flowi4 fl4 = { 1902 .daddr = iph->daddr, 1903 .saddr = iph->saddr, 1904 .flowi4_tos = RT_TOS(iph->tos), 1905 .flowi4_oif = (rt_is_output_route(rt) ? 1906 skb->dev->ifindex : 0), 1907 .flowi4_iif = (rt_is_output_route(rt) ? 1908 LOOPBACK_IFINDEX : 1909 skb->dev->ifindex), 1910 .flowi4_mark = skb->mark, 1911 }; 1912 struct mr_table *mrt; 1913 int err; 1914 1915 err = ipmr_fib_lookup(net, &fl4, &mrt); 1916 if (err) 1917 return ERR_PTR(err); 1918 return mrt; 1919 } 1920 1921 /* Multicast packets for forwarding arrive here 1922 * Called with rcu_read_lock(); 1923 */ 1924 int ip_mr_input(struct sk_buff *skb) 1925 { 1926 struct mfc_cache *cache; 1927 struct net *net = dev_net(skb->dev); 1928 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1929 struct mr_table *mrt; 1930 1931 /* Packet is looped back after forward, it should not be 1932 * forwarded second time, but still can be delivered locally. 1933 */ 1934 if (IPCB(skb)->flags & IPSKB_FORWARDED) 1935 goto dont_forward; 1936 1937 mrt = ipmr_rt_fib_lookup(net, skb); 1938 if (IS_ERR(mrt)) { 1939 kfree_skb(skb); 1940 return PTR_ERR(mrt); 1941 } 1942 if (!local) { 1943 if (IPCB(skb)->opt.router_alert) { 1944 if (ip_call_ra_chain(skb)) 1945 return 0; 1946 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 1947 /* IGMPv1 (and broken IGMPv2 implementations sort of 1948 * Cisco IOS <= 11.2(8)) do not put router alert 1949 * option to IGMP packets destined to routable 1950 * groups. It is very bad, because it means 1951 * that we can forward NO IGMP messages. 1952 */ 1953 struct sock *mroute_sk; 1954 1955 mroute_sk = rcu_dereference(mrt->mroute_sk); 1956 if (mroute_sk) { 1957 nf_reset(skb); 1958 raw_rcv(mroute_sk, skb); 1959 return 0; 1960 } 1961 } 1962 } 1963 1964 /* already under rcu_read_lock() */ 1965 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1966 if (!cache) { 1967 int vif = ipmr_find_vif(mrt, skb->dev); 1968 1969 if (vif >= 0) 1970 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 1971 vif); 1972 } 1973 1974 /* No usable cache entry */ 1975 if (!cache) { 1976 int vif; 1977 1978 if (local) { 1979 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1980 ip_local_deliver(skb); 1981 if (!skb2) 1982 return -ENOBUFS; 1983 skb = skb2; 1984 } 1985 1986 read_lock(&mrt_lock); 1987 vif = ipmr_find_vif(mrt, skb->dev); 1988 if (vif >= 0) { 1989 int err2 = ipmr_cache_unresolved(mrt, vif, skb); 1990 read_unlock(&mrt_lock); 1991 1992 return err2; 1993 } 1994 read_unlock(&mrt_lock); 1995 kfree_skb(skb); 1996 return -ENODEV; 1997 } 1998 1999 read_lock(&mrt_lock); 2000 ip_mr_forward(net, mrt, skb, cache, local); 2001 read_unlock(&mrt_lock); 2002 2003 if (local) 2004 return ip_local_deliver(skb); 2005 2006 return 0; 2007 2008 dont_forward: 2009 if (local) 2010 return ip_local_deliver(skb); 2011 kfree_skb(skb); 2012 return 0; 2013 } 2014 2015 #ifdef CONFIG_IP_PIMSM_V1 2016 /* Handle IGMP messages of PIMv1 */ 2017 int pim_rcv_v1(struct sk_buff *skb) 2018 { 2019 struct igmphdr *pim; 2020 struct net *net = dev_net(skb->dev); 2021 struct mr_table *mrt; 2022 2023 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2024 goto drop; 2025 2026 pim = igmp_hdr(skb); 2027 2028 mrt = ipmr_rt_fib_lookup(net, skb); 2029 if (IS_ERR(mrt)) 2030 goto drop; 2031 if (!mrt->mroute_do_pim || 2032 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2033 goto drop; 2034 2035 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2036 drop: 2037 kfree_skb(skb); 2038 } 2039 return 0; 2040 } 2041 #endif 2042 2043 #ifdef CONFIG_IP_PIMSM_V2 2044 static int pim_rcv(struct sk_buff *skb) 2045 { 2046 struct pimreghdr *pim; 2047 struct net *net = dev_net(skb->dev); 2048 struct mr_table *mrt; 2049 2050 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2051 goto drop; 2052 2053 pim = (struct pimreghdr *)skb_transport_header(skb); 2054 if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) || 2055 (pim->flags & PIM_NULL_REGISTER) || 2056 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2057 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2058 goto drop; 2059 2060 mrt = ipmr_rt_fib_lookup(net, skb); 2061 if (IS_ERR(mrt)) 2062 goto drop; 2063 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2064 drop: 2065 kfree_skb(skb); 2066 } 2067 return 0; 2068 } 2069 #endif 2070 2071 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2072 struct mfc_cache *c, struct rtmsg *rtm) 2073 { 2074 int ct; 2075 struct rtnexthop *nhp; 2076 struct nlattr *mp_attr; 2077 struct rta_mfc_stats mfcs; 2078 2079 /* If cache is unresolved, don't try to parse IIF and OIF */ 2080 if (c->mfc_parent >= MAXVIFS) 2081 return -ENOENT; 2082 2083 if (VIF_EXISTS(mrt, c->mfc_parent) && 2084 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) 2085 return -EMSGSIZE; 2086 2087 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) 2088 return -EMSGSIZE; 2089 2090 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2091 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2092 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { 2093 nla_nest_cancel(skb, mp_attr); 2094 return -EMSGSIZE; 2095 } 2096 2097 nhp->rtnh_flags = 0; 2098 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2099 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 2100 nhp->rtnh_len = sizeof(*nhp); 2101 } 2102 } 2103 2104 nla_nest_end(skb, mp_attr); 2105 2106 mfcs.mfcs_packets = c->mfc_un.res.pkt; 2107 mfcs.mfcs_bytes = c->mfc_un.res.bytes; 2108 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; 2109 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) 2110 return -EMSGSIZE; 2111 2112 rtm->rtm_type = RTN_MULTICAST; 2113 return 1; 2114 } 2115 2116 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2117 __be32 saddr, __be32 daddr, 2118 struct rtmsg *rtm, int nowait) 2119 { 2120 struct mfc_cache *cache; 2121 struct mr_table *mrt; 2122 int err; 2123 2124 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2125 if (!mrt) 2126 return -ENOENT; 2127 2128 rcu_read_lock(); 2129 cache = ipmr_cache_find(mrt, saddr, daddr); 2130 if (!cache && skb->dev) { 2131 int vif = ipmr_find_vif(mrt, skb->dev); 2132 2133 if (vif >= 0) 2134 cache = ipmr_cache_find_any(mrt, daddr, vif); 2135 } 2136 if (!cache) { 2137 struct sk_buff *skb2; 2138 struct iphdr *iph; 2139 struct net_device *dev; 2140 int vif = -1; 2141 2142 if (nowait) { 2143 rcu_read_unlock(); 2144 return -EAGAIN; 2145 } 2146 2147 dev = skb->dev; 2148 read_lock(&mrt_lock); 2149 if (dev) 2150 vif = ipmr_find_vif(mrt, dev); 2151 if (vif < 0) { 2152 read_unlock(&mrt_lock); 2153 rcu_read_unlock(); 2154 return -ENODEV; 2155 } 2156 skb2 = skb_clone(skb, GFP_ATOMIC); 2157 if (!skb2) { 2158 read_unlock(&mrt_lock); 2159 rcu_read_unlock(); 2160 return -ENOMEM; 2161 } 2162 2163 skb_push(skb2, sizeof(struct iphdr)); 2164 skb_reset_network_header(skb2); 2165 iph = ip_hdr(skb2); 2166 iph->ihl = sizeof(struct iphdr) >> 2; 2167 iph->saddr = saddr; 2168 iph->daddr = daddr; 2169 iph->version = 0; 2170 err = ipmr_cache_unresolved(mrt, vif, skb2); 2171 read_unlock(&mrt_lock); 2172 rcu_read_unlock(); 2173 return err; 2174 } 2175 2176 read_lock(&mrt_lock); 2177 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2178 read_unlock(&mrt_lock); 2179 rcu_read_unlock(); 2180 return err; 2181 } 2182 2183 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2184 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2185 int flags) 2186 { 2187 struct nlmsghdr *nlh; 2188 struct rtmsg *rtm; 2189 int err; 2190 2191 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2192 if (!nlh) 2193 return -EMSGSIZE; 2194 2195 rtm = nlmsg_data(nlh); 2196 rtm->rtm_family = RTNL_FAMILY_IPMR; 2197 rtm->rtm_dst_len = 32; 2198 rtm->rtm_src_len = 32; 2199 rtm->rtm_tos = 0; 2200 rtm->rtm_table = mrt->id; 2201 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2202 goto nla_put_failure; 2203 rtm->rtm_type = RTN_MULTICAST; 2204 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2205 if (c->mfc_flags & MFC_STATIC) 2206 rtm->rtm_protocol = RTPROT_STATIC; 2207 else 2208 rtm->rtm_protocol = RTPROT_MROUTED; 2209 rtm->rtm_flags = 0; 2210 2211 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2212 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2213 goto nla_put_failure; 2214 err = __ipmr_fill_mroute(mrt, skb, c, rtm); 2215 /* do not break the dump if cache is unresolved */ 2216 if (err < 0 && err != -ENOENT) 2217 goto nla_put_failure; 2218 2219 nlmsg_end(skb, nlh); 2220 return 0; 2221 2222 nla_put_failure: 2223 nlmsg_cancel(skb, nlh); 2224 return -EMSGSIZE; 2225 } 2226 2227 static size_t mroute_msgsize(bool unresolved, int maxvif) 2228 { 2229 size_t len = 2230 NLMSG_ALIGN(sizeof(struct rtmsg)) 2231 + nla_total_size(4) /* RTA_TABLE */ 2232 + nla_total_size(4) /* RTA_SRC */ 2233 + nla_total_size(4) /* RTA_DST */ 2234 ; 2235 2236 if (!unresolved) 2237 len = len 2238 + nla_total_size(4) /* RTA_IIF */ 2239 + nla_total_size(0) /* RTA_MULTIPATH */ 2240 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2241 /* RTA_MFC_STATS */ 2242 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2243 ; 2244 2245 return len; 2246 } 2247 2248 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2249 int cmd) 2250 { 2251 struct net *net = read_pnet(&mrt->net); 2252 struct sk_buff *skb; 2253 int err = -ENOBUFS; 2254 2255 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), 2256 GFP_ATOMIC); 2257 if (!skb) 2258 goto errout; 2259 2260 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2261 if (err < 0) 2262 goto errout; 2263 2264 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2265 return; 2266 2267 errout: 2268 kfree_skb(skb); 2269 if (err < 0) 2270 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2271 } 2272 2273 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2274 { 2275 struct net *net = sock_net(skb->sk); 2276 struct mr_table *mrt; 2277 struct mfc_cache *mfc; 2278 unsigned int t = 0, s_t; 2279 unsigned int h = 0, s_h; 2280 unsigned int e = 0, s_e; 2281 2282 s_t = cb->args[0]; 2283 s_h = cb->args[1]; 2284 s_e = cb->args[2]; 2285 2286 rcu_read_lock(); 2287 ipmr_for_each_table(mrt, net) { 2288 if (t < s_t) 2289 goto next_table; 2290 if (t > s_t) 2291 s_h = 0; 2292 for (h = s_h; h < MFC_LINES; h++) { 2293 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) { 2294 if (e < s_e) 2295 goto next_entry; 2296 if (ipmr_fill_mroute(mrt, skb, 2297 NETLINK_CB(cb->skb).portid, 2298 cb->nlh->nlmsg_seq, 2299 mfc, RTM_NEWROUTE, 2300 NLM_F_MULTI) < 0) 2301 goto done; 2302 next_entry: 2303 e++; 2304 } 2305 e = s_e = 0; 2306 } 2307 spin_lock_bh(&mfc_unres_lock); 2308 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 2309 if (e < s_e) 2310 goto next_entry2; 2311 if (ipmr_fill_mroute(mrt, skb, 2312 NETLINK_CB(cb->skb).portid, 2313 cb->nlh->nlmsg_seq, 2314 mfc, RTM_NEWROUTE, 2315 NLM_F_MULTI) < 0) { 2316 spin_unlock_bh(&mfc_unres_lock); 2317 goto done; 2318 } 2319 next_entry2: 2320 e++; 2321 } 2322 spin_unlock_bh(&mfc_unres_lock); 2323 e = s_e = 0; 2324 s_h = 0; 2325 next_table: 2326 t++; 2327 } 2328 done: 2329 rcu_read_unlock(); 2330 2331 cb->args[2] = e; 2332 cb->args[1] = h; 2333 cb->args[0] = t; 2334 2335 return skb->len; 2336 } 2337 2338 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2339 [RTA_SRC] = { .type = NLA_U32 }, 2340 [RTA_DST] = { .type = NLA_U32 }, 2341 [RTA_IIF] = { .type = NLA_U32 }, 2342 [RTA_TABLE] = { .type = NLA_U32 }, 2343 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2344 }; 2345 2346 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2347 { 2348 switch (rtm_protocol) { 2349 case RTPROT_STATIC: 2350 case RTPROT_MROUTED: 2351 return true; 2352 } 2353 return false; 2354 } 2355 2356 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2357 { 2358 struct rtnexthop *rtnh = nla_data(nla); 2359 int remaining = nla_len(nla), vifi = 0; 2360 2361 while (rtnh_ok(rtnh, remaining)) { 2362 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2363 if (++vifi == MAXVIFS) 2364 break; 2365 rtnh = rtnh_next(rtnh, &remaining); 2366 } 2367 2368 return remaining > 0 ? -EINVAL : vifi; 2369 } 2370 2371 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2372 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2373 struct mfcctl *mfcc, int *mrtsock, 2374 struct mr_table **mrtret) 2375 { 2376 struct net_device *dev = NULL; 2377 u32 tblid = RT_TABLE_DEFAULT; 2378 struct mr_table *mrt; 2379 struct nlattr *attr; 2380 struct rtmsg *rtm; 2381 int ret, rem; 2382 2383 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy); 2384 if (ret < 0) 2385 goto out; 2386 rtm = nlmsg_data(nlh); 2387 2388 ret = -EINVAL; 2389 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2390 rtm->rtm_type != RTN_MULTICAST || 2391 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2392 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2393 goto out; 2394 2395 memset(mfcc, 0, sizeof(*mfcc)); 2396 mfcc->mfcc_parent = -1; 2397 ret = 0; 2398 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2399 switch (nla_type(attr)) { 2400 case RTA_SRC: 2401 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2402 break; 2403 case RTA_DST: 2404 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2405 break; 2406 case RTA_IIF: 2407 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2408 if (!dev) { 2409 ret = -ENODEV; 2410 goto out; 2411 } 2412 break; 2413 case RTA_MULTIPATH: 2414 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2415 ret = -EINVAL; 2416 goto out; 2417 } 2418 break; 2419 case RTA_PREFSRC: 2420 ret = 1; 2421 break; 2422 case RTA_TABLE: 2423 tblid = nla_get_u32(attr); 2424 break; 2425 } 2426 } 2427 mrt = ipmr_get_table(net, tblid); 2428 if (!mrt) { 2429 ret = -ENOENT; 2430 goto out; 2431 } 2432 *mrtret = mrt; 2433 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2434 if (dev) 2435 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2436 2437 out: 2438 return ret; 2439 } 2440 2441 /* takes care of both newroute and delroute */ 2442 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh) 2443 { 2444 struct net *net = sock_net(skb->sk); 2445 int ret, mrtsock, parent; 2446 struct mr_table *tbl; 2447 struct mfcctl mfcc; 2448 2449 mrtsock = 0; 2450 tbl = NULL; 2451 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl); 2452 if (ret < 0) 2453 return ret; 2454 2455 parent = ret ? mfcc.mfcc_parent : -1; 2456 if (nlh->nlmsg_type == RTM_NEWROUTE) 2457 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2458 else 2459 return ipmr_mfc_delete(tbl, &mfcc, parent); 2460 } 2461 2462 #ifdef CONFIG_PROC_FS 2463 /* The /proc interfaces to multicast routing : 2464 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2465 */ 2466 struct ipmr_vif_iter { 2467 struct seq_net_private p; 2468 struct mr_table *mrt; 2469 int ct; 2470 }; 2471 2472 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2473 struct ipmr_vif_iter *iter, 2474 loff_t pos) 2475 { 2476 struct mr_table *mrt = iter->mrt; 2477 2478 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2479 if (!VIF_EXISTS(mrt, iter->ct)) 2480 continue; 2481 if (pos-- == 0) 2482 return &mrt->vif_table[iter->ct]; 2483 } 2484 return NULL; 2485 } 2486 2487 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2488 __acquires(mrt_lock) 2489 { 2490 struct ipmr_vif_iter *iter = seq->private; 2491 struct net *net = seq_file_net(seq); 2492 struct mr_table *mrt; 2493 2494 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2495 if (!mrt) 2496 return ERR_PTR(-ENOENT); 2497 2498 iter->mrt = mrt; 2499 2500 read_lock(&mrt_lock); 2501 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2502 : SEQ_START_TOKEN; 2503 } 2504 2505 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2506 { 2507 struct ipmr_vif_iter *iter = seq->private; 2508 struct net *net = seq_file_net(seq); 2509 struct mr_table *mrt = iter->mrt; 2510 2511 ++*pos; 2512 if (v == SEQ_START_TOKEN) 2513 return ipmr_vif_seq_idx(net, iter, 0); 2514 2515 while (++iter->ct < mrt->maxvif) { 2516 if (!VIF_EXISTS(mrt, iter->ct)) 2517 continue; 2518 return &mrt->vif_table[iter->ct]; 2519 } 2520 return NULL; 2521 } 2522 2523 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2524 __releases(mrt_lock) 2525 { 2526 read_unlock(&mrt_lock); 2527 } 2528 2529 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2530 { 2531 struct ipmr_vif_iter *iter = seq->private; 2532 struct mr_table *mrt = iter->mrt; 2533 2534 if (v == SEQ_START_TOKEN) { 2535 seq_puts(seq, 2536 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2537 } else { 2538 const struct vif_device *vif = v; 2539 const char *name = vif->dev ? vif->dev->name : "none"; 2540 2541 seq_printf(seq, 2542 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2543 vif - mrt->vif_table, 2544 name, vif->bytes_in, vif->pkt_in, 2545 vif->bytes_out, vif->pkt_out, 2546 vif->flags, vif->local, vif->remote); 2547 } 2548 return 0; 2549 } 2550 2551 static const struct seq_operations ipmr_vif_seq_ops = { 2552 .start = ipmr_vif_seq_start, 2553 .next = ipmr_vif_seq_next, 2554 .stop = ipmr_vif_seq_stop, 2555 .show = ipmr_vif_seq_show, 2556 }; 2557 2558 static int ipmr_vif_open(struct inode *inode, struct file *file) 2559 { 2560 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 2561 sizeof(struct ipmr_vif_iter)); 2562 } 2563 2564 static const struct file_operations ipmr_vif_fops = { 2565 .owner = THIS_MODULE, 2566 .open = ipmr_vif_open, 2567 .read = seq_read, 2568 .llseek = seq_lseek, 2569 .release = seq_release_net, 2570 }; 2571 2572 struct ipmr_mfc_iter { 2573 struct seq_net_private p; 2574 struct mr_table *mrt; 2575 struct list_head *cache; 2576 int ct; 2577 }; 2578 2579 2580 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2581 struct ipmr_mfc_iter *it, loff_t pos) 2582 { 2583 struct mr_table *mrt = it->mrt; 2584 struct mfc_cache *mfc; 2585 2586 rcu_read_lock(); 2587 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { 2588 it->cache = &mrt->mfc_cache_array[it->ct]; 2589 list_for_each_entry_rcu(mfc, it->cache, list) 2590 if (pos-- == 0) 2591 return mfc; 2592 } 2593 rcu_read_unlock(); 2594 2595 spin_lock_bh(&mfc_unres_lock); 2596 it->cache = &mrt->mfc_unres_queue; 2597 list_for_each_entry(mfc, it->cache, list) 2598 if (pos-- == 0) 2599 return mfc; 2600 spin_unlock_bh(&mfc_unres_lock); 2601 2602 it->cache = NULL; 2603 return NULL; 2604 } 2605 2606 2607 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2608 { 2609 struct ipmr_mfc_iter *it = seq->private; 2610 struct net *net = seq_file_net(seq); 2611 struct mr_table *mrt; 2612 2613 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2614 if (!mrt) 2615 return ERR_PTR(-ENOENT); 2616 2617 it->mrt = mrt; 2618 it->cache = NULL; 2619 it->ct = 0; 2620 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2621 : SEQ_START_TOKEN; 2622 } 2623 2624 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2625 { 2626 struct mfc_cache *mfc = v; 2627 struct ipmr_mfc_iter *it = seq->private; 2628 struct net *net = seq_file_net(seq); 2629 struct mr_table *mrt = it->mrt; 2630 2631 ++*pos; 2632 2633 if (v == SEQ_START_TOKEN) 2634 return ipmr_mfc_seq_idx(net, seq->private, 0); 2635 2636 if (mfc->list.next != it->cache) 2637 return list_entry(mfc->list.next, struct mfc_cache, list); 2638 2639 if (it->cache == &mrt->mfc_unres_queue) 2640 goto end_of_list; 2641 2642 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); 2643 2644 while (++it->ct < MFC_LINES) { 2645 it->cache = &mrt->mfc_cache_array[it->ct]; 2646 if (list_empty(it->cache)) 2647 continue; 2648 return list_first_entry(it->cache, struct mfc_cache, list); 2649 } 2650 2651 /* exhausted cache_array, show unresolved */ 2652 rcu_read_unlock(); 2653 it->cache = &mrt->mfc_unres_queue; 2654 it->ct = 0; 2655 2656 spin_lock_bh(&mfc_unres_lock); 2657 if (!list_empty(it->cache)) 2658 return list_first_entry(it->cache, struct mfc_cache, list); 2659 2660 end_of_list: 2661 spin_unlock_bh(&mfc_unres_lock); 2662 it->cache = NULL; 2663 2664 return NULL; 2665 } 2666 2667 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2668 { 2669 struct ipmr_mfc_iter *it = seq->private; 2670 struct mr_table *mrt = it->mrt; 2671 2672 if (it->cache == &mrt->mfc_unres_queue) 2673 spin_unlock_bh(&mfc_unres_lock); 2674 else if (it->cache == &mrt->mfc_cache_array[it->ct]) 2675 rcu_read_unlock(); 2676 } 2677 2678 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2679 { 2680 int n; 2681 2682 if (v == SEQ_START_TOKEN) { 2683 seq_puts(seq, 2684 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2685 } else { 2686 const struct mfc_cache *mfc = v; 2687 const struct ipmr_mfc_iter *it = seq->private; 2688 const struct mr_table *mrt = it->mrt; 2689 2690 seq_printf(seq, "%08X %08X %-3hd", 2691 (__force u32) mfc->mfc_mcastgrp, 2692 (__force u32) mfc->mfc_origin, 2693 mfc->mfc_parent); 2694 2695 if (it->cache != &mrt->mfc_unres_queue) { 2696 seq_printf(seq, " %8lu %8lu %8lu", 2697 mfc->mfc_un.res.pkt, 2698 mfc->mfc_un.res.bytes, 2699 mfc->mfc_un.res.wrong_if); 2700 for (n = mfc->mfc_un.res.minvif; 2701 n < mfc->mfc_un.res.maxvif; n++) { 2702 if (VIF_EXISTS(mrt, n) && 2703 mfc->mfc_un.res.ttls[n] < 255) 2704 seq_printf(seq, 2705 " %2d:%-3d", 2706 n, mfc->mfc_un.res.ttls[n]); 2707 } 2708 } else { 2709 /* unresolved mfc_caches don't contain 2710 * pkt, bytes and wrong_if values 2711 */ 2712 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 2713 } 2714 seq_putc(seq, '\n'); 2715 } 2716 return 0; 2717 } 2718 2719 static const struct seq_operations ipmr_mfc_seq_ops = { 2720 .start = ipmr_mfc_seq_start, 2721 .next = ipmr_mfc_seq_next, 2722 .stop = ipmr_mfc_seq_stop, 2723 .show = ipmr_mfc_seq_show, 2724 }; 2725 2726 static int ipmr_mfc_open(struct inode *inode, struct file *file) 2727 { 2728 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 2729 sizeof(struct ipmr_mfc_iter)); 2730 } 2731 2732 static const struct file_operations ipmr_mfc_fops = { 2733 .owner = THIS_MODULE, 2734 .open = ipmr_mfc_open, 2735 .read = seq_read, 2736 .llseek = seq_lseek, 2737 .release = seq_release_net, 2738 }; 2739 #endif 2740 2741 #ifdef CONFIG_IP_PIMSM_V2 2742 static const struct net_protocol pim_protocol = { 2743 .handler = pim_rcv, 2744 .netns_ok = 1, 2745 }; 2746 #endif 2747 2748 /* Setup for IP multicast routing */ 2749 static int __net_init ipmr_net_init(struct net *net) 2750 { 2751 int err; 2752 2753 err = ipmr_rules_init(net); 2754 if (err < 0) 2755 goto fail; 2756 2757 #ifdef CONFIG_PROC_FS 2758 err = -ENOMEM; 2759 if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) 2760 goto proc_vif_fail; 2761 if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) 2762 goto proc_cache_fail; 2763 #endif 2764 return 0; 2765 2766 #ifdef CONFIG_PROC_FS 2767 proc_cache_fail: 2768 remove_proc_entry("ip_mr_vif", net->proc_net); 2769 proc_vif_fail: 2770 ipmr_rules_exit(net); 2771 #endif 2772 fail: 2773 return err; 2774 } 2775 2776 static void __net_exit ipmr_net_exit(struct net *net) 2777 { 2778 #ifdef CONFIG_PROC_FS 2779 remove_proc_entry("ip_mr_cache", net->proc_net); 2780 remove_proc_entry("ip_mr_vif", net->proc_net); 2781 #endif 2782 ipmr_rules_exit(net); 2783 } 2784 2785 static struct pernet_operations ipmr_net_ops = { 2786 .init = ipmr_net_init, 2787 .exit = ipmr_net_exit, 2788 }; 2789 2790 int __init ip_mr_init(void) 2791 { 2792 int err; 2793 2794 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2795 sizeof(struct mfc_cache), 2796 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 2797 NULL); 2798 2799 err = register_pernet_subsys(&ipmr_net_ops); 2800 if (err) 2801 goto reg_pernet_fail; 2802 2803 err = register_netdevice_notifier(&ip_mr_notifier); 2804 if (err) 2805 goto reg_notif_fail; 2806 #ifdef CONFIG_IP_PIMSM_V2 2807 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 2808 pr_err("%s: can't add PIM protocol\n", __func__); 2809 err = -EAGAIN; 2810 goto add_proto_fail; 2811 } 2812 #endif 2813 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 2814 NULL, ipmr_rtm_dumproute, NULL); 2815 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 2816 ipmr_rtm_route, NULL, NULL); 2817 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 2818 ipmr_rtm_route, NULL, NULL); 2819 return 0; 2820 2821 #ifdef CONFIG_IP_PIMSM_V2 2822 add_proto_fail: 2823 unregister_netdevice_notifier(&ip_mr_notifier); 2824 #endif 2825 reg_notif_fail: 2826 unregister_pernet_subsys(&ipmr_net_ops); 2827 reg_pernet_fail: 2828 kmem_cache_destroy(mrt_cachep); 2829 return err; 2830 } 2831