1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <asm/system.h> 30 #include <asm/uaccess.h> 31 #include <linux/types.h> 32 #include <linux/capability.h> 33 #include <linux/errno.h> 34 #include <linux/timer.h> 35 #include <linux/mm.h> 36 #include <linux/kernel.h> 37 #include <linux/fcntl.h> 38 #include <linux/stat.h> 39 #include <linux/socket.h> 40 #include <linux/in.h> 41 #include <linux/inet.h> 42 #include <linux/netdevice.h> 43 #include <linux/inetdevice.h> 44 #include <linux/igmp.h> 45 #include <linux/proc_fs.h> 46 #include <linux/seq_file.h> 47 #include <linux/mroute.h> 48 #include <linux/init.h> 49 #include <linux/if_ether.h> 50 #include <linux/slab.h> 51 #include <net/net_namespace.h> 52 #include <net/ip.h> 53 #include <net/protocol.h> 54 #include <linux/skbuff.h> 55 #include <net/route.h> 56 #include <net/sock.h> 57 #include <net/icmp.h> 58 #include <net/udp.h> 59 #include <net/raw.h> 60 #include <linux/notifier.h> 61 #include <linux/if_arp.h> 62 #include <linux/netfilter_ipv4.h> 63 #include <net/ipip.h> 64 #include <net/checksum.h> 65 #include <net/netlink.h> 66 #include <net/fib_rules.h> 67 68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 69 #define CONFIG_IP_PIMSM 1 70 #endif 71 72 struct mr_table { 73 struct list_head list; 74 #ifdef CONFIG_NET_NS 75 struct net *net; 76 #endif 77 u32 id; 78 struct sock *mroute_sk; 79 struct timer_list ipmr_expire_timer; 80 struct list_head mfc_unres_queue; 81 struct list_head mfc_cache_array[MFC_LINES]; 82 struct vif_device vif_table[MAXVIFS]; 83 int maxvif; 84 atomic_t cache_resolve_queue_len; 85 int mroute_do_assert; 86 int mroute_do_pim; 87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 88 int mroute_reg_vif_num; 89 #endif 90 }; 91 92 struct ipmr_rule { 93 struct fib_rule common; 94 }; 95 96 struct ipmr_result { 97 struct mr_table *mrt; 98 }; 99 100 /* Big lock, protecting vif table, mrt cache and mroute socket state. 101 Note that the changes are semaphored via rtnl_lock. 102 */ 103 104 static DEFINE_RWLOCK(mrt_lock); 105 106 /* 107 * Multicast router control variables 108 */ 109 110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) 111 112 /* Special spinlock for queue of unresolved entries */ 113 static DEFINE_SPINLOCK(mfc_unres_lock); 114 115 /* We return to original Alan's scheme. Hash table of resolved 116 entries is changed only in process context and protected 117 with weak lock mrt_lock. Queue of unresolved entries is protected 118 with strong spinlock mfc_unres_lock. 119 120 In this case data path is free of exclusive locks at all. 121 */ 122 123 static struct kmem_cache *mrt_cachep __read_mostly; 124 125 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 126 static int ip_mr_forward(struct net *net, struct mr_table *mrt, 127 struct sk_buff *skb, struct mfc_cache *cache, 128 int local); 129 static int ipmr_cache_report(struct mr_table *mrt, 130 struct sk_buff *pkt, vifi_t vifi, int assert); 131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 132 struct mfc_cache *c, struct rtmsg *rtm); 133 static void ipmr_expire_process(unsigned long arg); 134 135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 136 #define ipmr_for_each_table(mrt, net) \ 137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 138 139 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 140 { 141 struct mr_table *mrt; 142 143 ipmr_for_each_table(mrt, net) { 144 if (mrt->id == id) 145 return mrt; 146 } 147 return NULL; 148 } 149 150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 151 struct mr_table **mrt) 152 { 153 struct ipmr_result res; 154 struct fib_lookup_arg arg = { .result = &res, }; 155 int err; 156 157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); 158 if (err < 0) 159 return err; 160 *mrt = res.mrt; 161 return 0; 162 } 163 164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 165 int flags, struct fib_lookup_arg *arg) 166 { 167 struct ipmr_result *res = arg->result; 168 struct mr_table *mrt; 169 170 switch (rule->action) { 171 case FR_ACT_TO_TBL: 172 break; 173 case FR_ACT_UNREACHABLE: 174 return -ENETUNREACH; 175 case FR_ACT_PROHIBIT: 176 return -EACCES; 177 case FR_ACT_BLACKHOLE: 178 default: 179 return -EINVAL; 180 } 181 182 mrt = ipmr_get_table(rule->fr_net, rule->table); 183 if (mrt == NULL) 184 return -EAGAIN; 185 res->mrt = mrt; 186 return 0; 187 } 188 189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 190 { 191 return 1; 192 } 193 194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 195 FRA_GENERIC_POLICY, 196 }; 197 198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 199 struct fib_rule_hdr *frh, struct nlattr **tb) 200 { 201 return 0; 202 } 203 204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 205 struct nlattr **tb) 206 { 207 return 1; 208 } 209 210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 211 struct fib_rule_hdr *frh) 212 { 213 frh->dst_len = 0; 214 frh->src_len = 0; 215 frh->tos = 0; 216 return 0; 217 } 218 219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { 220 .family = RTNL_FAMILY_IPMR, 221 .rule_size = sizeof(struct ipmr_rule), 222 .addr_size = sizeof(u32), 223 .action = ipmr_rule_action, 224 .match = ipmr_rule_match, 225 .configure = ipmr_rule_configure, 226 .compare = ipmr_rule_compare, 227 .default_pref = fib_default_rule_pref, 228 .fill = ipmr_rule_fill, 229 .nlgroup = RTNLGRP_IPV4_RULE, 230 .policy = ipmr_rule_policy, 231 .owner = THIS_MODULE, 232 }; 233 234 static int __net_init ipmr_rules_init(struct net *net) 235 { 236 struct fib_rules_ops *ops; 237 struct mr_table *mrt; 238 int err; 239 240 ops = fib_rules_register(&ipmr_rules_ops_template, net); 241 if (IS_ERR(ops)) 242 return PTR_ERR(ops); 243 244 INIT_LIST_HEAD(&net->ipv4.mr_tables); 245 246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 247 if (mrt == NULL) { 248 err = -ENOMEM; 249 goto err1; 250 } 251 252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 253 if (err < 0) 254 goto err2; 255 256 net->ipv4.mr_rules_ops = ops; 257 return 0; 258 259 err2: 260 kfree(mrt); 261 err1: 262 fib_rules_unregister(ops); 263 return err; 264 } 265 266 static void __net_exit ipmr_rules_exit(struct net *net) 267 { 268 struct mr_table *mrt, *next; 269 270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) 271 kfree(mrt); 272 fib_rules_unregister(net->ipv4.mr_rules_ops); 273 } 274 #else 275 #define ipmr_for_each_table(mrt, net) \ 276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 277 278 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 279 { 280 return net->ipv4.mrt; 281 } 282 283 static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 284 struct mr_table **mrt) 285 { 286 *mrt = net->ipv4.mrt; 287 return 0; 288 } 289 290 static int __net_init ipmr_rules_init(struct net *net) 291 { 292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 293 return net->ipv4.mrt ? 0 : -ENOMEM; 294 } 295 296 static void __net_exit ipmr_rules_exit(struct net *net) 297 { 298 kfree(net->ipv4.mrt); 299 } 300 #endif 301 302 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 303 { 304 struct mr_table *mrt; 305 unsigned int i; 306 307 mrt = ipmr_get_table(net, id); 308 if (mrt != NULL) 309 return mrt; 310 311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 312 if (mrt == NULL) 313 return NULL; 314 write_pnet(&mrt->net, net); 315 mrt->id = id; 316 317 /* Forwarding cache */ 318 for (i = 0; i < MFC_LINES; i++) 319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); 320 321 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 322 323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 324 (unsigned long)mrt); 325 326 #ifdef CONFIG_IP_PIMSM 327 mrt->mroute_reg_vif_num = -1; 328 #endif 329 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 331 #endif 332 return mrt; 333 } 334 335 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 336 337 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 338 { 339 struct net *net = dev_net(dev); 340 341 dev_close(dev); 342 343 dev = __dev_get_by_name(net, "tunl0"); 344 if (dev) { 345 const struct net_device_ops *ops = dev->netdev_ops; 346 struct ifreq ifr; 347 struct ip_tunnel_parm p; 348 349 memset(&p, 0, sizeof(p)); 350 p.iph.daddr = v->vifc_rmt_addr.s_addr; 351 p.iph.saddr = v->vifc_lcl_addr.s_addr; 352 p.iph.version = 4; 353 p.iph.ihl = 5; 354 p.iph.protocol = IPPROTO_IPIP; 355 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 357 358 if (ops->ndo_do_ioctl) { 359 mm_segment_t oldfs = get_fs(); 360 361 set_fs(KERNEL_DS); 362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 363 set_fs(oldfs); 364 } 365 } 366 } 367 368 static 369 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 370 { 371 struct net_device *dev; 372 373 dev = __dev_get_by_name(net, "tunl0"); 374 375 if (dev) { 376 const struct net_device_ops *ops = dev->netdev_ops; 377 int err; 378 struct ifreq ifr; 379 struct ip_tunnel_parm p; 380 struct in_device *in_dev; 381 382 memset(&p, 0, sizeof(p)); 383 p.iph.daddr = v->vifc_rmt_addr.s_addr; 384 p.iph.saddr = v->vifc_lcl_addr.s_addr; 385 p.iph.version = 4; 386 p.iph.ihl = 5; 387 p.iph.protocol = IPPROTO_IPIP; 388 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 390 391 if (ops->ndo_do_ioctl) { 392 mm_segment_t oldfs = get_fs(); 393 394 set_fs(KERNEL_DS); 395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 396 set_fs(oldfs); 397 } else 398 err = -EOPNOTSUPP; 399 400 dev = NULL; 401 402 if (err == 0 && 403 (dev = __dev_get_by_name(net, p.name)) != NULL) { 404 dev->flags |= IFF_MULTICAST; 405 406 in_dev = __in_dev_get_rtnl(dev); 407 if (in_dev == NULL) 408 goto failure; 409 410 ipv4_devconf_setall(in_dev); 411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 412 413 if (dev_open(dev)) 414 goto failure; 415 dev_hold(dev); 416 } 417 } 418 return dev; 419 420 failure: 421 /* allow the register to be completed before unregistering. */ 422 rtnl_unlock(); 423 rtnl_lock(); 424 425 unregister_netdevice(dev); 426 return NULL; 427 } 428 429 #ifdef CONFIG_IP_PIMSM 430 431 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 432 { 433 struct net *net = dev_net(dev); 434 struct mr_table *mrt; 435 struct flowi fl = { 436 .oif = dev->ifindex, 437 .iif = skb->skb_iif, 438 .mark = skb->mark, 439 }; 440 int err; 441 442 err = ipmr_fib_lookup(net, &fl, &mrt); 443 if (err < 0) 444 return err; 445 446 read_lock(&mrt_lock); 447 dev->stats.tx_bytes += skb->len; 448 dev->stats.tx_packets++; 449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 450 read_unlock(&mrt_lock); 451 kfree_skb(skb); 452 return NETDEV_TX_OK; 453 } 454 455 static const struct net_device_ops reg_vif_netdev_ops = { 456 .ndo_start_xmit = reg_vif_xmit, 457 }; 458 459 static void reg_vif_setup(struct net_device *dev) 460 { 461 dev->type = ARPHRD_PIMREG; 462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 463 dev->flags = IFF_NOARP; 464 dev->netdev_ops = ®_vif_netdev_ops, 465 dev->destructor = free_netdev; 466 dev->features |= NETIF_F_NETNS_LOCAL; 467 } 468 469 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 470 { 471 struct net_device *dev; 472 struct in_device *in_dev; 473 char name[IFNAMSIZ]; 474 475 if (mrt->id == RT_TABLE_DEFAULT) 476 sprintf(name, "pimreg"); 477 else 478 sprintf(name, "pimreg%u", mrt->id); 479 480 dev = alloc_netdev(0, name, reg_vif_setup); 481 482 if (dev == NULL) 483 return NULL; 484 485 dev_net_set(dev, net); 486 487 if (register_netdevice(dev)) { 488 free_netdev(dev); 489 return NULL; 490 } 491 dev->iflink = 0; 492 493 rcu_read_lock(); 494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 495 rcu_read_unlock(); 496 goto failure; 497 } 498 499 ipv4_devconf_setall(in_dev); 500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 501 rcu_read_unlock(); 502 503 if (dev_open(dev)) 504 goto failure; 505 506 dev_hold(dev); 507 508 return dev; 509 510 failure: 511 /* allow the register to be completed before unregistering. */ 512 rtnl_unlock(); 513 rtnl_lock(); 514 515 unregister_netdevice(dev); 516 return NULL; 517 } 518 #endif 519 520 /* 521 * Delete a VIF entry 522 * @notify: Set to 1, if the caller is a notifier_call 523 */ 524 525 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 526 struct list_head *head) 527 { 528 struct vif_device *v; 529 struct net_device *dev; 530 struct in_device *in_dev; 531 532 if (vifi < 0 || vifi >= mrt->maxvif) 533 return -EADDRNOTAVAIL; 534 535 v = &mrt->vif_table[vifi]; 536 537 write_lock_bh(&mrt_lock); 538 dev = v->dev; 539 v->dev = NULL; 540 541 if (!dev) { 542 write_unlock_bh(&mrt_lock); 543 return -EADDRNOTAVAIL; 544 } 545 546 #ifdef CONFIG_IP_PIMSM 547 if (vifi == mrt->mroute_reg_vif_num) 548 mrt->mroute_reg_vif_num = -1; 549 #endif 550 551 if (vifi+1 == mrt->maxvif) { 552 int tmp; 553 for (tmp=vifi-1; tmp>=0; tmp--) { 554 if (VIF_EXISTS(mrt, tmp)) 555 break; 556 } 557 mrt->maxvif = tmp+1; 558 } 559 560 write_unlock_bh(&mrt_lock); 561 562 dev_set_allmulti(dev, -1); 563 564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 566 ip_rt_multicast_event(in_dev); 567 } 568 569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 570 unregister_netdevice_queue(dev, head); 571 572 dev_put(dev); 573 return 0; 574 } 575 576 static inline void ipmr_cache_free(struct mfc_cache *c) 577 { 578 kmem_cache_free(mrt_cachep, c); 579 } 580 581 /* Destroy an unresolved cache entry, killing queued skbs 582 and reporting error to netlink readers. 583 */ 584 585 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 586 { 587 struct net *net = read_pnet(&mrt->net); 588 struct sk_buff *skb; 589 struct nlmsgerr *e; 590 591 atomic_dec(&mrt->cache_resolve_queue_len); 592 593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 594 if (ip_hdr(skb)->version == 0) { 595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 596 nlh->nlmsg_type = NLMSG_ERROR; 597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 598 skb_trim(skb, nlh->nlmsg_len); 599 e = NLMSG_DATA(nlh); 600 e->error = -ETIMEDOUT; 601 memset(&e->msg, 0, sizeof(e->msg)); 602 603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 604 } else 605 kfree_skb(skb); 606 } 607 608 ipmr_cache_free(c); 609 } 610 611 612 /* Timer process for the unresolved queue. */ 613 614 static void ipmr_expire_process(unsigned long arg) 615 { 616 struct mr_table *mrt = (struct mr_table *)arg; 617 unsigned long now; 618 unsigned long expires; 619 struct mfc_cache *c, *next; 620 621 if (!spin_trylock(&mfc_unres_lock)) { 622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 623 return; 624 } 625 626 if (list_empty(&mrt->mfc_unres_queue)) 627 goto out; 628 629 now = jiffies; 630 expires = 10*HZ; 631 632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 633 if (time_after(c->mfc_un.unres.expires, now)) { 634 unsigned long interval = c->mfc_un.unres.expires - now; 635 if (interval < expires) 636 expires = interval; 637 continue; 638 } 639 640 list_del(&c->list); 641 ipmr_destroy_unres(mrt, c); 642 } 643 644 if (!list_empty(&mrt->mfc_unres_queue)) 645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 646 647 out: 648 spin_unlock(&mfc_unres_lock); 649 } 650 651 /* Fill oifs list. It is called under write locked mrt_lock. */ 652 653 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 654 unsigned char *ttls) 655 { 656 int vifi; 657 658 cache->mfc_un.res.minvif = MAXVIFS; 659 cache->mfc_un.res.maxvif = 0; 660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 661 662 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 663 if (VIF_EXISTS(mrt, vifi) && 664 ttls[vifi] && ttls[vifi] < 255) { 665 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 666 if (cache->mfc_un.res.minvif > vifi) 667 cache->mfc_un.res.minvif = vifi; 668 if (cache->mfc_un.res.maxvif <= vifi) 669 cache->mfc_un.res.maxvif = vifi + 1; 670 } 671 } 672 } 673 674 static int vif_add(struct net *net, struct mr_table *mrt, 675 struct vifctl *vifc, int mrtsock) 676 { 677 int vifi = vifc->vifc_vifi; 678 struct vif_device *v = &mrt->vif_table[vifi]; 679 struct net_device *dev; 680 struct in_device *in_dev; 681 int err; 682 683 /* Is vif busy ? */ 684 if (VIF_EXISTS(mrt, vifi)) 685 return -EADDRINUSE; 686 687 switch (vifc->vifc_flags) { 688 #ifdef CONFIG_IP_PIMSM 689 case VIFF_REGISTER: 690 /* 691 * Special Purpose VIF in PIM 692 * All the packets will be sent to the daemon 693 */ 694 if (mrt->mroute_reg_vif_num >= 0) 695 return -EADDRINUSE; 696 dev = ipmr_reg_vif(net, mrt); 697 if (!dev) 698 return -ENOBUFS; 699 err = dev_set_allmulti(dev, 1); 700 if (err) { 701 unregister_netdevice(dev); 702 dev_put(dev); 703 return err; 704 } 705 break; 706 #endif 707 case VIFF_TUNNEL: 708 dev = ipmr_new_tunnel(net, vifc); 709 if (!dev) 710 return -ENOBUFS; 711 err = dev_set_allmulti(dev, 1); 712 if (err) { 713 ipmr_del_tunnel(dev, vifc); 714 dev_put(dev); 715 return err; 716 } 717 break; 718 719 case VIFF_USE_IFINDEX: 720 case 0: 721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 723 if (dev && dev->ip_ptr == NULL) { 724 dev_put(dev); 725 return -EADDRNOTAVAIL; 726 } 727 } else 728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 729 730 if (!dev) 731 return -EADDRNOTAVAIL; 732 err = dev_set_allmulti(dev, 1); 733 if (err) { 734 dev_put(dev); 735 return err; 736 } 737 break; 738 default: 739 return -EINVAL; 740 } 741 742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 743 dev_put(dev); 744 return -EADDRNOTAVAIL; 745 } 746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 747 ip_rt_multicast_event(in_dev); 748 749 /* 750 * Fill in the VIF structures 751 */ 752 v->rate_limit = vifc->vifc_rate_limit; 753 v->local = vifc->vifc_lcl_addr.s_addr; 754 v->remote = vifc->vifc_rmt_addr.s_addr; 755 v->flags = vifc->vifc_flags; 756 if (!mrtsock) 757 v->flags |= VIFF_STATIC; 758 v->threshold = vifc->vifc_threshold; 759 v->bytes_in = 0; 760 v->bytes_out = 0; 761 v->pkt_in = 0; 762 v->pkt_out = 0; 763 v->link = dev->ifindex; 764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 765 v->link = dev->iflink; 766 767 /* And finish update writing critical data */ 768 write_lock_bh(&mrt_lock); 769 v->dev = dev; 770 #ifdef CONFIG_IP_PIMSM 771 if (v->flags&VIFF_REGISTER) 772 mrt->mroute_reg_vif_num = vifi; 773 #endif 774 if (vifi+1 > mrt->maxvif) 775 mrt->maxvif = vifi+1; 776 write_unlock_bh(&mrt_lock); 777 return 0; 778 } 779 780 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 781 __be32 origin, 782 __be32 mcastgrp) 783 { 784 int line = MFC_HASH(mcastgrp, origin); 785 struct mfc_cache *c; 786 787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) 789 return c; 790 } 791 return NULL; 792 } 793 794 /* 795 * Allocate a multicast cache entry 796 */ 797 static struct mfc_cache *ipmr_cache_alloc(void) 798 { 799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 800 if (c == NULL) 801 return NULL; 802 c->mfc_un.res.minvif = MAXVIFS; 803 return c; 804 } 805 806 static struct mfc_cache *ipmr_cache_alloc_unres(void) 807 { 808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 809 if (c == NULL) 810 return NULL; 811 skb_queue_head_init(&c->mfc_un.unres.unresolved); 812 c->mfc_un.unres.expires = jiffies + 10*HZ; 813 return c; 814 } 815 816 /* 817 * A cache entry has gone into a resolved state from queued 818 */ 819 820 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 821 struct mfc_cache *uc, struct mfc_cache *c) 822 { 823 struct sk_buff *skb; 824 struct nlmsgerr *e; 825 826 /* 827 * Play the pending entries through our router 828 */ 829 830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 831 if (ip_hdr(skb)->version == 0) { 832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 833 834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 835 nlh->nlmsg_len = (skb_tail_pointer(skb) - 836 (u8 *)nlh); 837 } else { 838 nlh->nlmsg_type = NLMSG_ERROR; 839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 840 skb_trim(skb, nlh->nlmsg_len); 841 e = NLMSG_DATA(nlh); 842 e->error = -EMSGSIZE; 843 memset(&e->msg, 0, sizeof(e->msg)); 844 } 845 846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 847 } else 848 ip_mr_forward(net, mrt, skb, c, 0); 849 } 850 } 851 852 /* 853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 854 * expects the following bizarre scheme. 855 * 856 * Called under mrt_lock. 857 */ 858 859 static int ipmr_cache_report(struct mr_table *mrt, 860 struct sk_buff *pkt, vifi_t vifi, int assert) 861 { 862 struct sk_buff *skb; 863 const int ihl = ip_hdrlen(pkt); 864 struct igmphdr *igmp; 865 struct igmpmsg *msg; 866 int ret; 867 868 #ifdef CONFIG_IP_PIMSM 869 if (assert == IGMPMSG_WHOLEPKT) 870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 871 else 872 #endif 873 skb = alloc_skb(128, GFP_ATOMIC); 874 875 if (!skb) 876 return -ENOBUFS; 877 878 #ifdef CONFIG_IP_PIMSM 879 if (assert == IGMPMSG_WHOLEPKT) { 880 /* Ugly, but we have no choice with this interface. 881 Duplicate old header, fix ihl, length etc. 882 And all this only to mangle msg->im_msgtype and 883 to set msg->im_mbz to "mbz" :-) 884 */ 885 skb_push(skb, sizeof(struct iphdr)); 886 skb_reset_network_header(skb); 887 skb_reset_transport_header(skb); 888 msg = (struct igmpmsg *)skb_network_header(skb); 889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 890 msg->im_msgtype = IGMPMSG_WHOLEPKT; 891 msg->im_mbz = 0; 892 msg->im_vif = mrt->mroute_reg_vif_num; 893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 895 sizeof(struct iphdr)); 896 } else 897 #endif 898 { 899 900 /* 901 * Copy the IP header 902 */ 903 904 skb->network_header = skb->tail; 905 skb_put(skb, ihl); 906 skb_copy_to_linear_data(skb, pkt->data, ihl); 907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 908 msg = (struct igmpmsg *)skb_network_header(skb); 909 msg->im_vif = vifi; 910 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 911 912 /* 913 * Add our header 914 */ 915 916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 917 igmp->type = 918 msg->im_msgtype = assert; 919 igmp->code = 0; 920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 921 skb->transport_header = skb->network_header; 922 } 923 924 if (mrt->mroute_sk == NULL) { 925 kfree_skb(skb); 926 return -EINVAL; 927 } 928 929 /* 930 * Deliver to mrouted 931 */ 932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); 933 if (ret < 0) { 934 if (net_ratelimit()) 935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 936 kfree_skb(skb); 937 } 938 939 return ret; 940 } 941 942 /* 943 * Queue a packet for resolution. It gets locked cache entry! 944 */ 945 946 static int 947 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) 948 { 949 bool found = false; 950 int err; 951 struct mfc_cache *c; 952 const struct iphdr *iph = ip_hdr(skb); 953 954 spin_lock_bh(&mfc_unres_lock); 955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 956 if (c->mfc_mcastgrp == iph->daddr && 957 c->mfc_origin == iph->saddr) { 958 found = true; 959 break; 960 } 961 } 962 963 if (!found) { 964 /* 965 * Create a new entry if allowable 966 */ 967 968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 969 (c = ipmr_cache_alloc_unres()) == NULL) { 970 spin_unlock_bh(&mfc_unres_lock); 971 972 kfree_skb(skb); 973 return -ENOBUFS; 974 } 975 976 /* 977 * Fill in the new cache entry 978 */ 979 c->mfc_parent = -1; 980 c->mfc_origin = iph->saddr; 981 c->mfc_mcastgrp = iph->daddr; 982 983 /* 984 * Reflect first query at mrouted. 985 */ 986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 987 if (err < 0) { 988 /* If the report failed throw the cache entry 989 out - Brad Parker 990 */ 991 spin_unlock_bh(&mfc_unres_lock); 992 993 ipmr_cache_free(c); 994 kfree_skb(skb); 995 return err; 996 } 997 998 atomic_inc(&mrt->cache_resolve_queue_len); 999 list_add(&c->list, &mrt->mfc_unres_queue); 1000 1001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1003 } 1004 1005 /* 1006 * See if we can append the packet 1007 */ 1008 if (c->mfc_un.unres.unresolved.qlen>3) { 1009 kfree_skb(skb); 1010 err = -ENOBUFS; 1011 } else { 1012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1013 err = 0; 1014 } 1015 1016 spin_unlock_bh(&mfc_unres_lock); 1017 return err; 1018 } 1019 1020 /* 1021 * MFC cache manipulation by user space mroute daemon 1022 */ 1023 1024 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) 1025 { 1026 int line; 1027 struct mfc_cache *c, *next; 1028 1029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1030 1031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { 1032 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1034 write_lock_bh(&mrt_lock); 1035 list_del(&c->list); 1036 write_unlock_bh(&mrt_lock); 1037 1038 ipmr_cache_free(c); 1039 return 0; 1040 } 1041 } 1042 return -ENOENT; 1043 } 1044 1045 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1046 struct mfcctl *mfc, int mrtsock) 1047 { 1048 bool found = false; 1049 int line; 1050 struct mfc_cache *uc, *c; 1051 1052 if (mfc->mfcc_parent >= MAXVIFS) 1053 return -ENFILE; 1054 1055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1056 1057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 1058 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1060 found = true; 1061 break; 1062 } 1063 } 1064 1065 if (found) { 1066 write_lock_bh(&mrt_lock); 1067 c->mfc_parent = mfc->mfcc_parent; 1068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1069 if (!mrtsock) 1070 c->mfc_flags |= MFC_STATIC; 1071 write_unlock_bh(&mrt_lock); 1072 return 0; 1073 } 1074 1075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1076 return -EINVAL; 1077 1078 c = ipmr_cache_alloc(); 1079 if (c == NULL) 1080 return -ENOMEM; 1081 1082 c->mfc_origin = mfc->mfcc_origin.s_addr; 1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1084 c->mfc_parent = mfc->mfcc_parent; 1085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1086 if (!mrtsock) 1087 c->mfc_flags |= MFC_STATIC; 1088 1089 write_lock_bh(&mrt_lock); 1090 list_add(&c->list, &mrt->mfc_cache_array[line]); 1091 write_unlock_bh(&mrt_lock); 1092 1093 /* 1094 * Check to see if we resolved a queued list. If so we 1095 * need to send on the frames and tidy up. 1096 */ 1097 found = false; 1098 spin_lock_bh(&mfc_unres_lock); 1099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1100 if (uc->mfc_origin == c->mfc_origin && 1101 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1102 list_del(&uc->list); 1103 atomic_dec(&mrt->cache_resolve_queue_len); 1104 found = true; 1105 break; 1106 } 1107 } 1108 if (list_empty(&mrt->mfc_unres_queue)) 1109 del_timer(&mrt->ipmr_expire_timer); 1110 spin_unlock_bh(&mfc_unres_lock); 1111 1112 if (found) { 1113 ipmr_cache_resolve(net, mrt, uc, c); 1114 ipmr_cache_free(uc); 1115 } 1116 return 0; 1117 } 1118 1119 /* 1120 * Close the multicast socket, and clear the vif tables etc 1121 */ 1122 1123 static void mroute_clean_tables(struct mr_table *mrt) 1124 { 1125 int i; 1126 LIST_HEAD(list); 1127 struct mfc_cache *c, *next; 1128 1129 /* 1130 * Shut down all active vif entries 1131 */ 1132 for (i = 0; i < mrt->maxvif; i++) { 1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC)) 1134 vif_delete(mrt, i, 0, &list); 1135 } 1136 unregister_netdevice_many(&list); 1137 1138 /* 1139 * Wipe the cache 1140 */ 1141 for (i = 0; i < MFC_LINES; i++) { 1142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1143 if (c->mfc_flags&MFC_STATIC) 1144 continue; 1145 write_lock_bh(&mrt_lock); 1146 list_del(&c->list); 1147 write_unlock_bh(&mrt_lock); 1148 1149 ipmr_cache_free(c); 1150 } 1151 } 1152 1153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1154 spin_lock_bh(&mfc_unres_lock); 1155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 1156 list_del(&c->list); 1157 ipmr_destroy_unres(mrt, c); 1158 } 1159 spin_unlock_bh(&mfc_unres_lock); 1160 } 1161 } 1162 1163 static void mrtsock_destruct(struct sock *sk) 1164 { 1165 struct net *net = sock_net(sk); 1166 struct mr_table *mrt; 1167 1168 rtnl_lock(); 1169 ipmr_for_each_table(mrt, net) { 1170 if (sk == mrt->mroute_sk) { 1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1172 1173 write_lock_bh(&mrt_lock); 1174 mrt->mroute_sk = NULL; 1175 write_unlock_bh(&mrt_lock); 1176 1177 mroute_clean_tables(mrt); 1178 } 1179 } 1180 rtnl_unlock(); 1181 } 1182 1183 /* 1184 * Socket options and virtual interface manipulation. The whole 1185 * virtual interface system is a complete heap, but unfortunately 1186 * that's how BSD mrouted happens to think. Maybe one day with a proper 1187 * MOSPF/PIM router set up we can clean this up. 1188 */ 1189 1190 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1191 { 1192 int ret; 1193 struct vifctl vif; 1194 struct mfcctl mfc; 1195 struct net *net = sock_net(sk); 1196 struct mr_table *mrt; 1197 1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1199 if (mrt == NULL) 1200 return -ENOENT; 1201 1202 if (optname != MRT_INIT) { 1203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) 1204 return -EACCES; 1205 } 1206 1207 switch (optname) { 1208 case MRT_INIT: 1209 if (sk->sk_type != SOCK_RAW || 1210 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1211 return -EOPNOTSUPP; 1212 if (optlen != sizeof(int)) 1213 return -ENOPROTOOPT; 1214 1215 rtnl_lock(); 1216 if (mrt->mroute_sk) { 1217 rtnl_unlock(); 1218 return -EADDRINUSE; 1219 } 1220 1221 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1222 if (ret == 0) { 1223 write_lock_bh(&mrt_lock); 1224 mrt->mroute_sk = sk; 1225 write_unlock_bh(&mrt_lock); 1226 1227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1228 } 1229 rtnl_unlock(); 1230 return ret; 1231 case MRT_DONE: 1232 if (sk != mrt->mroute_sk) 1233 return -EACCES; 1234 return ip_ra_control(sk, 0, NULL); 1235 case MRT_ADD_VIF: 1236 case MRT_DEL_VIF: 1237 if (optlen != sizeof(vif)) 1238 return -EINVAL; 1239 if (copy_from_user(&vif, optval, sizeof(vif))) 1240 return -EFAULT; 1241 if (vif.vifc_vifi >= MAXVIFS) 1242 return -ENFILE; 1243 rtnl_lock(); 1244 if (optname == MRT_ADD_VIF) { 1245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); 1246 } else { 1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1248 } 1249 rtnl_unlock(); 1250 return ret; 1251 1252 /* 1253 * Manipulate the forwarding caches. These live 1254 * in a sort of kernel/user symbiosis. 1255 */ 1256 case MRT_ADD_MFC: 1257 case MRT_DEL_MFC: 1258 if (optlen != sizeof(mfc)) 1259 return -EINVAL; 1260 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1261 return -EFAULT; 1262 rtnl_lock(); 1263 if (optname == MRT_DEL_MFC) 1264 ret = ipmr_mfc_delete(mrt, &mfc); 1265 else 1266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); 1267 rtnl_unlock(); 1268 return ret; 1269 /* 1270 * Control PIM assert. 1271 */ 1272 case MRT_ASSERT: 1273 { 1274 int v; 1275 if (get_user(v,(int __user *)optval)) 1276 return -EFAULT; 1277 mrt->mroute_do_assert = (v) ? 1 : 0; 1278 return 0; 1279 } 1280 #ifdef CONFIG_IP_PIMSM 1281 case MRT_PIM: 1282 { 1283 int v; 1284 1285 if (get_user(v,(int __user *)optval)) 1286 return -EFAULT; 1287 v = (v) ? 1 : 0; 1288 1289 rtnl_lock(); 1290 ret = 0; 1291 if (v != mrt->mroute_do_pim) { 1292 mrt->mroute_do_pim = v; 1293 mrt->mroute_do_assert = v; 1294 } 1295 rtnl_unlock(); 1296 return ret; 1297 } 1298 #endif 1299 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 1300 case MRT_TABLE: 1301 { 1302 u32 v; 1303 1304 if (optlen != sizeof(u32)) 1305 return -EINVAL; 1306 if (get_user(v, (u32 __user *)optval)) 1307 return -EFAULT; 1308 if (sk == mrt->mroute_sk) 1309 return -EBUSY; 1310 1311 rtnl_lock(); 1312 ret = 0; 1313 if (!ipmr_new_table(net, v)) 1314 ret = -ENOMEM; 1315 raw_sk(sk)->ipmr_table = v; 1316 rtnl_unlock(); 1317 return ret; 1318 } 1319 #endif 1320 /* 1321 * Spurious command, or MRT_VERSION which you cannot 1322 * set. 1323 */ 1324 default: 1325 return -ENOPROTOOPT; 1326 } 1327 } 1328 1329 /* 1330 * Getsock opt support for the multicast routing system. 1331 */ 1332 1333 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1334 { 1335 int olr; 1336 int val; 1337 struct net *net = sock_net(sk); 1338 struct mr_table *mrt; 1339 1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1341 if (mrt == NULL) 1342 return -ENOENT; 1343 1344 if (optname != MRT_VERSION && 1345 #ifdef CONFIG_IP_PIMSM 1346 optname!=MRT_PIM && 1347 #endif 1348 optname!=MRT_ASSERT) 1349 return -ENOPROTOOPT; 1350 1351 if (get_user(olr, optlen)) 1352 return -EFAULT; 1353 1354 olr = min_t(unsigned int, olr, sizeof(int)); 1355 if (olr < 0) 1356 return -EINVAL; 1357 1358 if (put_user(olr, optlen)) 1359 return -EFAULT; 1360 if (optname == MRT_VERSION) 1361 val = 0x0305; 1362 #ifdef CONFIG_IP_PIMSM 1363 else if (optname == MRT_PIM) 1364 val = mrt->mroute_do_pim; 1365 #endif 1366 else 1367 val = mrt->mroute_do_assert; 1368 if (copy_to_user(optval, &val, olr)) 1369 return -EFAULT; 1370 return 0; 1371 } 1372 1373 /* 1374 * The IP multicast ioctl support routines. 1375 */ 1376 1377 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1378 { 1379 struct sioc_sg_req sr; 1380 struct sioc_vif_req vr; 1381 struct vif_device *vif; 1382 struct mfc_cache *c; 1383 struct net *net = sock_net(sk); 1384 struct mr_table *mrt; 1385 1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1387 if (mrt == NULL) 1388 return -ENOENT; 1389 1390 switch (cmd) { 1391 case SIOCGETVIFCNT: 1392 if (copy_from_user(&vr, arg, sizeof(vr))) 1393 return -EFAULT; 1394 if (vr.vifi >= mrt->maxvif) 1395 return -EINVAL; 1396 read_lock(&mrt_lock); 1397 vif = &mrt->vif_table[vr.vifi]; 1398 if (VIF_EXISTS(mrt, vr.vifi)) { 1399 vr.icount = vif->pkt_in; 1400 vr.ocount = vif->pkt_out; 1401 vr.ibytes = vif->bytes_in; 1402 vr.obytes = vif->bytes_out; 1403 read_unlock(&mrt_lock); 1404 1405 if (copy_to_user(arg, &vr, sizeof(vr))) 1406 return -EFAULT; 1407 return 0; 1408 } 1409 read_unlock(&mrt_lock); 1410 return -EADDRNOTAVAIL; 1411 case SIOCGETSGCNT: 1412 if (copy_from_user(&sr, arg, sizeof(sr))) 1413 return -EFAULT; 1414 1415 read_lock(&mrt_lock); 1416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1417 if (c) { 1418 sr.pktcnt = c->mfc_un.res.pkt; 1419 sr.bytecnt = c->mfc_un.res.bytes; 1420 sr.wrong_if = c->mfc_un.res.wrong_if; 1421 read_unlock(&mrt_lock); 1422 1423 if (copy_to_user(arg, &sr, sizeof(sr))) 1424 return -EFAULT; 1425 return 0; 1426 } 1427 read_unlock(&mrt_lock); 1428 return -EADDRNOTAVAIL; 1429 default: 1430 return -ENOIOCTLCMD; 1431 } 1432 } 1433 1434 1435 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1436 { 1437 struct net_device *dev = ptr; 1438 struct net *net = dev_net(dev); 1439 struct mr_table *mrt; 1440 struct vif_device *v; 1441 int ct; 1442 LIST_HEAD(list); 1443 1444 if (event != NETDEV_UNREGISTER) 1445 return NOTIFY_DONE; 1446 1447 ipmr_for_each_table(mrt, net) { 1448 v = &mrt->vif_table[0]; 1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1450 if (v->dev == dev) 1451 vif_delete(mrt, ct, 1, &list); 1452 } 1453 } 1454 unregister_netdevice_many(&list); 1455 return NOTIFY_DONE; 1456 } 1457 1458 1459 static struct notifier_block ip_mr_notifier = { 1460 .notifier_call = ipmr_device_event, 1461 }; 1462 1463 /* 1464 * Encapsulate a packet by attaching a valid IPIP header to it. 1465 * This avoids tunnel drivers and other mess and gives us the speed so 1466 * important for multicast video. 1467 */ 1468 1469 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1470 { 1471 struct iphdr *iph; 1472 struct iphdr *old_iph = ip_hdr(skb); 1473 1474 skb_push(skb, sizeof(struct iphdr)); 1475 skb->transport_header = skb->network_header; 1476 skb_reset_network_header(skb); 1477 iph = ip_hdr(skb); 1478 1479 iph->version = 4; 1480 iph->tos = old_iph->tos; 1481 iph->ttl = old_iph->ttl; 1482 iph->frag_off = 0; 1483 iph->daddr = daddr; 1484 iph->saddr = saddr; 1485 iph->protocol = IPPROTO_IPIP; 1486 iph->ihl = 5; 1487 iph->tot_len = htons(skb->len); 1488 ip_select_ident(iph, skb_dst(skb), NULL); 1489 ip_send_check(iph); 1490 1491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1492 nf_reset(skb); 1493 } 1494 1495 static inline int ipmr_forward_finish(struct sk_buff *skb) 1496 { 1497 struct ip_options * opt = &(IPCB(skb)->opt); 1498 1499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1500 1501 if (unlikely(opt->optlen)) 1502 ip_forward_options(skb); 1503 1504 return dst_output(skb); 1505 } 1506 1507 /* 1508 * Processing handlers for ipmr_forward 1509 */ 1510 1511 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1512 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1513 { 1514 const struct iphdr *iph = ip_hdr(skb); 1515 struct vif_device *vif = &mrt->vif_table[vifi]; 1516 struct net_device *dev; 1517 struct rtable *rt; 1518 int encap = 0; 1519 1520 if (vif->dev == NULL) 1521 goto out_free; 1522 1523 #ifdef CONFIG_IP_PIMSM 1524 if (vif->flags & VIFF_REGISTER) { 1525 vif->pkt_out++; 1526 vif->bytes_out += skb->len; 1527 vif->dev->stats.tx_bytes += skb->len; 1528 vif->dev->stats.tx_packets++; 1529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1530 goto out_free; 1531 } 1532 #endif 1533 1534 if (vif->flags&VIFF_TUNNEL) { 1535 struct flowi fl = { .oif = vif->link, 1536 .nl_u = { .ip4_u = 1537 { .daddr = vif->remote, 1538 .saddr = vif->local, 1539 .tos = RT_TOS(iph->tos) } }, 1540 .proto = IPPROTO_IPIP }; 1541 if (ip_route_output_key(net, &rt, &fl)) 1542 goto out_free; 1543 encap = sizeof(struct iphdr); 1544 } else { 1545 struct flowi fl = { .oif = vif->link, 1546 .nl_u = { .ip4_u = 1547 { .daddr = iph->daddr, 1548 .tos = RT_TOS(iph->tos) } }, 1549 .proto = IPPROTO_IPIP }; 1550 if (ip_route_output_key(net, &rt, &fl)) 1551 goto out_free; 1552 } 1553 1554 dev = rt->u.dst.dev; 1555 1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1557 /* Do not fragment multicasts. Alas, IPv4 does not 1558 allow to send ICMP, so that packets will disappear 1559 to blackhole. 1560 */ 1561 1562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1563 ip_rt_put(rt); 1564 goto out_free; 1565 } 1566 1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1568 1569 if (skb_cow(skb, encap)) { 1570 ip_rt_put(rt); 1571 goto out_free; 1572 } 1573 1574 vif->pkt_out++; 1575 vif->bytes_out += skb->len; 1576 1577 skb_dst_drop(skb); 1578 skb_dst_set(skb, &rt->u.dst); 1579 ip_decrease_ttl(ip_hdr(skb)); 1580 1581 /* FIXME: forward and output firewalls used to be called here. 1582 * What do we do with netfilter? -- RR */ 1583 if (vif->flags & VIFF_TUNNEL) { 1584 ip_encap(skb, vif->local, vif->remote); 1585 /* FIXME: extra output firewall step used to be here. --RR */ 1586 vif->dev->stats.tx_packets++; 1587 vif->dev->stats.tx_bytes += skb->len; 1588 } 1589 1590 IPCB(skb)->flags |= IPSKB_FORWARDED; 1591 1592 /* 1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1594 * not only before forwarding, but after forwarding on all output 1595 * interfaces. It is clear, if mrouter runs a multicasting 1596 * program, it should receive packets not depending to what interface 1597 * program is joined. 1598 * If we will not make it, the program will have to join on all 1599 * interfaces. On the other hand, multihoming host (or router, but 1600 * not mrouter) cannot join to more than one interface - it will 1601 * result in receiving multiple packets. 1602 */ 1603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, 1604 ipmr_forward_finish); 1605 return; 1606 1607 out_free: 1608 kfree_skb(skb); 1609 } 1610 1611 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1612 { 1613 int ct; 1614 1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1616 if (mrt->vif_table[ct].dev == dev) 1617 break; 1618 } 1619 return ct; 1620 } 1621 1622 /* "local" means that we should preserve one skb (for local delivery) */ 1623 1624 static int ip_mr_forward(struct net *net, struct mr_table *mrt, 1625 struct sk_buff *skb, struct mfc_cache *cache, 1626 int local) 1627 { 1628 int psend = -1; 1629 int vif, ct; 1630 1631 vif = cache->mfc_parent; 1632 cache->mfc_un.res.pkt++; 1633 cache->mfc_un.res.bytes += skb->len; 1634 1635 /* 1636 * Wrong interface: drop packet and (maybe) send PIM assert. 1637 */ 1638 if (mrt->vif_table[vif].dev != skb->dev) { 1639 int true_vifi; 1640 1641 if (skb_rtable(skb)->fl.iif == 0) { 1642 /* It is our own packet, looped back. 1643 Very complicated situation... 1644 1645 The best workaround until routing daemons will be 1646 fixed is not to redistribute packet, if it was 1647 send through wrong interface. It means, that 1648 multicast applications WILL NOT work for 1649 (S,G), which have default multicast route pointing 1650 to wrong oif. In any case, it is not a good 1651 idea to use multicasting applications on router. 1652 */ 1653 goto dont_forward; 1654 } 1655 1656 cache->mfc_un.res.wrong_if++; 1657 true_vifi = ipmr_find_vif(mrt, skb->dev); 1658 1659 if (true_vifi >= 0 && mrt->mroute_do_assert && 1660 /* pimsm uses asserts, when switching from RPT to SPT, 1661 so that we cannot check that packet arrived on an oif. 1662 It is bad, but otherwise we would need to move pretty 1663 large chunk of pimd to kernel. Ough... --ANK 1664 */ 1665 (mrt->mroute_do_pim || 1666 cache->mfc_un.res.ttls[true_vifi] < 255) && 1667 time_after(jiffies, 1668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1669 cache->mfc_un.res.last_assert = jiffies; 1670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1671 } 1672 goto dont_forward; 1673 } 1674 1675 mrt->vif_table[vif].pkt_in++; 1676 mrt->vif_table[vif].bytes_in += skb->len; 1677 1678 /* 1679 * Forward the frame 1680 */ 1681 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1682 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1683 if (psend != -1) { 1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1685 if (skb2) 1686 ipmr_queue_xmit(net, mrt, skb2, cache, 1687 psend); 1688 } 1689 psend = ct; 1690 } 1691 } 1692 if (psend != -1) { 1693 if (local) { 1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1695 if (skb2) 1696 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1697 } else { 1698 ipmr_queue_xmit(net, mrt, skb, cache, psend); 1699 return 0; 1700 } 1701 } 1702 1703 dont_forward: 1704 if (!local) 1705 kfree_skb(skb); 1706 return 0; 1707 } 1708 1709 1710 /* 1711 * Multicast packets for forwarding arrive here 1712 */ 1713 1714 int ip_mr_input(struct sk_buff *skb) 1715 { 1716 struct mfc_cache *cache; 1717 struct net *net = dev_net(skb->dev); 1718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1719 struct mr_table *mrt; 1720 int err; 1721 1722 /* Packet is looped back after forward, it should not be 1723 forwarded second time, but still can be delivered locally. 1724 */ 1725 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1726 goto dont_forward; 1727 1728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1729 if (err < 0) 1730 return err; 1731 1732 if (!local) { 1733 if (IPCB(skb)->opt.router_alert) { 1734 if (ip_call_ra_chain(skb)) 1735 return 0; 1736 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1737 /* IGMPv1 (and broken IGMPv2 implementations sort of 1738 Cisco IOS <= 11.2(8)) do not put router alert 1739 option to IGMP packets destined to routable 1740 groups. It is very bad, because it means 1741 that we can forward NO IGMP messages. 1742 */ 1743 read_lock(&mrt_lock); 1744 if (mrt->mroute_sk) { 1745 nf_reset(skb); 1746 raw_rcv(mrt->mroute_sk, skb); 1747 read_unlock(&mrt_lock); 1748 return 0; 1749 } 1750 read_unlock(&mrt_lock); 1751 } 1752 } 1753 1754 read_lock(&mrt_lock); 1755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1756 1757 /* 1758 * No usable cache entry 1759 */ 1760 if (cache == NULL) { 1761 int vif; 1762 1763 if (local) { 1764 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1765 ip_local_deliver(skb); 1766 if (skb2 == NULL) { 1767 read_unlock(&mrt_lock); 1768 return -ENOBUFS; 1769 } 1770 skb = skb2; 1771 } 1772 1773 vif = ipmr_find_vif(mrt, skb->dev); 1774 if (vif >= 0) { 1775 int err2 = ipmr_cache_unresolved(mrt, vif, skb); 1776 read_unlock(&mrt_lock); 1777 1778 return err2; 1779 } 1780 read_unlock(&mrt_lock); 1781 kfree_skb(skb); 1782 return -ENODEV; 1783 } 1784 1785 ip_mr_forward(net, mrt, skb, cache, local); 1786 1787 read_unlock(&mrt_lock); 1788 1789 if (local) 1790 return ip_local_deliver(skb); 1791 1792 return 0; 1793 1794 dont_forward: 1795 if (local) 1796 return ip_local_deliver(skb); 1797 kfree_skb(skb); 1798 return 0; 1799 } 1800 1801 #ifdef CONFIG_IP_PIMSM 1802 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 1803 unsigned int pimlen) 1804 { 1805 struct net_device *reg_dev = NULL; 1806 struct iphdr *encap; 1807 1808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1809 /* 1810 Check that: 1811 a. packet is really destinted to a multicast group 1812 b. packet is not a NULL-REGISTER 1813 c. packet is not truncated 1814 */ 1815 if (!ipv4_is_multicast(encap->daddr) || 1816 encap->tot_len == 0 || 1817 ntohs(encap->tot_len) + pimlen > skb->len) 1818 return 1; 1819 1820 read_lock(&mrt_lock); 1821 if (mrt->mroute_reg_vif_num >= 0) 1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 1823 if (reg_dev) 1824 dev_hold(reg_dev); 1825 read_unlock(&mrt_lock); 1826 1827 if (reg_dev == NULL) 1828 return 1; 1829 1830 skb->mac_header = skb->network_header; 1831 skb_pull(skb, (u8*)encap - skb->data); 1832 skb_reset_network_header(skb); 1833 skb->protocol = htons(ETH_P_IP); 1834 skb->ip_summed = 0; 1835 skb->pkt_type = PACKET_HOST; 1836 1837 skb_tunnel_rx(skb, reg_dev); 1838 1839 netif_rx(skb); 1840 dev_put(reg_dev); 1841 1842 return 0; 1843 } 1844 #endif 1845 1846 #ifdef CONFIG_IP_PIMSM_V1 1847 /* 1848 * Handle IGMP messages of PIMv1 1849 */ 1850 1851 int pim_rcv_v1(struct sk_buff * skb) 1852 { 1853 struct igmphdr *pim; 1854 struct net *net = dev_net(skb->dev); 1855 struct mr_table *mrt; 1856 1857 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1858 goto drop; 1859 1860 pim = igmp_hdr(skb); 1861 1862 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1863 goto drop; 1864 1865 if (!mrt->mroute_do_pim || 1866 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1867 goto drop; 1868 1869 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1870 drop: 1871 kfree_skb(skb); 1872 } 1873 return 0; 1874 } 1875 #endif 1876 1877 #ifdef CONFIG_IP_PIMSM_V2 1878 static int pim_rcv(struct sk_buff * skb) 1879 { 1880 struct pimreghdr *pim; 1881 struct net *net = dev_net(skb->dev); 1882 struct mr_table *mrt; 1883 1884 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1885 goto drop; 1886 1887 pim = (struct pimreghdr *)skb_transport_header(skb); 1888 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1889 (pim->flags&PIM_NULL_REGISTER) || 1890 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1891 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1892 goto drop; 1893 1894 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1895 goto drop; 1896 1897 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1898 drop: 1899 kfree_skb(skb); 1900 } 1901 return 0; 1902 } 1903 #endif 1904 1905 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 1906 struct mfc_cache *c, struct rtmsg *rtm) 1907 { 1908 int ct; 1909 struct rtnexthop *nhp; 1910 u8 *b = skb_tail_pointer(skb); 1911 struct rtattr *mp_head; 1912 1913 /* If cache is unresolved, don't try to parse IIF and OIF */ 1914 if (c->mfc_parent > MAXVIFS) 1915 return -ENOENT; 1916 1917 if (VIF_EXISTS(mrt, c->mfc_parent)) 1918 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); 1919 1920 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1921 1922 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1923 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 1924 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1925 goto rtattr_failure; 1926 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1927 nhp->rtnh_flags = 0; 1928 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1929 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 1930 nhp->rtnh_len = sizeof(*nhp); 1931 } 1932 } 1933 mp_head->rta_type = RTA_MULTIPATH; 1934 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 1935 rtm->rtm_type = RTN_MULTICAST; 1936 return 1; 1937 1938 rtattr_failure: 1939 nlmsg_trim(skb, b); 1940 return -EMSGSIZE; 1941 } 1942 1943 int ipmr_get_route(struct net *net, 1944 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1945 { 1946 int err; 1947 struct mr_table *mrt; 1948 struct mfc_cache *cache; 1949 struct rtable *rt = skb_rtable(skb); 1950 1951 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 1952 if (mrt == NULL) 1953 return -ENOENT; 1954 1955 read_lock(&mrt_lock); 1956 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 1957 1958 if (cache == NULL) { 1959 struct sk_buff *skb2; 1960 struct iphdr *iph; 1961 struct net_device *dev; 1962 int vif; 1963 1964 if (nowait) { 1965 read_unlock(&mrt_lock); 1966 return -EAGAIN; 1967 } 1968 1969 dev = skb->dev; 1970 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { 1971 read_unlock(&mrt_lock); 1972 return -ENODEV; 1973 } 1974 skb2 = skb_clone(skb, GFP_ATOMIC); 1975 if (!skb2) { 1976 read_unlock(&mrt_lock); 1977 return -ENOMEM; 1978 } 1979 1980 skb_push(skb2, sizeof(struct iphdr)); 1981 skb_reset_network_header(skb2); 1982 iph = ip_hdr(skb2); 1983 iph->ihl = sizeof(struct iphdr) >> 2; 1984 iph->saddr = rt->rt_src; 1985 iph->daddr = rt->rt_dst; 1986 iph->version = 0; 1987 err = ipmr_cache_unresolved(mrt, vif, skb2); 1988 read_unlock(&mrt_lock); 1989 return err; 1990 } 1991 1992 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1993 cache->mfc_flags |= MFC_NOTIFY; 1994 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 1995 read_unlock(&mrt_lock); 1996 return err; 1997 } 1998 1999 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2000 u32 pid, u32 seq, struct mfc_cache *c) 2001 { 2002 struct nlmsghdr *nlh; 2003 struct rtmsg *rtm; 2004 2005 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2006 if (nlh == NULL) 2007 return -EMSGSIZE; 2008 2009 rtm = nlmsg_data(nlh); 2010 rtm->rtm_family = RTNL_FAMILY_IPMR; 2011 rtm->rtm_dst_len = 32; 2012 rtm->rtm_src_len = 32; 2013 rtm->rtm_tos = 0; 2014 rtm->rtm_table = mrt->id; 2015 NLA_PUT_U32(skb, RTA_TABLE, mrt->id); 2016 rtm->rtm_type = RTN_MULTICAST; 2017 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2018 rtm->rtm_protocol = RTPROT_UNSPEC; 2019 rtm->rtm_flags = 0; 2020 2021 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); 2022 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); 2023 2024 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) 2025 goto nla_put_failure; 2026 2027 return nlmsg_end(skb, nlh); 2028 2029 nla_put_failure: 2030 nlmsg_cancel(skb, nlh); 2031 return -EMSGSIZE; 2032 } 2033 2034 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2035 { 2036 struct net *net = sock_net(skb->sk); 2037 struct mr_table *mrt; 2038 struct mfc_cache *mfc; 2039 unsigned int t = 0, s_t; 2040 unsigned int h = 0, s_h; 2041 unsigned int e = 0, s_e; 2042 2043 s_t = cb->args[0]; 2044 s_h = cb->args[1]; 2045 s_e = cb->args[2]; 2046 2047 read_lock(&mrt_lock); 2048 ipmr_for_each_table(mrt, net) { 2049 if (t < s_t) 2050 goto next_table; 2051 if (t > s_t) 2052 s_h = 0; 2053 for (h = s_h; h < MFC_LINES; h++) { 2054 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { 2055 if (e < s_e) 2056 goto next_entry; 2057 if (ipmr_fill_mroute(mrt, skb, 2058 NETLINK_CB(cb->skb).pid, 2059 cb->nlh->nlmsg_seq, 2060 mfc) < 0) 2061 goto done; 2062 next_entry: 2063 e++; 2064 } 2065 e = s_e = 0; 2066 } 2067 s_h = 0; 2068 next_table: 2069 t++; 2070 } 2071 done: 2072 read_unlock(&mrt_lock); 2073 2074 cb->args[2] = e; 2075 cb->args[1] = h; 2076 cb->args[0] = t; 2077 2078 return skb->len; 2079 } 2080 2081 #ifdef CONFIG_PROC_FS 2082 /* 2083 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2084 */ 2085 struct ipmr_vif_iter { 2086 struct seq_net_private p; 2087 struct mr_table *mrt; 2088 int ct; 2089 }; 2090 2091 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2092 struct ipmr_vif_iter *iter, 2093 loff_t pos) 2094 { 2095 struct mr_table *mrt = iter->mrt; 2096 2097 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2098 if (!VIF_EXISTS(mrt, iter->ct)) 2099 continue; 2100 if (pos-- == 0) 2101 return &mrt->vif_table[iter->ct]; 2102 } 2103 return NULL; 2104 } 2105 2106 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2107 __acquires(mrt_lock) 2108 { 2109 struct ipmr_vif_iter *iter = seq->private; 2110 struct net *net = seq_file_net(seq); 2111 struct mr_table *mrt; 2112 2113 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2114 if (mrt == NULL) 2115 return ERR_PTR(-ENOENT); 2116 2117 iter->mrt = mrt; 2118 2119 read_lock(&mrt_lock); 2120 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2121 : SEQ_START_TOKEN; 2122 } 2123 2124 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2125 { 2126 struct ipmr_vif_iter *iter = seq->private; 2127 struct net *net = seq_file_net(seq); 2128 struct mr_table *mrt = iter->mrt; 2129 2130 ++*pos; 2131 if (v == SEQ_START_TOKEN) 2132 return ipmr_vif_seq_idx(net, iter, 0); 2133 2134 while (++iter->ct < mrt->maxvif) { 2135 if (!VIF_EXISTS(mrt, iter->ct)) 2136 continue; 2137 return &mrt->vif_table[iter->ct]; 2138 } 2139 return NULL; 2140 } 2141 2142 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2143 __releases(mrt_lock) 2144 { 2145 read_unlock(&mrt_lock); 2146 } 2147 2148 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2149 { 2150 struct ipmr_vif_iter *iter = seq->private; 2151 struct mr_table *mrt = iter->mrt; 2152 2153 if (v == SEQ_START_TOKEN) { 2154 seq_puts(seq, 2155 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2156 } else { 2157 const struct vif_device *vif = v; 2158 const char *name = vif->dev ? vif->dev->name : "none"; 2159 2160 seq_printf(seq, 2161 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2162 vif - mrt->vif_table, 2163 name, vif->bytes_in, vif->pkt_in, 2164 vif->bytes_out, vif->pkt_out, 2165 vif->flags, vif->local, vif->remote); 2166 } 2167 return 0; 2168 } 2169 2170 static const struct seq_operations ipmr_vif_seq_ops = { 2171 .start = ipmr_vif_seq_start, 2172 .next = ipmr_vif_seq_next, 2173 .stop = ipmr_vif_seq_stop, 2174 .show = ipmr_vif_seq_show, 2175 }; 2176 2177 static int ipmr_vif_open(struct inode *inode, struct file *file) 2178 { 2179 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 2180 sizeof(struct ipmr_vif_iter)); 2181 } 2182 2183 static const struct file_operations ipmr_vif_fops = { 2184 .owner = THIS_MODULE, 2185 .open = ipmr_vif_open, 2186 .read = seq_read, 2187 .llseek = seq_lseek, 2188 .release = seq_release_net, 2189 }; 2190 2191 struct ipmr_mfc_iter { 2192 struct seq_net_private p; 2193 struct mr_table *mrt; 2194 struct list_head *cache; 2195 int ct; 2196 }; 2197 2198 2199 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2200 struct ipmr_mfc_iter *it, loff_t pos) 2201 { 2202 struct mr_table *mrt = it->mrt; 2203 struct mfc_cache *mfc; 2204 2205 read_lock(&mrt_lock); 2206 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { 2207 it->cache = &mrt->mfc_cache_array[it->ct]; 2208 list_for_each_entry(mfc, it->cache, list) 2209 if (pos-- == 0) 2210 return mfc; 2211 } 2212 read_unlock(&mrt_lock); 2213 2214 spin_lock_bh(&mfc_unres_lock); 2215 it->cache = &mrt->mfc_unres_queue; 2216 list_for_each_entry(mfc, it->cache, list) 2217 if (pos-- == 0) 2218 return mfc; 2219 spin_unlock_bh(&mfc_unres_lock); 2220 2221 it->cache = NULL; 2222 return NULL; 2223 } 2224 2225 2226 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2227 { 2228 struct ipmr_mfc_iter *it = seq->private; 2229 struct net *net = seq_file_net(seq); 2230 struct mr_table *mrt; 2231 2232 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2233 if (mrt == NULL) 2234 return ERR_PTR(-ENOENT); 2235 2236 it->mrt = mrt; 2237 it->cache = NULL; 2238 it->ct = 0; 2239 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2240 : SEQ_START_TOKEN; 2241 } 2242 2243 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2244 { 2245 struct mfc_cache *mfc = v; 2246 struct ipmr_mfc_iter *it = seq->private; 2247 struct net *net = seq_file_net(seq); 2248 struct mr_table *mrt = it->mrt; 2249 2250 ++*pos; 2251 2252 if (v == SEQ_START_TOKEN) 2253 return ipmr_mfc_seq_idx(net, seq->private, 0); 2254 2255 if (mfc->list.next != it->cache) 2256 return list_entry(mfc->list.next, struct mfc_cache, list); 2257 2258 if (it->cache == &mrt->mfc_unres_queue) 2259 goto end_of_list; 2260 2261 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); 2262 2263 while (++it->ct < MFC_LINES) { 2264 it->cache = &mrt->mfc_cache_array[it->ct]; 2265 if (list_empty(it->cache)) 2266 continue; 2267 return list_first_entry(it->cache, struct mfc_cache, list); 2268 } 2269 2270 /* exhausted cache_array, show unresolved */ 2271 read_unlock(&mrt_lock); 2272 it->cache = &mrt->mfc_unres_queue; 2273 it->ct = 0; 2274 2275 spin_lock_bh(&mfc_unres_lock); 2276 if (!list_empty(it->cache)) 2277 return list_first_entry(it->cache, struct mfc_cache, list); 2278 2279 end_of_list: 2280 spin_unlock_bh(&mfc_unres_lock); 2281 it->cache = NULL; 2282 2283 return NULL; 2284 } 2285 2286 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2287 { 2288 struct ipmr_mfc_iter *it = seq->private; 2289 struct mr_table *mrt = it->mrt; 2290 2291 if (it->cache == &mrt->mfc_unres_queue) 2292 spin_unlock_bh(&mfc_unres_lock); 2293 else if (it->cache == &mrt->mfc_cache_array[it->ct]) 2294 read_unlock(&mrt_lock); 2295 } 2296 2297 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2298 { 2299 int n; 2300 2301 if (v == SEQ_START_TOKEN) { 2302 seq_puts(seq, 2303 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2304 } else { 2305 const struct mfc_cache *mfc = v; 2306 const struct ipmr_mfc_iter *it = seq->private; 2307 const struct mr_table *mrt = it->mrt; 2308 2309 seq_printf(seq, "%08X %08X %-3hd", 2310 (__force u32) mfc->mfc_mcastgrp, 2311 (__force u32) mfc->mfc_origin, 2312 mfc->mfc_parent); 2313 2314 if (it->cache != &mrt->mfc_unres_queue) { 2315 seq_printf(seq, " %8lu %8lu %8lu", 2316 mfc->mfc_un.res.pkt, 2317 mfc->mfc_un.res.bytes, 2318 mfc->mfc_un.res.wrong_if); 2319 for (n = mfc->mfc_un.res.minvif; 2320 n < mfc->mfc_un.res.maxvif; n++ ) { 2321 if (VIF_EXISTS(mrt, n) && 2322 mfc->mfc_un.res.ttls[n] < 255) 2323 seq_printf(seq, 2324 " %2d:%-3d", 2325 n, mfc->mfc_un.res.ttls[n]); 2326 } 2327 } else { 2328 /* unresolved mfc_caches don't contain 2329 * pkt, bytes and wrong_if values 2330 */ 2331 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 2332 } 2333 seq_putc(seq, '\n'); 2334 } 2335 return 0; 2336 } 2337 2338 static const struct seq_operations ipmr_mfc_seq_ops = { 2339 .start = ipmr_mfc_seq_start, 2340 .next = ipmr_mfc_seq_next, 2341 .stop = ipmr_mfc_seq_stop, 2342 .show = ipmr_mfc_seq_show, 2343 }; 2344 2345 static int ipmr_mfc_open(struct inode *inode, struct file *file) 2346 { 2347 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 2348 sizeof(struct ipmr_mfc_iter)); 2349 } 2350 2351 static const struct file_operations ipmr_mfc_fops = { 2352 .owner = THIS_MODULE, 2353 .open = ipmr_mfc_open, 2354 .read = seq_read, 2355 .llseek = seq_lseek, 2356 .release = seq_release_net, 2357 }; 2358 #endif 2359 2360 #ifdef CONFIG_IP_PIMSM_V2 2361 static const struct net_protocol pim_protocol = { 2362 .handler = pim_rcv, 2363 .netns_ok = 1, 2364 }; 2365 #endif 2366 2367 2368 /* 2369 * Setup for IP multicast routing 2370 */ 2371 static int __net_init ipmr_net_init(struct net *net) 2372 { 2373 int err; 2374 2375 err = ipmr_rules_init(net); 2376 if (err < 0) 2377 goto fail; 2378 2379 #ifdef CONFIG_PROC_FS 2380 err = -ENOMEM; 2381 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) 2382 goto proc_vif_fail; 2383 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) 2384 goto proc_cache_fail; 2385 #endif 2386 return 0; 2387 2388 #ifdef CONFIG_PROC_FS 2389 proc_cache_fail: 2390 proc_net_remove(net, "ip_mr_vif"); 2391 proc_vif_fail: 2392 ipmr_rules_exit(net); 2393 #endif 2394 fail: 2395 return err; 2396 } 2397 2398 static void __net_exit ipmr_net_exit(struct net *net) 2399 { 2400 #ifdef CONFIG_PROC_FS 2401 proc_net_remove(net, "ip_mr_cache"); 2402 proc_net_remove(net, "ip_mr_vif"); 2403 #endif 2404 ipmr_rules_exit(net); 2405 } 2406 2407 static struct pernet_operations ipmr_net_ops = { 2408 .init = ipmr_net_init, 2409 .exit = ipmr_net_exit, 2410 }; 2411 2412 int __init ip_mr_init(void) 2413 { 2414 int err; 2415 2416 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2417 sizeof(struct mfc_cache), 2418 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2419 NULL); 2420 if (!mrt_cachep) 2421 return -ENOMEM; 2422 2423 err = register_pernet_subsys(&ipmr_net_ops); 2424 if (err) 2425 goto reg_pernet_fail; 2426 2427 err = register_netdevice_notifier(&ip_mr_notifier); 2428 if (err) 2429 goto reg_notif_fail; 2430 #ifdef CONFIG_IP_PIMSM_V2 2431 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 2432 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n"); 2433 err = -EAGAIN; 2434 goto add_proto_fail; 2435 } 2436 #endif 2437 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); 2438 return 0; 2439 2440 #ifdef CONFIG_IP_PIMSM_V2 2441 add_proto_fail: 2442 unregister_netdevice_notifier(&ip_mr_notifier); 2443 #endif 2444 reg_notif_fail: 2445 unregister_pernet_subsys(&ipmr_net_ops); 2446 reg_pernet_fail: 2447 kmem_cache_destroy(mrt_cachep); 2448 return err; 2449 } 2450