1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@redhat.com> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ 13 * 14 * Fixes: 15 * Michael Chastain : Incorrect size of copying. 16 * Alan Cox : Added the cache manager code 17 * Alan Cox : Fixed the clone/copy bug and device race. 18 * Mike McLagan : Routing by source 19 * Malcolm Beattie : Buffer handling fixes. 20 * Alexey Kuznetsov : Double buffer free and other fixes. 21 * SVR Anand : Fixed several multicast bugs and problems. 22 * Alexey Kuznetsov : Status, optimisations and more. 23 * Brad Parker : Better behaviour on mrouted upcall 24 * overflow. 25 * Carlos Picoto : PIMv1 Support 26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 27 * Relax this requrement to work with older peers. 28 * 29 */ 30 31 #include <linux/config.h> 32 #include <asm/system.h> 33 #include <asm/uaccess.h> 34 #include <linux/types.h> 35 #include <linux/sched.h> 36 #include <linux/errno.h> 37 #include <linux/timer.h> 38 #include <linux/mm.h> 39 #include <linux/kernel.h> 40 #include <linux/fcntl.h> 41 #include <linux/stat.h> 42 #include <linux/socket.h> 43 #include <linux/in.h> 44 #include <linux/inet.h> 45 #include <linux/netdevice.h> 46 #include <linux/inetdevice.h> 47 #include <linux/igmp.h> 48 #include <linux/proc_fs.h> 49 #include <linux/seq_file.h> 50 #include <linux/mroute.h> 51 #include <linux/init.h> 52 #include <net/ip.h> 53 #include <net/protocol.h> 54 #include <linux/skbuff.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <net/ipip.h> 63 #include <net/checksum.h> 64 65 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 66 #define CONFIG_IP_PIMSM 1 67 #endif 68 69 static struct sock *mroute_socket; 70 71 72 /* Big lock, protecting vif table, mrt cache and mroute socket state. 73 Note that the changes are semaphored via rtnl_lock. 74 */ 75 76 static DEFINE_RWLOCK(mrt_lock); 77 78 /* 79 * Multicast router control variables 80 */ 81 82 static struct vif_device vif_table[MAXVIFS]; /* Devices */ 83 static int maxvif; 84 85 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) 86 87 static int mroute_do_assert; /* Set in PIM assert */ 88 static int mroute_do_pim; 89 90 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ 91 92 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ 93 static atomic_t cache_resolve_queue_len; /* Size of unresolved */ 94 95 /* Special spinlock for queue of unresolved entries */ 96 static DEFINE_SPINLOCK(mfc_unres_lock); 97 98 /* We return to original Alan's scheme. Hash table of resolved 99 entries is changed only in process context and protected 100 with weak lock mrt_lock. Queue of unresolved entries is protected 101 with strong spinlock mfc_unres_lock. 102 103 In this case data path is free of exclusive locks at all. 104 */ 105 106 static kmem_cache_t *mrt_cachep; 107 108 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 109 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); 110 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 111 112 #ifdef CONFIG_IP_PIMSM_V2 113 static struct net_protocol pim_protocol; 114 #endif 115 116 static struct timer_list ipmr_expire_timer; 117 118 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 119 120 static 121 struct net_device *ipmr_new_tunnel(struct vifctl *v) 122 { 123 struct net_device *dev; 124 125 dev = __dev_get_by_name("tunl0"); 126 127 if (dev) { 128 int err; 129 struct ifreq ifr; 130 mm_segment_t oldfs; 131 struct ip_tunnel_parm p; 132 struct in_device *in_dev; 133 134 memset(&p, 0, sizeof(p)); 135 p.iph.daddr = v->vifc_rmt_addr.s_addr; 136 p.iph.saddr = v->vifc_lcl_addr.s_addr; 137 p.iph.version = 4; 138 p.iph.ihl = 5; 139 p.iph.protocol = IPPROTO_IPIP; 140 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 141 ifr.ifr_ifru.ifru_data = (void*)&p; 142 143 oldfs = get_fs(); set_fs(KERNEL_DS); 144 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 145 set_fs(oldfs); 146 147 dev = NULL; 148 149 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { 150 dev->flags |= IFF_MULTICAST; 151 152 in_dev = __in_dev_get(dev); 153 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) 154 goto failure; 155 in_dev->cnf.rp_filter = 0; 156 157 if (dev_open(dev)) 158 goto failure; 159 } 160 } 161 return dev; 162 163 failure: 164 /* allow the register to be completed before unregistering. */ 165 rtnl_unlock(); 166 rtnl_lock(); 167 168 unregister_netdevice(dev); 169 return NULL; 170 } 171 172 #ifdef CONFIG_IP_PIMSM 173 174 static int reg_vif_num = -1; 175 176 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 177 { 178 read_lock(&mrt_lock); 179 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; 180 ((struct net_device_stats*)dev->priv)->tx_packets++; 181 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 182 read_unlock(&mrt_lock); 183 kfree_skb(skb); 184 return 0; 185 } 186 187 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) 188 { 189 return (struct net_device_stats*)dev->priv; 190 } 191 192 static void reg_vif_setup(struct net_device *dev) 193 { 194 dev->type = ARPHRD_PIMREG; 195 dev->mtu = 1500 - sizeof(struct iphdr) - 8; 196 dev->flags = IFF_NOARP; 197 dev->hard_start_xmit = reg_vif_xmit; 198 dev->get_stats = reg_vif_get_stats; 199 dev->destructor = free_netdev; 200 } 201 202 static struct net_device *ipmr_reg_vif(void) 203 { 204 struct net_device *dev; 205 struct in_device *in_dev; 206 207 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", 208 reg_vif_setup); 209 210 if (dev == NULL) 211 return NULL; 212 213 if (register_netdevice(dev)) { 214 free_netdev(dev); 215 return NULL; 216 } 217 dev->iflink = 0; 218 219 if ((in_dev = inetdev_init(dev)) == NULL) 220 goto failure; 221 222 in_dev->cnf.rp_filter = 0; 223 224 if (dev_open(dev)) 225 goto failure; 226 227 return dev; 228 229 failure: 230 /* allow the register to be completed before unregistering. */ 231 rtnl_unlock(); 232 rtnl_lock(); 233 234 unregister_netdevice(dev); 235 return NULL; 236 } 237 #endif 238 239 /* 240 * Delete a VIF entry 241 */ 242 243 static int vif_delete(int vifi) 244 { 245 struct vif_device *v; 246 struct net_device *dev; 247 struct in_device *in_dev; 248 249 if (vifi < 0 || vifi >= maxvif) 250 return -EADDRNOTAVAIL; 251 252 v = &vif_table[vifi]; 253 254 write_lock_bh(&mrt_lock); 255 dev = v->dev; 256 v->dev = NULL; 257 258 if (!dev) { 259 write_unlock_bh(&mrt_lock); 260 return -EADDRNOTAVAIL; 261 } 262 263 #ifdef CONFIG_IP_PIMSM 264 if (vifi == reg_vif_num) 265 reg_vif_num = -1; 266 #endif 267 268 if (vifi+1 == maxvif) { 269 int tmp; 270 for (tmp=vifi-1; tmp>=0; tmp--) { 271 if (VIF_EXISTS(tmp)) 272 break; 273 } 274 maxvif = tmp+1; 275 } 276 277 write_unlock_bh(&mrt_lock); 278 279 dev_set_allmulti(dev, -1); 280 281 if ((in_dev = __in_dev_get(dev)) != NULL) { 282 in_dev->cnf.mc_forwarding--; 283 ip_rt_multicast_event(in_dev); 284 } 285 286 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 287 unregister_netdevice(dev); 288 289 dev_put(dev); 290 return 0; 291 } 292 293 /* Destroy an unresolved cache entry, killing queued skbs 294 and reporting error to netlink readers. 295 */ 296 297 static void ipmr_destroy_unres(struct mfc_cache *c) 298 { 299 struct sk_buff *skb; 300 301 atomic_dec(&cache_resolve_queue_len); 302 303 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { 304 if (skb->nh.iph->version == 0) { 305 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 306 nlh->nlmsg_type = NLMSG_ERROR; 307 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 308 skb_trim(skb, nlh->nlmsg_len); 309 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 310 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 311 } else 312 kfree_skb(skb); 313 } 314 315 kmem_cache_free(mrt_cachep, c); 316 } 317 318 319 /* Single timer process for all the unresolved queue. */ 320 321 static void ipmr_expire_process(unsigned long dummy) 322 { 323 unsigned long now; 324 unsigned long expires; 325 struct mfc_cache *c, **cp; 326 327 if (!spin_trylock(&mfc_unres_lock)) { 328 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 329 return; 330 } 331 332 if (atomic_read(&cache_resolve_queue_len) == 0) 333 goto out; 334 335 now = jiffies; 336 expires = 10*HZ; 337 cp = &mfc_unres_queue; 338 339 while ((c=*cp) != NULL) { 340 if (time_after(c->mfc_un.unres.expires, now)) { 341 unsigned long interval = c->mfc_un.unres.expires - now; 342 if (interval < expires) 343 expires = interval; 344 cp = &c->next; 345 continue; 346 } 347 348 *cp = c->next; 349 350 ipmr_destroy_unres(c); 351 } 352 353 if (atomic_read(&cache_resolve_queue_len)) 354 mod_timer(&ipmr_expire_timer, jiffies + expires); 355 356 out: 357 spin_unlock(&mfc_unres_lock); 358 } 359 360 /* Fill oifs list. It is called under write locked mrt_lock. */ 361 362 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls) 363 { 364 int vifi; 365 366 cache->mfc_un.res.minvif = MAXVIFS; 367 cache->mfc_un.res.maxvif = 0; 368 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 369 370 for (vifi=0; vifi<maxvif; vifi++) { 371 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { 372 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 373 if (cache->mfc_un.res.minvif > vifi) 374 cache->mfc_un.res.minvif = vifi; 375 if (cache->mfc_un.res.maxvif <= vifi) 376 cache->mfc_un.res.maxvif = vifi + 1; 377 } 378 } 379 } 380 381 static int vif_add(struct vifctl *vifc, int mrtsock) 382 { 383 int vifi = vifc->vifc_vifi; 384 struct vif_device *v = &vif_table[vifi]; 385 struct net_device *dev; 386 struct in_device *in_dev; 387 388 /* Is vif busy ? */ 389 if (VIF_EXISTS(vifi)) 390 return -EADDRINUSE; 391 392 switch (vifc->vifc_flags) { 393 #ifdef CONFIG_IP_PIMSM 394 case VIFF_REGISTER: 395 /* 396 * Special Purpose VIF in PIM 397 * All the packets will be sent to the daemon 398 */ 399 if (reg_vif_num >= 0) 400 return -EADDRINUSE; 401 dev = ipmr_reg_vif(); 402 if (!dev) 403 return -ENOBUFS; 404 break; 405 #endif 406 case VIFF_TUNNEL: 407 dev = ipmr_new_tunnel(vifc); 408 if (!dev) 409 return -ENOBUFS; 410 break; 411 case 0: 412 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr); 413 if (!dev) 414 return -EADDRNOTAVAIL; 415 __dev_put(dev); 416 break; 417 default: 418 return -EINVAL; 419 } 420 421 if ((in_dev = __in_dev_get(dev)) == NULL) 422 return -EADDRNOTAVAIL; 423 in_dev->cnf.mc_forwarding++; 424 dev_set_allmulti(dev, +1); 425 ip_rt_multicast_event(in_dev); 426 427 /* 428 * Fill in the VIF structures 429 */ 430 v->rate_limit=vifc->vifc_rate_limit; 431 v->local=vifc->vifc_lcl_addr.s_addr; 432 v->remote=vifc->vifc_rmt_addr.s_addr; 433 v->flags=vifc->vifc_flags; 434 if (!mrtsock) 435 v->flags |= VIFF_STATIC; 436 v->threshold=vifc->vifc_threshold; 437 v->bytes_in = 0; 438 v->bytes_out = 0; 439 v->pkt_in = 0; 440 v->pkt_out = 0; 441 v->link = dev->ifindex; 442 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 443 v->link = dev->iflink; 444 445 /* And finish update writing critical data */ 446 write_lock_bh(&mrt_lock); 447 dev_hold(dev); 448 v->dev=dev; 449 #ifdef CONFIG_IP_PIMSM 450 if (v->flags&VIFF_REGISTER) 451 reg_vif_num = vifi; 452 #endif 453 if (vifi+1 > maxvif) 454 maxvif = vifi+1; 455 write_unlock_bh(&mrt_lock); 456 return 0; 457 } 458 459 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) 460 { 461 int line=MFC_HASH(mcastgrp,origin); 462 struct mfc_cache *c; 463 464 for (c=mfc_cache_array[line]; c; c = c->next) { 465 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 466 break; 467 } 468 return c; 469 } 470 471 /* 472 * Allocate a multicast cache entry 473 */ 474 static struct mfc_cache *ipmr_cache_alloc(void) 475 { 476 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL); 477 if(c==NULL) 478 return NULL; 479 memset(c, 0, sizeof(*c)); 480 c->mfc_un.res.minvif = MAXVIFS; 481 return c; 482 } 483 484 static struct mfc_cache *ipmr_cache_alloc_unres(void) 485 { 486 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); 487 if(c==NULL) 488 return NULL; 489 memset(c, 0, sizeof(*c)); 490 skb_queue_head_init(&c->mfc_un.unres.unresolved); 491 c->mfc_un.unres.expires = jiffies + 10*HZ; 492 return c; 493 } 494 495 /* 496 * A cache entry has gone into a resolved state from queued 497 */ 498 499 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 500 { 501 struct sk_buff *skb; 502 503 /* 504 * Play the pending entries through our router 505 */ 506 507 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { 508 if (skb->nh.iph->version == 0) { 509 int err; 510 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 511 512 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 513 nlh->nlmsg_len = skb->tail - (u8*)nlh; 514 } else { 515 nlh->nlmsg_type = NLMSG_ERROR; 516 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 517 skb_trim(skb, nlh->nlmsg_len); 518 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE; 519 } 520 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 521 } else 522 ip_mr_forward(skb, c, 0); 523 } 524 } 525 526 /* 527 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 528 * expects the following bizarre scheme. 529 * 530 * Called under mrt_lock. 531 */ 532 533 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) 534 { 535 struct sk_buff *skb; 536 int ihl = pkt->nh.iph->ihl<<2; 537 struct igmphdr *igmp; 538 struct igmpmsg *msg; 539 int ret; 540 541 #ifdef CONFIG_IP_PIMSM 542 if (assert == IGMPMSG_WHOLEPKT) 543 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 544 else 545 #endif 546 skb = alloc_skb(128, GFP_ATOMIC); 547 548 if(!skb) 549 return -ENOBUFS; 550 551 #ifdef CONFIG_IP_PIMSM 552 if (assert == IGMPMSG_WHOLEPKT) { 553 /* Ugly, but we have no choice with this interface. 554 Duplicate old header, fix ihl, length etc. 555 And all this only to mangle msg->im_msgtype and 556 to set msg->im_mbz to "mbz" :-) 557 */ 558 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); 559 skb->nh.raw = skb->h.raw = (u8*)msg; 560 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); 561 msg->im_msgtype = IGMPMSG_WHOLEPKT; 562 msg->im_mbz = 0; 563 msg->im_vif = reg_vif_num; 564 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; 565 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); 566 } else 567 #endif 568 { 569 570 /* 571 * Copy the IP header 572 */ 573 574 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); 575 memcpy(skb->data,pkt->data,ihl); 576 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ 577 msg = (struct igmpmsg*)skb->nh.iph; 578 msg->im_vif = vifi; 579 skb->dst = dst_clone(pkt->dst); 580 581 /* 582 * Add our header 583 */ 584 585 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); 586 igmp->type = 587 msg->im_msgtype = assert; 588 igmp->code = 0; 589 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ 590 skb->h.raw = skb->nh.raw; 591 } 592 593 if (mroute_socket == NULL) { 594 kfree_skb(skb); 595 return -EINVAL; 596 } 597 598 /* 599 * Deliver to mrouted 600 */ 601 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { 602 if (net_ratelimit()) 603 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 604 kfree_skb(skb); 605 } 606 607 return ret; 608 } 609 610 /* 611 * Queue a packet for resolution. It gets locked cache entry! 612 */ 613 614 static int 615 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) 616 { 617 int err; 618 struct mfc_cache *c; 619 620 spin_lock_bh(&mfc_unres_lock); 621 for (c=mfc_unres_queue; c; c=c->next) { 622 if (c->mfc_mcastgrp == skb->nh.iph->daddr && 623 c->mfc_origin == skb->nh.iph->saddr) 624 break; 625 } 626 627 if (c == NULL) { 628 /* 629 * Create a new entry if allowable 630 */ 631 632 if (atomic_read(&cache_resolve_queue_len)>=10 || 633 (c=ipmr_cache_alloc_unres())==NULL) { 634 spin_unlock_bh(&mfc_unres_lock); 635 636 kfree_skb(skb); 637 return -ENOBUFS; 638 } 639 640 /* 641 * Fill in the new cache entry 642 */ 643 c->mfc_parent=-1; 644 c->mfc_origin=skb->nh.iph->saddr; 645 c->mfc_mcastgrp=skb->nh.iph->daddr; 646 647 /* 648 * Reflect first query at mrouted. 649 */ 650 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { 651 /* If the report failed throw the cache entry 652 out - Brad Parker 653 */ 654 spin_unlock_bh(&mfc_unres_lock); 655 656 kmem_cache_free(mrt_cachep, c); 657 kfree_skb(skb); 658 return err; 659 } 660 661 atomic_inc(&cache_resolve_queue_len); 662 c->next = mfc_unres_queue; 663 mfc_unres_queue = c; 664 665 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 666 } 667 668 /* 669 * See if we can append the packet 670 */ 671 if (c->mfc_un.unres.unresolved.qlen>3) { 672 kfree_skb(skb); 673 err = -ENOBUFS; 674 } else { 675 skb_queue_tail(&c->mfc_un.unres.unresolved,skb); 676 err = 0; 677 } 678 679 spin_unlock_bh(&mfc_unres_lock); 680 return err; 681 } 682 683 /* 684 * MFC cache manipulation by user space mroute daemon 685 */ 686 687 static int ipmr_mfc_delete(struct mfcctl *mfc) 688 { 689 int line; 690 struct mfc_cache *c, **cp; 691 692 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 693 694 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 695 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 696 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 697 write_lock_bh(&mrt_lock); 698 *cp = c->next; 699 write_unlock_bh(&mrt_lock); 700 701 kmem_cache_free(mrt_cachep, c); 702 return 0; 703 } 704 } 705 return -ENOENT; 706 } 707 708 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) 709 { 710 int line; 711 struct mfc_cache *uc, *c, **cp; 712 713 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 714 715 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 716 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 717 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 718 break; 719 } 720 721 if (c != NULL) { 722 write_lock_bh(&mrt_lock); 723 c->mfc_parent = mfc->mfcc_parent; 724 ipmr_update_threshoulds(c, mfc->mfcc_ttls); 725 if (!mrtsock) 726 c->mfc_flags |= MFC_STATIC; 727 write_unlock_bh(&mrt_lock); 728 return 0; 729 } 730 731 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) 732 return -EINVAL; 733 734 c=ipmr_cache_alloc(); 735 if (c==NULL) 736 return -ENOMEM; 737 738 c->mfc_origin=mfc->mfcc_origin.s_addr; 739 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; 740 c->mfc_parent=mfc->mfcc_parent; 741 ipmr_update_threshoulds(c, mfc->mfcc_ttls); 742 if (!mrtsock) 743 c->mfc_flags |= MFC_STATIC; 744 745 write_lock_bh(&mrt_lock); 746 c->next = mfc_cache_array[line]; 747 mfc_cache_array[line] = c; 748 write_unlock_bh(&mrt_lock); 749 750 /* 751 * Check to see if we resolved a queued list. If so we 752 * need to send on the frames and tidy up. 753 */ 754 spin_lock_bh(&mfc_unres_lock); 755 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 756 cp = &uc->next) { 757 if (uc->mfc_origin == c->mfc_origin && 758 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 759 *cp = uc->next; 760 if (atomic_dec_and_test(&cache_resolve_queue_len)) 761 del_timer(&ipmr_expire_timer); 762 break; 763 } 764 } 765 spin_unlock_bh(&mfc_unres_lock); 766 767 if (uc) { 768 ipmr_cache_resolve(uc, c); 769 kmem_cache_free(mrt_cachep, uc); 770 } 771 return 0; 772 } 773 774 /* 775 * Close the multicast socket, and clear the vif tables etc 776 */ 777 778 static void mroute_clean_tables(struct sock *sk) 779 { 780 int i; 781 782 /* 783 * Shut down all active vif entries 784 */ 785 for(i=0; i<maxvif; i++) { 786 if (!(vif_table[i].flags&VIFF_STATIC)) 787 vif_delete(i); 788 } 789 790 /* 791 * Wipe the cache 792 */ 793 for (i=0;i<MFC_LINES;i++) { 794 struct mfc_cache *c, **cp; 795 796 cp = &mfc_cache_array[i]; 797 while ((c = *cp) != NULL) { 798 if (c->mfc_flags&MFC_STATIC) { 799 cp = &c->next; 800 continue; 801 } 802 write_lock_bh(&mrt_lock); 803 *cp = c->next; 804 write_unlock_bh(&mrt_lock); 805 806 kmem_cache_free(mrt_cachep, c); 807 } 808 } 809 810 if (atomic_read(&cache_resolve_queue_len) != 0) { 811 struct mfc_cache *c; 812 813 spin_lock_bh(&mfc_unres_lock); 814 while (mfc_unres_queue != NULL) { 815 c = mfc_unres_queue; 816 mfc_unres_queue = c->next; 817 spin_unlock_bh(&mfc_unres_lock); 818 819 ipmr_destroy_unres(c); 820 821 spin_lock_bh(&mfc_unres_lock); 822 } 823 spin_unlock_bh(&mfc_unres_lock); 824 } 825 } 826 827 static void mrtsock_destruct(struct sock *sk) 828 { 829 rtnl_lock(); 830 if (sk == mroute_socket) { 831 ipv4_devconf.mc_forwarding--; 832 833 write_lock_bh(&mrt_lock); 834 mroute_socket=NULL; 835 write_unlock_bh(&mrt_lock); 836 837 mroute_clean_tables(sk); 838 } 839 rtnl_unlock(); 840 } 841 842 /* 843 * Socket options and virtual interface manipulation. The whole 844 * virtual interface system is a complete heap, but unfortunately 845 * that's how BSD mrouted happens to think. Maybe one day with a proper 846 * MOSPF/PIM router set up we can clean this up. 847 */ 848 849 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) 850 { 851 int ret; 852 struct vifctl vif; 853 struct mfcctl mfc; 854 855 if(optname!=MRT_INIT) 856 { 857 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN)) 858 return -EACCES; 859 } 860 861 switch(optname) 862 { 863 case MRT_INIT: 864 if (sk->sk_type != SOCK_RAW || 865 inet_sk(sk)->num != IPPROTO_IGMP) 866 return -EOPNOTSUPP; 867 if(optlen!=sizeof(int)) 868 return -ENOPROTOOPT; 869 870 rtnl_lock(); 871 if (mroute_socket) { 872 rtnl_unlock(); 873 return -EADDRINUSE; 874 } 875 876 ret = ip_ra_control(sk, 1, mrtsock_destruct); 877 if (ret == 0) { 878 write_lock_bh(&mrt_lock); 879 mroute_socket=sk; 880 write_unlock_bh(&mrt_lock); 881 882 ipv4_devconf.mc_forwarding++; 883 } 884 rtnl_unlock(); 885 return ret; 886 case MRT_DONE: 887 if (sk!=mroute_socket) 888 return -EACCES; 889 return ip_ra_control(sk, 0, NULL); 890 case MRT_ADD_VIF: 891 case MRT_DEL_VIF: 892 if(optlen!=sizeof(vif)) 893 return -EINVAL; 894 if (copy_from_user(&vif,optval,sizeof(vif))) 895 return -EFAULT; 896 if(vif.vifc_vifi >= MAXVIFS) 897 return -ENFILE; 898 rtnl_lock(); 899 if (optname==MRT_ADD_VIF) { 900 ret = vif_add(&vif, sk==mroute_socket); 901 } else { 902 ret = vif_delete(vif.vifc_vifi); 903 } 904 rtnl_unlock(); 905 return ret; 906 907 /* 908 * Manipulate the forwarding caches. These live 909 * in a sort of kernel/user symbiosis. 910 */ 911 case MRT_ADD_MFC: 912 case MRT_DEL_MFC: 913 if(optlen!=sizeof(mfc)) 914 return -EINVAL; 915 if (copy_from_user(&mfc,optval, sizeof(mfc))) 916 return -EFAULT; 917 rtnl_lock(); 918 if (optname==MRT_DEL_MFC) 919 ret = ipmr_mfc_delete(&mfc); 920 else 921 ret = ipmr_mfc_add(&mfc, sk==mroute_socket); 922 rtnl_unlock(); 923 return ret; 924 /* 925 * Control PIM assert. 926 */ 927 case MRT_ASSERT: 928 { 929 int v; 930 if(get_user(v,(int __user *)optval)) 931 return -EFAULT; 932 mroute_do_assert=(v)?1:0; 933 return 0; 934 } 935 #ifdef CONFIG_IP_PIMSM 936 case MRT_PIM: 937 { 938 int v, ret; 939 if(get_user(v,(int __user *)optval)) 940 return -EFAULT; 941 v = (v)?1:0; 942 rtnl_lock(); 943 ret = 0; 944 if (v != mroute_do_pim) { 945 mroute_do_pim = v; 946 mroute_do_assert = v; 947 #ifdef CONFIG_IP_PIMSM_V2 948 if (mroute_do_pim) 949 ret = inet_add_protocol(&pim_protocol, 950 IPPROTO_PIM); 951 else 952 ret = inet_del_protocol(&pim_protocol, 953 IPPROTO_PIM); 954 if (ret < 0) 955 ret = -EAGAIN; 956 #endif 957 } 958 rtnl_unlock(); 959 return ret; 960 } 961 #endif 962 /* 963 * Spurious command, or MRT_VERSION which you cannot 964 * set. 965 */ 966 default: 967 return -ENOPROTOOPT; 968 } 969 } 970 971 /* 972 * Getsock opt support for the multicast routing system. 973 */ 974 975 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) 976 { 977 int olr; 978 int val; 979 980 if(optname!=MRT_VERSION && 981 #ifdef CONFIG_IP_PIMSM 982 optname!=MRT_PIM && 983 #endif 984 optname!=MRT_ASSERT) 985 return -ENOPROTOOPT; 986 987 if (get_user(olr, optlen)) 988 return -EFAULT; 989 990 olr = min_t(unsigned int, olr, sizeof(int)); 991 if (olr < 0) 992 return -EINVAL; 993 994 if(put_user(olr,optlen)) 995 return -EFAULT; 996 if(optname==MRT_VERSION) 997 val=0x0305; 998 #ifdef CONFIG_IP_PIMSM 999 else if(optname==MRT_PIM) 1000 val=mroute_do_pim; 1001 #endif 1002 else 1003 val=mroute_do_assert; 1004 if(copy_to_user(optval,&val,olr)) 1005 return -EFAULT; 1006 return 0; 1007 } 1008 1009 /* 1010 * The IP multicast ioctl support routines. 1011 */ 1012 1013 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1014 { 1015 struct sioc_sg_req sr; 1016 struct sioc_vif_req vr; 1017 struct vif_device *vif; 1018 struct mfc_cache *c; 1019 1020 switch(cmd) 1021 { 1022 case SIOCGETVIFCNT: 1023 if (copy_from_user(&vr,arg,sizeof(vr))) 1024 return -EFAULT; 1025 if(vr.vifi>=maxvif) 1026 return -EINVAL; 1027 read_lock(&mrt_lock); 1028 vif=&vif_table[vr.vifi]; 1029 if(VIF_EXISTS(vr.vifi)) { 1030 vr.icount=vif->pkt_in; 1031 vr.ocount=vif->pkt_out; 1032 vr.ibytes=vif->bytes_in; 1033 vr.obytes=vif->bytes_out; 1034 read_unlock(&mrt_lock); 1035 1036 if (copy_to_user(arg,&vr,sizeof(vr))) 1037 return -EFAULT; 1038 return 0; 1039 } 1040 read_unlock(&mrt_lock); 1041 return -EADDRNOTAVAIL; 1042 case SIOCGETSGCNT: 1043 if (copy_from_user(&sr,arg,sizeof(sr))) 1044 return -EFAULT; 1045 1046 read_lock(&mrt_lock); 1047 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); 1048 if (c) { 1049 sr.pktcnt = c->mfc_un.res.pkt; 1050 sr.bytecnt = c->mfc_un.res.bytes; 1051 sr.wrong_if = c->mfc_un.res.wrong_if; 1052 read_unlock(&mrt_lock); 1053 1054 if (copy_to_user(arg,&sr,sizeof(sr))) 1055 return -EFAULT; 1056 return 0; 1057 } 1058 read_unlock(&mrt_lock); 1059 return -EADDRNOTAVAIL; 1060 default: 1061 return -ENOIOCTLCMD; 1062 } 1063 } 1064 1065 1066 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1067 { 1068 struct vif_device *v; 1069 int ct; 1070 if (event != NETDEV_UNREGISTER) 1071 return NOTIFY_DONE; 1072 v=&vif_table[0]; 1073 for(ct=0;ct<maxvif;ct++,v++) { 1074 if (v->dev==ptr) 1075 vif_delete(ct); 1076 } 1077 return NOTIFY_DONE; 1078 } 1079 1080 1081 static struct notifier_block ip_mr_notifier={ 1082 .notifier_call = ipmr_device_event, 1083 }; 1084 1085 /* 1086 * Encapsulate a packet by attaching a valid IPIP header to it. 1087 * This avoids tunnel drivers and other mess and gives us the speed so 1088 * important for multicast video. 1089 */ 1090 1091 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) 1092 { 1093 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); 1094 1095 iph->version = 4; 1096 iph->tos = skb->nh.iph->tos; 1097 iph->ttl = skb->nh.iph->ttl; 1098 iph->frag_off = 0; 1099 iph->daddr = daddr; 1100 iph->saddr = saddr; 1101 iph->protocol = IPPROTO_IPIP; 1102 iph->ihl = 5; 1103 iph->tot_len = htons(skb->len); 1104 ip_select_ident(iph, skb->dst, NULL); 1105 ip_send_check(iph); 1106 1107 skb->h.ipiph = skb->nh.iph; 1108 skb->nh.iph = iph; 1109 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1110 nf_reset(skb); 1111 } 1112 1113 static inline int ipmr_forward_finish(struct sk_buff *skb) 1114 { 1115 struct ip_options * opt = &(IPCB(skb)->opt); 1116 1117 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); 1118 1119 if (unlikely(opt->optlen)) 1120 ip_forward_options(skb); 1121 1122 return dst_output(skb); 1123 } 1124 1125 /* 1126 * Processing handlers for ipmr_forward 1127 */ 1128 1129 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1130 { 1131 struct iphdr *iph = skb->nh.iph; 1132 struct vif_device *vif = &vif_table[vifi]; 1133 struct net_device *dev; 1134 struct rtable *rt; 1135 int encap = 0; 1136 1137 if (vif->dev == NULL) 1138 goto out_free; 1139 1140 #ifdef CONFIG_IP_PIMSM 1141 if (vif->flags & VIFF_REGISTER) { 1142 vif->pkt_out++; 1143 vif->bytes_out+=skb->len; 1144 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; 1145 ((struct net_device_stats*)vif->dev->priv)->tx_packets++; 1146 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1147 kfree_skb(skb); 1148 return; 1149 } 1150 #endif 1151 1152 if (vif->flags&VIFF_TUNNEL) { 1153 struct flowi fl = { .oif = vif->link, 1154 .nl_u = { .ip4_u = 1155 { .daddr = vif->remote, 1156 .saddr = vif->local, 1157 .tos = RT_TOS(iph->tos) } }, 1158 .proto = IPPROTO_IPIP }; 1159 if (ip_route_output_key(&rt, &fl)) 1160 goto out_free; 1161 encap = sizeof(struct iphdr); 1162 } else { 1163 struct flowi fl = { .oif = vif->link, 1164 .nl_u = { .ip4_u = 1165 { .daddr = iph->daddr, 1166 .tos = RT_TOS(iph->tos) } }, 1167 .proto = IPPROTO_IPIP }; 1168 if (ip_route_output_key(&rt, &fl)) 1169 goto out_free; 1170 } 1171 1172 dev = rt->u.dst.dev; 1173 1174 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1175 /* Do not fragment multicasts. Alas, IPv4 does not 1176 allow to send ICMP, so that packets will disappear 1177 to blackhole. 1178 */ 1179 1180 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); 1181 ip_rt_put(rt); 1182 goto out_free; 1183 } 1184 1185 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1186 1187 if (skb_cow(skb, encap)) { 1188 ip_rt_put(rt); 1189 goto out_free; 1190 } 1191 1192 vif->pkt_out++; 1193 vif->bytes_out+=skb->len; 1194 1195 dst_release(skb->dst); 1196 skb->dst = &rt->u.dst; 1197 iph = skb->nh.iph; 1198 ip_decrease_ttl(iph); 1199 1200 /* FIXME: forward and output firewalls used to be called here. 1201 * What do we do with netfilter? -- RR */ 1202 if (vif->flags & VIFF_TUNNEL) { 1203 ip_encap(skb, vif->local, vif->remote); 1204 /* FIXME: extra output firewall step used to be here. --RR */ 1205 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; 1206 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len; 1207 } 1208 1209 IPCB(skb)->flags |= IPSKB_FORWARDED; 1210 1211 /* 1212 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1213 * not only before forwarding, but after forwarding on all output 1214 * interfaces. It is clear, if mrouter runs a multicasting 1215 * program, it should receive packets not depending to what interface 1216 * program is joined. 1217 * If we will not make it, the program will have to join on all 1218 * interfaces. On the other hand, multihoming host (or router, but 1219 * not mrouter) cannot join to more than one interface - it will 1220 * result in receiving multiple packets. 1221 */ 1222 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 1223 ipmr_forward_finish); 1224 return; 1225 1226 out_free: 1227 kfree_skb(skb); 1228 return; 1229 } 1230 1231 static int ipmr_find_vif(struct net_device *dev) 1232 { 1233 int ct; 1234 for (ct=maxvif-1; ct>=0; ct--) { 1235 if (vif_table[ct].dev == dev) 1236 break; 1237 } 1238 return ct; 1239 } 1240 1241 /* "local" means that we should preserve one skb (for local delivery) */ 1242 1243 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 1244 { 1245 int psend = -1; 1246 int vif, ct; 1247 1248 vif = cache->mfc_parent; 1249 cache->mfc_un.res.pkt++; 1250 cache->mfc_un.res.bytes += skb->len; 1251 1252 /* 1253 * Wrong interface: drop packet and (maybe) send PIM assert. 1254 */ 1255 if (vif_table[vif].dev != skb->dev) { 1256 int true_vifi; 1257 1258 if (((struct rtable*)skb->dst)->fl.iif == 0) { 1259 /* It is our own packet, looped back. 1260 Very complicated situation... 1261 1262 The best workaround until routing daemons will be 1263 fixed is not to redistribute packet, if it was 1264 send through wrong interface. It means, that 1265 multicast applications WILL NOT work for 1266 (S,G), which have default multicast route pointing 1267 to wrong oif. In any case, it is not a good 1268 idea to use multicasting applications on router. 1269 */ 1270 goto dont_forward; 1271 } 1272 1273 cache->mfc_un.res.wrong_if++; 1274 true_vifi = ipmr_find_vif(skb->dev); 1275 1276 if (true_vifi >= 0 && mroute_do_assert && 1277 /* pimsm uses asserts, when switching from RPT to SPT, 1278 so that we cannot check that packet arrived on an oif. 1279 It is bad, but otherwise we would need to move pretty 1280 large chunk of pimd to kernel. Ough... --ANK 1281 */ 1282 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && 1283 time_after(jiffies, 1284 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1285 cache->mfc_un.res.last_assert = jiffies; 1286 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); 1287 } 1288 goto dont_forward; 1289 } 1290 1291 vif_table[vif].pkt_in++; 1292 vif_table[vif].bytes_in+=skb->len; 1293 1294 /* 1295 * Forward the frame 1296 */ 1297 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1298 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) { 1299 if (psend != -1) { 1300 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1301 if (skb2) 1302 ipmr_queue_xmit(skb2, cache, psend); 1303 } 1304 psend=ct; 1305 } 1306 } 1307 if (psend != -1) { 1308 if (local) { 1309 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1310 if (skb2) 1311 ipmr_queue_xmit(skb2, cache, psend); 1312 } else { 1313 ipmr_queue_xmit(skb, cache, psend); 1314 return 0; 1315 } 1316 } 1317 1318 dont_forward: 1319 if (!local) 1320 kfree_skb(skb); 1321 return 0; 1322 } 1323 1324 1325 /* 1326 * Multicast packets for forwarding arrive here 1327 */ 1328 1329 int ip_mr_input(struct sk_buff *skb) 1330 { 1331 struct mfc_cache *cache; 1332 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; 1333 1334 /* Packet is looped back after forward, it should not be 1335 forwarded second time, but still can be delivered locally. 1336 */ 1337 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1338 goto dont_forward; 1339 1340 if (!local) { 1341 if (IPCB(skb)->opt.router_alert) { 1342 if (ip_call_ra_chain(skb)) 1343 return 0; 1344 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){ 1345 /* IGMPv1 (and broken IGMPv2 implementations sort of 1346 Cisco IOS <= 11.2(8)) do not put router alert 1347 option to IGMP packets destined to routable 1348 groups. It is very bad, because it means 1349 that we can forward NO IGMP messages. 1350 */ 1351 read_lock(&mrt_lock); 1352 if (mroute_socket) { 1353 raw_rcv(mroute_socket, skb); 1354 read_unlock(&mrt_lock); 1355 return 0; 1356 } 1357 read_unlock(&mrt_lock); 1358 } 1359 } 1360 1361 read_lock(&mrt_lock); 1362 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); 1363 1364 /* 1365 * No usable cache entry 1366 */ 1367 if (cache==NULL) { 1368 int vif; 1369 1370 if (local) { 1371 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1372 ip_local_deliver(skb); 1373 if (skb2 == NULL) { 1374 read_unlock(&mrt_lock); 1375 return -ENOBUFS; 1376 } 1377 skb = skb2; 1378 } 1379 1380 vif = ipmr_find_vif(skb->dev); 1381 if (vif >= 0) { 1382 int err = ipmr_cache_unresolved(vif, skb); 1383 read_unlock(&mrt_lock); 1384 1385 return err; 1386 } 1387 read_unlock(&mrt_lock); 1388 kfree_skb(skb); 1389 return -ENODEV; 1390 } 1391 1392 ip_mr_forward(skb, cache, local); 1393 1394 read_unlock(&mrt_lock); 1395 1396 if (local) 1397 return ip_local_deliver(skb); 1398 1399 return 0; 1400 1401 dont_forward: 1402 if (local) 1403 return ip_local_deliver(skb); 1404 kfree_skb(skb); 1405 return 0; 1406 } 1407 1408 #ifdef CONFIG_IP_PIMSM_V1 1409 /* 1410 * Handle IGMP messages of PIMv1 1411 */ 1412 1413 int pim_rcv_v1(struct sk_buff * skb) 1414 { 1415 struct igmphdr *pim; 1416 struct iphdr *encap; 1417 struct net_device *reg_dev = NULL; 1418 1419 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1420 goto drop; 1421 1422 pim = (struct igmphdr*)skb->h.raw; 1423 1424 if (!mroute_do_pim || 1425 skb->len < sizeof(*pim) + sizeof(*encap) || 1426 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1427 goto drop; 1428 1429 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); 1430 /* 1431 Check that: 1432 a. packet is really destinted to a multicast group 1433 b. packet is not a NULL-REGISTER 1434 c. packet is not truncated 1435 */ 1436 if (!MULTICAST(encap->daddr) || 1437 encap->tot_len == 0 || 1438 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1439 goto drop; 1440 1441 read_lock(&mrt_lock); 1442 if (reg_vif_num >= 0) 1443 reg_dev = vif_table[reg_vif_num].dev; 1444 if (reg_dev) 1445 dev_hold(reg_dev); 1446 read_unlock(&mrt_lock); 1447 1448 if (reg_dev == NULL) 1449 goto drop; 1450 1451 skb->mac.raw = skb->nh.raw; 1452 skb_pull(skb, (u8*)encap - skb->data); 1453 skb->nh.iph = (struct iphdr *)skb->data; 1454 skb->dev = reg_dev; 1455 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 1456 skb->protocol = htons(ETH_P_IP); 1457 skb->ip_summed = 0; 1458 skb->pkt_type = PACKET_HOST; 1459 dst_release(skb->dst); 1460 skb->dst = NULL; 1461 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 1462 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 1463 nf_reset(skb); 1464 netif_rx(skb); 1465 dev_put(reg_dev); 1466 return 0; 1467 drop: 1468 kfree_skb(skb); 1469 return 0; 1470 } 1471 #endif 1472 1473 #ifdef CONFIG_IP_PIMSM_V2 1474 static int pim_rcv(struct sk_buff * skb) 1475 { 1476 struct pimreghdr *pim; 1477 struct iphdr *encap; 1478 struct net_device *reg_dev = NULL; 1479 1480 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1481 goto drop; 1482 1483 pim = (struct pimreghdr*)skb->h.raw; 1484 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1485 (pim->flags&PIM_NULL_REGISTER) || 1486 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1487 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1488 goto drop; 1489 1490 /* check if the inner packet is destined to mcast group */ 1491 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); 1492 if (!MULTICAST(encap->daddr) || 1493 encap->tot_len == 0 || 1494 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1495 goto drop; 1496 1497 read_lock(&mrt_lock); 1498 if (reg_vif_num >= 0) 1499 reg_dev = vif_table[reg_vif_num].dev; 1500 if (reg_dev) 1501 dev_hold(reg_dev); 1502 read_unlock(&mrt_lock); 1503 1504 if (reg_dev == NULL) 1505 goto drop; 1506 1507 skb->mac.raw = skb->nh.raw; 1508 skb_pull(skb, (u8*)encap - skb->data); 1509 skb->nh.iph = (struct iphdr *)skb->data; 1510 skb->dev = reg_dev; 1511 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 1512 skb->protocol = htons(ETH_P_IP); 1513 skb->ip_summed = 0; 1514 skb->pkt_type = PACKET_HOST; 1515 dst_release(skb->dst); 1516 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 1517 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 1518 skb->dst = NULL; 1519 nf_reset(skb); 1520 netif_rx(skb); 1521 dev_put(reg_dev); 1522 return 0; 1523 drop: 1524 kfree_skb(skb); 1525 return 0; 1526 } 1527 #endif 1528 1529 static int 1530 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 1531 { 1532 int ct; 1533 struct rtnexthop *nhp; 1534 struct net_device *dev = vif_table[c->mfc_parent].dev; 1535 u8 *b = skb->tail; 1536 struct rtattr *mp_head; 1537 1538 if (dev) 1539 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); 1540 1541 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); 1542 1543 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1544 if (c->mfc_un.res.ttls[ct] < 255) { 1545 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1546 goto rtattr_failure; 1547 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1548 nhp->rtnh_flags = 0; 1549 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1550 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; 1551 nhp->rtnh_len = sizeof(*nhp); 1552 } 1553 } 1554 mp_head->rta_type = RTA_MULTIPATH; 1555 mp_head->rta_len = skb->tail - (u8*)mp_head; 1556 rtm->rtm_type = RTN_MULTICAST; 1557 return 1; 1558 1559 rtattr_failure: 1560 skb_trim(skb, b - skb->data); 1561 return -EMSGSIZE; 1562 } 1563 1564 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1565 { 1566 int err; 1567 struct mfc_cache *cache; 1568 struct rtable *rt = (struct rtable*)skb->dst; 1569 1570 read_lock(&mrt_lock); 1571 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1572 1573 if (cache==NULL) { 1574 struct net_device *dev; 1575 int vif; 1576 1577 if (nowait) { 1578 read_unlock(&mrt_lock); 1579 return -EAGAIN; 1580 } 1581 1582 dev = skb->dev; 1583 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 1584 read_unlock(&mrt_lock); 1585 return -ENODEV; 1586 } 1587 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 1588 skb->nh.iph->ihl = sizeof(struct iphdr)>>2; 1589 skb->nh.iph->saddr = rt->rt_src; 1590 skb->nh.iph->daddr = rt->rt_dst; 1591 skb->nh.iph->version = 0; 1592 err = ipmr_cache_unresolved(vif, skb); 1593 read_unlock(&mrt_lock); 1594 return err; 1595 } 1596 1597 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1598 cache->mfc_flags |= MFC_NOTIFY; 1599 err = ipmr_fill_mroute(skb, cache, rtm); 1600 read_unlock(&mrt_lock); 1601 return err; 1602 } 1603 1604 #ifdef CONFIG_PROC_FS 1605 /* 1606 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 1607 */ 1608 struct ipmr_vif_iter { 1609 int ct; 1610 }; 1611 1612 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, 1613 loff_t pos) 1614 { 1615 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { 1616 if(!VIF_EXISTS(iter->ct)) 1617 continue; 1618 if (pos-- == 0) 1619 return &vif_table[iter->ct]; 1620 } 1621 return NULL; 1622 } 1623 1624 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1625 { 1626 read_lock(&mrt_lock); 1627 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 1628 : SEQ_START_TOKEN; 1629 } 1630 1631 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1632 { 1633 struct ipmr_vif_iter *iter = seq->private; 1634 1635 ++*pos; 1636 if (v == SEQ_START_TOKEN) 1637 return ipmr_vif_seq_idx(iter, 0); 1638 1639 while (++iter->ct < maxvif) { 1640 if(!VIF_EXISTS(iter->ct)) 1641 continue; 1642 return &vif_table[iter->ct]; 1643 } 1644 return NULL; 1645 } 1646 1647 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 1648 { 1649 read_unlock(&mrt_lock); 1650 } 1651 1652 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 1653 { 1654 if (v == SEQ_START_TOKEN) { 1655 seq_puts(seq, 1656 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 1657 } else { 1658 const struct vif_device *vif = v; 1659 const char *name = vif->dev ? vif->dev->name : "none"; 1660 1661 seq_printf(seq, 1662 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 1663 vif - vif_table, 1664 name, vif->bytes_in, vif->pkt_in, 1665 vif->bytes_out, vif->pkt_out, 1666 vif->flags, vif->local, vif->remote); 1667 } 1668 return 0; 1669 } 1670 1671 static struct seq_operations ipmr_vif_seq_ops = { 1672 .start = ipmr_vif_seq_start, 1673 .next = ipmr_vif_seq_next, 1674 .stop = ipmr_vif_seq_stop, 1675 .show = ipmr_vif_seq_show, 1676 }; 1677 1678 static int ipmr_vif_open(struct inode *inode, struct file *file) 1679 { 1680 struct seq_file *seq; 1681 int rc = -ENOMEM; 1682 struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); 1683 1684 if (!s) 1685 goto out; 1686 1687 rc = seq_open(file, &ipmr_vif_seq_ops); 1688 if (rc) 1689 goto out_kfree; 1690 1691 s->ct = 0; 1692 seq = file->private_data; 1693 seq->private = s; 1694 out: 1695 return rc; 1696 out_kfree: 1697 kfree(s); 1698 goto out; 1699 1700 } 1701 1702 static struct file_operations ipmr_vif_fops = { 1703 .owner = THIS_MODULE, 1704 .open = ipmr_vif_open, 1705 .read = seq_read, 1706 .llseek = seq_lseek, 1707 .release = seq_release_private, 1708 }; 1709 1710 struct ipmr_mfc_iter { 1711 struct mfc_cache **cache; 1712 int ct; 1713 }; 1714 1715 1716 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) 1717 { 1718 struct mfc_cache *mfc; 1719 1720 it->cache = mfc_cache_array; 1721 read_lock(&mrt_lock); 1722 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 1723 for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 1724 if (pos-- == 0) 1725 return mfc; 1726 read_unlock(&mrt_lock); 1727 1728 it->cache = &mfc_unres_queue; 1729 spin_lock_bh(&mfc_unres_lock); 1730 for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 1731 if (pos-- == 0) 1732 return mfc; 1733 spin_unlock_bh(&mfc_unres_lock); 1734 1735 it->cache = NULL; 1736 return NULL; 1737 } 1738 1739 1740 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 1741 { 1742 struct ipmr_mfc_iter *it = seq->private; 1743 it->cache = NULL; 1744 it->ct = 0; 1745 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 1746 : SEQ_START_TOKEN; 1747 } 1748 1749 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1750 { 1751 struct mfc_cache *mfc = v; 1752 struct ipmr_mfc_iter *it = seq->private; 1753 1754 ++*pos; 1755 1756 if (v == SEQ_START_TOKEN) 1757 return ipmr_mfc_seq_idx(seq->private, 0); 1758 1759 if (mfc->next) 1760 return mfc->next; 1761 1762 if (it->cache == &mfc_unres_queue) 1763 goto end_of_list; 1764 1765 BUG_ON(it->cache != mfc_cache_array); 1766 1767 while (++it->ct < MFC_LINES) { 1768 mfc = mfc_cache_array[it->ct]; 1769 if (mfc) 1770 return mfc; 1771 } 1772 1773 /* exhausted cache_array, show unresolved */ 1774 read_unlock(&mrt_lock); 1775 it->cache = &mfc_unres_queue; 1776 it->ct = 0; 1777 1778 spin_lock_bh(&mfc_unres_lock); 1779 mfc = mfc_unres_queue; 1780 if (mfc) 1781 return mfc; 1782 1783 end_of_list: 1784 spin_unlock_bh(&mfc_unres_lock); 1785 it->cache = NULL; 1786 1787 return NULL; 1788 } 1789 1790 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 1791 { 1792 struct ipmr_mfc_iter *it = seq->private; 1793 1794 if (it->cache == &mfc_unres_queue) 1795 spin_unlock_bh(&mfc_unres_lock); 1796 else if (it->cache == mfc_cache_array) 1797 read_unlock(&mrt_lock); 1798 } 1799 1800 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 1801 { 1802 int n; 1803 1804 if (v == SEQ_START_TOKEN) { 1805 seq_puts(seq, 1806 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 1807 } else { 1808 const struct mfc_cache *mfc = v; 1809 const struct ipmr_mfc_iter *it = seq->private; 1810 1811 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", 1812 (unsigned long) mfc->mfc_mcastgrp, 1813 (unsigned long) mfc->mfc_origin, 1814 mfc->mfc_parent, 1815 mfc->mfc_un.res.pkt, 1816 mfc->mfc_un.res.bytes, 1817 mfc->mfc_un.res.wrong_if); 1818 1819 if (it->cache != &mfc_unres_queue) { 1820 for(n = mfc->mfc_un.res.minvif; 1821 n < mfc->mfc_un.res.maxvif; n++ ) { 1822 if(VIF_EXISTS(n) 1823 && mfc->mfc_un.res.ttls[n] < 255) 1824 seq_printf(seq, 1825 " %2d:%-3d", 1826 n, mfc->mfc_un.res.ttls[n]); 1827 } 1828 } 1829 seq_putc(seq, '\n'); 1830 } 1831 return 0; 1832 } 1833 1834 static struct seq_operations ipmr_mfc_seq_ops = { 1835 .start = ipmr_mfc_seq_start, 1836 .next = ipmr_mfc_seq_next, 1837 .stop = ipmr_mfc_seq_stop, 1838 .show = ipmr_mfc_seq_show, 1839 }; 1840 1841 static int ipmr_mfc_open(struct inode *inode, struct file *file) 1842 { 1843 struct seq_file *seq; 1844 int rc = -ENOMEM; 1845 struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); 1846 1847 if (!s) 1848 goto out; 1849 1850 rc = seq_open(file, &ipmr_mfc_seq_ops); 1851 if (rc) 1852 goto out_kfree; 1853 1854 seq = file->private_data; 1855 seq->private = s; 1856 out: 1857 return rc; 1858 out_kfree: 1859 kfree(s); 1860 goto out; 1861 1862 } 1863 1864 static struct file_operations ipmr_mfc_fops = { 1865 .owner = THIS_MODULE, 1866 .open = ipmr_mfc_open, 1867 .read = seq_read, 1868 .llseek = seq_lseek, 1869 .release = seq_release_private, 1870 }; 1871 #endif 1872 1873 #ifdef CONFIG_IP_PIMSM_V2 1874 static struct net_protocol pim_protocol = { 1875 .handler = pim_rcv, 1876 }; 1877 #endif 1878 1879 1880 /* 1881 * Setup for IP multicast routing 1882 */ 1883 1884 void __init ip_mr_init(void) 1885 { 1886 mrt_cachep = kmem_cache_create("ip_mrt_cache", 1887 sizeof(struct mfc_cache), 1888 0, SLAB_HWCACHE_ALIGN, 1889 NULL, NULL); 1890 if (!mrt_cachep) 1891 panic("cannot allocate ip_mrt_cache"); 1892 1893 init_timer(&ipmr_expire_timer); 1894 ipmr_expire_timer.function=ipmr_expire_process; 1895 register_netdevice_notifier(&ip_mr_notifier); 1896 #ifdef CONFIG_PROC_FS 1897 proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); 1898 proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); 1899 #endif 1900 } 1901