1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requrement to work with older peers. 26 * 27 */ 28 29 #include <asm/system.h> 30 #include <asm/uaccess.h> 31 #include <linux/types.h> 32 #include <linux/capability.h> 33 #include <linux/errno.h> 34 #include <linux/timer.h> 35 #include <linux/mm.h> 36 #include <linux/kernel.h> 37 #include <linux/fcntl.h> 38 #include <linux/stat.h> 39 #include <linux/socket.h> 40 #include <linux/in.h> 41 #include <linux/inet.h> 42 #include <linux/netdevice.h> 43 #include <linux/inetdevice.h> 44 #include <linux/igmp.h> 45 #include <linux/proc_fs.h> 46 #include <linux/seq_file.h> 47 #include <linux/mroute.h> 48 #include <linux/init.h> 49 #include <linux/if_ether.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <net/ipip.h> 63 #include <net/checksum.h> 64 #include <net/netlink.h> 65 66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 67 #define CONFIG_IP_PIMSM 1 68 #endif 69 70 static struct sock *mroute_socket; 71 72 73 /* Big lock, protecting vif table, mrt cache and mroute socket state. 74 Note that the changes are semaphored via rtnl_lock. 75 */ 76 77 static DEFINE_RWLOCK(mrt_lock); 78 79 /* 80 * Multicast router control variables 81 */ 82 83 static struct vif_device vif_table[MAXVIFS]; /* Devices */ 84 static int maxvif; 85 86 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) 87 88 static int mroute_do_assert; /* Set in PIM assert */ 89 static int mroute_do_pim; 90 91 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ 92 93 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ 94 static atomic_t cache_resolve_queue_len; /* Size of unresolved */ 95 96 /* Special spinlock for queue of unresolved entries */ 97 static DEFINE_SPINLOCK(mfc_unres_lock); 98 99 /* We return to original Alan's scheme. Hash table of resolved 100 entries is changed only in process context and protected 101 with weak lock mrt_lock. Queue of unresolved entries is protected 102 with strong spinlock mfc_unres_lock. 103 104 In this case data path is free of exclusive locks at all. 105 */ 106 107 static struct kmem_cache *mrt_cachep __read_mostly; 108 109 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 110 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); 111 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 112 113 #ifdef CONFIG_IP_PIMSM_V2 114 static struct net_protocol pim_protocol; 115 #endif 116 117 static struct timer_list ipmr_expire_timer; 118 119 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 120 121 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 122 { 123 dev_close(dev); 124 125 dev = __dev_get_by_name(&init_net, "tunl0"); 126 if (dev) { 127 struct ifreq ifr; 128 mm_segment_t oldfs; 129 struct ip_tunnel_parm p; 130 131 memset(&p, 0, sizeof(p)); 132 p.iph.daddr = v->vifc_rmt_addr.s_addr; 133 p.iph.saddr = v->vifc_lcl_addr.s_addr; 134 p.iph.version = 4; 135 p.iph.ihl = 5; 136 p.iph.protocol = IPPROTO_IPIP; 137 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 138 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 139 140 oldfs = get_fs(); set_fs(KERNEL_DS); 141 dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL); 142 set_fs(oldfs); 143 } 144 } 145 146 static 147 struct net_device *ipmr_new_tunnel(struct vifctl *v) 148 { 149 struct net_device *dev; 150 151 dev = __dev_get_by_name(&init_net, "tunl0"); 152 153 if (dev) { 154 int err; 155 struct ifreq ifr; 156 mm_segment_t oldfs; 157 struct ip_tunnel_parm p; 158 struct in_device *in_dev; 159 160 memset(&p, 0, sizeof(p)); 161 p.iph.daddr = v->vifc_rmt_addr.s_addr; 162 p.iph.saddr = v->vifc_lcl_addr.s_addr; 163 p.iph.version = 4; 164 p.iph.ihl = 5; 165 p.iph.protocol = IPPROTO_IPIP; 166 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 167 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 168 169 oldfs = get_fs(); set_fs(KERNEL_DS); 170 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 171 set_fs(oldfs); 172 173 dev = NULL; 174 175 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { 176 dev->flags |= IFF_MULTICAST; 177 178 in_dev = __in_dev_get_rtnl(dev); 179 if (in_dev == NULL) 180 goto failure; 181 182 ipv4_devconf_setall(in_dev); 183 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 184 185 if (dev_open(dev)) 186 goto failure; 187 dev_hold(dev); 188 } 189 } 190 return dev; 191 192 failure: 193 /* allow the register to be completed before unregistering. */ 194 rtnl_unlock(); 195 rtnl_lock(); 196 197 unregister_netdevice(dev); 198 return NULL; 199 } 200 201 #ifdef CONFIG_IP_PIMSM 202 203 static int reg_vif_num = -1; 204 205 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 206 { 207 read_lock(&mrt_lock); 208 dev->stats.tx_bytes += skb->len; 209 dev->stats.tx_packets++; 210 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 211 read_unlock(&mrt_lock); 212 kfree_skb(skb); 213 return 0; 214 } 215 216 static void reg_vif_setup(struct net_device *dev) 217 { 218 dev->type = ARPHRD_PIMREG; 219 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 220 dev->flags = IFF_NOARP; 221 dev->hard_start_xmit = reg_vif_xmit; 222 dev->destructor = free_netdev; 223 } 224 225 static struct net_device *ipmr_reg_vif(void) 226 { 227 struct net_device *dev; 228 struct in_device *in_dev; 229 230 dev = alloc_netdev(0, "pimreg", reg_vif_setup); 231 232 if (dev == NULL) 233 return NULL; 234 235 if (register_netdevice(dev)) { 236 free_netdev(dev); 237 return NULL; 238 } 239 dev->iflink = 0; 240 241 rcu_read_lock(); 242 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 243 rcu_read_unlock(); 244 goto failure; 245 } 246 247 ipv4_devconf_setall(in_dev); 248 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 249 rcu_read_unlock(); 250 251 if (dev_open(dev)) 252 goto failure; 253 254 dev_hold(dev); 255 256 return dev; 257 258 failure: 259 /* allow the register to be completed before unregistering. */ 260 rtnl_unlock(); 261 rtnl_lock(); 262 263 unregister_netdevice(dev); 264 return NULL; 265 } 266 #endif 267 268 /* 269 * Delete a VIF entry 270 * @notify: Set to 1, if the caller is a notifier_call 271 */ 272 273 static int vif_delete(int vifi, int notify) 274 { 275 struct vif_device *v; 276 struct net_device *dev; 277 struct in_device *in_dev; 278 279 if (vifi < 0 || vifi >= maxvif) 280 return -EADDRNOTAVAIL; 281 282 v = &vif_table[vifi]; 283 284 write_lock_bh(&mrt_lock); 285 dev = v->dev; 286 v->dev = NULL; 287 288 if (!dev) { 289 write_unlock_bh(&mrt_lock); 290 return -EADDRNOTAVAIL; 291 } 292 293 #ifdef CONFIG_IP_PIMSM 294 if (vifi == reg_vif_num) 295 reg_vif_num = -1; 296 #endif 297 298 if (vifi+1 == maxvif) { 299 int tmp; 300 for (tmp=vifi-1; tmp>=0; tmp--) { 301 if (VIF_EXISTS(tmp)) 302 break; 303 } 304 maxvif = tmp+1; 305 } 306 307 write_unlock_bh(&mrt_lock); 308 309 dev_set_allmulti(dev, -1); 310 311 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 312 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 313 ip_rt_multicast_event(in_dev); 314 } 315 316 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 317 unregister_netdevice(dev); 318 319 dev_put(dev); 320 return 0; 321 } 322 323 /* Destroy an unresolved cache entry, killing queued skbs 324 and reporting error to netlink readers. 325 */ 326 327 static void ipmr_destroy_unres(struct mfc_cache *c) 328 { 329 struct sk_buff *skb; 330 struct nlmsgerr *e; 331 332 atomic_dec(&cache_resolve_queue_len); 333 334 while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { 335 if (ip_hdr(skb)->version == 0) { 336 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 337 nlh->nlmsg_type = NLMSG_ERROR; 338 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 339 skb_trim(skb, nlh->nlmsg_len); 340 e = NLMSG_DATA(nlh); 341 e->error = -ETIMEDOUT; 342 memset(&e->msg, 0, sizeof(e->msg)); 343 344 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 345 } else 346 kfree_skb(skb); 347 } 348 349 kmem_cache_free(mrt_cachep, c); 350 } 351 352 353 /* Single timer process for all the unresolved queue. */ 354 355 static void ipmr_expire_process(unsigned long dummy) 356 { 357 unsigned long now; 358 unsigned long expires; 359 struct mfc_cache *c, **cp; 360 361 if (!spin_trylock(&mfc_unres_lock)) { 362 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 363 return; 364 } 365 366 if (atomic_read(&cache_resolve_queue_len) == 0) 367 goto out; 368 369 now = jiffies; 370 expires = 10*HZ; 371 cp = &mfc_unres_queue; 372 373 while ((c=*cp) != NULL) { 374 if (time_after(c->mfc_un.unres.expires, now)) { 375 unsigned long interval = c->mfc_un.unres.expires - now; 376 if (interval < expires) 377 expires = interval; 378 cp = &c->next; 379 continue; 380 } 381 382 *cp = c->next; 383 384 ipmr_destroy_unres(c); 385 } 386 387 if (atomic_read(&cache_resolve_queue_len)) 388 mod_timer(&ipmr_expire_timer, jiffies + expires); 389 390 out: 391 spin_unlock(&mfc_unres_lock); 392 } 393 394 /* Fill oifs list. It is called under write locked mrt_lock. */ 395 396 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 397 { 398 int vifi; 399 400 cache->mfc_un.res.minvif = MAXVIFS; 401 cache->mfc_un.res.maxvif = 0; 402 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 403 404 for (vifi=0; vifi<maxvif; vifi++) { 405 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { 406 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 407 if (cache->mfc_un.res.minvif > vifi) 408 cache->mfc_un.res.minvif = vifi; 409 if (cache->mfc_un.res.maxvif <= vifi) 410 cache->mfc_un.res.maxvif = vifi + 1; 411 } 412 } 413 } 414 415 static int vif_add(struct vifctl *vifc, int mrtsock) 416 { 417 int vifi = vifc->vifc_vifi; 418 struct vif_device *v = &vif_table[vifi]; 419 struct net_device *dev; 420 struct in_device *in_dev; 421 int err; 422 423 /* Is vif busy ? */ 424 if (VIF_EXISTS(vifi)) 425 return -EADDRINUSE; 426 427 switch (vifc->vifc_flags) { 428 #ifdef CONFIG_IP_PIMSM 429 case VIFF_REGISTER: 430 /* 431 * Special Purpose VIF in PIM 432 * All the packets will be sent to the daemon 433 */ 434 if (reg_vif_num >= 0) 435 return -EADDRINUSE; 436 dev = ipmr_reg_vif(); 437 if (!dev) 438 return -ENOBUFS; 439 err = dev_set_allmulti(dev, 1); 440 if (err) { 441 unregister_netdevice(dev); 442 dev_put(dev); 443 return err; 444 } 445 break; 446 #endif 447 case VIFF_TUNNEL: 448 dev = ipmr_new_tunnel(vifc); 449 if (!dev) 450 return -ENOBUFS; 451 err = dev_set_allmulti(dev, 1); 452 if (err) { 453 ipmr_del_tunnel(dev, vifc); 454 dev_put(dev); 455 return err; 456 } 457 break; 458 case 0: 459 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); 460 if (!dev) 461 return -EADDRNOTAVAIL; 462 err = dev_set_allmulti(dev, 1); 463 if (err) { 464 dev_put(dev); 465 return err; 466 } 467 break; 468 default: 469 return -EINVAL; 470 } 471 472 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) 473 return -EADDRNOTAVAIL; 474 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 475 ip_rt_multicast_event(in_dev); 476 477 /* 478 * Fill in the VIF structures 479 */ 480 v->rate_limit=vifc->vifc_rate_limit; 481 v->local=vifc->vifc_lcl_addr.s_addr; 482 v->remote=vifc->vifc_rmt_addr.s_addr; 483 v->flags=vifc->vifc_flags; 484 if (!mrtsock) 485 v->flags |= VIFF_STATIC; 486 v->threshold=vifc->vifc_threshold; 487 v->bytes_in = 0; 488 v->bytes_out = 0; 489 v->pkt_in = 0; 490 v->pkt_out = 0; 491 v->link = dev->ifindex; 492 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 493 v->link = dev->iflink; 494 495 /* And finish update writing critical data */ 496 write_lock_bh(&mrt_lock); 497 v->dev=dev; 498 #ifdef CONFIG_IP_PIMSM 499 if (v->flags&VIFF_REGISTER) 500 reg_vif_num = vifi; 501 #endif 502 if (vifi+1 > maxvif) 503 maxvif = vifi+1; 504 write_unlock_bh(&mrt_lock); 505 return 0; 506 } 507 508 static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) 509 { 510 int line=MFC_HASH(mcastgrp,origin); 511 struct mfc_cache *c; 512 513 for (c=mfc_cache_array[line]; c; c = c->next) { 514 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 515 break; 516 } 517 return c; 518 } 519 520 /* 521 * Allocate a multicast cache entry 522 */ 523 static struct mfc_cache *ipmr_cache_alloc(void) 524 { 525 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 526 if (c==NULL) 527 return NULL; 528 c->mfc_un.res.minvif = MAXVIFS; 529 return c; 530 } 531 532 static struct mfc_cache *ipmr_cache_alloc_unres(void) 533 { 534 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 535 if (c==NULL) 536 return NULL; 537 skb_queue_head_init(&c->mfc_un.unres.unresolved); 538 c->mfc_un.unres.expires = jiffies + 10*HZ; 539 return c; 540 } 541 542 /* 543 * A cache entry has gone into a resolved state from queued 544 */ 545 546 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 547 { 548 struct sk_buff *skb; 549 struct nlmsgerr *e; 550 551 /* 552 * Play the pending entries through our router 553 */ 554 555 while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { 556 if (ip_hdr(skb)->version == 0) { 557 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 558 559 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 560 nlh->nlmsg_len = (skb_tail_pointer(skb) - 561 (u8 *)nlh); 562 } else { 563 nlh->nlmsg_type = NLMSG_ERROR; 564 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 565 skb_trim(skb, nlh->nlmsg_len); 566 e = NLMSG_DATA(nlh); 567 e->error = -EMSGSIZE; 568 memset(&e->msg, 0, sizeof(e->msg)); 569 } 570 571 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 572 } else 573 ip_mr_forward(skb, c, 0); 574 } 575 } 576 577 /* 578 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 579 * expects the following bizarre scheme. 580 * 581 * Called under mrt_lock. 582 */ 583 584 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) 585 { 586 struct sk_buff *skb; 587 const int ihl = ip_hdrlen(pkt); 588 struct igmphdr *igmp; 589 struct igmpmsg *msg; 590 int ret; 591 592 #ifdef CONFIG_IP_PIMSM 593 if (assert == IGMPMSG_WHOLEPKT) 594 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 595 else 596 #endif 597 skb = alloc_skb(128, GFP_ATOMIC); 598 599 if (!skb) 600 return -ENOBUFS; 601 602 #ifdef CONFIG_IP_PIMSM 603 if (assert == IGMPMSG_WHOLEPKT) { 604 /* Ugly, but we have no choice with this interface. 605 Duplicate old header, fix ihl, length etc. 606 And all this only to mangle msg->im_msgtype and 607 to set msg->im_mbz to "mbz" :-) 608 */ 609 skb_push(skb, sizeof(struct iphdr)); 610 skb_reset_network_header(skb); 611 skb_reset_transport_header(skb); 612 msg = (struct igmpmsg *)skb_network_header(skb); 613 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 614 msg->im_msgtype = IGMPMSG_WHOLEPKT; 615 msg->im_mbz = 0; 616 msg->im_vif = reg_vif_num; 617 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 618 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 619 sizeof(struct iphdr)); 620 } else 621 #endif 622 { 623 624 /* 625 * Copy the IP header 626 */ 627 628 skb->network_header = skb->tail; 629 skb_put(skb, ihl); 630 skb_copy_to_linear_data(skb, pkt->data, ihl); 631 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 632 msg = (struct igmpmsg *)skb_network_header(skb); 633 msg->im_vif = vifi; 634 skb->dst = dst_clone(pkt->dst); 635 636 /* 637 * Add our header 638 */ 639 640 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); 641 igmp->type = 642 msg->im_msgtype = assert; 643 igmp->code = 0; 644 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 645 skb->transport_header = skb->network_header; 646 } 647 648 if (mroute_socket == NULL) { 649 kfree_skb(skb); 650 return -EINVAL; 651 } 652 653 /* 654 * Deliver to mrouted 655 */ 656 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { 657 if (net_ratelimit()) 658 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 659 kfree_skb(skb); 660 } 661 662 return ret; 663 } 664 665 /* 666 * Queue a packet for resolution. It gets locked cache entry! 667 */ 668 669 static int 670 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) 671 { 672 int err; 673 struct mfc_cache *c; 674 const struct iphdr *iph = ip_hdr(skb); 675 676 spin_lock_bh(&mfc_unres_lock); 677 for (c=mfc_unres_queue; c; c=c->next) { 678 if (c->mfc_mcastgrp == iph->daddr && 679 c->mfc_origin == iph->saddr) 680 break; 681 } 682 683 if (c == NULL) { 684 /* 685 * Create a new entry if allowable 686 */ 687 688 if (atomic_read(&cache_resolve_queue_len)>=10 || 689 (c=ipmr_cache_alloc_unres())==NULL) { 690 spin_unlock_bh(&mfc_unres_lock); 691 692 kfree_skb(skb); 693 return -ENOBUFS; 694 } 695 696 /* 697 * Fill in the new cache entry 698 */ 699 c->mfc_parent = -1; 700 c->mfc_origin = iph->saddr; 701 c->mfc_mcastgrp = iph->daddr; 702 703 /* 704 * Reflect first query at mrouted. 705 */ 706 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { 707 /* If the report failed throw the cache entry 708 out - Brad Parker 709 */ 710 spin_unlock_bh(&mfc_unres_lock); 711 712 kmem_cache_free(mrt_cachep, c); 713 kfree_skb(skb); 714 return err; 715 } 716 717 atomic_inc(&cache_resolve_queue_len); 718 c->next = mfc_unres_queue; 719 mfc_unres_queue = c; 720 721 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 722 } 723 724 /* 725 * See if we can append the packet 726 */ 727 if (c->mfc_un.unres.unresolved.qlen>3) { 728 kfree_skb(skb); 729 err = -ENOBUFS; 730 } else { 731 skb_queue_tail(&c->mfc_un.unres.unresolved,skb); 732 err = 0; 733 } 734 735 spin_unlock_bh(&mfc_unres_lock); 736 return err; 737 } 738 739 /* 740 * MFC cache manipulation by user space mroute daemon 741 */ 742 743 static int ipmr_mfc_delete(struct mfcctl *mfc) 744 { 745 int line; 746 struct mfc_cache *c, **cp; 747 748 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 749 750 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 751 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 752 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 753 write_lock_bh(&mrt_lock); 754 *cp = c->next; 755 write_unlock_bh(&mrt_lock); 756 757 kmem_cache_free(mrt_cachep, c); 758 return 0; 759 } 760 } 761 return -ENOENT; 762 } 763 764 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) 765 { 766 int line; 767 struct mfc_cache *uc, *c, **cp; 768 769 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 770 771 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 772 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 773 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 774 break; 775 } 776 777 if (c != NULL) { 778 write_lock_bh(&mrt_lock); 779 c->mfc_parent = mfc->mfcc_parent; 780 ipmr_update_thresholds(c, mfc->mfcc_ttls); 781 if (!mrtsock) 782 c->mfc_flags |= MFC_STATIC; 783 write_unlock_bh(&mrt_lock); 784 return 0; 785 } 786 787 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 788 return -EINVAL; 789 790 c=ipmr_cache_alloc(); 791 if (c==NULL) 792 return -ENOMEM; 793 794 c->mfc_origin=mfc->mfcc_origin.s_addr; 795 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; 796 c->mfc_parent=mfc->mfcc_parent; 797 ipmr_update_thresholds(c, mfc->mfcc_ttls); 798 if (!mrtsock) 799 c->mfc_flags |= MFC_STATIC; 800 801 write_lock_bh(&mrt_lock); 802 c->next = mfc_cache_array[line]; 803 mfc_cache_array[line] = c; 804 write_unlock_bh(&mrt_lock); 805 806 /* 807 * Check to see if we resolved a queued list. If so we 808 * need to send on the frames and tidy up. 809 */ 810 spin_lock_bh(&mfc_unres_lock); 811 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 812 cp = &uc->next) { 813 if (uc->mfc_origin == c->mfc_origin && 814 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 815 *cp = uc->next; 816 if (atomic_dec_and_test(&cache_resolve_queue_len)) 817 del_timer(&ipmr_expire_timer); 818 break; 819 } 820 } 821 spin_unlock_bh(&mfc_unres_lock); 822 823 if (uc) { 824 ipmr_cache_resolve(uc, c); 825 kmem_cache_free(mrt_cachep, uc); 826 } 827 return 0; 828 } 829 830 /* 831 * Close the multicast socket, and clear the vif tables etc 832 */ 833 834 static void mroute_clean_tables(struct sock *sk) 835 { 836 int i; 837 838 /* 839 * Shut down all active vif entries 840 */ 841 for (i=0; i<maxvif; i++) { 842 if (!(vif_table[i].flags&VIFF_STATIC)) 843 vif_delete(i, 0); 844 } 845 846 /* 847 * Wipe the cache 848 */ 849 for (i=0;i<MFC_LINES;i++) { 850 struct mfc_cache *c, **cp; 851 852 cp = &mfc_cache_array[i]; 853 while ((c = *cp) != NULL) { 854 if (c->mfc_flags&MFC_STATIC) { 855 cp = &c->next; 856 continue; 857 } 858 write_lock_bh(&mrt_lock); 859 *cp = c->next; 860 write_unlock_bh(&mrt_lock); 861 862 kmem_cache_free(mrt_cachep, c); 863 } 864 } 865 866 if (atomic_read(&cache_resolve_queue_len) != 0) { 867 struct mfc_cache *c; 868 869 spin_lock_bh(&mfc_unres_lock); 870 while (mfc_unres_queue != NULL) { 871 c = mfc_unres_queue; 872 mfc_unres_queue = c->next; 873 spin_unlock_bh(&mfc_unres_lock); 874 875 ipmr_destroy_unres(c); 876 877 spin_lock_bh(&mfc_unres_lock); 878 } 879 spin_unlock_bh(&mfc_unres_lock); 880 } 881 } 882 883 static void mrtsock_destruct(struct sock *sk) 884 { 885 rtnl_lock(); 886 if (sk == mroute_socket) { 887 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; 888 889 write_lock_bh(&mrt_lock); 890 mroute_socket=NULL; 891 write_unlock_bh(&mrt_lock); 892 893 mroute_clean_tables(sk); 894 } 895 rtnl_unlock(); 896 } 897 898 /* 899 * Socket options and virtual interface manipulation. The whole 900 * virtual interface system is a complete heap, but unfortunately 901 * that's how BSD mrouted happens to think. Maybe one day with a proper 902 * MOSPF/PIM router set up we can clean this up. 903 */ 904 905 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) 906 { 907 int ret; 908 struct vifctl vif; 909 struct mfcctl mfc; 910 911 if (optname != MRT_INIT) { 912 if (sk != mroute_socket && !capable(CAP_NET_ADMIN)) 913 return -EACCES; 914 } 915 916 switch (optname) { 917 case MRT_INIT: 918 if (sk->sk_type != SOCK_RAW || 919 inet_sk(sk)->num != IPPROTO_IGMP) 920 return -EOPNOTSUPP; 921 if (optlen!=sizeof(int)) 922 return -ENOPROTOOPT; 923 924 rtnl_lock(); 925 if (mroute_socket) { 926 rtnl_unlock(); 927 return -EADDRINUSE; 928 } 929 930 ret = ip_ra_control(sk, 1, mrtsock_destruct); 931 if (ret == 0) { 932 write_lock_bh(&mrt_lock); 933 mroute_socket=sk; 934 write_unlock_bh(&mrt_lock); 935 936 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; 937 } 938 rtnl_unlock(); 939 return ret; 940 case MRT_DONE: 941 if (sk!=mroute_socket) 942 return -EACCES; 943 return ip_ra_control(sk, 0, NULL); 944 case MRT_ADD_VIF: 945 case MRT_DEL_VIF: 946 if (optlen!=sizeof(vif)) 947 return -EINVAL; 948 if (copy_from_user(&vif,optval,sizeof(vif))) 949 return -EFAULT; 950 if (vif.vifc_vifi >= MAXVIFS) 951 return -ENFILE; 952 rtnl_lock(); 953 if (optname==MRT_ADD_VIF) { 954 ret = vif_add(&vif, sk==mroute_socket); 955 } else { 956 ret = vif_delete(vif.vifc_vifi, 0); 957 } 958 rtnl_unlock(); 959 return ret; 960 961 /* 962 * Manipulate the forwarding caches. These live 963 * in a sort of kernel/user symbiosis. 964 */ 965 case MRT_ADD_MFC: 966 case MRT_DEL_MFC: 967 if (optlen!=sizeof(mfc)) 968 return -EINVAL; 969 if (copy_from_user(&mfc,optval, sizeof(mfc))) 970 return -EFAULT; 971 rtnl_lock(); 972 if (optname==MRT_DEL_MFC) 973 ret = ipmr_mfc_delete(&mfc); 974 else 975 ret = ipmr_mfc_add(&mfc, sk==mroute_socket); 976 rtnl_unlock(); 977 return ret; 978 /* 979 * Control PIM assert. 980 */ 981 case MRT_ASSERT: 982 { 983 int v; 984 if (get_user(v,(int __user *)optval)) 985 return -EFAULT; 986 mroute_do_assert=(v)?1:0; 987 return 0; 988 } 989 #ifdef CONFIG_IP_PIMSM 990 case MRT_PIM: 991 { 992 int v; 993 994 if (get_user(v,(int __user *)optval)) 995 return -EFAULT; 996 v = (v) ? 1 : 0; 997 998 rtnl_lock(); 999 ret = 0; 1000 if (v != mroute_do_pim) { 1001 mroute_do_pim = v; 1002 mroute_do_assert = v; 1003 #ifdef CONFIG_IP_PIMSM_V2 1004 if (mroute_do_pim) 1005 ret = inet_add_protocol(&pim_protocol, 1006 IPPROTO_PIM); 1007 else 1008 ret = inet_del_protocol(&pim_protocol, 1009 IPPROTO_PIM); 1010 if (ret < 0) 1011 ret = -EAGAIN; 1012 #endif 1013 } 1014 rtnl_unlock(); 1015 return ret; 1016 } 1017 #endif 1018 /* 1019 * Spurious command, or MRT_VERSION which you cannot 1020 * set. 1021 */ 1022 default: 1023 return -ENOPROTOOPT; 1024 } 1025 } 1026 1027 /* 1028 * Getsock opt support for the multicast routing system. 1029 */ 1030 1031 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) 1032 { 1033 int olr; 1034 int val; 1035 1036 if (optname!=MRT_VERSION && 1037 #ifdef CONFIG_IP_PIMSM 1038 optname!=MRT_PIM && 1039 #endif 1040 optname!=MRT_ASSERT) 1041 return -ENOPROTOOPT; 1042 1043 if (get_user(olr, optlen)) 1044 return -EFAULT; 1045 1046 olr = min_t(unsigned int, olr, sizeof(int)); 1047 if (olr < 0) 1048 return -EINVAL; 1049 1050 if (put_user(olr,optlen)) 1051 return -EFAULT; 1052 if (optname==MRT_VERSION) 1053 val=0x0305; 1054 #ifdef CONFIG_IP_PIMSM 1055 else if (optname==MRT_PIM) 1056 val=mroute_do_pim; 1057 #endif 1058 else 1059 val=mroute_do_assert; 1060 if (copy_to_user(optval,&val,olr)) 1061 return -EFAULT; 1062 return 0; 1063 } 1064 1065 /* 1066 * The IP multicast ioctl support routines. 1067 */ 1068 1069 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1070 { 1071 struct sioc_sg_req sr; 1072 struct sioc_vif_req vr; 1073 struct vif_device *vif; 1074 struct mfc_cache *c; 1075 1076 switch (cmd) { 1077 case SIOCGETVIFCNT: 1078 if (copy_from_user(&vr,arg,sizeof(vr))) 1079 return -EFAULT; 1080 if (vr.vifi>=maxvif) 1081 return -EINVAL; 1082 read_lock(&mrt_lock); 1083 vif=&vif_table[vr.vifi]; 1084 if (VIF_EXISTS(vr.vifi)) { 1085 vr.icount=vif->pkt_in; 1086 vr.ocount=vif->pkt_out; 1087 vr.ibytes=vif->bytes_in; 1088 vr.obytes=vif->bytes_out; 1089 read_unlock(&mrt_lock); 1090 1091 if (copy_to_user(arg,&vr,sizeof(vr))) 1092 return -EFAULT; 1093 return 0; 1094 } 1095 read_unlock(&mrt_lock); 1096 return -EADDRNOTAVAIL; 1097 case SIOCGETSGCNT: 1098 if (copy_from_user(&sr,arg,sizeof(sr))) 1099 return -EFAULT; 1100 1101 read_lock(&mrt_lock); 1102 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); 1103 if (c) { 1104 sr.pktcnt = c->mfc_un.res.pkt; 1105 sr.bytecnt = c->mfc_un.res.bytes; 1106 sr.wrong_if = c->mfc_un.res.wrong_if; 1107 read_unlock(&mrt_lock); 1108 1109 if (copy_to_user(arg,&sr,sizeof(sr))) 1110 return -EFAULT; 1111 return 0; 1112 } 1113 read_unlock(&mrt_lock); 1114 return -EADDRNOTAVAIL; 1115 default: 1116 return -ENOIOCTLCMD; 1117 } 1118 } 1119 1120 1121 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1122 { 1123 struct net_device *dev = ptr; 1124 struct vif_device *v; 1125 int ct; 1126 1127 if (!net_eq(dev_net(dev), &init_net)) 1128 return NOTIFY_DONE; 1129 1130 if (event != NETDEV_UNREGISTER) 1131 return NOTIFY_DONE; 1132 v=&vif_table[0]; 1133 for (ct=0;ct<maxvif;ct++,v++) { 1134 if (v->dev==dev) 1135 vif_delete(ct, 1); 1136 } 1137 return NOTIFY_DONE; 1138 } 1139 1140 1141 static struct notifier_block ip_mr_notifier={ 1142 .notifier_call = ipmr_device_event, 1143 }; 1144 1145 /* 1146 * Encapsulate a packet by attaching a valid IPIP header to it. 1147 * This avoids tunnel drivers and other mess and gives us the speed so 1148 * important for multicast video. 1149 */ 1150 1151 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1152 { 1153 struct iphdr *iph; 1154 struct iphdr *old_iph = ip_hdr(skb); 1155 1156 skb_push(skb, sizeof(struct iphdr)); 1157 skb->transport_header = skb->network_header; 1158 skb_reset_network_header(skb); 1159 iph = ip_hdr(skb); 1160 1161 iph->version = 4; 1162 iph->tos = old_iph->tos; 1163 iph->ttl = old_iph->ttl; 1164 iph->frag_off = 0; 1165 iph->daddr = daddr; 1166 iph->saddr = saddr; 1167 iph->protocol = IPPROTO_IPIP; 1168 iph->ihl = 5; 1169 iph->tot_len = htons(skb->len); 1170 ip_select_ident(iph, skb->dst, NULL); 1171 ip_send_check(iph); 1172 1173 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1174 nf_reset(skb); 1175 } 1176 1177 static inline int ipmr_forward_finish(struct sk_buff *skb) 1178 { 1179 struct ip_options * opt = &(IPCB(skb)->opt); 1180 1181 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1182 1183 if (unlikely(opt->optlen)) 1184 ip_forward_options(skb); 1185 1186 return dst_output(skb); 1187 } 1188 1189 /* 1190 * Processing handlers for ipmr_forward 1191 */ 1192 1193 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1194 { 1195 const struct iphdr *iph = ip_hdr(skb); 1196 struct vif_device *vif = &vif_table[vifi]; 1197 struct net_device *dev; 1198 struct rtable *rt; 1199 int encap = 0; 1200 1201 if (vif->dev == NULL) 1202 goto out_free; 1203 1204 #ifdef CONFIG_IP_PIMSM 1205 if (vif->flags & VIFF_REGISTER) { 1206 vif->pkt_out++; 1207 vif->bytes_out+=skb->len; 1208 vif->dev->stats.tx_bytes += skb->len; 1209 vif->dev->stats.tx_packets++; 1210 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1211 kfree_skb(skb); 1212 return; 1213 } 1214 #endif 1215 1216 if (vif->flags&VIFF_TUNNEL) { 1217 struct flowi fl = { .oif = vif->link, 1218 .nl_u = { .ip4_u = 1219 { .daddr = vif->remote, 1220 .saddr = vif->local, 1221 .tos = RT_TOS(iph->tos) } }, 1222 .proto = IPPROTO_IPIP }; 1223 if (ip_route_output_key(&init_net, &rt, &fl)) 1224 goto out_free; 1225 encap = sizeof(struct iphdr); 1226 } else { 1227 struct flowi fl = { .oif = vif->link, 1228 .nl_u = { .ip4_u = 1229 { .daddr = iph->daddr, 1230 .tos = RT_TOS(iph->tos) } }, 1231 .proto = IPPROTO_IPIP }; 1232 if (ip_route_output_key(&init_net, &rt, &fl)) 1233 goto out_free; 1234 } 1235 1236 dev = rt->u.dst.dev; 1237 1238 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1239 /* Do not fragment multicasts. Alas, IPv4 does not 1240 allow to send ICMP, so that packets will disappear 1241 to blackhole. 1242 */ 1243 1244 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1245 ip_rt_put(rt); 1246 goto out_free; 1247 } 1248 1249 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1250 1251 if (skb_cow(skb, encap)) { 1252 ip_rt_put(rt); 1253 goto out_free; 1254 } 1255 1256 vif->pkt_out++; 1257 vif->bytes_out+=skb->len; 1258 1259 dst_release(skb->dst); 1260 skb->dst = &rt->u.dst; 1261 ip_decrease_ttl(ip_hdr(skb)); 1262 1263 /* FIXME: forward and output firewalls used to be called here. 1264 * What do we do with netfilter? -- RR */ 1265 if (vif->flags & VIFF_TUNNEL) { 1266 ip_encap(skb, vif->local, vif->remote); 1267 /* FIXME: extra output firewall step used to be here. --RR */ 1268 vif->dev->stats.tx_packets++; 1269 vif->dev->stats.tx_bytes += skb->len; 1270 } 1271 1272 IPCB(skb)->flags |= IPSKB_FORWARDED; 1273 1274 /* 1275 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1276 * not only before forwarding, but after forwarding on all output 1277 * interfaces. It is clear, if mrouter runs a multicasting 1278 * program, it should receive packets not depending to what interface 1279 * program is joined. 1280 * If we will not make it, the program will have to join on all 1281 * interfaces. On the other hand, multihoming host (or router, but 1282 * not mrouter) cannot join to more than one interface - it will 1283 * result in receiving multiple packets. 1284 */ 1285 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev, 1286 ipmr_forward_finish); 1287 return; 1288 1289 out_free: 1290 kfree_skb(skb); 1291 return; 1292 } 1293 1294 static int ipmr_find_vif(struct net_device *dev) 1295 { 1296 int ct; 1297 for (ct=maxvif-1; ct>=0; ct--) { 1298 if (vif_table[ct].dev == dev) 1299 break; 1300 } 1301 return ct; 1302 } 1303 1304 /* "local" means that we should preserve one skb (for local delivery) */ 1305 1306 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 1307 { 1308 int psend = -1; 1309 int vif, ct; 1310 1311 vif = cache->mfc_parent; 1312 cache->mfc_un.res.pkt++; 1313 cache->mfc_un.res.bytes += skb->len; 1314 1315 /* 1316 * Wrong interface: drop packet and (maybe) send PIM assert. 1317 */ 1318 if (vif_table[vif].dev != skb->dev) { 1319 int true_vifi; 1320 1321 if (skb->rtable->fl.iif == 0) { 1322 /* It is our own packet, looped back. 1323 Very complicated situation... 1324 1325 The best workaround until routing daemons will be 1326 fixed is not to redistribute packet, if it was 1327 send through wrong interface. It means, that 1328 multicast applications WILL NOT work for 1329 (S,G), which have default multicast route pointing 1330 to wrong oif. In any case, it is not a good 1331 idea to use multicasting applications on router. 1332 */ 1333 goto dont_forward; 1334 } 1335 1336 cache->mfc_un.res.wrong_if++; 1337 true_vifi = ipmr_find_vif(skb->dev); 1338 1339 if (true_vifi >= 0 && mroute_do_assert && 1340 /* pimsm uses asserts, when switching from RPT to SPT, 1341 so that we cannot check that packet arrived on an oif. 1342 It is bad, but otherwise we would need to move pretty 1343 large chunk of pimd to kernel. Ough... --ANK 1344 */ 1345 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && 1346 time_after(jiffies, 1347 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1348 cache->mfc_un.res.last_assert = jiffies; 1349 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); 1350 } 1351 goto dont_forward; 1352 } 1353 1354 vif_table[vif].pkt_in++; 1355 vif_table[vif].bytes_in+=skb->len; 1356 1357 /* 1358 * Forward the frame 1359 */ 1360 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1361 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1362 if (psend != -1) { 1363 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1364 if (skb2) 1365 ipmr_queue_xmit(skb2, cache, psend); 1366 } 1367 psend=ct; 1368 } 1369 } 1370 if (psend != -1) { 1371 if (local) { 1372 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1373 if (skb2) 1374 ipmr_queue_xmit(skb2, cache, psend); 1375 } else { 1376 ipmr_queue_xmit(skb, cache, psend); 1377 return 0; 1378 } 1379 } 1380 1381 dont_forward: 1382 if (!local) 1383 kfree_skb(skb); 1384 return 0; 1385 } 1386 1387 1388 /* 1389 * Multicast packets for forwarding arrive here 1390 */ 1391 1392 int ip_mr_input(struct sk_buff *skb) 1393 { 1394 struct mfc_cache *cache; 1395 int local = skb->rtable->rt_flags&RTCF_LOCAL; 1396 1397 /* Packet is looped back after forward, it should not be 1398 forwarded second time, but still can be delivered locally. 1399 */ 1400 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1401 goto dont_forward; 1402 1403 if (!local) { 1404 if (IPCB(skb)->opt.router_alert) { 1405 if (ip_call_ra_chain(skb)) 1406 return 0; 1407 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1408 /* IGMPv1 (and broken IGMPv2 implementations sort of 1409 Cisco IOS <= 11.2(8)) do not put router alert 1410 option to IGMP packets destined to routable 1411 groups. It is very bad, because it means 1412 that we can forward NO IGMP messages. 1413 */ 1414 read_lock(&mrt_lock); 1415 if (mroute_socket) { 1416 nf_reset(skb); 1417 raw_rcv(mroute_socket, skb); 1418 read_unlock(&mrt_lock); 1419 return 0; 1420 } 1421 read_unlock(&mrt_lock); 1422 } 1423 } 1424 1425 read_lock(&mrt_lock); 1426 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1427 1428 /* 1429 * No usable cache entry 1430 */ 1431 if (cache==NULL) { 1432 int vif; 1433 1434 if (local) { 1435 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1436 ip_local_deliver(skb); 1437 if (skb2 == NULL) { 1438 read_unlock(&mrt_lock); 1439 return -ENOBUFS; 1440 } 1441 skb = skb2; 1442 } 1443 1444 vif = ipmr_find_vif(skb->dev); 1445 if (vif >= 0) { 1446 int err = ipmr_cache_unresolved(vif, skb); 1447 read_unlock(&mrt_lock); 1448 1449 return err; 1450 } 1451 read_unlock(&mrt_lock); 1452 kfree_skb(skb); 1453 return -ENODEV; 1454 } 1455 1456 ip_mr_forward(skb, cache, local); 1457 1458 read_unlock(&mrt_lock); 1459 1460 if (local) 1461 return ip_local_deliver(skb); 1462 1463 return 0; 1464 1465 dont_forward: 1466 if (local) 1467 return ip_local_deliver(skb); 1468 kfree_skb(skb); 1469 return 0; 1470 } 1471 1472 #ifdef CONFIG_IP_PIMSM_V1 1473 /* 1474 * Handle IGMP messages of PIMv1 1475 */ 1476 1477 int pim_rcv_v1(struct sk_buff * skb) 1478 { 1479 struct igmphdr *pim; 1480 struct iphdr *encap; 1481 struct net_device *reg_dev = NULL; 1482 1483 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1484 goto drop; 1485 1486 pim = igmp_hdr(skb); 1487 1488 if (!mroute_do_pim || 1489 skb->len < sizeof(*pim) + sizeof(*encap) || 1490 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1491 goto drop; 1492 1493 encap = (struct iphdr *)(skb_transport_header(skb) + 1494 sizeof(struct igmphdr)); 1495 /* 1496 Check that: 1497 a. packet is really destinted to a multicast group 1498 b. packet is not a NULL-REGISTER 1499 c. packet is not truncated 1500 */ 1501 if (!ipv4_is_multicast(encap->daddr) || 1502 encap->tot_len == 0 || 1503 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1504 goto drop; 1505 1506 read_lock(&mrt_lock); 1507 if (reg_vif_num >= 0) 1508 reg_dev = vif_table[reg_vif_num].dev; 1509 if (reg_dev) 1510 dev_hold(reg_dev); 1511 read_unlock(&mrt_lock); 1512 1513 if (reg_dev == NULL) 1514 goto drop; 1515 1516 skb->mac_header = skb->network_header; 1517 skb_pull(skb, (u8*)encap - skb->data); 1518 skb_reset_network_header(skb); 1519 skb->dev = reg_dev; 1520 skb->protocol = htons(ETH_P_IP); 1521 skb->ip_summed = 0; 1522 skb->pkt_type = PACKET_HOST; 1523 dst_release(skb->dst); 1524 skb->dst = NULL; 1525 reg_dev->stats.rx_bytes += skb->len; 1526 reg_dev->stats.rx_packets++; 1527 nf_reset(skb); 1528 netif_rx(skb); 1529 dev_put(reg_dev); 1530 return 0; 1531 drop: 1532 kfree_skb(skb); 1533 return 0; 1534 } 1535 #endif 1536 1537 #ifdef CONFIG_IP_PIMSM_V2 1538 static int pim_rcv(struct sk_buff * skb) 1539 { 1540 struct pimreghdr *pim; 1541 struct iphdr *encap; 1542 struct net_device *reg_dev = NULL; 1543 1544 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1545 goto drop; 1546 1547 pim = (struct pimreghdr *)skb_transport_header(skb); 1548 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1549 (pim->flags&PIM_NULL_REGISTER) || 1550 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1551 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1552 goto drop; 1553 1554 /* check if the inner packet is destined to mcast group */ 1555 encap = (struct iphdr *)(skb_transport_header(skb) + 1556 sizeof(struct pimreghdr)); 1557 if (!ipv4_is_multicast(encap->daddr) || 1558 encap->tot_len == 0 || 1559 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1560 goto drop; 1561 1562 read_lock(&mrt_lock); 1563 if (reg_vif_num >= 0) 1564 reg_dev = vif_table[reg_vif_num].dev; 1565 if (reg_dev) 1566 dev_hold(reg_dev); 1567 read_unlock(&mrt_lock); 1568 1569 if (reg_dev == NULL) 1570 goto drop; 1571 1572 skb->mac_header = skb->network_header; 1573 skb_pull(skb, (u8*)encap - skb->data); 1574 skb_reset_network_header(skb); 1575 skb->dev = reg_dev; 1576 skb->protocol = htons(ETH_P_IP); 1577 skb->ip_summed = 0; 1578 skb->pkt_type = PACKET_HOST; 1579 dst_release(skb->dst); 1580 reg_dev->stats.rx_bytes += skb->len; 1581 reg_dev->stats.rx_packets++; 1582 skb->dst = NULL; 1583 nf_reset(skb); 1584 netif_rx(skb); 1585 dev_put(reg_dev); 1586 return 0; 1587 drop: 1588 kfree_skb(skb); 1589 return 0; 1590 } 1591 #endif 1592 1593 static int 1594 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 1595 { 1596 int ct; 1597 struct rtnexthop *nhp; 1598 struct net_device *dev = vif_table[c->mfc_parent].dev; 1599 u8 *b = skb_tail_pointer(skb); 1600 struct rtattr *mp_head; 1601 1602 if (dev) 1603 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); 1604 1605 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); 1606 1607 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1608 if (c->mfc_un.res.ttls[ct] < 255) { 1609 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1610 goto rtattr_failure; 1611 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1612 nhp->rtnh_flags = 0; 1613 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1614 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; 1615 nhp->rtnh_len = sizeof(*nhp); 1616 } 1617 } 1618 mp_head->rta_type = RTA_MULTIPATH; 1619 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 1620 rtm->rtm_type = RTN_MULTICAST; 1621 return 1; 1622 1623 rtattr_failure: 1624 nlmsg_trim(skb, b); 1625 return -EMSGSIZE; 1626 } 1627 1628 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1629 { 1630 int err; 1631 struct mfc_cache *cache; 1632 struct rtable *rt = skb->rtable; 1633 1634 read_lock(&mrt_lock); 1635 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1636 1637 if (cache==NULL) { 1638 struct sk_buff *skb2; 1639 struct iphdr *iph; 1640 struct net_device *dev; 1641 int vif; 1642 1643 if (nowait) { 1644 read_unlock(&mrt_lock); 1645 return -EAGAIN; 1646 } 1647 1648 dev = skb->dev; 1649 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 1650 read_unlock(&mrt_lock); 1651 return -ENODEV; 1652 } 1653 skb2 = skb_clone(skb, GFP_ATOMIC); 1654 if (!skb2) { 1655 read_unlock(&mrt_lock); 1656 return -ENOMEM; 1657 } 1658 1659 skb_push(skb2, sizeof(struct iphdr)); 1660 skb_reset_network_header(skb2); 1661 iph = ip_hdr(skb2); 1662 iph->ihl = sizeof(struct iphdr) >> 2; 1663 iph->saddr = rt->rt_src; 1664 iph->daddr = rt->rt_dst; 1665 iph->version = 0; 1666 err = ipmr_cache_unresolved(vif, skb2); 1667 read_unlock(&mrt_lock); 1668 return err; 1669 } 1670 1671 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1672 cache->mfc_flags |= MFC_NOTIFY; 1673 err = ipmr_fill_mroute(skb, cache, rtm); 1674 read_unlock(&mrt_lock); 1675 return err; 1676 } 1677 1678 #ifdef CONFIG_PROC_FS 1679 /* 1680 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 1681 */ 1682 struct ipmr_vif_iter { 1683 int ct; 1684 }; 1685 1686 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, 1687 loff_t pos) 1688 { 1689 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { 1690 if (!VIF_EXISTS(iter->ct)) 1691 continue; 1692 if (pos-- == 0) 1693 return &vif_table[iter->ct]; 1694 } 1695 return NULL; 1696 } 1697 1698 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1699 __acquires(mrt_lock) 1700 { 1701 read_lock(&mrt_lock); 1702 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 1703 : SEQ_START_TOKEN; 1704 } 1705 1706 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1707 { 1708 struct ipmr_vif_iter *iter = seq->private; 1709 1710 ++*pos; 1711 if (v == SEQ_START_TOKEN) 1712 return ipmr_vif_seq_idx(iter, 0); 1713 1714 while (++iter->ct < maxvif) { 1715 if (!VIF_EXISTS(iter->ct)) 1716 continue; 1717 return &vif_table[iter->ct]; 1718 } 1719 return NULL; 1720 } 1721 1722 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 1723 __releases(mrt_lock) 1724 { 1725 read_unlock(&mrt_lock); 1726 } 1727 1728 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 1729 { 1730 if (v == SEQ_START_TOKEN) { 1731 seq_puts(seq, 1732 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 1733 } else { 1734 const struct vif_device *vif = v; 1735 const char *name = vif->dev ? vif->dev->name : "none"; 1736 1737 seq_printf(seq, 1738 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 1739 vif - vif_table, 1740 name, vif->bytes_in, vif->pkt_in, 1741 vif->bytes_out, vif->pkt_out, 1742 vif->flags, vif->local, vif->remote); 1743 } 1744 return 0; 1745 } 1746 1747 static const struct seq_operations ipmr_vif_seq_ops = { 1748 .start = ipmr_vif_seq_start, 1749 .next = ipmr_vif_seq_next, 1750 .stop = ipmr_vif_seq_stop, 1751 .show = ipmr_vif_seq_show, 1752 }; 1753 1754 static int ipmr_vif_open(struct inode *inode, struct file *file) 1755 { 1756 return seq_open_private(file, &ipmr_vif_seq_ops, 1757 sizeof(struct ipmr_vif_iter)); 1758 } 1759 1760 static const struct file_operations ipmr_vif_fops = { 1761 .owner = THIS_MODULE, 1762 .open = ipmr_vif_open, 1763 .read = seq_read, 1764 .llseek = seq_lseek, 1765 .release = seq_release_private, 1766 }; 1767 1768 struct ipmr_mfc_iter { 1769 struct mfc_cache **cache; 1770 int ct; 1771 }; 1772 1773 1774 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) 1775 { 1776 struct mfc_cache *mfc; 1777 1778 it->cache = mfc_cache_array; 1779 read_lock(&mrt_lock); 1780 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 1781 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 1782 if (pos-- == 0) 1783 return mfc; 1784 read_unlock(&mrt_lock); 1785 1786 it->cache = &mfc_unres_queue; 1787 spin_lock_bh(&mfc_unres_lock); 1788 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 1789 if (pos-- == 0) 1790 return mfc; 1791 spin_unlock_bh(&mfc_unres_lock); 1792 1793 it->cache = NULL; 1794 return NULL; 1795 } 1796 1797 1798 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 1799 { 1800 struct ipmr_mfc_iter *it = seq->private; 1801 it->cache = NULL; 1802 it->ct = 0; 1803 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 1804 : SEQ_START_TOKEN; 1805 } 1806 1807 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1808 { 1809 struct mfc_cache *mfc = v; 1810 struct ipmr_mfc_iter *it = seq->private; 1811 1812 ++*pos; 1813 1814 if (v == SEQ_START_TOKEN) 1815 return ipmr_mfc_seq_idx(seq->private, 0); 1816 1817 if (mfc->next) 1818 return mfc->next; 1819 1820 if (it->cache == &mfc_unres_queue) 1821 goto end_of_list; 1822 1823 BUG_ON(it->cache != mfc_cache_array); 1824 1825 while (++it->ct < MFC_LINES) { 1826 mfc = mfc_cache_array[it->ct]; 1827 if (mfc) 1828 return mfc; 1829 } 1830 1831 /* exhausted cache_array, show unresolved */ 1832 read_unlock(&mrt_lock); 1833 it->cache = &mfc_unres_queue; 1834 it->ct = 0; 1835 1836 spin_lock_bh(&mfc_unres_lock); 1837 mfc = mfc_unres_queue; 1838 if (mfc) 1839 return mfc; 1840 1841 end_of_list: 1842 spin_unlock_bh(&mfc_unres_lock); 1843 it->cache = NULL; 1844 1845 return NULL; 1846 } 1847 1848 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 1849 { 1850 struct ipmr_mfc_iter *it = seq->private; 1851 1852 if (it->cache == &mfc_unres_queue) 1853 spin_unlock_bh(&mfc_unres_lock); 1854 else if (it->cache == mfc_cache_array) 1855 read_unlock(&mrt_lock); 1856 } 1857 1858 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 1859 { 1860 int n; 1861 1862 if (v == SEQ_START_TOKEN) { 1863 seq_puts(seq, 1864 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 1865 } else { 1866 const struct mfc_cache *mfc = v; 1867 const struct ipmr_mfc_iter *it = seq->private; 1868 1869 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", 1870 (unsigned long) mfc->mfc_mcastgrp, 1871 (unsigned long) mfc->mfc_origin, 1872 mfc->mfc_parent, 1873 mfc->mfc_un.res.pkt, 1874 mfc->mfc_un.res.bytes, 1875 mfc->mfc_un.res.wrong_if); 1876 1877 if (it->cache != &mfc_unres_queue) { 1878 for (n = mfc->mfc_un.res.minvif; 1879 n < mfc->mfc_un.res.maxvif; n++ ) { 1880 if (VIF_EXISTS(n) 1881 && mfc->mfc_un.res.ttls[n] < 255) 1882 seq_printf(seq, 1883 " %2d:%-3d", 1884 n, mfc->mfc_un.res.ttls[n]); 1885 } 1886 } 1887 seq_putc(seq, '\n'); 1888 } 1889 return 0; 1890 } 1891 1892 static const struct seq_operations ipmr_mfc_seq_ops = { 1893 .start = ipmr_mfc_seq_start, 1894 .next = ipmr_mfc_seq_next, 1895 .stop = ipmr_mfc_seq_stop, 1896 .show = ipmr_mfc_seq_show, 1897 }; 1898 1899 static int ipmr_mfc_open(struct inode *inode, struct file *file) 1900 { 1901 return seq_open_private(file, &ipmr_mfc_seq_ops, 1902 sizeof(struct ipmr_mfc_iter)); 1903 } 1904 1905 static const struct file_operations ipmr_mfc_fops = { 1906 .owner = THIS_MODULE, 1907 .open = ipmr_mfc_open, 1908 .read = seq_read, 1909 .llseek = seq_lseek, 1910 .release = seq_release_private, 1911 }; 1912 #endif 1913 1914 #ifdef CONFIG_IP_PIMSM_V2 1915 static struct net_protocol pim_protocol = { 1916 .handler = pim_rcv, 1917 }; 1918 #endif 1919 1920 1921 /* 1922 * Setup for IP multicast routing 1923 */ 1924 1925 int __init ip_mr_init(void) 1926 { 1927 int err; 1928 1929 mrt_cachep = kmem_cache_create("ip_mrt_cache", 1930 sizeof(struct mfc_cache), 1931 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 1932 NULL); 1933 if (!mrt_cachep) 1934 return -ENOMEM; 1935 1936 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); 1937 err = register_netdevice_notifier(&ip_mr_notifier); 1938 if (err) 1939 goto reg_notif_fail; 1940 #ifdef CONFIG_PROC_FS 1941 err = -ENOMEM; 1942 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops)) 1943 goto proc_vif_fail; 1944 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops)) 1945 goto proc_cache_fail; 1946 #endif 1947 return 0; 1948 reg_notif_fail: 1949 kmem_cache_destroy(mrt_cachep); 1950 #ifdef CONFIG_PROC_FS 1951 proc_vif_fail: 1952 unregister_netdevice_notifier(&ip_mr_notifier); 1953 proc_cache_fail: 1954 proc_net_remove(&init_net, "ip_mr_vif"); 1955 #endif 1956 return err; 1957 } 1958