xref: /linux/net/ipv4/ipmr.c (revision 173d6681380aa1d60dfc35ed7178bd7811ba2784)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@redhat.com>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13  *
14  *	Fixes:
15  *	Michael Chastain	:	Incorrect size of copying.
16  *	Alan Cox		:	Added the cache manager code
17  *	Alan Cox		:	Fixed the clone/copy bug and device race.
18  *	Mike McLagan		:	Routing by source
19  *	Malcolm Beattie		:	Buffer handling fixes.
20  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
21  *	SVR Anand		:	Fixed several multicast bugs and problems.
22  *	Alexey Kuznetsov	:	Status, optimisations and more.
23  *	Brad Parker		:	Better behaviour on mrouted upcall
24  *					overflow.
25  *      Carlos Picoto           :       PIMv1 Support
26  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
27  *					Relax this requrement to work with older peers.
28  *
29  */
30 
31 #include <asm/system.h>
32 #include <asm/uaccess.h>
33 #include <linux/types.h>
34 #include <linux/sched.h>
35 #include <linux/capability.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/seq_file.h>
50 #include <linux/mroute.h>
51 #include <linux/init.h>
52 #include <linux/if_ether.h>
53 #include <net/ip.h>
54 #include <net/protocol.h>
55 #include <linux/skbuff.h>
56 #include <net/route.h>
57 #include <net/sock.h>
58 #include <net/icmp.h>
59 #include <net/udp.h>
60 #include <net/raw.h>
61 #include <linux/notifier.h>
62 #include <linux/if_arp.h>
63 #include <linux/netfilter_ipv4.h>
64 #include <net/ipip.h>
65 #include <net/checksum.h>
66 
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM	1
69 #endif
70 
71 static struct sock *mroute_socket;
72 
73 
74 /* Big lock, protecting vif table, mrt cache and mroute socket state.
75    Note that the changes are semaphored via rtnl_lock.
76  */
77 
78 static DEFINE_RWLOCK(mrt_lock);
79 
80 /*
81  *	Multicast router control variables
82  */
83 
84 static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
85 static int maxvif;
86 
87 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
88 
89 static int mroute_do_assert;				/* Set in PIM assert	*/
90 static int mroute_do_pim;
91 
92 static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
93 
94 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
95 static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
111 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
112 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
113 
114 #ifdef CONFIG_IP_PIMSM_V2
115 static struct net_protocol pim_protocol;
116 #endif
117 
118 static struct timer_list ipmr_expire_timer;
119 
120 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
121 
122 static
123 struct net_device *ipmr_new_tunnel(struct vifctl *v)
124 {
125 	struct net_device  *dev;
126 
127 	dev = __dev_get_by_name("tunl0");
128 
129 	if (dev) {
130 		int err;
131 		struct ifreq ifr;
132 		mm_segment_t	oldfs;
133 		struct ip_tunnel_parm p;
134 		struct in_device  *in_dev;
135 
136 		memset(&p, 0, sizeof(p));
137 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
138 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
139 		p.iph.version = 4;
140 		p.iph.ihl = 5;
141 		p.iph.protocol = IPPROTO_IPIP;
142 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
143 		ifr.ifr_ifru.ifru_data = (void*)&p;
144 
145 		oldfs = get_fs(); set_fs(KERNEL_DS);
146 		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
147 		set_fs(oldfs);
148 
149 		dev = NULL;
150 
151 		if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
152 			dev->flags |= IFF_MULTICAST;
153 
154 			in_dev = __in_dev_get_rtnl(dev);
155 			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
156 				goto failure;
157 			in_dev->cnf.rp_filter = 0;
158 
159 			if (dev_open(dev))
160 				goto failure;
161 		}
162 	}
163 	return dev;
164 
165 failure:
166 	/* allow the register to be completed before unregistering. */
167 	rtnl_unlock();
168 	rtnl_lock();
169 
170 	unregister_netdevice(dev);
171 	return NULL;
172 }
173 
174 #ifdef CONFIG_IP_PIMSM
175 
176 static int reg_vif_num = -1;
177 
178 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
179 {
180 	read_lock(&mrt_lock);
181 	((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
182 	((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
183 	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
184 	read_unlock(&mrt_lock);
185 	kfree_skb(skb);
186 	return 0;
187 }
188 
189 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
190 {
191 	return (struct net_device_stats*)netdev_priv(dev);
192 }
193 
194 static void reg_vif_setup(struct net_device *dev)
195 {
196 	dev->type		= ARPHRD_PIMREG;
197 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
198 	dev->flags		= IFF_NOARP;
199 	dev->hard_start_xmit	= reg_vif_xmit;
200 	dev->get_stats		= reg_vif_get_stats;
201 	dev->destructor		= free_netdev;
202 }
203 
204 static struct net_device *ipmr_reg_vif(void)
205 {
206 	struct net_device *dev;
207 	struct in_device *in_dev;
208 
209 	dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
210 			   reg_vif_setup);
211 
212 	if (dev == NULL)
213 		return NULL;
214 
215 	if (register_netdevice(dev)) {
216 		free_netdev(dev);
217 		return NULL;
218 	}
219 	dev->iflink = 0;
220 
221 	if ((in_dev = inetdev_init(dev)) == NULL)
222 		goto failure;
223 
224 	in_dev->cnf.rp_filter = 0;
225 
226 	if (dev_open(dev))
227 		goto failure;
228 
229 	return dev;
230 
231 failure:
232 	/* allow the register to be completed before unregistering. */
233 	rtnl_unlock();
234 	rtnl_lock();
235 
236 	unregister_netdevice(dev);
237 	return NULL;
238 }
239 #endif
240 
241 /*
242  *	Delete a VIF entry
243  */
244 
245 static int vif_delete(int vifi)
246 {
247 	struct vif_device *v;
248 	struct net_device *dev;
249 	struct in_device *in_dev;
250 
251 	if (vifi < 0 || vifi >= maxvif)
252 		return -EADDRNOTAVAIL;
253 
254 	v = &vif_table[vifi];
255 
256 	write_lock_bh(&mrt_lock);
257 	dev = v->dev;
258 	v->dev = NULL;
259 
260 	if (!dev) {
261 		write_unlock_bh(&mrt_lock);
262 		return -EADDRNOTAVAIL;
263 	}
264 
265 #ifdef CONFIG_IP_PIMSM
266 	if (vifi == reg_vif_num)
267 		reg_vif_num = -1;
268 #endif
269 
270 	if (vifi+1 == maxvif) {
271 		int tmp;
272 		for (tmp=vifi-1; tmp>=0; tmp--) {
273 			if (VIF_EXISTS(tmp))
274 				break;
275 		}
276 		maxvif = tmp+1;
277 	}
278 
279 	write_unlock_bh(&mrt_lock);
280 
281 	dev_set_allmulti(dev, -1);
282 
283 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
284 		in_dev->cnf.mc_forwarding--;
285 		ip_rt_multicast_event(in_dev);
286 	}
287 
288 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
289 		unregister_netdevice(dev);
290 
291 	dev_put(dev);
292 	return 0;
293 }
294 
295 /* Destroy an unresolved cache entry, killing queued skbs
296    and reporting error to netlink readers.
297  */
298 
299 static void ipmr_destroy_unres(struct mfc_cache *c)
300 {
301 	struct sk_buff *skb;
302 	struct nlmsgerr *e;
303 
304 	atomic_dec(&cache_resolve_queue_len);
305 
306 	while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
307 		if (skb->nh.iph->version == 0) {
308 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
309 			nlh->nlmsg_type = NLMSG_ERROR;
310 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
311 			skb_trim(skb, nlh->nlmsg_len);
312 			e = NLMSG_DATA(nlh);
313 			e->error = -ETIMEDOUT;
314 			memset(&e->msg, 0, sizeof(e->msg));
315 
316 			rtnl_unicast(skb, NETLINK_CB(skb).pid);
317 		} else
318 			kfree_skb(skb);
319 	}
320 
321 	kmem_cache_free(mrt_cachep, c);
322 }
323 
324 
325 /* Single timer process for all the unresolved queue. */
326 
327 static void ipmr_expire_process(unsigned long dummy)
328 {
329 	unsigned long now;
330 	unsigned long expires;
331 	struct mfc_cache *c, **cp;
332 
333 	if (!spin_trylock(&mfc_unres_lock)) {
334 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
335 		return;
336 	}
337 
338 	if (atomic_read(&cache_resolve_queue_len) == 0)
339 		goto out;
340 
341 	now = jiffies;
342 	expires = 10*HZ;
343 	cp = &mfc_unres_queue;
344 
345 	while ((c=*cp) != NULL) {
346 		if (time_after(c->mfc_un.unres.expires, now)) {
347 			unsigned long interval = c->mfc_un.unres.expires - now;
348 			if (interval < expires)
349 				expires = interval;
350 			cp = &c->next;
351 			continue;
352 		}
353 
354 		*cp = c->next;
355 
356 		ipmr_destroy_unres(c);
357 	}
358 
359 	if (atomic_read(&cache_resolve_queue_len))
360 		mod_timer(&ipmr_expire_timer, jiffies + expires);
361 
362 out:
363 	spin_unlock(&mfc_unres_lock);
364 }
365 
366 /* Fill oifs list. It is called under write locked mrt_lock. */
367 
368 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
369 {
370 	int vifi;
371 
372 	cache->mfc_un.res.minvif = MAXVIFS;
373 	cache->mfc_un.res.maxvif = 0;
374 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
375 
376 	for (vifi=0; vifi<maxvif; vifi++) {
377 		if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
378 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
379 			if (cache->mfc_un.res.minvif > vifi)
380 				cache->mfc_un.res.minvif = vifi;
381 			if (cache->mfc_un.res.maxvif <= vifi)
382 				cache->mfc_un.res.maxvif = vifi + 1;
383 		}
384 	}
385 }
386 
387 static int vif_add(struct vifctl *vifc, int mrtsock)
388 {
389 	int vifi = vifc->vifc_vifi;
390 	struct vif_device *v = &vif_table[vifi];
391 	struct net_device *dev;
392 	struct in_device *in_dev;
393 
394 	/* Is vif busy ? */
395 	if (VIF_EXISTS(vifi))
396 		return -EADDRINUSE;
397 
398 	switch (vifc->vifc_flags) {
399 #ifdef CONFIG_IP_PIMSM
400 	case VIFF_REGISTER:
401 		/*
402 		 * Special Purpose VIF in PIM
403 		 * All the packets will be sent to the daemon
404 		 */
405 		if (reg_vif_num >= 0)
406 			return -EADDRINUSE;
407 		dev = ipmr_reg_vif();
408 		if (!dev)
409 			return -ENOBUFS;
410 		break;
411 #endif
412 	case VIFF_TUNNEL:
413 		dev = ipmr_new_tunnel(vifc);
414 		if (!dev)
415 			return -ENOBUFS;
416 		break;
417 	case 0:
418 		dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
419 		if (!dev)
420 			return -EADDRNOTAVAIL;
421 		dev_put(dev);
422 		break;
423 	default:
424 		return -EINVAL;
425 	}
426 
427 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
428 		return -EADDRNOTAVAIL;
429 	in_dev->cnf.mc_forwarding++;
430 	dev_set_allmulti(dev, +1);
431 	ip_rt_multicast_event(in_dev);
432 
433 	/*
434 	 *	Fill in the VIF structures
435 	 */
436 	v->rate_limit=vifc->vifc_rate_limit;
437 	v->local=vifc->vifc_lcl_addr.s_addr;
438 	v->remote=vifc->vifc_rmt_addr.s_addr;
439 	v->flags=vifc->vifc_flags;
440 	if (!mrtsock)
441 		v->flags |= VIFF_STATIC;
442 	v->threshold=vifc->vifc_threshold;
443 	v->bytes_in = 0;
444 	v->bytes_out = 0;
445 	v->pkt_in = 0;
446 	v->pkt_out = 0;
447 	v->link = dev->ifindex;
448 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
449 		v->link = dev->iflink;
450 
451 	/* And finish update writing critical data */
452 	write_lock_bh(&mrt_lock);
453 	dev_hold(dev);
454 	v->dev=dev;
455 #ifdef CONFIG_IP_PIMSM
456 	if (v->flags&VIFF_REGISTER)
457 		reg_vif_num = vifi;
458 #endif
459 	if (vifi+1 > maxvif)
460 		maxvif = vifi+1;
461 	write_unlock_bh(&mrt_lock);
462 	return 0;
463 }
464 
465 static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
466 {
467 	int line=MFC_HASH(mcastgrp,origin);
468 	struct mfc_cache *c;
469 
470 	for (c=mfc_cache_array[line]; c; c = c->next) {
471 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
472 			break;
473 	}
474 	return c;
475 }
476 
477 /*
478  *	Allocate a multicast cache entry
479  */
480 static struct mfc_cache *ipmr_cache_alloc(void)
481 {
482 	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
483 	if(c==NULL)
484 		return NULL;
485 	memset(c, 0, sizeof(*c));
486 	c->mfc_un.res.minvif = MAXVIFS;
487 	return c;
488 }
489 
490 static struct mfc_cache *ipmr_cache_alloc_unres(void)
491 {
492 	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
493 	if(c==NULL)
494 		return NULL;
495 	memset(c, 0, sizeof(*c));
496 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
497 	c->mfc_un.unres.expires = jiffies + 10*HZ;
498 	return c;
499 }
500 
501 /*
502  *	A cache entry has gone into a resolved state from queued
503  */
504 
505 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
506 {
507 	struct sk_buff *skb;
508 	struct nlmsgerr *e;
509 
510 	/*
511 	 *	Play the pending entries through our router
512 	 */
513 
514 	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
515 		if (skb->nh.iph->version == 0) {
516 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
517 
518 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
519 				nlh->nlmsg_len = skb->tail - (u8*)nlh;
520 			} else {
521 				nlh->nlmsg_type = NLMSG_ERROR;
522 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
523 				skb_trim(skb, nlh->nlmsg_len);
524 				e = NLMSG_DATA(nlh);
525 				e->error = -EMSGSIZE;
526 				memset(&e->msg, 0, sizeof(e->msg));
527 			}
528 
529 			rtnl_unicast(skb, NETLINK_CB(skb).pid);
530 		} else
531 			ip_mr_forward(skb, c, 0);
532 	}
533 }
534 
535 /*
536  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
537  *	expects the following bizarre scheme.
538  *
539  *	Called under mrt_lock.
540  */
541 
542 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
543 {
544 	struct sk_buff *skb;
545 	int ihl = pkt->nh.iph->ihl<<2;
546 	struct igmphdr *igmp;
547 	struct igmpmsg *msg;
548 	int ret;
549 
550 #ifdef CONFIG_IP_PIMSM
551 	if (assert == IGMPMSG_WHOLEPKT)
552 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
553 	else
554 #endif
555 		skb = alloc_skb(128, GFP_ATOMIC);
556 
557 	if(!skb)
558 		return -ENOBUFS;
559 
560 #ifdef CONFIG_IP_PIMSM
561 	if (assert == IGMPMSG_WHOLEPKT) {
562 		/* Ugly, but we have no choice with this interface.
563 		   Duplicate old header, fix ihl, length etc.
564 		   And all this only to mangle msg->im_msgtype and
565 		   to set msg->im_mbz to "mbz" :-)
566 		 */
567 		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
568 		skb->nh.raw = skb->h.raw = (u8*)msg;
569 		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
570 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
571 		msg->im_mbz = 0;
572  		msg->im_vif = reg_vif_num;
573 		skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
574 		skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
575 	} else
576 #endif
577 	{
578 
579 	/*
580 	 *	Copy the IP header
581 	 */
582 
583 	skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
584 	memcpy(skb->data,pkt->data,ihl);
585 	skb->nh.iph->protocol = 0;			/* Flag to the kernel this is a route add */
586 	msg = (struct igmpmsg*)skb->nh.iph;
587 	msg->im_vif = vifi;
588 	skb->dst = dst_clone(pkt->dst);
589 
590 	/*
591 	 *	Add our header
592 	 */
593 
594 	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
595 	igmp->type	=
596 	msg->im_msgtype = assert;
597 	igmp->code 	=	0;
598 	skb->nh.iph->tot_len=htons(skb->len);			/* Fix the length */
599 	skb->h.raw = skb->nh.raw;
600         }
601 
602 	if (mroute_socket == NULL) {
603 		kfree_skb(skb);
604 		return -EINVAL;
605 	}
606 
607 	/*
608 	 *	Deliver to mrouted
609 	 */
610 	if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
611 		if (net_ratelimit())
612 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
613 		kfree_skb(skb);
614 	}
615 
616 	return ret;
617 }
618 
619 /*
620  *	Queue a packet for resolution. It gets locked cache entry!
621  */
622 
623 static int
624 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
625 {
626 	int err;
627 	struct mfc_cache *c;
628 
629 	spin_lock_bh(&mfc_unres_lock);
630 	for (c=mfc_unres_queue; c; c=c->next) {
631 		if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
632 		    c->mfc_origin == skb->nh.iph->saddr)
633 			break;
634 	}
635 
636 	if (c == NULL) {
637 		/*
638 		 *	Create a new entry if allowable
639 		 */
640 
641 		if (atomic_read(&cache_resolve_queue_len)>=10 ||
642 		    (c=ipmr_cache_alloc_unres())==NULL) {
643 			spin_unlock_bh(&mfc_unres_lock);
644 
645 			kfree_skb(skb);
646 			return -ENOBUFS;
647 		}
648 
649 		/*
650 		 *	Fill in the new cache entry
651 		 */
652 		c->mfc_parent=-1;
653 		c->mfc_origin=skb->nh.iph->saddr;
654 		c->mfc_mcastgrp=skb->nh.iph->daddr;
655 
656 		/*
657 		 *	Reflect first query at mrouted.
658 		 */
659 		if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
660 			/* If the report failed throw the cache entry
661 			   out - Brad Parker
662 			 */
663 			spin_unlock_bh(&mfc_unres_lock);
664 
665 			kmem_cache_free(mrt_cachep, c);
666 			kfree_skb(skb);
667 			return err;
668 		}
669 
670 		atomic_inc(&cache_resolve_queue_len);
671 		c->next = mfc_unres_queue;
672 		mfc_unres_queue = c;
673 
674 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
675 	}
676 
677 	/*
678 	 *	See if we can append the packet
679 	 */
680 	if (c->mfc_un.unres.unresolved.qlen>3) {
681 		kfree_skb(skb);
682 		err = -ENOBUFS;
683 	} else {
684 		skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
685 		err = 0;
686 	}
687 
688 	spin_unlock_bh(&mfc_unres_lock);
689 	return err;
690 }
691 
692 /*
693  *	MFC cache manipulation by user space mroute daemon
694  */
695 
696 static int ipmr_mfc_delete(struct mfcctl *mfc)
697 {
698 	int line;
699 	struct mfc_cache *c, **cp;
700 
701 	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
702 
703 	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
704 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
705 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
706 			write_lock_bh(&mrt_lock);
707 			*cp = c->next;
708 			write_unlock_bh(&mrt_lock);
709 
710 			kmem_cache_free(mrt_cachep, c);
711 			return 0;
712 		}
713 	}
714 	return -ENOENT;
715 }
716 
717 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
718 {
719 	int line;
720 	struct mfc_cache *uc, *c, **cp;
721 
722 	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
723 
724 	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
725 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
726 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
727 			break;
728 	}
729 
730 	if (c != NULL) {
731 		write_lock_bh(&mrt_lock);
732 		c->mfc_parent = mfc->mfcc_parent;
733 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
734 		if (!mrtsock)
735 			c->mfc_flags |= MFC_STATIC;
736 		write_unlock_bh(&mrt_lock);
737 		return 0;
738 	}
739 
740 	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
741 		return -EINVAL;
742 
743 	c=ipmr_cache_alloc();
744 	if (c==NULL)
745 		return -ENOMEM;
746 
747 	c->mfc_origin=mfc->mfcc_origin.s_addr;
748 	c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
749 	c->mfc_parent=mfc->mfcc_parent;
750 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
751 	if (!mrtsock)
752 		c->mfc_flags |= MFC_STATIC;
753 
754 	write_lock_bh(&mrt_lock);
755 	c->next = mfc_cache_array[line];
756 	mfc_cache_array[line] = c;
757 	write_unlock_bh(&mrt_lock);
758 
759 	/*
760 	 *	Check to see if we resolved a queued list. If so we
761 	 *	need to send on the frames and tidy up.
762 	 */
763 	spin_lock_bh(&mfc_unres_lock);
764 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
765 	     cp = &uc->next) {
766 		if (uc->mfc_origin == c->mfc_origin &&
767 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
768 			*cp = uc->next;
769 			if (atomic_dec_and_test(&cache_resolve_queue_len))
770 				del_timer(&ipmr_expire_timer);
771 			break;
772 		}
773 	}
774 	spin_unlock_bh(&mfc_unres_lock);
775 
776 	if (uc) {
777 		ipmr_cache_resolve(uc, c);
778 		kmem_cache_free(mrt_cachep, uc);
779 	}
780 	return 0;
781 }
782 
783 /*
784  *	Close the multicast socket, and clear the vif tables etc
785  */
786 
787 static void mroute_clean_tables(struct sock *sk)
788 {
789 	int i;
790 
791 	/*
792 	 *	Shut down all active vif entries
793 	 */
794 	for(i=0; i<maxvif; i++) {
795 		if (!(vif_table[i].flags&VIFF_STATIC))
796 			vif_delete(i);
797 	}
798 
799 	/*
800 	 *	Wipe the cache
801 	 */
802 	for (i=0;i<MFC_LINES;i++) {
803 		struct mfc_cache *c, **cp;
804 
805 		cp = &mfc_cache_array[i];
806 		while ((c = *cp) != NULL) {
807 			if (c->mfc_flags&MFC_STATIC) {
808 				cp = &c->next;
809 				continue;
810 			}
811 			write_lock_bh(&mrt_lock);
812 			*cp = c->next;
813 			write_unlock_bh(&mrt_lock);
814 
815 			kmem_cache_free(mrt_cachep, c);
816 		}
817 	}
818 
819 	if (atomic_read(&cache_resolve_queue_len) != 0) {
820 		struct mfc_cache *c;
821 
822 		spin_lock_bh(&mfc_unres_lock);
823 		while (mfc_unres_queue != NULL) {
824 			c = mfc_unres_queue;
825 			mfc_unres_queue = c->next;
826 			spin_unlock_bh(&mfc_unres_lock);
827 
828 			ipmr_destroy_unres(c);
829 
830 			spin_lock_bh(&mfc_unres_lock);
831 		}
832 		spin_unlock_bh(&mfc_unres_lock);
833 	}
834 }
835 
836 static void mrtsock_destruct(struct sock *sk)
837 {
838 	rtnl_lock();
839 	if (sk == mroute_socket) {
840 		ipv4_devconf.mc_forwarding--;
841 
842 		write_lock_bh(&mrt_lock);
843 		mroute_socket=NULL;
844 		write_unlock_bh(&mrt_lock);
845 
846 		mroute_clean_tables(sk);
847 	}
848 	rtnl_unlock();
849 }
850 
851 /*
852  *	Socket options and virtual interface manipulation. The whole
853  *	virtual interface system is a complete heap, but unfortunately
854  *	that's how BSD mrouted happens to think. Maybe one day with a proper
855  *	MOSPF/PIM router set up we can clean this up.
856  */
857 
858 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
859 {
860 	int ret;
861 	struct vifctl vif;
862 	struct mfcctl mfc;
863 
864 	if(optname!=MRT_INIT)
865 	{
866 		if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
867 			return -EACCES;
868 	}
869 
870 	switch(optname)
871 	{
872 		case MRT_INIT:
873 			if (sk->sk_type != SOCK_RAW ||
874 			    inet_sk(sk)->num != IPPROTO_IGMP)
875 				return -EOPNOTSUPP;
876 			if(optlen!=sizeof(int))
877 				return -ENOPROTOOPT;
878 
879 			rtnl_lock();
880 			if (mroute_socket) {
881 				rtnl_unlock();
882 				return -EADDRINUSE;
883 			}
884 
885 			ret = ip_ra_control(sk, 1, mrtsock_destruct);
886 			if (ret == 0) {
887 				write_lock_bh(&mrt_lock);
888 				mroute_socket=sk;
889 				write_unlock_bh(&mrt_lock);
890 
891 				ipv4_devconf.mc_forwarding++;
892 			}
893 			rtnl_unlock();
894 			return ret;
895 		case MRT_DONE:
896 			if (sk!=mroute_socket)
897 				return -EACCES;
898 			return ip_ra_control(sk, 0, NULL);
899 		case MRT_ADD_VIF:
900 		case MRT_DEL_VIF:
901 			if(optlen!=sizeof(vif))
902 				return -EINVAL;
903 			if (copy_from_user(&vif,optval,sizeof(vif)))
904 				return -EFAULT;
905 			if(vif.vifc_vifi >= MAXVIFS)
906 				return -ENFILE;
907 			rtnl_lock();
908 			if (optname==MRT_ADD_VIF) {
909 				ret = vif_add(&vif, sk==mroute_socket);
910 			} else {
911 				ret = vif_delete(vif.vifc_vifi);
912 			}
913 			rtnl_unlock();
914 			return ret;
915 
916 		/*
917 		 *	Manipulate the forwarding caches. These live
918 		 *	in a sort of kernel/user symbiosis.
919 		 */
920 		case MRT_ADD_MFC:
921 		case MRT_DEL_MFC:
922 			if(optlen!=sizeof(mfc))
923 				return -EINVAL;
924 			if (copy_from_user(&mfc,optval, sizeof(mfc)))
925 				return -EFAULT;
926 			rtnl_lock();
927 			if (optname==MRT_DEL_MFC)
928 				ret = ipmr_mfc_delete(&mfc);
929 			else
930 				ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
931 			rtnl_unlock();
932 			return ret;
933 		/*
934 		 *	Control PIM assert.
935 		 */
936 		case MRT_ASSERT:
937 		{
938 			int v;
939 			if(get_user(v,(int __user *)optval))
940 				return -EFAULT;
941 			mroute_do_assert=(v)?1:0;
942 			return 0;
943 		}
944 #ifdef CONFIG_IP_PIMSM
945 		case MRT_PIM:
946 		{
947 			int v, ret;
948 			if(get_user(v,(int __user *)optval))
949 				return -EFAULT;
950 			v = (v)?1:0;
951 			rtnl_lock();
952 			ret = 0;
953 			if (v != mroute_do_pim) {
954 				mroute_do_pim = v;
955 				mroute_do_assert = v;
956 #ifdef CONFIG_IP_PIMSM_V2
957 				if (mroute_do_pim)
958 					ret = inet_add_protocol(&pim_protocol,
959 								IPPROTO_PIM);
960 				else
961 					ret = inet_del_protocol(&pim_protocol,
962 								IPPROTO_PIM);
963 				if (ret < 0)
964 					ret = -EAGAIN;
965 #endif
966 			}
967 			rtnl_unlock();
968 			return ret;
969 		}
970 #endif
971 		/*
972 		 *	Spurious command, or MRT_VERSION which you cannot
973 		 *	set.
974 		 */
975 		default:
976 			return -ENOPROTOOPT;
977 	}
978 }
979 
980 /*
981  *	Getsock opt support for the multicast routing system.
982  */
983 
984 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
985 {
986 	int olr;
987 	int val;
988 
989 	if(optname!=MRT_VERSION &&
990 #ifdef CONFIG_IP_PIMSM
991 	   optname!=MRT_PIM &&
992 #endif
993 	   optname!=MRT_ASSERT)
994 		return -ENOPROTOOPT;
995 
996 	if (get_user(olr, optlen))
997 		return -EFAULT;
998 
999 	olr = min_t(unsigned int, olr, sizeof(int));
1000 	if (olr < 0)
1001 		return -EINVAL;
1002 
1003 	if(put_user(olr,optlen))
1004 		return -EFAULT;
1005 	if(optname==MRT_VERSION)
1006 		val=0x0305;
1007 #ifdef CONFIG_IP_PIMSM
1008 	else if(optname==MRT_PIM)
1009 		val=mroute_do_pim;
1010 #endif
1011 	else
1012 		val=mroute_do_assert;
1013 	if(copy_to_user(optval,&val,olr))
1014 		return -EFAULT;
1015 	return 0;
1016 }
1017 
1018 /*
1019  *	The IP multicast ioctl support routines.
1020  */
1021 
1022 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1023 {
1024 	struct sioc_sg_req sr;
1025 	struct sioc_vif_req vr;
1026 	struct vif_device *vif;
1027 	struct mfc_cache *c;
1028 
1029 	switch(cmd)
1030 	{
1031 		case SIOCGETVIFCNT:
1032 			if (copy_from_user(&vr,arg,sizeof(vr)))
1033 				return -EFAULT;
1034 			if(vr.vifi>=maxvif)
1035 				return -EINVAL;
1036 			read_lock(&mrt_lock);
1037 			vif=&vif_table[vr.vifi];
1038 			if(VIF_EXISTS(vr.vifi))	{
1039 				vr.icount=vif->pkt_in;
1040 				vr.ocount=vif->pkt_out;
1041 				vr.ibytes=vif->bytes_in;
1042 				vr.obytes=vif->bytes_out;
1043 				read_unlock(&mrt_lock);
1044 
1045 				if (copy_to_user(arg,&vr,sizeof(vr)))
1046 					return -EFAULT;
1047 				return 0;
1048 			}
1049 			read_unlock(&mrt_lock);
1050 			return -EADDRNOTAVAIL;
1051 		case SIOCGETSGCNT:
1052 			if (copy_from_user(&sr,arg,sizeof(sr)))
1053 				return -EFAULT;
1054 
1055 			read_lock(&mrt_lock);
1056 			c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1057 			if (c) {
1058 				sr.pktcnt = c->mfc_un.res.pkt;
1059 				sr.bytecnt = c->mfc_un.res.bytes;
1060 				sr.wrong_if = c->mfc_un.res.wrong_if;
1061 				read_unlock(&mrt_lock);
1062 
1063 				if (copy_to_user(arg,&sr,sizeof(sr)))
1064 					return -EFAULT;
1065 				return 0;
1066 			}
1067 			read_unlock(&mrt_lock);
1068 			return -EADDRNOTAVAIL;
1069 		default:
1070 			return -ENOIOCTLCMD;
1071 	}
1072 }
1073 
1074 
1075 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1076 {
1077 	struct vif_device *v;
1078 	int ct;
1079 	if (event != NETDEV_UNREGISTER)
1080 		return NOTIFY_DONE;
1081 	v=&vif_table[0];
1082 	for(ct=0;ct<maxvif;ct++,v++) {
1083 		if (v->dev==ptr)
1084 			vif_delete(ct);
1085 	}
1086 	return NOTIFY_DONE;
1087 }
1088 
1089 
1090 static struct notifier_block ip_mr_notifier={
1091 	.notifier_call = ipmr_device_event,
1092 };
1093 
1094 /*
1095  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1096  *	This avoids tunnel drivers and other mess and gives us the speed so
1097  *	important for multicast video.
1098  */
1099 
1100 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1101 {
1102 	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1103 
1104 	iph->version	= 	4;
1105 	iph->tos	=	skb->nh.iph->tos;
1106 	iph->ttl	=	skb->nh.iph->ttl;
1107 	iph->frag_off	=	0;
1108 	iph->daddr	=	daddr;
1109 	iph->saddr	=	saddr;
1110 	iph->protocol	=	IPPROTO_IPIP;
1111 	iph->ihl	=	5;
1112 	iph->tot_len	=	htons(skb->len);
1113 	ip_select_ident(iph, skb->dst, NULL);
1114 	ip_send_check(iph);
1115 
1116 	skb->h.ipiph = skb->nh.iph;
1117 	skb->nh.iph = iph;
1118 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1119 	nf_reset(skb);
1120 }
1121 
1122 static inline int ipmr_forward_finish(struct sk_buff *skb)
1123 {
1124 	struct ip_options * opt	= &(IPCB(skb)->opt);
1125 
1126 	IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1127 
1128 	if (unlikely(opt->optlen))
1129 		ip_forward_options(skb);
1130 
1131 	return dst_output(skb);
1132 }
1133 
1134 /*
1135  *	Processing handlers for ipmr_forward
1136  */
1137 
1138 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1139 {
1140 	struct iphdr *iph = skb->nh.iph;
1141 	struct vif_device *vif = &vif_table[vifi];
1142 	struct net_device *dev;
1143 	struct rtable *rt;
1144 	int    encap = 0;
1145 
1146 	if (vif->dev == NULL)
1147 		goto out_free;
1148 
1149 #ifdef CONFIG_IP_PIMSM
1150 	if (vif->flags & VIFF_REGISTER) {
1151 		vif->pkt_out++;
1152 		vif->bytes_out+=skb->len;
1153 		((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1154 		((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1155 		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1156 		kfree_skb(skb);
1157 		return;
1158 	}
1159 #endif
1160 
1161 	if (vif->flags&VIFF_TUNNEL) {
1162 		struct flowi fl = { .oif = vif->link,
1163 				    .nl_u = { .ip4_u =
1164 					      { .daddr = vif->remote,
1165 						.saddr = vif->local,
1166 						.tos = RT_TOS(iph->tos) } },
1167 				    .proto = IPPROTO_IPIP };
1168 		if (ip_route_output_key(&rt, &fl))
1169 			goto out_free;
1170 		encap = sizeof(struct iphdr);
1171 	} else {
1172 		struct flowi fl = { .oif = vif->link,
1173 				    .nl_u = { .ip4_u =
1174 					      { .daddr = iph->daddr,
1175 						.tos = RT_TOS(iph->tos) } },
1176 				    .proto = IPPROTO_IPIP };
1177 		if (ip_route_output_key(&rt, &fl))
1178 			goto out_free;
1179 	}
1180 
1181 	dev = rt->u.dst.dev;
1182 
1183 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1184 		/* Do not fragment multicasts. Alas, IPv4 does not
1185 		   allow to send ICMP, so that packets will disappear
1186 		   to blackhole.
1187 		 */
1188 
1189 		IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1190 		ip_rt_put(rt);
1191 		goto out_free;
1192 	}
1193 
1194 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1195 
1196 	if (skb_cow(skb, encap)) {
1197  		ip_rt_put(rt);
1198 		goto out_free;
1199 	}
1200 
1201 	vif->pkt_out++;
1202 	vif->bytes_out+=skb->len;
1203 
1204 	dst_release(skb->dst);
1205 	skb->dst = &rt->u.dst;
1206 	iph = skb->nh.iph;
1207 	ip_decrease_ttl(iph);
1208 
1209 	/* FIXME: forward and output firewalls used to be called here.
1210 	 * What do we do with netfilter? -- RR */
1211 	if (vif->flags & VIFF_TUNNEL) {
1212 		ip_encap(skb, vif->local, vif->remote);
1213 		/* FIXME: extra output firewall step used to be here. --RR */
1214 		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1215 		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1216 	}
1217 
1218 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1219 
1220 	/*
1221 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1222 	 * not only before forwarding, but after forwarding on all output
1223 	 * interfaces. It is clear, if mrouter runs a multicasting
1224 	 * program, it should receive packets not depending to what interface
1225 	 * program is joined.
1226 	 * If we will not make it, the program will have to join on all
1227 	 * interfaces. On the other hand, multihoming host (or router, but
1228 	 * not mrouter) cannot join to more than one interface - it will
1229 	 * result in receiving multiple packets.
1230 	 */
1231 	NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
1232 		ipmr_forward_finish);
1233 	return;
1234 
1235 out_free:
1236 	kfree_skb(skb);
1237 	return;
1238 }
1239 
1240 static int ipmr_find_vif(struct net_device *dev)
1241 {
1242 	int ct;
1243 	for (ct=maxvif-1; ct>=0; ct--) {
1244 		if (vif_table[ct].dev == dev)
1245 			break;
1246 	}
1247 	return ct;
1248 }
1249 
1250 /* "local" means that we should preserve one skb (for local delivery) */
1251 
1252 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1253 {
1254 	int psend = -1;
1255 	int vif, ct;
1256 
1257 	vif = cache->mfc_parent;
1258 	cache->mfc_un.res.pkt++;
1259 	cache->mfc_un.res.bytes += skb->len;
1260 
1261 	/*
1262 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1263 	 */
1264 	if (vif_table[vif].dev != skb->dev) {
1265 		int true_vifi;
1266 
1267 		if (((struct rtable*)skb->dst)->fl.iif == 0) {
1268 			/* It is our own packet, looped back.
1269 			   Very complicated situation...
1270 
1271 			   The best workaround until routing daemons will be
1272 			   fixed is not to redistribute packet, if it was
1273 			   send through wrong interface. It means, that
1274 			   multicast applications WILL NOT work for
1275 			   (S,G), which have default multicast route pointing
1276 			   to wrong oif. In any case, it is not a good
1277 			   idea to use multicasting applications on router.
1278 			 */
1279 			goto dont_forward;
1280 		}
1281 
1282 		cache->mfc_un.res.wrong_if++;
1283 		true_vifi = ipmr_find_vif(skb->dev);
1284 
1285 		if (true_vifi >= 0 && mroute_do_assert &&
1286 		    /* pimsm uses asserts, when switching from RPT to SPT,
1287 		       so that we cannot check that packet arrived on an oif.
1288 		       It is bad, but otherwise we would need to move pretty
1289 		       large chunk of pimd to kernel. Ough... --ANK
1290 		     */
1291 		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1292 		    time_after(jiffies,
1293 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1294 			cache->mfc_un.res.last_assert = jiffies;
1295 			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1296 		}
1297 		goto dont_forward;
1298 	}
1299 
1300 	vif_table[vif].pkt_in++;
1301 	vif_table[vif].bytes_in+=skb->len;
1302 
1303 	/*
1304 	 *	Forward the frame
1305 	 */
1306 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1307 		if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1308 			if (psend != -1) {
1309 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1310 				if (skb2)
1311 					ipmr_queue_xmit(skb2, cache, psend);
1312 			}
1313 			psend=ct;
1314 		}
1315 	}
1316 	if (psend != -1) {
1317 		if (local) {
1318 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1319 			if (skb2)
1320 				ipmr_queue_xmit(skb2, cache, psend);
1321 		} else {
1322 			ipmr_queue_xmit(skb, cache, psend);
1323 			return 0;
1324 		}
1325 	}
1326 
1327 dont_forward:
1328 	if (!local)
1329 		kfree_skb(skb);
1330 	return 0;
1331 }
1332 
1333 
1334 /*
1335  *	Multicast packets for forwarding arrive here
1336  */
1337 
1338 int ip_mr_input(struct sk_buff *skb)
1339 {
1340 	struct mfc_cache *cache;
1341 	int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1342 
1343 	/* Packet is looped back after forward, it should not be
1344 	   forwarded second time, but still can be delivered locally.
1345 	 */
1346 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1347 		goto dont_forward;
1348 
1349 	if (!local) {
1350 		    if (IPCB(skb)->opt.router_alert) {
1351 			    if (ip_call_ra_chain(skb))
1352 				    return 0;
1353 		    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1354 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1355 			       Cisco IOS <= 11.2(8)) do not put router alert
1356 			       option to IGMP packets destined to routable
1357 			       groups. It is very bad, because it means
1358 			       that we can forward NO IGMP messages.
1359 			     */
1360 			    read_lock(&mrt_lock);
1361 			    if (mroute_socket) {
1362 				    nf_reset(skb);
1363 				    raw_rcv(mroute_socket, skb);
1364 				    read_unlock(&mrt_lock);
1365 				    return 0;
1366 			    }
1367 			    read_unlock(&mrt_lock);
1368 		    }
1369 	}
1370 
1371 	read_lock(&mrt_lock);
1372 	cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1373 
1374 	/*
1375 	 *	No usable cache entry
1376 	 */
1377 	if (cache==NULL) {
1378 		int vif;
1379 
1380 		if (local) {
1381 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1382 			ip_local_deliver(skb);
1383 			if (skb2 == NULL) {
1384 				read_unlock(&mrt_lock);
1385 				return -ENOBUFS;
1386 			}
1387 			skb = skb2;
1388 		}
1389 
1390 		vif = ipmr_find_vif(skb->dev);
1391 		if (vif >= 0) {
1392 			int err = ipmr_cache_unresolved(vif, skb);
1393 			read_unlock(&mrt_lock);
1394 
1395 			return err;
1396 		}
1397 		read_unlock(&mrt_lock);
1398 		kfree_skb(skb);
1399 		return -ENODEV;
1400 	}
1401 
1402 	ip_mr_forward(skb, cache, local);
1403 
1404 	read_unlock(&mrt_lock);
1405 
1406 	if (local)
1407 		return ip_local_deliver(skb);
1408 
1409 	return 0;
1410 
1411 dont_forward:
1412 	if (local)
1413 		return ip_local_deliver(skb);
1414 	kfree_skb(skb);
1415 	return 0;
1416 }
1417 
1418 #ifdef CONFIG_IP_PIMSM_V1
1419 /*
1420  * Handle IGMP messages of PIMv1
1421  */
1422 
1423 int pim_rcv_v1(struct sk_buff * skb)
1424 {
1425 	struct igmphdr *pim;
1426 	struct iphdr   *encap;
1427 	struct net_device  *reg_dev = NULL;
1428 
1429 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1430 		goto drop;
1431 
1432 	pim = (struct igmphdr*)skb->h.raw;
1433 
1434         if (!mroute_do_pim ||
1435 	    skb->len < sizeof(*pim) + sizeof(*encap) ||
1436 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1437 		goto drop;
1438 
1439 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1440 	/*
1441 	   Check that:
1442 	   a. packet is really destinted to a multicast group
1443 	   b. packet is not a NULL-REGISTER
1444 	   c. packet is not truncated
1445 	 */
1446 	if (!MULTICAST(encap->daddr) ||
1447 	    encap->tot_len == 0 ||
1448 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1449 		goto drop;
1450 
1451 	read_lock(&mrt_lock);
1452 	if (reg_vif_num >= 0)
1453 		reg_dev = vif_table[reg_vif_num].dev;
1454 	if (reg_dev)
1455 		dev_hold(reg_dev);
1456 	read_unlock(&mrt_lock);
1457 
1458 	if (reg_dev == NULL)
1459 		goto drop;
1460 
1461 	skb->mac.raw = skb->nh.raw;
1462 	skb_pull(skb, (u8*)encap - skb->data);
1463 	skb->nh.iph = (struct iphdr *)skb->data;
1464 	skb->dev = reg_dev;
1465 	skb->protocol = htons(ETH_P_IP);
1466 	skb->ip_summed = 0;
1467 	skb->pkt_type = PACKET_HOST;
1468 	dst_release(skb->dst);
1469 	skb->dst = NULL;
1470 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1471 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1472 	nf_reset(skb);
1473 	netif_rx(skb);
1474 	dev_put(reg_dev);
1475 	return 0;
1476  drop:
1477 	kfree_skb(skb);
1478 	return 0;
1479 }
1480 #endif
1481 
1482 #ifdef CONFIG_IP_PIMSM_V2
1483 static int pim_rcv(struct sk_buff * skb)
1484 {
1485 	struct pimreghdr *pim;
1486 	struct iphdr   *encap;
1487 	struct net_device  *reg_dev = NULL;
1488 
1489 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1490 		goto drop;
1491 
1492 	pim = (struct pimreghdr*)skb->h.raw;
1493         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1494 	    (pim->flags&PIM_NULL_REGISTER) ||
1495 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1496 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1497 		goto drop;
1498 
1499 	/* check if the inner packet is destined to mcast group */
1500 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1501 	if (!MULTICAST(encap->daddr) ||
1502 	    encap->tot_len == 0 ||
1503 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1504 		goto drop;
1505 
1506 	read_lock(&mrt_lock);
1507 	if (reg_vif_num >= 0)
1508 		reg_dev = vif_table[reg_vif_num].dev;
1509 	if (reg_dev)
1510 		dev_hold(reg_dev);
1511 	read_unlock(&mrt_lock);
1512 
1513 	if (reg_dev == NULL)
1514 		goto drop;
1515 
1516 	skb->mac.raw = skb->nh.raw;
1517 	skb_pull(skb, (u8*)encap - skb->data);
1518 	skb->nh.iph = (struct iphdr *)skb->data;
1519 	skb->dev = reg_dev;
1520 	skb->protocol = htons(ETH_P_IP);
1521 	skb->ip_summed = 0;
1522 	skb->pkt_type = PACKET_HOST;
1523 	dst_release(skb->dst);
1524 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1525 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1526 	skb->dst = NULL;
1527 	nf_reset(skb);
1528 	netif_rx(skb);
1529 	dev_put(reg_dev);
1530 	return 0;
1531  drop:
1532 	kfree_skb(skb);
1533 	return 0;
1534 }
1535 #endif
1536 
1537 static int
1538 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1539 {
1540 	int ct;
1541 	struct rtnexthop *nhp;
1542 	struct net_device *dev = vif_table[c->mfc_parent].dev;
1543 	u8 *b = skb->tail;
1544 	struct rtattr *mp_head;
1545 
1546 	if (dev)
1547 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1548 
1549 	mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1550 
1551 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1552 		if (c->mfc_un.res.ttls[ct] < 255) {
1553 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1554 				goto rtattr_failure;
1555 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1556 			nhp->rtnh_flags = 0;
1557 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1558 			nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1559 			nhp->rtnh_len = sizeof(*nhp);
1560 		}
1561 	}
1562 	mp_head->rta_type = RTA_MULTIPATH;
1563 	mp_head->rta_len = skb->tail - (u8*)mp_head;
1564 	rtm->rtm_type = RTN_MULTICAST;
1565 	return 1;
1566 
1567 rtattr_failure:
1568 	skb_trim(skb, b - skb->data);
1569 	return -EMSGSIZE;
1570 }
1571 
1572 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1573 {
1574 	int err;
1575 	struct mfc_cache *cache;
1576 	struct rtable *rt = (struct rtable*)skb->dst;
1577 
1578 	read_lock(&mrt_lock);
1579 	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1580 
1581 	if (cache==NULL) {
1582 		struct sk_buff *skb2;
1583 		struct net_device *dev;
1584 		int vif;
1585 
1586 		if (nowait) {
1587 			read_unlock(&mrt_lock);
1588 			return -EAGAIN;
1589 		}
1590 
1591 		dev = skb->dev;
1592 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1593 			read_unlock(&mrt_lock);
1594 			return -ENODEV;
1595 		}
1596 		skb2 = skb_clone(skb, GFP_ATOMIC);
1597 		if (!skb2) {
1598 			read_unlock(&mrt_lock);
1599 			return -ENOMEM;
1600 		}
1601 
1602 		skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
1603 		skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
1604 		skb2->nh.iph->saddr = rt->rt_src;
1605 		skb2->nh.iph->daddr = rt->rt_dst;
1606 		skb2->nh.iph->version = 0;
1607 		err = ipmr_cache_unresolved(vif, skb2);
1608 		read_unlock(&mrt_lock);
1609 		return err;
1610 	}
1611 
1612 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1613 		cache->mfc_flags |= MFC_NOTIFY;
1614 	err = ipmr_fill_mroute(skb, cache, rtm);
1615 	read_unlock(&mrt_lock);
1616 	return err;
1617 }
1618 
1619 #ifdef CONFIG_PROC_FS
1620 /*
1621  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1622  */
1623 struct ipmr_vif_iter {
1624 	int ct;
1625 };
1626 
1627 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1628 					   loff_t pos)
1629 {
1630 	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1631 		if(!VIF_EXISTS(iter->ct))
1632 			continue;
1633 		if (pos-- == 0)
1634 			return &vif_table[iter->ct];
1635 	}
1636 	return NULL;
1637 }
1638 
1639 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1640 {
1641 	read_lock(&mrt_lock);
1642 	return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1643 		: SEQ_START_TOKEN;
1644 }
1645 
1646 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1647 {
1648 	struct ipmr_vif_iter *iter = seq->private;
1649 
1650 	++*pos;
1651 	if (v == SEQ_START_TOKEN)
1652 		return ipmr_vif_seq_idx(iter, 0);
1653 
1654 	while (++iter->ct < maxvif) {
1655 		if(!VIF_EXISTS(iter->ct))
1656 			continue;
1657 		return &vif_table[iter->ct];
1658 	}
1659 	return NULL;
1660 }
1661 
1662 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1663 {
1664 	read_unlock(&mrt_lock);
1665 }
1666 
1667 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1668 {
1669 	if (v == SEQ_START_TOKEN) {
1670 		seq_puts(seq,
1671 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1672 	} else {
1673 		const struct vif_device *vif = v;
1674 		const char *name =  vif->dev ? vif->dev->name : "none";
1675 
1676 		seq_printf(seq,
1677 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1678 			   vif - vif_table,
1679 			   name, vif->bytes_in, vif->pkt_in,
1680 			   vif->bytes_out, vif->pkt_out,
1681 			   vif->flags, vif->local, vif->remote);
1682 	}
1683 	return 0;
1684 }
1685 
1686 static struct seq_operations ipmr_vif_seq_ops = {
1687 	.start = ipmr_vif_seq_start,
1688 	.next  = ipmr_vif_seq_next,
1689 	.stop  = ipmr_vif_seq_stop,
1690 	.show  = ipmr_vif_seq_show,
1691 };
1692 
1693 static int ipmr_vif_open(struct inode *inode, struct file *file)
1694 {
1695 	struct seq_file *seq;
1696 	int rc = -ENOMEM;
1697 	struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1698 
1699 	if (!s)
1700 		goto out;
1701 
1702 	rc = seq_open(file, &ipmr_vif_seq_ops);
1703 	if (rc)
1704 		goto out_kfree;
1705 
1706 	s->ct = 0;
1707 	seq = file->private_data;
1708 	seq->private = s;
1709 out:
1710 	return rc;
1711 out_kfree:
1712 	kfree(s);
1713 	goto out;
1714 
1715 }
1716 
1717 static struct file_operations ipmr_vif_fops = {
1718 	.owner	 = THIS_MODULE,
1719 	.open    = ipmr_vif_open,
1720 	.read    = seq_read,
1721 	.llseek  = seq_lseek,
1722 	.release = seq_release_private,
1723 };
1724 
1725 struct ipmr_mfc_iter {
1726 	struct mfc_cache **cache;
1727 	int ct;
1728 };
1729 
1730 
1731 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1732 {
1733 	struct mfc_cache *mfc;
1734 
1735 	it->cache = mfc_cache_array;
1736 	read_lock(&mrt_lock);
1737 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1738 		for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1739 			if (pos-- == 0)
1740 				return mfc;
1741 	read_unlock(&mrt_lock);
1742 
1743 	it->cache = &mfc_unres_queue;
1744 	spin_lock_bh(&mfc_unres_lock);
1745 	for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1746 		if (pos-- == 0)
1747 			return mfc;
1748 	spin_unlock_bh(&mfc_unres_lock);
1749 
1750 	it->cache = NULL;
1751 	return NULL;
1752 }
1753 
1754 
1755 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1756 {
1757 	struct ipmr_mfc_iter *it = seq->private;
1758 	it->cache = NULL;
1759 	it->ct = 0;
1760 	return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1761 		: SEQ_START_TOKEN;
1762 }
1763 
1764 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1765 {
1766 	struct mfc_cache *mfc = v;
1767 	struct ipmr_mfc_iter *it = seq->private;
1768 
1769 	++*pos;
1770 
1771 	if (v == SEQ_START_TOKEN)
1772 		return ipmr_mfc_seq_idx(seq->private, 0);
1773 
1774 	if (mfc->next)
1775 		return mfc->next;
1776 
1777 	if (it->cache == &mfc_unres_queue)
1778 		goto end_of_list;
1779 
1780 	BUG_ON(it->cache != mfc_cache_array);
1781 
1782 	while (++it->ct < MFC_LINES) {
1783 		mfc = mfc_cache_array[it->ct];
1784 		if (mfc)
1785 			return mfc;
1786 	}
1787 
1788 	/* exhausted cache_array, show unresolved */
1789 	read_unlock(&mrt_lock);
1790 	it->cache = &mfc_unres_queue;
1791 	it->ct = 0;
1792 
1793 	spin_lock_bh(&mfc_unres_lock);
1794 	mfc = mfc_unres_queue;
1795 	if (mfc)
1796 		return mfc;
1797 
1798  end_of_list:
1799 	spin_unlock_bh(&mfc_unres_lock);
1800 	it->cache = NULL;
1801 
1802 	return NULL;
1803 }
1804 
1805 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1806 {
1807 	struct ipmr_mfc_iter *it = seq->private;
1808 
1809 	if (it->cache == &mfc_unres_queue)
1810 		spin_unlock_bh(&mfc_unres_lock);
1811 	else if (it->cache == mfc_cache_array)
1812 		read_unlock(&mrt_lock);
1813 }
1814 
1815 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1816 {
1817 	int n;
1818 
1819 	if (v == SEQ_START_TOKEN) {
1820 		seq_puts(seq,
1821 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1822 	} else {
1823 		const struct mfc_cache *mfc = v;
1824 		const struct ipmr_mfc_iter *it = seq->private;
1825 
1826 		seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1827 			   (unsigned long) mfc->mfc_mcastgrp,
1828 			   (unsigned long) mfc->mfc_origin,
1829 			   mfc->mfc_parent,
1830 			   mfc->mfc_un.res.pkt,
1831 			   mfc->mfc_un.res.bytes,
1832 			   mfc->mfc_un.res.wrong_if);
1833 
1834 		if (it->cache != &mfc_unres_queue) {
1835 			for(n = mfc->mfc_un.res.minvif;
1836 			    n < mfc->mfc_un.res.maxvif; n++ ) {
1837 				if(VIF_EXISTS(n)
1838 				   && mfc->mfc_un.res.ttls[n] < 255)
1839 				seq_printf(seq,
1840 					   " %2d:%-3d",
1841 					   n, mfc->mfc_un.res.ttls[n]);
1842 			}
1843 		}
1844 		seq_putc(seq, '\n');
1845 	}
1846 	return 0;
1847 }
1848 
1849 static struct seq_operations ipmr_mfc_seq_ops = {
1850 	.start = ipmr_mfc_seq_start,
1851 	.next  = ipmr_mfc_seq_next,
1852 	.stop  = ipmr_mfc_seq_stop,
1853 	.show  = ipmr_mfc_seq_show,
1854 };
1855 
1856 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1857 {
1858 	struct seq_file *seq;
1859 	int rc = -ENOMEM;
1860 	struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1861 
1862 	if (!s)
1863 		goto out;
1864 
1865 	rc = seq_open(file, &ipmr_mfc_seq_ops);
1866 	if (rc)
1867 		goto out_kfree;
1868 
1869 	seq = file->private_data;
1870 	seq->private = s;
1871 out:
1872 	return rc;
1873 out_kfree:
1874 	kfree(s);
1875 	goto out;
1876 
1877 }
1878 
1879 static struct file_operations ipmr_mfc_fops = {
1880 	.owner	 = THIS_MODULE,
1881 	.open    = ipmr_mfc_open,
1882 	.read    = seq_read,
1883 	.llseek  = seq_lseek,
1884 	.release = seq_release_private,
1885 };
1886 #endif
1887 
1888 #ifdef CONFIG_IP_PIMSM_V2
1889 static struct net_protocol pim_protocol = {
1890 	.handler	=	pim_rcv,
1891 };
1892 #endif
1893 
1894 
1895 /*
1896  *	Setup for IP multicast routing
1897  */
1898 
1899 void __init ip_mr_init(void)
1900 {
1901 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
1902 				       sizeof(struct mfc_cache),
1903 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1904 				       NULL, NULL);
1905 	init_timer(&ipmr_expire_timer);
1906 	ipmr_expire_timer.function=ipmr_expire_process;
1907 	register_netdevice_notifier(&ip_mr_notifier);
1908 #ifdef CONFIG_PROC_FS
1909 	proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1910 	proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1911 #endif
1912 }
1913