xref: /linux/net/ipv4/ipmr.c (revision a115bc070b1fc57ab23f3972401425927b5b465c)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Fixes:
13  *	Michael Chastain	:	Incorrect size of copying.
14  *	Alan Cox		:	Added the cache manager code
15  *	Alan Cox		:	Fixed the clone/copy bug and device race.
16  *	Mike McLagan		:	Routing by source
17  *	Malcolm Beattie		:	Buffer handling fixes.
18  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19  *	SVR Anand		:	Fixed several multicast bugs and problems.
20  *	Alexey Kuznetsov	:	Status, optimisations and more.
21  *	Brad Parker		:	Better behaviour on mrouted upcall
22  *					overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25  *					Relax this requrement to work with older peers.
26  *
27  */
28 
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65 
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM	1
68 #endif
69 
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73 
74 static DEFINE_RWLOCK(mrt_lock);
75 
76 /*
77  *	Multicast router control variables
78  */
79 
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81 
82 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
83 
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86 
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91 
92    In this case data path is free of exclusive locks at all.
93  */
94 
95 static struct kmem_cache *mrt_cachep __read_mostly;
96 
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99 			     struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101 
102 static struct timer_list ipmr_expire_timer;
103 
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105 
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
107 {
108 	struct net *net = dev_net(dev);
109 
110 	dev_close(dev);
111 
112 	dev = __dev_get_by_name(net, "tunl0");
113 	if (dev) {
114 		const struct net_device_ops *ops = dev->netdev_ops;
115 		struct ifreq ifr;
116 		struct ip_tunnel_parm p;
117 
118 		memset(&p, 0, sizeof(p));
119 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
120 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
121 		p.iph.version = 4;
122 		p.iph.ihl = 5;
123 		p.iph.protocol = IPPROTO_IPIP;
124 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
126 
127 		if (ops->ndo_do_ioctl) {
128 			mm_segment_t oldfs = get_fs();
129 
130 			set_fs(KERNEL_DS);
131 			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132 			set_fs(oldfs);
133 		}
134 	}
135 }
136 
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
139 {
140 	struct net_device  *dev;
141 
142 	dev = __dev_get_by_name(net, "tunl0");
143 
144 	if (dev) {
145 		const struct net_device_ops *ops = dev->netdev_ops;
146 		int err;
147 		struct ifreq ifr;
148 		struct ip_tunnel_parm p;
149 		struct in_device  *in_dev;
150 
151 		memset(&p, 0, sizeof(p));
152 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
153 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
154 		p.iph.version = 4;
155 		p.iph.ihl = 5;
156 		p.iph.protocol = IPPROTO_IPIP;
157 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
159 
160 		if (ops->ndo_do_ioctl) {
161 			mm_segment_t oldfs = get_fs();
162 
163 			set_fs(KERNEL_DS);
164 			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165 			set_fs(oldfs);
166 		} else
167 			err = -EOPNOTSUPP;
168 
169 		dev = NULL;
170 
171 		if (err == 0 &&
172 		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
173 			dev->flags |= IFF_MULTICAST;
174 
175 			in_dev = __in_dev_get_rtnl(dev);
176 			if (in_dev == NULL)
177 				goto failure;
178 
179 			ipv4_devconf_setall(in_dev);
180 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
181 
182 			if (dev_open(dev))
183 				goto failure;
184 			dev_hold(dev);
185 		}
186 	}
187 	return dev;
188 
189 failure:
190 	/* allow the register to be completed before unregistering. */
191 	rtnl_unlock();
192 	rtnl_lock();
193 
194 	unregister_netdevice(dev);
195 	return NULL;
196 }
197 
198 #ifdef CONFIG_IP_PIMSM
199 
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201 {
202 	struct net *net = dev_net(dev);
203 
204 	read_lock(&mrt_lock);
205 	dev->stats.tx_bytes += skb->len;
206 	dev->stats.tx_packets++;
207 	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208 			  IGMPMSG_WHOLEPKT);
209 	read_unlock(&mrt_lock);
210 	kfree_skb(skb);
211 	return NETDEV_TX_OK;
212 }
213 
214 static const struct net_device_ops reg_vif_netdev_ops = {
215 	.ndo_start_xmit	= reg_vif_xmit,
216 };
217 
218 static void reg_vif_setup(struct net_device *dev)
219 {
220 	dev->type		= ARPHRD_PIMREG;
221 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222 	dev->flags		= IFF_NOARP;
223 	dev->netdev_ops		= &reg_vif_netdev_ops,
224 	dev->destructor		= free_netdev;
225 	dev->features		|= NETIF_F_NETNS_LOCAL;
226 }
227 
228 static struct net_device *ipmr_reg_vif(struct net *net)
229 {
230 	struct net_device *dev;
231 	struct in_device *in_dev;
232 
233 	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
234 
235 	if (dev == NULL)
236 		return NULL;
237 
238 	dev_net_set(dev, net);
239 
240 	if (register_netdevice(dev)) {
241 		free_netdev(dev);
242 		return NULL;
243 	}
244 	dev->iflink = 0;
245 
246 	rcu_read_lock();
247 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248 		rcu_read_unlock();
249 		goto failure;
250 	}
251 
252 	ipv4_devconf_setall(in_dev);
253 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254 	rcu_read_unlock();
255 
256 	if (dev_open(dev))
257 		goto failure;
258 
259 	dev_hold(dev);
260 
261 	return dev;
262 
263 failure:
264 	/* allow the register to be completed before unregistering. */
265 	rtnl_unlock();
266 	rtnl_lock();
267 
268 	unregister_netdevice(dev);
269 	return NULL;
270 }
271 #endif
272 
273 /*
274  *	Delete a VIF entry
275  *	@notify: Set to 1, if the caller is a notifier_call
276  */
277 
278 static int vif_delete(struct net *net, int vifi, int notify,
279 		      struct list_head *head)
280 {
281 	struct vif_device *v;
282 	struct net_device *dev;
283 	struct in_device *in_dev;
284 
285 	if (vifi < 0 || vifi >= net->ipv4.maxvif)
286 		return -EADDRNOTAVAIL;
287 
288 	v = &net->ipv4.vif_table[vifi];
289 
290 	write_lock_bh(&mrt_lock);
291 	dev = v->dev;
292 	v->dev = NULL;
293 
294 	if (!dev) {
295 		write_unlock_bh(&mrt_lock);
296 		return -EADDRNOTAVAIL;
297 	}
298 
299 #ifdef CONFIG_IP_PIMSM
300 	if (vifi == net->ipv4.mroute_reg_vif_num)
301 		net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303 
304 	if (vifi+1 == net->ipv4.maxvif) {
305 		int tmp;
306 		for (tmp=vifi-1; tmp>=0; tmp--) {
307 			if (VIF_EXISTS(net, tmp))
308 				break;
309 		}
310 		net->ipv4.maxvif = tmp+1;
311 	}
312 
313 	write_unlock_bh(&mrt_lock);
314 
315 	dev_set_allmulti(dev, -1);
316 
317 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319 		ip_rt_multicast_event(in_dev);
320 	}
321 
322 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323 		unregister_netdevice_queue(dev, head);
324 
325 	dev_put(dev);
326 	return 0;
327 }
328 
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331 	release_net(mfc_net(c));
332 	kmem_cache_free(mrt_cachep, c);
333 }
334 
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338 
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341 	struct sk_buff *skb;
342 	struct nlmsgerr *e;
343 	struct net *net = mfc_net(c);
344 
345 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
346 
347 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348 		if (ip_hdr(skb)->version == 0) {
349 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350 			nlh->nlmsg_type = NLMSG_ERROR;
351 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352 			skb_trim(skb, nlh->nlmsg_len);
353 			e = NLMSG_DATA(nlh);
354 			e->error = -ETIMEDOUT;
355 			memset(&e->msg, 0, sizeof(e->msg));
356 
357 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358 		} else
359 			kfree_skb(skb);
360 	}
361 
362 	ipmr_cache_free(c);
363 }
364 
365 
366 /* Single timer process for all the unresolved queue. */
367 
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370 	unsigned long now;
371 	unsigned long expires;
372 	struct mfc_cache *c, **cp;
373 
374 	if (!spin_trylock(&mfc_unres_lock)) {
375 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376 		return;
377 	}
378 
379 	if (mfc_unres_queue == NULL)
380 		goto out;
381 
382 	now = jiffies;
383 	expires = 10*HZ;
384 	cp = &mfc_unres_queue;
385 
386 	while ((c=*cp) != NULL) {
387 		if (time_after(c->mfc_un.unres.expires, now)) {
388 			unsigned long interval = c->mfc_un.unres.expires - now;
389 			if (interval < expires)
390 				expires = interval;
391 			cp = &c->next;
392 			continue;
393 		}
394 
395 		*cp = c->next;
396 
397 		ipmr_destroy_unres(c);
398 	}
399 
400 	if (mfc_unres_queue != NULL)
401 		mod_timer(&ipmr_expire_timer, jiffies + expires);
402 
403 out:
404 	spin_unlock(&mfc_unres_lock);
405 }
406 
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408 
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411 	int vifi;
412 	struct net *net = mfc_net(cache);
413 
414 	cache->mfc_un.res.minvif = MAXVIFS;
415 	cache->mfc_un.res.maxvif = 0;
416 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417 
418 	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419 		if (VIF_EXISTS(net, vifi) &&
420 		    ttls[vifi] && ttls[vifi] < 255) {
421 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422 			if (cache->mfc_un.res.minvif > vifi)
423 				cache->mfc_un.res.minvif = vifi;
424 			if (cache->mfc_un.res.maxvif <= vifi)
425 				cache->mfc_un.res.maxvif = vifi + 1;
426 		}
427 	}
428 }
429 
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432 	int vifi = vifc->vifc_vifi;
433 	struct vif_device *v = &net->ipv4.vif_table[vifi];
434 	struct net_device *dev;
435 	struct in_device *in_dev;
436 	int err;
437 
438 	/* Is vif busy ? */
439 	if (VIF_EXISTS(net, vifi))
440 		return -EADDRINUSE;
441 
442 	switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444 	case VIFF_REGISTER:
445 		/*
446 		 * Special Purpose VIF in PIM
447 		 * All the packets will be sent to the daemon
448 		 */
449 		if (net->ipv4.mroute_reg_vif_num >= 0)
450 			return -EADDRINUSE;
451 		dev = ipmr_reg_vif(net);
452 		if (!dev)
453 			return -ENOBUFS;
454 		err = dev_set_allmulti(dev, 1);
455 		if (err) {
456 			unregister_netdevice(dev);
457 			dev_put(dev);
458 			return err;
459 		}
460 		break;
461 #endif
462 	case VIFF_TUNNEL:
463 		dev = ipmr_new_tunnel(net, vifc);
464 		if (!dev)
465 			return -ENOBUFS;
466 		err = dev_set_allmulti(dev, 1);
467 		if (err) {
468 			ipmr_del_tunnel(dev, vifc);
469 			dev_put(dev);
470 			return err;
471 		}
472 		break;
473 
474 	case VIFF_USE_IFINDEX:
475 	case 0:
476 		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
477 			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
478 			if (dev && dev->ip_ptr == NULL) {
479 				dev_put(dev);
480 				return -EADDRNOTAVAIL;
481 			}
482 		} else
483 			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
484 
485 		if (!dev)
486 			return -EADDRNOTAVAIL;
487 		err = dev_set_allmulti(dev, 1);
488 		if (err) {
489 			dev_put(dev);
490 			return err;
491 		}
492 		break;
493 	default:
494 		return -EINVAL;
495 	}
496 
497 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
498 		dev_put(dev);
499 		return -EADDRNOTAVAIL;
500 	}
501 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
502 	ip_rt_multicast_event(in_dev);
503 
504 	/*
505 	 *	Fill in the VIF structures
506 	 */
507 	v->rate_limit = vifc->vifc_rate_limit;
508 	v->local = vifc->vifc_lcl_addr.s_addr;
509 	v->remote = vifc->vifc_rmt_addr.s_addr;
510 	v->flags = vifc->vifc_flags;
511 	if (!mrtsock)
512 		v->flags |= VIFF_STATIC;
513 	v->threshold = vifc->vifc_threshold;
514 	v->bytes_in = 0;
515 	v->bytes_out = 0;
516 	v->pkt_in = 0;
517 	v->pkt_out = 0;
518 	v->link = dev->ifindex;
519 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
520 		v->link = dev->iflink;
521 
522 	/* And finish update writing critical data */
523 	write_lock_bh(&mrt_lock);
524 	v->dev = dev;
525 #ifdef CONFIG_IP_PIMSM
526 	if (v->flags&VIFF_REGISTER)
527 		net->ipv4.mroute_reg_vif_num = vifi;
528 #endif
529 	if (vifi+1 > net->ipv4.maxvif)
530 		net->ipv4.maxvif = vifi+1;
531 	write_unlock_bh(&mrt_lock);
532 	return 0;
533 }
534 
535 static struct mfc_cache *ipmr_cache_find(struct net *net,
536 					 __be32 origin,
537 					 __be32 mcastgrp)
538 {
539 	int line = MFC_HASH(mcastgrp, origin);
540 	struct mfc_cache *c;
541 
542 	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
543 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
544 			break;
545 	}
546 	return c;
547 }
548 
549 /*
550  *	Allocate a multicast cache entry
551  */
552 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
553 {
554 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
555 	if (c == NULL)
556 		return NULL;
557 	c->mfc_un.res.minvif = MAXVIFS;
558 	mfc_net_set(c, net);
559 	return c;
560 }
561 
562 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
563 {
564 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
565 	if (c == NULL)
566 		return NULL;
567 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
568 	c->mfc_un.unres.expires = jiffies + 10*HZ;
569 	mfc_net_set(c, net);
570 	return c;
571 }
572 
573 /*
574  *	A cache entry has gone into a resolved state from queued
575  */
576 
577 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
578 {
579 	struct sk_buff *skb;
580 	struct nlmsgerr *e;
581 
582 	/*
583 	 *	Play the pending entries through our router
584 	 */
585 
586 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
587 		if (ip_hdr(skb)->version == 0) {
588 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
589 
590 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
591 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
592 						  (u8 *)nlh);
593 			} else {
594 				nlh->nlmsg_type = NLMSG_ERROR;
595 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
596 				skb_trim(skb, nlh->nlmsg_len);
597 				e = NLMSG_DATA(nlh);
598 				e->error = -EMSGSIZE;
599 				memset(&e->msg, 0, sizeof(e->msg));
600 			}
601 
602 			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
603 		} else
604 			ip_mr_forward(skb, c, 0);
605 	}
606 }
607 
608 /*
609  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
610  *	expects the following bizarre scheme.
611  *
612  *	Called under mrt_lock.
613  */
614 
615 static int ipmr_cache_report(struct net *net,
616 			     struct sk_buff *pkt, vifi_t vifi, int assert)
617 {
618 	struct sk_buff *skb;
619 	const int ihl = ip_hdrlen(pkt);
620 	struct igmphdr *igmp;
621 	struct igmpmsg *msg;
622 	int ret;
623 
624 #ifdef CONFIG_IP_PIMSM
625 	if (assert == IGMPMSG_WHOLEPKT)
626 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
627 	else
628 #endif
629 		skb = alloc_skb(128, GFP_ATOMIC);
630 
631 	if (!skb)
632 		return -ENOBUFS;
633 
634 #ifdef CONFIG_IP_PIMSM
635 	if (assert == IGMPMSG_WHOLEPKT) {
636 		/* Ugly, but we have no choice with this interface.
637 		   Duplicate old header, fix ihl, length etc.
638 		   And all this only to mangle msg->im_msgtype and
639 		   to set msg->im_mbz to "mbz" :-)
640 		 */
641 		skb_push(skb, sizeof(struct iphdr));
642 		skb_reset_network_header(skb);
643 		skb_reset_transport_header(skb);
644 		msg = (struct igmpmsg *)skb_network_header(skb);
645 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
646 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
647 		msg->im_mbz = 0;
648 		msg->im_vif = net->ipv4.mroute_reg_vif_num;
649 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
650 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
651 					     sizeof(struct iphdr));
652 	} else
653 #endif
654 	{
655 
656 	/*
657 	 *	Copy the IP header
658 	 */
659 
660 	skb->network_header = skb->tail;
661 	skb_put(skb, ihl);
662 	skb_copy_to_linear_data(skb, pkt->data, ihl);
663 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
664 	msg = (struct igmpmsg *)skb_network_header(skb);
665 	msg->im_vif = vifi;
666 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
667 
668 	/*
669 	 *	Add our header
670 	 */
671 
672 	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
673 	igmp->type	=
674 	msg->im_msgtype = assert;
675 	igmp->code 	=	0;
676 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
677 	skb->transport_header = skb->network_header;
678 	}
679 
680 	if (net->ipv4.mroute_sk == NULL) {
681 		kfree_skb(skb);
682 		return -EINVAL;
683 	}
684 
685 	/*
686 	 *	Deliver to mrouted
687 	 */
688 	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
689 	if (ret < 0) {
690 		if (net_ratelimit())
691 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
692 		kfree_skb(skb);
693 	}
694 
695 	return ret;
696 }
697 
698 /*
699  *	Queue a packet for resolution. It gets locked cache entry!
700  */
701 
702 static int
703 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
704 {
705 	int err;
706 	struct mfc_cache *c;
707 	const struct iphdr *iph = ip_hdr(skb);
708 
709 	spin_lock_bh(&mfc_unres_lock);
710 	for (c=mfc_unres_queue; c; c=c->next) {
711 		if (net_eq(mfc_net(c), net) &&
712 		    c->mfc_mcastgrp == iph->daddr &&
713 		    c->mfc_origin == iph->saddr)
714 			break;
715 	}
716 
717 	if (c == NULL) {
718 		/*
719 		 *	Create a new entry if allowable
720 		 */
721 
722 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
723 		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
724 			spin_unlock_bh(&mfc_unres_lock);
725 
726 			kfree_skb(skb);
727 			return -ENOBUFS;
728 		}
729 
730 		/*
731 		 *	Fill in the new cache entry
732 		 */
733 		c->mfc_parent	= -1;
734 		c->mfc_origin	= iph->saddr;
735 		c->mfc_mcastgrp	= iph->daddr;
736 
737 		/*
738 		 *	Reflect first query at mrouted.
739 		 */
740 		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
741 		if (err < 0) {
742 			/* If the report failed throw the cache entry
743 			   out - Brad Parker
744 			 */
745 			spin_unlock_bh(&mfc_unres_lock);
746 
747 			ipmr_cache_free(c);
748 			kfree_skb(skb);
749 			return err;
750 		}
751 
752 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
753 		c->next = mfc_unres_queue;
754 		mfc_unres_queue = c;
755 
756 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
757 	}
758 
759 	/*
760 	 *	See if we can append the packet
761 	 */
762 	if (c->mfc_un.unres.unresolved.qlen>3) {
763 		kfree_skb(skb);
764 		err = -ENOBUFS;
765 	} else {
766 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
767 		err = 0;
768 	}
769 
770 	spin_unlock_bh(&mfc_unres_lock);
771 	return err;
772 }
773 
774 /*
775  *	MFC cache manipulation by user space mroute daemon
776  */
777 
778 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
779 {
780 	int line;
781 	struct mfc_cache *c, **cp;
782 
783 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
784 
785 	for (cp = &net->ipv4.mfc_cache_array[line];
786 	     (c = *cp) != NULL; cp = &c->next) {
787 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
788 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
789 			write_lock_bh(&mrt_lock);
790 			*cp = c->next;
791 			write_unlock_bh(&mrt_lock);
792 
793 			ipmr_cache_free(c);
794 			return 0;
795 		}
796 	}
797 	return -ENOENT;
798 }
799 
800 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
801 {
802 	int line;
803 	struct mfc_cache *uc, *c, **cp;
804 
805 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
806 
807 	for (cp = &net->ipv4.mfc_cache_array[line];
808 	     (c = *cp) != NULL; cp = &c->next) {
809 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
810 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
811 			break;
812 	}
813 
814 	if (c != NULL) {
815 		write_lock_bh(&mrt_lock);
816 		c->mfc_parent = mfc->mfcc_parent;
817 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
818 		if (!mrtsock)
819 			c->mfc_flags |= MFC_STATIC;
820 		write_unlock_bh(&mrt_lock);
821 		return 0;
822 	}
823 
824 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
825 		return -EINVAL;
826 
827 	c = ipmr_cache_alloc(net);
828 	if (c == NULL)
829 		return -ENOMEM;
830 
831 	c->mfc_origin = mfc->mfcc_origin.s_addr;
832 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
833 	c->mfc_parent = mfc->mfcc_parent;
834 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
835 	if (!mrtsock)
836 		c->mfc_flags |= MFC_STATIC;
837 
838 	write_lock_bh(&mrt_lock);
839 	c->next = net->ipv4.mfc_cache_array[line];
840 	net->ipv4.mfc_cache_array[line] = c;
841 	write_unlock_bh(&mrt_lock);
842 
843 	/*
844 	 *	Check to see if we resolved a queued list. If so we
845 	 *	need to send on the frames and tidy up.
846 	 */
847 	spin_lock_bh(&mfc_unres_lock);
848 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
849 	     cp = &uc->next) {
850 		if (net_eq(mfc_net(uc), net) &&
851 		    uc->mfc_origin == c->mfc_origin &&
852 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
853 			*cp = uc->next;
854 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
855 			break;
856 		}
857 	}
858 	if (mfc_unres_queue == NULL)
859 		del_timer(&ipmr_expire_timer);
860 	spin_unlock_bh(&mfc_unres_lock);
861 
862 	if (uc) {
863 		ipmr_cache_resolve(uc, c);
864 		ipmr_cache_free(uc);
865 	}
866 	return 0;
867 }
868 
869 /*
870  *	Close the multicast socket, and clear the vif tables etc
871  */
872 
873 static void mroute_clean_tables(struct net *net)
874 {
875 	int i;
876 	LIST_HEAD(list);
877 
878 	/*
879 	 *	Shut down all active vif entries
880 	 */
881 	for (i = 0; i < net->ipv4.maxvif; i++) {
882 		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
883 			vif_delete(net, i, 0, &list);
884 	}
885 	unregister_netdevice_many(&list);
886 
887 	/*
888 	 *	Wipe the cache
889 	 */
890 	for (i=0; i<MFC_LINES; i++) {
891 		struct mfc_cache *c, **cp;
892 
893 		cp = &net->ipv4.mfc_cache_array[i];
894 		while ((c = *cp) != NULL) {
895 			if (c->mfc_flags&MFC_STATIC) {
896 				cp = &c->next;
897 				continue;
898 			}
899 			write_lock_bh(&mrt_lock);
900 			*cp = c->next;
901 			write_unlock_bh(&mrt_lock);
902 
903 			ipmr_cache_free(c);
904 		}
905 	}
906 
907 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
908 		struct mfc_cache *c, **cp;
909 
910 		spin_lock_bh(&mfc_unres_lock);
911 		cp = &mfc_unres_queue;
912 		while ((c = *cp) != NULL) {
913 			if (!net_eq(mfc_net(c), net)) {
914 				cp = &c->next;
915 				continue;
916 			}
917 			*cp = c->next;
918 
919 			ipmr_destroy_unres(c);
920 		}
921 		spin_unlock_bh(&mfc_unres_lock);
922 	}
923 }
924 
925 static void mrtsock_destruct(struct sock *sk)
926 {
927 	struct net *net = sock_net(sk);
928 
929 	rtnl_lock();
930 	if (sk == net->ipv4.mroute_sk) {
931 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
932 
933 		write_lock_bh(&mrt_lock);
934 		net->ipv4.mroute_sk = NULL;
935 		write_unlock_bh(&mrt_lock);
936 
937 		mroute_clean_tables(net);
938 	}
939 	rtnl_unlock();
940 }
941 
942 /*
943  *	Socket options and virtual interface manipulation. The whole
944  *	virtual interface system is a complete heap, but unfortunately
945  *	that's how BSD mrouted happens to think. Maybe one day with a proper
946  *	MOSPF/PIM router set up we can clean this up.
947  */
948 
949 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
950 {
951 	int ret;
952 	struct vifctl vif;
953 	struct mfcctl mfc;
954 	struct net *net = sock_net(sk);
955 
956 	if (optname != MRT_INIT) {
957 		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
958 			return -EACCES;
959 	}
960 
961 	switch (optname) {
962 	case MRT_INIT:
963 		if (sk->sk_type != SOCK_RAW ||
964 		    inet_sk(sk)->inet_num != IPPROTO_IGMP)
965 			return -EOPNOTSUPP;
966 		if (optlen != sizeof(int))
967 			return -ENOPROTOOPT;
968 
969 		rtnl_lock();
970 		if (net->ipv4.mroute_sk) {
971 			rtnl_unlock();
972 			return -EADDRINUSE;
973 		}
974 
975 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
976 		if (ret == 0) {
977 			write_lock_bh(&mrt_lock);
978 			net->ipv4.mroute_sk = sk;
979 			write_unlock_bh(&mrt_lock);
980 
981 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
982 		}
983 		rtnl_unlock();
984 		return ret;
985 	case MRT_DONE:
986 		if (sk != net->ipv4.mroute_sk)
987 			return -EACCES;
988 		return ip_ra_control(sk, 0, NULL);
989 	case MRT_ADD_VIF:
990 	case MRT_DEL_VIF:
991 		if (optlen != sizeof(vif))
992 			return -EINVAL;
993 		if (copy_from_user(&vif, optval, sizeof(vif)))
994 			return -EFAULT;
995 		if (vif.vifc_vifi >= MAXVIFS)
996 			return -ENFILE;
997 		rtnl_lock();
998 		if (optname == MRT_ADD_VIF) {
999 			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1000 		} else {
1001 			ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1002 		}
1003 		rtnl_unlock();
1004 		return ret;
1005 
1006 		/*
1007 		 *	Manipulate the forwarding caches. These live
1008 		 *	in a sort of kernel/user symbiosis.
1009 		 */
1010 	case MRT_ADD_MFC:
1011 	case MRT_DEL_MFC:
1012 		if (optlen != sizeof(mfc))
1013 			return -EINVAL;
1014 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1015 			return -EFAULT;
1016 		rtnl_lock();
1017 		if (optname == MRT_DEL_MFC)
1018 			ret = ipmr_mfc_delete(net, &mfc);
1019 		else
1020 			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1021 		rtnl_unlock();
1022 		return ret;
1023 		/*
1024 		 *	Control PIM assert.
1025 		 */
1026 	case MRT_ASSERT:
1027 	{
1028 		int v;
1029 		if (get_user(v,(int __user *)optval))
1030 			return -EFAULT;
1031 		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1032 		return 0;
1033 	}
1034 #ifdef CONFIG_IP_PIMSM
1035 	case MRT_PIM:
1036 	{
1037 		int v;
1038 
1039 		if (get_user(v,(int __user *)optval))
1040 			return -EFAULT;
1041 		v = (v) ? 1 : 0;
1042 
1043 		rtnl_lock();
1044 		ret = 0;
1045 		if (v != net->ipv4.mroute_do_pim) {
1046 			net->ipv4.mroute_do_pim = v;
1047 			net->ipv4.mroute_do_assert = v;
1048 		}
1049 		rtnl_unlock();
1050 		return ret;
1051 	}
1052 #endif
1053 	/*
1054 	 *	Spurious command, or MRT_VERSION which you cannot
1055 	 *	set.
1056 	 */
1057 	default:
1058 		return -ENOPROTOOPT;
1059 	}
1060 }
1061 
1062 /*
1063  *	Getsock opt support for the multicast routing system.
1064  */
1065 
1066 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1067 {
1068 	int olr;
1069 	int val;
1070 	struct net *net = sock_net(sk);
1071 
1072 	if (optname != MRT_VERSION &&
1073 #ifdef CONFIG_IP_PIMSM
1074 	   optname!=MRT_PIM &&
1075 #endif
1076 	   optname!=MRT_ASSERT)
1077 		return -ENOPROTOOPT;
1078 
1079 	if (get_user(olr, optlen))
1080 		return -EFAULT;
1081 
1082 	olr = min_t(unsigned int, olr, sizeof(int));
1083 	if (olr < 0)
1084 		return -EINVAL;
1085 
1086 	if (put_user(olr, optlen))
1087 		return -EFAULT;
1088 	if (optname == MRT_VERSION)
1089 		val = 0x0305;
1090 #ifdef CONFIG_IP_PIMSM
1091 	else if (optname == MRT_PIM)
1092 		val = net->ipv4.mroute_do_pim;
1093 #endif
1094 	else
1095 		val = net->ipv4.mroute_do_assert;
1096 	if (copy_to_user(optval, &val, olr))
1097 		return -EFAULT;
1098 	return 0;
1099 }
1100 
1101 /*
1102  *	The IP multicast ioctl support routines.
1103  */
1104 
1105 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1106 {
1107 	struct sioc_sg_req sr;
1108 	struct sioc_vif_req vr;
1109 	struct vif_device *vif;
1110 	struct mfc_cache *c;
1111 	struct net *net = sock_net(sk);
1112 
1113 	switch (cmd) {
1114 	case SIOCGETVIFCNT:
1115 		if (copy_from_user(&vr, arg, sizeof(vr)))
1116 			return -EFAULT;
1117 		if (vr.vifi >= net->ipv4.maxvif)
1118 			return -EINVAL;
1119 		read_lock(&mrt_lock);
1120 		vif = &net->ipv4.vif_table[vr.vifi];
1121 		if (VIF_EXISTS(net, vr.vifi)) {
1122 			vr.icount = vif->pkt_in;
1123 			vr.ocount = vif->pkt_out;
1124 			vr.ibytes = vif->bytes_in;
1125 			vr.obytes = vif->bytes_out;
1126 			read_unlock(&mrt_lock);
1127 
1128 			if (copy_to_user(arg, &vr, sizeof(vr)))
1129 				return -EFAULT;
1130 			return 0;
1131 		}
1132 		read_unlock(&mrt_lock);
1133 		return -EADDRNOTAVAIL;
1134 	case SIOCGETSGCNT:
1135 		if (copy_from_user(&sr, arg, sizeof(sr)))
1136 			return -EFAULT;
1137 
1138 		read_lock(&mrt_lock);
1139 		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1140 		if (c) {
1141 			sr.pktcnt = c->mfc_un.res.pkt;
1142 			sr.bytecnt = c->mfc_un.res.bytes;
1143 			sr.wrong_if = c->mfc_un.res.wrong_if;
1144 			read_unlock(&mrt_lock);
1145 
1146 			if (copy_to_user(arg, &sr, sizeof(sr)))
1147 				return -EFAULT;
1148 			return 0;
1149 		}
1150 		read_unlock(&mrt_lock);
1151 		return -EADDRNOTAVAIL;
1152 	default:
1153 		return -ENOIOCTLCMD;
1154 	}
1155 }
1156 
1157 
1158 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1159 {
1160 	struct net_device *dev = ptr;
1161 	struct net *net = dev_net(dev);
1162 	struct vif_device *v;
1163 	int ct;
1164 	LIST_HEAD(list);
1165 
1166 	if (event != NETDEV_UNREGISTER)
1167 		return NOTIFY_DONE;
1168 	v = &net->ipv4.vif_table[0];
1169 	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1170 		if (v->dev == dev)
1171 			vif_delete(net, ct, 1, &list);
1172 	}
1173 	unregister_netdevice_many(&list);
1174 	return NOTIFY_DONE;
1175 }
1176 
1177 
1178 static struct notifier_block ip_mr_notifier = {
1179 	.notifier_call = ipmr_device_event,
1180 };
1181 
1182 /*
1183  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1184  *	This avoids tunnel drivers and other mess and gives us the speed so
1185  *	important for multicast video.
1186  */
1187 
1188 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1189 {
1190 	struct iphdr *iph;
1191 	struct iphdr *old_iph = ip_hdr(skb);
1192 
1193 	skb_push(skb, sizeof(struct iphdr));
1194 	skb->transport_header = skb->network_header;
1195 	skb_reset_network_header(skb);
1196 	iph = ip_hdr(skb);
1197 
1198 	iph->version	= 	4;
1199 	iph->tos	=	old_iph->tos;
1200 	iph->ttl	=	old_iph->ttl;
1201 	iph->frag_off	=	0;
1202 	iph->daddr	=	daddr;
1203 	iph->saddr	=	saddr;
1204 	iph->protocol	=	IPPROTO_IPIP;
1205 	iph->ihl	=	5;
1206 	iph->tot_len	=	htons(skb->len);
1207 	ip_select_ident(iph, skb_dst(skb), NULL);
1208 	ip_send_check(iph);
1209 
1210 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1211 	nf_reset(skb);
1212 }
1213 
1214 static inline int ipmr_forward_finish(struct sk_buff *skb)
1215 {
1216 	struct ip_options * opt	= &(IPCB(skb)->opt);
1217 
1218 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1219 
1220 	if (unlikely(opt->optlen))
1221 		ip_forward_options(skb);
1222 
1223 	return dst_output(skb);
1224 }
1225 
1226 /*
1227  *	Processing handlers for ipmr_forward
1228  */
1229 
1230 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1231 {
1232 	struct net *net = mfc_net(c);
1233 	const struct iphdr *iph = ip_hdr(skb);
1234 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
1235 	struct net_device *dev;
1236 	struct rtable *rt;
1237 	int    encap = 0;
1238 
1239 	if (vif->dev == NULL)
1240 		goto out_free;
1241 
1242 #ifdef CONFIG_IP_PIMSM
1243 	if (vif->flags & VIFF_REGISTER) {
1244 		vif->pkt_out++;
1245 		vif->bytes_out += skb->len;
1246 		vif->dev->stats.tx_bytes += skb->len;
1247 		vif->dev->stats.tx_packets++;
1248 		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1249 		goto out_free;
1250 	}
1251 #endif
1252 
1253 	if (vif->flags&VIFF_TUNNEL) {
1254 		struct flowi fl = { .oif = vif->link,
1255 				    .nl_u = { .ip4_u =
1256 					      { .daddr = vif->remote,
1257 						.saddr = vif->local,
1258 						.tos = RT_TOS(iph->tos) } },
1259 				    .proto = IPPROTO_IPIP };
1260 		if (ip_route_output_key(net, &rt, &fl))
1261 			goto out_free;
1262 		encap = sizeof(struct iphdr);
1263 	} else {
1264 		struct flowi fl = { .oif = vif->link,
1265 				    .nl_u = { .ip4_u =
1266 					      { .daddr = iph->daddr,
1267 						.tos = RT_TOS(iph->tos) } },
1268 				    .proto = IPPROTO_IPIP };
1269 		if (ip_route_output_key(net, &rt, &fl))
1270 			goto out_free;
1271 	}
1272 
1273 	dev = rt->u.dst.dev;
1274 
1275 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1276 		/* Do not fragment multicasts. Alas, IPv4 does not
1277 		   allow to send ICMP, so that packets will disappear
1278 		   to blackhole.
1279 		 */
1280 
1281 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1282 		ip_rt_put(rt);
1283 		goto out_free;
1284 	}
1285 
1286 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1287 
1288 	if (skb_cow(skb, encap)) {
1289 		ip_rt_put(rt);
1290 		goto out_free;
1291 	}
1292 
1293 	vif->pkt_out++;
1294 	vif->bytes_out += skb->len;
1295 
1296 	skb_dst_drop(skb);
1297 	skb_dst_set(skb, &rt->u.dst);
1298 	ip_decrease_ttl(ip_hdr(skb));
1299 
1300 	/* FIXME: forward and output firewalls used to be called here.
1301 	 * What do we do with netfilter? -- RR */
1302 	if (vif->flags & VIFF_TUNNEL) {
1303 		ip_encap(skb, vif->local, vif->remote);
1304 		/* FIXME: extra output firewall step used to be here. --RR */
1305 		vif->dev->stats.tx_packets++;
1306 		vif->dev->stats.tx_bytes += skb->len;
1307 	}
1308 
1309 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1310 
1311 	/*
1312 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1313 	 * not only before forwarding, but after forwarding on all output
1314 	 * interfaces. It is clear, if mrouter runs a multicasting
1315 	 * program, it should receive packets not depending to what interface
1316 	 * program is joined.
1317 	 * If we will not make it, the program will have to join on all
1318 	 * interfaces. On the other hand, multihoming host (or router, but
1319 	 * not mrouter) cannot join to more than one interface - it will
1320 	 * result in receiving multiple packets.
1321 	 */
1322 	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1323 		ipmr_forward_finish);
1324 	return;
1325 
1326 out_free:
1327 	kfree_skb(skb);
1328 	return;
1329 }
1330 
1331 static int ipmr_find_vif(struct net_device *dev)
1332 {
1333 	struct net *net = dev_net(dev);
1334 	int ct;
1335 	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1336 		if (net->ipv4.vif_table[ct].dev == dev)
1337 			break;
1338 	}
1339 	return ct;
1340 }
1341 
1342 /* "local" means that we should preserve one skb (for local delivery) */
1343 
1344 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1345 {
1346 	int psend = -1;
1347 	int vif, ct;
1348 	struct net *net = mfc_net(cache);
1349 
1350 	vif = cache->mfc_parent;
1351 	cache->mfc_un.res.pkt++;
1352 	cache->mfc_un.res.bytes += skb->len;
1353 
1354 	/*
1355 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1356 	 */
1357 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
1358 		int true_vifi;
1359 
1360 		if (skb_rtable(skb)->fl.iif == 0) {
1361 			/* It is our own packet, looped back.
1362 			   Very complicated situation...
1363 
1364 			   The best workaround until routing daemons will be
1365 			   fixed is not to redistribute packet, if it was
1366 			   send through wrong interface. It means, that
1367 			   multicast applications WILL NOT work for
1368 			   (S,G), which have default multicast route pointing
1369 			   to wrong oif. In any case, it is not a good
1370 			   idea to use multicasting applications on router.
1371 			 */
1372 			goto dont_forward;
1373 		}
1374 
1375 		cache->mfc_un.res.wrong_if++;
1376 		true_vifi = ipmr_find_vif(skb->dev);
1377 
1378 		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1379 		    /* pimsm uses asserts, when switching from RPT to SPT,
1380 		       so that we cannot check that packet arrived on an oif.
1381 		       It is bad, but otherwise we would need to move pretty
1382 		       large chunk of pimd to kernel. Ough... --ANK
1383 		     */
1384 		    (net->ipv4.mroute_do_pim ||
1385 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1386 		    time_after(jiffies,
1387 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1388 			cache->mfc_un.res.last_assert = jiffies;
1389 			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1390 		}
1391 		goto dont_forward;
1392 	}
1393 
1394 	net->ipv4.vif_table[vif].pkt_in++;
1395 	net->ipv4.vif_table[vif].bytes_in += skb->len;
1396 
1397 	/*
1398 	 *	Forward the frame
1399 	 */
1400 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1401 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1402 			if (psend != -1) {
1403 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1404 				if (skb2)
1405 					ipmr_queue_xmit(skb2, cache, psend);
1406 			}
1407 			psend = ct;
1408 		}
1409 	}
1410 	if (psend != -1) {
1411 		if (local) {
1412 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1413 			if (skb2)
1414 				ipmr_queue_xmit(skb2, cache, psend);
1415 		} else {
1416 			ipmr_queue_xmit(skb, cache, psend);
1417 			return 0;
1418 		}
1419 	}
1420 
1421 dont_forward:
1422 	if (!local)
1423 		kfree_skb(skb);
1424 	return 0;
1425 }
1426 
1427 
1428 /*
1429  *	Multicast packets for forwarding arrive here
1430  */
1431 
1432 int ip_mr_input(struct sk_buff *skb)
1433 {
1434 	struct mfc_cache *cache;
1435 	struct net *net = dev_net(skb->dev);
1436 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1437 
1438 	/* Packet is looped back after forward, it should not be
1439 	   forwarded second time, but still can be delivered locally.
1440 	 */
1441 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1442 		goto dont_forward;
1443 
1444 	if (!local) {
1445 		    if (IPCB(skb)->opt.router_alert) {
1446 			    if (ip_call_ra_chain(skb))
1447 				    return 0;
1448 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1449 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1450 			       Cisco IOS <= 11.2(8)) do not put router alert
1451 			       option to IGMP packets destined to routable
1452 			       groups. It is very bad, because it means
1453 			       that we can forward NO IGMP messages.
1454 			     */
1455 			    read_lock(&mrt_lock);
1456 			    if (net->ipv4.mroute_sk) {
1457 				    nf_reset(skb);
1458 				    raw_rcv(net->ipv4.mroute_sk, skb);
1459 				    read_unlock(&mrt_lock);
1460 				    return 0;
1461 			    }
1462 			    read_unlock(&mrt_lock);
1463 		    }
1464 	}
1465 
1466 	read_lock(&mrt_lock);
1467 	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1468 
1469 	/*
1470 	 *	No usable cache entry
1471 	 */
1472 	if (cache == NULL) {
1473 		int vif;
1474 
1475 		if (local) {
1476 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1477 			ip_local_deliver(skb);
1478 			if (skb2 == NULL) {
1479 				read_unlock(&mrt_lock);
1480 				return -ENOBUFS;
1481 			}
1482 			skb = skb2;
1483 		}
1484 
1485 		vif = ipmr_find_vif(skb->dev);
1486 		if (vif >= 0) {
1487 			int err = ipmr_cache_unresolved(net, vif, skb);
1488 			read_unlock(&mrt_lock);
1489 
1490 			return err;
1491 		}
1492 		read_unlock(&mrt_lock);
1493 		kfree_skb(skb);
1494 		return -ENODEV;
1495 	}
1496 
1497 	ip_mr_forward(skb, cache, local);
1498 
1499 	read_unlock(&mrt_lock);
1500 
1501 	if (local)
1502 		return ip_local_deliver(skb);
1503 
1504 	return 0;
1505 
1506 dont_forward:
1507 	if (local)
1508 		return ip_local_deliver(skb);
1509 	kfree_skb(skb);
1510 	return 0;
1511 }
1512 
1513 #ifdef CONFIG_IP_PIMSM
1514 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1515 {
1516 	struct net_device *reg_dev = NULL;
1517 	struct iphdr *encap;
1518 	struct net *net = dev_net(skb->dev);
1519 
1520 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1521 	/*
1522 	   Check that:
1523 	   a. packet is really destinted to a multicast group
1524 	   b. packet is not a NULL-REGISTER
1525 	   c. packet is not truncated
1526 	 */
1527 	if (!ipv4_is_multicast(encap->daddr) ||
1528 	    encap->tot_len == 0 ||
1529 	    ntohs(encap->tot_len) + pimlen > skb->len)
1530 		return 1;
1531 
1532 	read_lock(&mrt_lock);
1533 	if (net->ipv4.mroute_reg_vif_num >= 0)
1534 		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1535 	if (reg_dev)
1536 		dev_hold(reg_dev);
1537 	read_unlock(&mrt_lock);
1538 
1539 	if (reg_dev == NULL)
1540 		return 1;
1541 
1542 	skb->mac_header = skb->network_header;
1543 	skb_pull(skb, (u8*)encap - skb->data);
1544 	skb_reset_network_header(skb);
1545 	skb->dev = reg_dev;
1546 	skb->protocol = htons(ETH_P_IP);
1547 	skb->ip_summed = 0;
1548 	skb->pkt_type = PACKET_HOST;
1549 	skb_dst_drop(skb);
1550 	reg_dev->stats.rx_bytes += skb->len;
1551 	reg_dev->stats.rx_packets++;
1552 	nf_reset(skb);
1553 	netif_rx(skb);
1554 	dev_put(reg_dev);
1555 
1556 	return 0;
1557 }
1558 #endif
1559 
1560 #ifdef CONFIG_IP_PIMSM_V1
1561 /*
1562  * Handle IGMP messages of PIMv1
1563  */
1564 
1565 int pim_rcv_v1(struct sk_buff * skb)
1566 {
1567 	struct igmphdr *pim;
1568 	struct net *net = dev_net(skb->dev);
1569 
1570 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1571 		goto drop;
1572 
1573 	pim = igmp_hdr(skb);
1574 
1575 	if (!net->ipv4.mroute_do_pim ||
1576 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1577 		goto drop;
1578 
1579 	if (__pim_rcv(skb, sizeof(*pim))) {
1580 drop:
1581 		kfree_skb(skb);
1582 	}
1583 	return 0;
1584 }
1585 #endif
1586 
1587 #ifdef CONFIG_IP_PIMSM_V2
1588 static int pim_rcv(struct sk_buff * skb)
1589 {
1590 	struct pimreghdr *pim;
1591 
1592 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1593 		goto drop;
1594 
1595 	pim = (struct pimreghdr *)skb_transport_header(skb);
1596 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1597 	    (pim->flags&PIM_NULL_REGISTER) ||
1598 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1599 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1600 		goto drop;
1601 
1602 	if (__pim_rcv(skb, sizeof(*pim))) {
1603 drop:
1604 		kfree_skb(skb);
1605 	}
1606 	return 0;
1607 }
1608 #endif
1609 
1610 static int
1611 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1612 {
1613 	int ct;
1614 	struct rtnexthop *nhp;
1615 	struct net *net = mfc_net(c);
1616 	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1617 	u8 *b = skb_tail_pointer(skb);
1618 	struct rtattr *mp_head;
1619 
1620 	if (dev)
1621 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1622 
1623 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1624 
1625 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1626 		if (c->mfc_un.res.ttls[ct] < 255) {
1627 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1628 				goto rtattr_failure;
1629 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1630 			nhp->rtnh_flags = 0;
1631 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1632 			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1633 			nhp->rtnh_len = sizeof(*nhp);
1634 		}
1635 	}
1636 	mp_head->rta_type = RTA_MULTIPATH;
1637 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1638 	rtm->rtm_type = RTN_MULTICAST;
1639 	return 1;
1640 
1641 rtattr_failure:
1642 	nlmsg_trim(skb, b);
1643 	return -EMSGSIZE;
1644 }
1645 
1646 int ipmr_get_route(struct net *net,
1647 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1648 {
1649 	int err;
1650 	struct mfc_cache *cache;
1651 	struct rtable *rt = skb_rtable(skb);
1652 
1653 	read_lock(&mrt_lock);
1654 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1655 
1656 	if (cache == NULL) {
1657 		struct sk_buff *skb2;
1658 		struct iphdr *iph;
1659 		struct net_device *dev;
1660 		int vif;
1661 
1662 		if (nowait) {
1663 			read_unlock(&mrt_lock);
1664 			return -EAGAIN;
1665 		}
1666 
1667 		dev = skb->dev;
1668 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1669 			read_unlock(&mrt_lock);
1670 			return -ENODEV;
1671 		}
1672 		skb2 = skb_clone(skb, GFP_ATOMIC);
1673 		if (!skb2) {
1674 			read_unlock(&mrt_lock);
1675 			return -ENOMEM;
1676 		}
1677 
1678 		skb_push(skb2, sizeof(struct iphdr));
1679 		skb_reset_network_header(skb2);
1680 		iph = ip_hdr(skb2);
1681 		iph->ihl = sizeof(struct iphdr) >> 2;
1682 		iph->saddr = rt->rt_src;
1683 		iph->daddr = rt->rt_dst;
1684 		iph->version = 0;
1685 		err = ipmr_cache_unresolved(net, vif, skb2);
1686 		read_unlock(&mrt_lock);
1687 		return err;
1688 	}
1689 
1690 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1691 		cache->mfc_flags |= MFC_NOTIFY;
1692 	err = ipmr_fill_mroute(skb, cache, rtm);
1693 	read_unlock(&mrt_lock);
1694 	return err;
1695 }
1696 
1697 #ifdef CONFIG_PROC_FS
1698 /*
1699  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1700  */
1701 struct ipmr_vif_iter {
1702 	struct seq_net_private p;
1703 	int ct;
1704 };
1705 
1706 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1707 					   struct ipmr_vif_iter *iter,
1708 					   loff_t pos)
1709 {
1710 	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1711 		if (!VIF_EXISTS(net, iter->ct))
1712 			continue;
1713 		if (pos-- == 0)
1714 			return &net->ipv4.vif_table[iter->ct];
1715 	}
1716 	return NULL;
1717 }
1718 
1719 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1720 	__acquires(mrt_lock)
1721 {
1722 	struct net *net = seq_file_net(seq);
1723 
1724 	read_lock(&mrt_lock);
1725 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1726 		: SEQ_START_TOKEN;
1727 }
1728 
1729 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1730 {
1731 	struct ipmr_vif_iter *iter = seq->private;
1732 	struct net *net = seq_file_net(seq);
1733 
1734 	++*pos;
1735 	if (v == SEQ_START_TOKEN)
1736 		return ipmr_vif_seq_idx(net, iter, 0);
1737 
1738 	while (++iter->ct < net->ipv4.maxvif) {
1739 		if (!VIF_EXISTS(net, iter->ct))
1740 			continue;
1741 		return &net->ipv4.vif_table[iter->ct];
1742 	}
1743 	return NULL;
1744 }
1745 
1746 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1747 	__releases(mrt_lock)
1748 {
1749 	read_unlock(&mrt_lock);
1750 }
1751 
1752 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1753 {
1754 	struct net *net = seq_file_net(seq);
1755 
1756 	if (v == SEQ_START_TOKEN) {
1757 		seq_puts(seq,
1758 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1759 	} else {
1760 		const struct vif_device *vif = v;
1761 		const char *name =  vif->dev ? vif->dev->name : "none";
1762 
1763 		seq_printf(seq,
1764 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1765 			   vif - net->ipv4.vif_table,
1766 			   name, vif->bytes_in, vif->pkt_in,
1767 			   vif->bytes_out, vif->pkt_out,
1768 			   vif->flags, vif->local, vif->remote);
1769 	}
1770 	return 0;
1771 }
1772 
1773 static const struct seq_operations ipmr_vif_seq_ops = {
1774 	.start = ipmr_vif_seq_start,
1775 	.next  = ipmr_vif_seq_next,
1776 	.stop  = ipmr_vif_seq_stop,
1777 	.show  = ipmr_vif_seq_show,
1778 };
1779 
1780 static int ipmr_vif_open(struct inode *inode, struct file *file)
1781 {
1782 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1783 			    sizeof(struct ipmr_vif_iter));
1784 }
1785 
1786 static const struct file_operations ipmr_vif_fops = {
1787 	.owner	 = THIS_MODULE,
1788 	.open    = ipmr_vif_open,
1789 	.read    = seq_read,
1790 	.llseek  = seq_lseek,
1791 	.release = seq_release_net,
1792 };
1793 
1794 struct ipmr_mfc_iter {
1795 	struct seq_net_private p;
1796 	struct mfc_cache **cache;
1797 	int ct;
1798 };
1799 
1800 
1801 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1802 					  struct ipmr_mfc_iter *it, loff_t pos)
1803 {
1804 	struct mfc_cache *mfc;
1805 
1806 	it->cache = net->ipv4.mfc_cache_array;
1807 	read_lock(&mrt_lock);
1808 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1809 		for (mfc = net->ipv4.mfc_cache_array[it->ct];
1810 		     mfc; mfc = mfc->next)
1811 			if (pos-- == 0)
1812 				return mfc;
1813 	read_unlock(&mrt_lock);
1814 
1815 	it->cache = &mfc_unres_queue;
1816 	spin_lock_bh(&mfc_unres_lock);
1817 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1818 		if (net_eq(mfc_net(mfc), net) &&
1819 		    pos-- == 0)
1820 			return mfc;
1821 	spin_unlock_bh(&mfc_unres_lock);
1822 
1823 	it->cache = NULL;
1824 	return NULL;
1825 }
1826 
1827 
1828 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1829 {
1830 	struct ipmr_mfc_iter *it = seq->private;
1831 	struct net *net = seq_file_net(seq);
1832 
1833 	it->cache = NULL;
1834 	it->ct = 0;
1835 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1836 		: SEQ_START_TOKEN;
1837 }
1838 
1839 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1840 {
1841 	struct mfc_cache *mfc = v;
1842 	struct ipmr_mfc_iter *it = seq->private;
1843 	struct net *net = seq_file_net(seq);
1844 
1845 	++*pos;
1846 
1847 	if (v == SEQ_START_TOKEN)
1848 		return ipmr_mfc_seq_idx(net, seq->private, 0);
1849 
1850 	if (mfc->next)
1851 		return mfc->next;
1852 
1853 	if (it->cache == &mfc_unres_queue)
1854 		goto end_of_list;
1855 
1856 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1857 
1858 	while (++it->ct < MFC_LINES) {
1859 		mfc = net->ipv4.mfc_cache_array[it->ct];
1860 		if (mfc)
1861 			return mfc;
1862 	}
1863 
1864 	/* exhausted cache_array, show unresolved */
1865 	read_unlock(&mrt_lock);
1866 	it->cache = &mfc_unres_queue;
1867 	it->ct = 0;
1868 
1869 	spin_lock_bh(&mfc_unres_lock);
1870 	mfc = mfc_unres_queue;
1871 	while (mfc && !net_eq(mfc_net(mfc), net))
1872 		mfc = mfc->next;
1873 	if (mfc)
1874 		return mfc;
1875 
1876  end_of_list:
1877 	spin_unlock_bh(&mfc_unres_lock);
1878 	it->cache = NULL;
1879 
1880 	return NULL;
1881 }
1882 
1883 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1884 {
1885 	struct ipmr_mfc_iter *it = seq->private;
1886 	struct net *net = seq_file_net(seq);
1887 
1888 	if (it->cache == &mfc_unres_queue)
1889 		spin_unlock_bh(&mfc_unres_lock);
1890 	else if (it->cache == net->ipv4.mfc_cache_array)
1891 		read_unlock(&mrt_lock);
1892 }
1893 
1894 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1895 {
1896 	int n;
1897 	struct net *net = seq_file_net(seq);
1898 
1899 	if (v == SEQ_START_TOKEN) {
1900 		seq_puts(seq,
1901 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1902 	} else {
1903 		const struct mfc_cache *mfc = v;
1904 		const struct ipmr_mfc_iter *it = seq->private;
1905 
1906 		seq_printf(seq, "%08lX %08lX %-3hd",
1907 			   (unsigned long) mfc->mfc_mcastgrp,
1908 			   (unsigned long) mfc->mfc_origin,
1909 			   mfc->mfc_parent);
1910 
1911 		if (it->cache != &mfc_unres_queue) {
1912 			seq_printf(seq, " %8lu %8lu %8lu",
1913 				   mfc->mfc_un.res.pkt,
1914 				   mfc->mfc_un.res.bytes,
1915 				   mfc->mfc_un.res.wrong_if);
1916 			for (n = mfc->mfc_un.res.minvif;
1917 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1918 				if (VIF_EXISTS(net, n) &&
1919 				    mfc->mfc_un.res.ttls[n] < 255)
1920 					seq_printf(seq,
1921 					   " %2d:%-3d",
1922 					   n, mfc->mfc_un.res.ttls[n]);
1923 			}
1924 		} else {
1925 			/* unresolved mfc_caches don't contain
1926 			 * pkt, bytes and wrong_if values
1927 			 */
1928 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1929 		}
1930 		seq_putc(seq, '\n');
1931 	}
1932 	return 0;
1933 }
1934 
1935 static const struct seq_operations ipmr_mfc_seq_ops = {
1936 	.start = ipmr_mfc_seq_start,
1937 	.next  = ipmr_mfc_seq_next,
1938 	.stop  = ipmr_mfc_seq_stop,
1939 	.show  = ipmr_mfc_seq_show,
1940 };
1941 
1942 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1943 {
1944 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1945 			    sizeof(struct ipmr_mfc_iter));
1946 }
1947 
1948 static const struct file_operations ipmr_mfc_fops = {
1949 	.owner	 = THIS_MODULE,
1950 	.open    = ipmr_mfc_open,
1951 	.read    = seq_read,
1952 	.llseek  = seq_lseek,
1953 	.release = seq_release_net,
1954 };
1955 #endif
1956 
1957 #ifdef CONFIG_IP_PIMSM_V2
1958 static const struct net_protocol pim_protocol = {
1959 	.handler	=	pim_rcv,
1960 	.netns_ok	=	1,
1961 };
1962 #endif
1963 
1964 
1965 /*
1966  *	Setup for IP multicast routing
1967  */
1968 static int __net_init ipmr_net_init(struct net *net)
1969 {
1970 	int err = 0;
1971 
1972 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1973 				      GFP_KERNEL);
1974 	if (!net->ipv4.vif_table) {
1975 		err = -ENOMEM;
1976 		goto fail;
1977 	}
1978 
1979 	/* Forwarding cache */
1980 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1981 					    sizeof(struct mfc_cache *),
1982 					    GFP_KERNEL);
1983 	if (!net->ipv4.mfc_cache_array) {
1984 		err = -ENOMEM;
1985 		goto fail_mfc_cache;
1986 	}
1987 
1988 #ifdef CONFIG_IP_PIMSM
1989 	net->ipv4.mroute_reg_vif_num = -1;
1990 #endif
1991 
1992 #ifdef CONFIG_PROC_FS
1993 	err = -ENOMEM;
1994 	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1995 		goto proc_vif_fail;
1996 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1997 		goto proc_cache_fail;
1998 #endif
1999 	return 0;
2000 
2001 #ifdef CONFIG_PROC_FS
2002 proc_cache_fail:
2003 	proc_net_remove(net, "ip_mr_vif");
2004 proc_vif_fail:
2005 	kfree(net->ipv4.mfc_cache_array);
2006 #endif
2007 fail_mfc_cache:
2008 	kfree(net->ipv4.vif_table);
2009 fail:
2010 	return err;
2011 }
2012 
2013 static void __net_exit ipmr_net_exit(struct net *net)
2014 {
2015 #ifdef CONFIG_PROC_FS
2016 	proc_net_remove(net, "ip_mr_cache");
2017 	proc_net_remove(net, "ip_mr_vif");
2018 #endif
2019 	kfree(net->ipv4.mfc_cache_array);
2020 	kfree(net->ipv4.vif_table);
2021 }
2022 
2023 static struct pernet_operations ipmr_net_ops = {
2024 	.init = ipmr_net_init,
2025 	.exit = ipmr_net_exit,
2026 };
2027 
2028 int __init ip_mr_init(void)
2029 {
2030 	int err;
2031 
2032 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2033 				       sizeof(struct mfc_cache),
2034 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2035 				       NULL);
2036 	if (!mrt_cachep)
2037 		return -ENOMEM;
2038 
2039 	err = register_pernet_subsys(&ipmr_net_ops);
2040 	if (err)
2041 		goto reg_pernet_fail;
2042 
2043 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2044 	err = register_netdevice_notifier(&ip_mr_notifier);
2045 	if (err)
2046 		goto reg_notif_fail;
2047 #ifdef CONFIG_IP_PIMSM_V2
2048 	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2049 		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2050 		err = -EAGAIN;
2051 		goto add_proto_fail;
2052 	}
2053 #endif
2054 	return 0;
2055 
2056 #ifdef CONFIG_IP_PIMSM_V2
2057 add_proto_fail:
2058 	unregister_netdevice_notifier(&ip_mr_notifier);
2059 #endif
2060 reg_notif_fail:
2061 	del_timer(&ipmr_expire_timer);
2062 	unregister_pernet_subsys(&ipmr_net_ops);
2063 reg_pernet_fail:
2064 	kmem_cache_destroy(mrt_cachep);
2065 	return err;
2066 }
2067