xref: /linux/net/ipv4/ipmr.c (revision c145211d1f9e2ef19e7b4c2b943f68366daa97af)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Fixes:
13  *	Michael Chastain	:	Incorrect size of copying.
14  *	Alan Cox		:	Added the cache manager code
15  *	Alan Cox		:	Fixed the clone/copy bug and device race.
16  *	Mike McLagan		:	Routing by source
17  *	Malcolm Beattie		:	Buffer handling fixes.
18  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19  *	SVR Anand		:	Fixed several multicast bugs and problems.
20  *	Alexey Kuznetsov	:	Status, optimisations and more.
21  *	Brad Parker		:	Better behaviour on mrouted upcall
22  *					overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25  *					Relax this requrement to work with older peers.
26  *
27  */
28 
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM	1
69 #endif
70 
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72    Note that the changes are semaphored via rtnl_lock.
73  */
74 
75 static DEFINE_RWLOCK(mrt_lock);
76 
77 /*
78  *	Multicast router control variables
79  */
80 
81 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82 
83 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
84 
85 /* Special spinlock for queue of unresolved entries */
86 static DEFINE_SPINLOCK(mfc_unres_lock);
87 
88 /* We return to original Alan's scheme. Hash table of resolved
89    entries is changed only in process context and protected
90    with weak lock mrt_lock. Queue of unresolved entries is protected
91    with strong spinlock mfc_unres_lock.
92 
93    In this case data path is free of exclusive locks at all.
94  */
95 
96 static struct kmem_cache *mrt_cachep __read_mostly;
97 
98 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
99 static int ipmr_cache_report(struct net *net,
100 			     struct sk_buff *pkt, vifi_t vifi, int assert);
101 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
102 
103 static struct timer_list ipmr_expire_timer;
104 
105 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106 
107 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
108 {
109 	struct net *net = dev_net(dev);
110 
111 	dev_close(dev);
112 
113 	dev = __dev_get_by_name(net, "tunl0");
114 	if (dev) {
115 		const struct net_device_ops *ops = dev->netdev_ops;
116 		struct ifreq ifr;
117 		struct ip_tunnel_parm p;
118 
119 		memset(&p, 0, sizeof(p));
120 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
121 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
122 		p.iph.version = 4;
123 		p.iph.ihl = 5;
124 		p.iph.protocol = IPPROTO_IPIP;
125 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127 
128 		if (ops->ndo_do_ioctl) {
129 			mm_segment_t oldfs = get_fs();
130 
131 			set_fs(KERNEL_DS);
132 			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133 			set_fs(oldfs);
134 		}
135 	}
136 }
137 
138 static
139 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
140 {
141 	struct net_device  *dev;
142 
143 	dev = __dev_get_by_name(net, "tunl0");
144 
145 	if (dev) {
146 		const struct net_device_ops *ops = dev->netdev_ops;
147 		int err;
148 		struct ifreq ifr;
149 		struct ip_tunnel_parm p;
150 		struct in_device  *in_dev;
151 
152 		memset(&p, 0, sizeof(p));
153 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
154 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
155 		p.iph.version = 4;
156 		p.iph.ihl = 5;
157 		p.iph.protocol = IPPROTO_IPIP;
158 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
159 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
160 
161 		if (ops->ndo_do_ioctl) {
162 			mm_segment_t oldfs = get_fs();
163 
164 			set_fs(KERNEL_DS);
165 			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166 			set_fs(oldfs);
167 		} else
168 			err = -EOPNOTSUPP;
169 
170 		dev = NULL;
171 
172 		if (err == 0 &&
173 		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
174 			dev->flags |= IFF_MULTICAST;
175 
176 			in_dev = __in_dev_get_rtnl(dev);
177 			if (in_dev == NULL)
178 				goto failure;
179 
180 			ipv4_devconf_setall(in_dev);
181 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
182 
183 			if (dev_open(dev))
184 				goto failure;
185 			dev_hold(dev);
186 		}
187 	}
188 	return dev;
189 
190 failure:
191 	/* allow the register to be completed before unregistering. */
192 	rtnl_unlock();
193 	rtnl_lock();
194 
195 	unregister_netdevice(dev);
196 	return NULL;
197 }
198 
199 #ifdef CONFIG_IP_PIMSM
200 
201 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
202 {
203 	struct net *net = dev_net(dev);
204 
205 	read_lock(&mrt_lock);
206 	dev->stats.tx_bytes += skb->len;
207 	dev->stats.tx_packets++;
208 	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
209 			  IGMPMSG_WHOLEPKT);
210 	read_unlock(&mrt_lock);
211 	kfree_skb(skb);
212 	return NETDEV_TX_OK;
213 }
214 
215 static const struct net_device_ops reg_vif_netdev_ops = {
216 	.ndo_start_xmit	= reg_vif_xmit,
217 };
218 
219 static void reg_vif_setup(struct net_device *dev)
220 {
221 	dev->type		= ARPHRD_PIMREG;
222 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
223 	dev->flags		= IFF_NOARP;
224 	dev->netdev_ops		= &reg_vif_netdev_ops,
225 	dev->destructor		= free_netdev;
226 	dev->features		|= NETIF_F_NETNS_LOCAL;
227 }
228 
229 static struct net_device *ipmr_reg_vif(struct net *net)
230 {
231 	struct net_device *dev;
232 	struct in_device *in_dev;
233 
234 	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
235 
236 	if (dev == NULL)
237 		return NULL;
238 
239 	dev_net_set(dev, net);
240 
241 	if (register_netdevice(dev)) {
242 		free_netdev(dev);
243 		return NULL;
244 	}
245 	dev->iflink = 0;
246 
247 	rcu_read_lock();
248 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249 		rcu_read_unlock();
250 		goto failure;
251 	}
252 
253 	ipv4_devconf_setall(in_dev);
254 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255 	rcu_read_unlock();
256 
257 	if (dev_open(dev))
258 		goto failure;
259 
260 	dev_hold(dev);
261 
262 	return dev;
263 
264 failure:
265 	/* allow the register to be completed before unregistering. */
266 	rtnl_unlock();
267 	rtnl_lock();
268 
269 	unregister_netdevice(dev);
270 	return NULL;
271 }
272 #endif
273 
274 /*
275  *	Delete a VIF entry
276  *	@notify: Set to 1, if the caller is a notifier_call
277  */
278 
279 static int vif_delete(struct net *net, int vifi, int notify,
280 		      struct list_head *head)
281 {
282 	struct vif_device *v;
283 	struct net_device *dev;
284 	struct in_device *in_dev;
285 
286 	if (vifi < 0 || vifi >= net->ipv4.maxvif)
287 		return -EADDRNOTAVAIL;
288 
289 	v = &net->ipv4.vif_table[vifi];
290 
291 	write_lock_bh(&mrt_lock);
292 	dev = v->dev;
293 	v->dev = NULL;
294 
295 	if (!dev) {
296 		write_unlock_bh(&mrt_lock);
297 		return -EADDRNOTAVAIL;
298 	}
299 
300 #ifdef CONFIG_IP_PIMSM
301 	if (vifi == net->ipv4.mroute_reg_vif_num)
302 		net->ipv4.mroute_reg_vif_num = -1;
303 #endif
304 
305 	if (vifi+1 == net->ipv4.maxvif) {
306 		int tmp;
307 		for (tmp=vifi-1; tmp>=0; tmp--) {
308 			if (VIF_EXISTS(net, tmp))
309 				break;
310 		}
311 		net->ipv4.maxvif = tmp+1;
312 	}
313 
314 	write_unlock_bh(&mrt_lock);
315 
316 	dev_set_allmulti(dev, -1);
317 
318 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
319 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
320 		ip_rt_multicast_event(in_dev);
321 	}
322 
323 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
324 		unregister_netdevice_queue(dev, head);
325 
326 	dev_put(dev);
327 	return 0;
328 }
329 
330 static inline void ipmr_cache_free(struct mfc_cache *c)
331 {
332 	release_net(mfc_net(c));
333 	kmem_cache_free(mrt_cachep, c);
334 }
335 
336 /* Destroy an unresolved cache entry, killing queued skbs
337    and reporting error to netlink readers.
338  */
339 
340 static void ipmr_destroy_unres(struct mfc_cache *c)
341 {
342 	struct sk_buff *skb;
343 	struct nlmsgerr *e;
344 	struct net *net = mfc_net(c);
345 
346 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
347 
348 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
349 		if (ip_hdr(skb)->version == 0) {
350 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
351 			nlh->nlmsg_type = NLMSG_ERROR;
352 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
353 			skb_trim(skb, nlh->nlmsg_len);
354 			e = NLMSG_DATA(nlh);
355 			e->error = -ETIMEDOUT;
356 			memset(&e->msg, 0, sizeof(e->msg));
357 
358 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
359 		} else
360 			kfree_skb(skb);
361 	}
362 
363 	ipmr_cache_free(c);
364 }
365 
366 
367 /* Single timer process for all the unresolved queue. */
368 
369 static void ipmr_expire_process(unsigned long dummy)
370 {
371 	unsigned long now;
372 	unsigned long expires;
373 	struct mfc_cache *c, **cp;
374 
375 	if (!spin_trylock(&mfc_unres_lock)) {
376 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
377 		return;
378 	}
379 
380 	if (mfc_unres_queue == NULL)
381 		goto out;
382 
383 	now = jiffies;
384 	expires = 10*HZ;
385 	cp = &mfc_unres_queue;
386 
387 	while ((c=*cp) != NULL) {
388 		if (time_after(c->mfc_un.unres.expires, now)) {
389 			unsigned long interval = c->mfc_un.unres.expires - now;
390 			if (interval < expires)
391 				expires = interval;
392 			cp = &c->next;
393 			continue;
394 		}
395 
396 		*cp = c->next;
397 
398 		ipmr_destroy_unres(c);
399 	}
400 
401 	if (mfc_unres_queue != NULL)
402 		mod_timer(&ipmr_expire_timer, jiffies + expires);
403 
404 out:
405 	spin_unlock(&mfc_unres_lock);
406 }
407 
408 /* Fill oifs list. It is called under write locked mrt_lock. */
409 
410 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
411 {
412 	int vifi;
413 	struct net *net = mfc_net(cache);
414 
415 	cache->mfc_un.res.minvif = MAXVIFS;
416 	cache->mfc_un.res.maxvif = 0;
417 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
418 
419 	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
420 		if (VIF_EXISTS(net, vifi) &&
421 		    ttls[vifi] && ttls[vifi] < 255) {
422 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
423 			if (cache->mfc_un.res.minvif > vifi)
424 				cache->mfc_un.res.minvif = vifi;
425 			if (cache->mfc_un.res.maxvif <= vifi)
426 				cache->mfc_un.res.maxvif = vifi + 1;
427 		}
428 	}
429 }
430 
431 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
432 {
433 	int vifi = vifc->vifc_vifi;
434 	struct vif_device *v = &net->ipv4.vif_table[vifi];
435 	struct net_device *dev;
436 	struct in_device *in_dev;
437 	int err;
438 
439 	/* Is vif busy ? */
440 	if (VIF_EXISTS(net, vifi))
441 		return -EADDRINUSE;
442 
443 	switch (vifc->vifc_flags) {
444 #ifdef CONFIG_IP_PIMSM
445 	case VIFF_REGISTER:
446 		/*
447 		 * Special Purpose VIF in PIM
448 		 * All the packets will be sent to the daemon
449 		 */
450 		if (net->ipv4.mroute_reg_vif_num >= 0)
451 			return -EADDRINUSE;
452 		dev = ipmr_reg_vif(net);
453 		if (!dev)
454 			return -ENOBUFS;
455 		err = dev_set_allmulti(dev, 1);
456 		if (err) {
457 			unregister_netdevice(dev);
458 			dev_put(dev);
459 			return err;
460 		}
461 		break;
462 #endif
463 	case VIFF_TUNNEL:
464 		dev = ipmr_new_tunnel(net, vifc);
465 		if (!dev)
466 			return -ENOBUFS;
467 		err = dev_set_allmulti(dev, 1);
468 		if (err) {
469 			ipmr_del_tunnel(dev, vifc);
470 			dev_put(dev);
471 			return err;
472 		}
473 		break;
474 
475 	case VIFF_USE_IFINDEX:
476 	case 0:
477 		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
478 			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
479 			if (dev && dev->ip_ptr == NULL) {
480 				dev_put(dev);
481 				return -EADDRNOTAVAIL;
482 			}
483 		} else
484 			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
485 
486 		if (!dev)
487 			return -EADDRNOTAVAIL;
488 		err = dev_set_allmulti(dev, 1);
489 		if (err) {
490 			dev_put(dev);
491 			return err;
492 		}
493 		break;
494 	default:
495 		return -EINVAL;
496 	}
497 
498 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
499 		dev_put(dev);
500 		return -EADDRNOTAVAIL;
501 	}
502 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
503 	ip_rt_multicast_event(in_dev);
504 
505 	/*
506 	 *	Fill in the VIF structures
507 	 */
508 	v->rate_limit = vifc->vifc_rate_limit;
509 	v->local = vifc->vifc_lcl_addr.s_addr;
510 	v->remote = vifc->vifc_rmt_addr.s_addr;
511 	v->flags = vifc->vifc_flags;
512 	if (!mrtsock)
513 		v->flags |= VIFF_STATIC;
514 	v->threshold = vifc->vifc_threshold;
515 	v->bytes_in = 0;
516 	v->bytes_out = 0;
517 	v->pkt_in = 0;
518 	v->pkt_out = 0;
519 	v->link = dev->ifindex;
520 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
521 		v->link = dev->iflink;
522 
523 	/* And finish update writing critical data */
524 	write_lock_bh(&mrt_lock);
525 	v->dev = dev;
526 #ifdef CONFIG_IP_PIMSM
527 	if (v->flags&VIFF_REGISTER)
528 		net->ipv4.mroute_reg_vif_num = vifi;
529 #endif
530 	if (vifi+1 > net->ipv4.maxvif)
531 		net->ipv4.maxvif = vifi+1;
532 	write_unlock_bh(&mrt_lock);
533 	return 0;
534 }
535 
536 static struct mfc_cache *ipmr_cache_find(struct net *net,
537 					 __be32 origin,
538 					 __be32 mcastgrp)
539 {
540 	int line = MFC_HASH(mcastgrp, origin);
541 	struct mfc_cache *c;
542 
543 	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
544 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
545 			break;
546 	}
547 	return c;
548 }
549 
550 /*
551  *	Allocate a multicast cache entry
552  */
553 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
554 {
555 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
556 	if (c == NULL)
557 		return NULL;
558 	c->mfc_un.res.minvif = MAXVIFS;
559 	mfc_net_set(c, net);
560 	return c;
561 }
562 
563 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
564 {
565 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
566 	if (c == NULL)
567 		return NULL;
568 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
569 	c->mfc_un.unres.expires = jiffies + 10*HZ;
570 	mfc_net_set(c, net);
571 	return c;
572 }
573 
574 /*
575  *	A cache entry has gone into a resolved state from queued
576  */
577 
578 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
579 {
580 	struct sk_buff *skb;
581 	struct nlmsgerr *e;
582 
583 	/*
584 	 *	Play the pending entries through our router
585 	 */
586 
587 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
588 		if (ip_hdr(skb)->version == 0) {
589 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
590 
591 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
592 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
593 						  (u8 *)nlh);
594 			} else {
595 				nlh->nlmsg_type = NLMSG_ERROR;
596 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
597 				skb_trim(skb, nlh->nlmsg_len);
598 				e = NLMSG_DATA(nlh);
599 				e->error = -EMSGSIZE;
600 				memset(&e->msg, 0, sizeof(e->msg));
601 			}
602 
603 			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
604 		} else
605 			ip_mr_forward(skb, c, 0);
606 	}
607 }
608 
609 /*
610  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
611  *	expects the following bizarre scheme.
612  *
613  *	Called under mrt_lock.
614  */
615 
616 static int ipmr_cache_report(struct net *net,
617 			     struct sk_buff *pkt, vifi_t vifi, int assert)
618 {
619 	struct sk_buff *skb;
620 	const int ihl = ip_hdrlen(pkt);
621 	struct igmphdr *igmp;
622 	struct igmpmsg *msg;
623 	int ret;
624 
625 #ifdef CONFIG_IP_PIMSM
626 	if (assert == IGMPMSG_WHOLEPKT)
627 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
628 	else
629 #endif
630 		skb = alloc_skb(128, GFP_ATOMIC);
631 
632 	if (!skb)
633 		return -ENOBUFS;
634 
635 #ifdef CONFIG_IP_PIMSM
636 	if (assert == IGMPMSG_WHOLEPKT) {
637 		/* Ugly, but we have no choice with this interface.
638 		   Duplicate old header, fix ihl, length etc.
639 		   And all this only to mangle msg->im_msgtype and
640 		   to set msg->im_mbz to "mbz" :-)
641 		 */
642 		skb_push(skb, sizeof(struct iphdr));
643 		skb_reset_network_header(skb);
644 		skb_reset_transport_header(skb);
645 		msg = (struct igmpmsg *)skb_network_header(skb);
646 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
647 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
648 		msg->im_mbz = 0;
649 		msg->im_vif = net->ipv4.mroute_reg_vif_num;
650 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
651 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
652 					     sizeof(struct iphdr));
653 	} else
654 #endif
655 	{
656 
657 	/*
658 	 *	Copy the IP header
659 	 */
660 
661 	skb->network_header = skb->tail;
662 	skb_put(skb, ihl);
663 	skb_copy_to_linear_data(skb, pkt->data, ihl);
664 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
665 	msg = (struct igmpmsg *)skb_network_header(skb);
666 	msg->im_vif = vifi;
667 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
668 
669 	/*
670 	 *	Add our header
671 	 */
672 
673 	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
674 	igmp->type	=
675 	msg->im_msgtype = assert;
676 	igmp->code 	=	0;
677 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
678 	skb->transport_header = skb->network_header;
679 	}
680 
681 	if (net->ipv4.mroute_sk == NULL) {
682 		kfree_skb(skb);
683 		return -EINVAL;
684 	}
685 
686 	/*
687 	 *	Deliver to mrouted
688 	 */
689 	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
690 	if (ret < 0) {
691 		if (net_ratelimit())
692 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
693 		kfree_skb(skb);
694 	}
695 
696 	return ret;
697 }
698 
699 /*
700  *	Queue a packet for resolution. It gets locked cache entry!
701  */
702 
703 static int
704 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
705 {
706 	int err;
707 	struct mfc_cache *c;
708 	const struct iphdr *iph = ip_hdr(skb);
709 
710 	spin_lock_bh(&mfc_unres_lock);
711 	for (c=mfc_unres_queue; c; c=c->next) {
712 		if (net_eq(mfc_net(c), net) &&
713 		    c->mfc_mcastgrp == iph->daddr &&
714 		    c->mfc_origin == iph->saddr)
715 			break;
716 	}
717 
718 	if (c == NULL) {
719 		/*
720 		 *	Create a new entry if allowable
721 		 */
722 
723 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
724 		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
725 			spin_unlock_bh(&mfc_unres_lock);
726 
727 			kfree_skb(skb);
728 			return -ENOBUFS;
729 		}
730 
731 		/*
732 		 *	Fill in the new cache entry
733 		 */
734 		c->mfc_parent	= -1;
735 		c->mfc_origin	= iph->saddr;
736 		c->mfc_mcastgrp	= iph->daddr;
737 
738 		/*
739 		 *	Reflect first query at mrouted.
740 		 */
741 		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
742 		if (err < 0) {
743 			/* If the report failed throw the cache entry
744 			   out - Brad Parker
745 			 */
746 			spin_unlock_bh(&mfc_unres_lock);
747 
748 			ipmr_cache_free(c);
749 			kfree_skb(skb);
750 			return err;
751 		}
752 
753 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
754 		c->next = mfc_unres_queue;
755 		mfc_unres_queue = c;
756 
757 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) == 1)
758 			mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
759 	}
760 
761 	/*
762 	 *	See if we can append the packet
763 	 */
764 	if (c->mfc_un.unres.unresolved.qlen>3) {
765 		kfree_skb(skb);
766 		err = -ENOBUFS;
767 	} else {
768 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
769 		err = 0;
770 	}
771 
772 	spin_unlock_bh(&mfc_unres_lock);
773 	return err;
774 }
775 
776 /*
777  *	MFC cache manipulation by user space mroute daemon
778  */
779 
780 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
781 {
782 	int line;
783 	struct mfc_cache *c, **cp;
784 
785 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
786 
787 	for (cp = &net->ipv4.mfc_cache_array[line];
788 	     (c = *cp) != NULL; cp = &c->next) {
789 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
791 			write_lock_bh(&mrt_lock);
792 			*cp = c->next;
793 			write_unlock_bh(&mrt_lock);
794 
795 			ipmr_cache_free(c);
796 			return 0;
797 		}
798 	}
799 	return -ENOENT;
800 }
801 
802 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
803 {
804 	int line;
805 	struct mfc_cache *uc, *c, **cp;
806 
807 	if (mfc->mfcc_parent >= MAXVIFS)
808 		return -ENFILE;
809 
810 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
811 
812 	for (cp = &net->ipv4.mfc_cache_array[line];
813 	     (c = *cp) != NULL; cp = &c->next) {
814 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
815 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
816 			break;
817 	}
818 
819 	if (c != NULL) {
820 		write_lock_bh(&mrt_lock);
821 		c->mfc_parent = mfc->mfcc_parent;
822 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
823 		if (!mrtsock)
824 			c->mfc_flags |= MFC_STATIC;
825 		write_unlock_bh(&mrt_lock);
826 		return 0;
827 	}
828 
829 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
830 		return -EINVAL;
831 
832 	c = ipmr_cache_alloc(net);
833 	if (c == NULL)
834 		return -ENOMEM;
835 
836 	c->mfc_origin = mfc->mfcc_origin.s_addr;
837 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
838 	c->mfc_parent = mfc->mfcc_parent;
839 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
840 	if (!mrtsock)
841 		c->mfc_flags |= MFC_STATIC;
842 
843 	write_lock_bh(&mrt_lock);
844 	c->next = net->ipv4.mfc_cache_array[line];
845 	net->ipv4.mfc_cache_array[line] = c;
846 	write_unlock_bh(&mrt_lock);
847 
848 	/*
849 	 *	Check to see if we resolved a queued list. If so we
850 	 *	need to send on the frames and tidy up.
851 	 */
852 	spin_lock_bh(&mfc_unres_lock);
853 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
854 	     cp = &uc->next) {
855 		if (net_eq(mfc_net(uc), net) &&
856 		    uc->mfc_origin == c->mfc_origin &&
857 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
858 			*cp = uc->next;
859 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
860 			break;
861 		}
862 	}
863 	if (mfc_unres_queue == NULL)
864 		del_timer(&ipmr_expire_timer);
865 	spin_unlock_bh(&mfc_unres_lock);
866 
867 	if (uc) {
868 		ipmr_cache_resolve(uc, c);
869 		ipmr_cache_free(uc);
870 	}
871 	return 0;
872 }
873 
874 /*
875  *	Close the multicast socket, and clear the vif tables etc
876  */
877 
878 static void mroute_clean_tables(struct net *net)
879 {
880 	int i;
881 	LIST_HEAD(list);
882 
883 	/*
884 	 *	Shut down all active vif entries
885 	 */
886 	for (i = 0; i < net->ipv4.maxvif; i++) {
887 		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
888 			vif_delete(net, i, 0, &list);
889 	}
890 	unregister_netdevice_many(&list);
891 
892 	/*
893 	 *	Wipe the cache
894 	 */
895 	for (i=0; i<MFC_LINES; i++) {
896 		struct mfc_cache *c, **cp;
897 
898 		cp = &net->ipv4.mfc_cache_array[i];
899 		while ((c = *cp) != NULL) {
900 			if (c->mfc_flags&MFC_STATIC) {
901 				cp = &c->next;
902 				continue;
903 			}
904 			write_lock_bh(&mrt_lock);
905 			*cp = c->next;
906 			write_unlock_bh(&mrt_lock);
907 
908 			ipmr_cache_free(c);
909 		}
910 	}
911 
912 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
913 		struct mfc_cache *c, **cp;
914 
915 		spin_lock_bh(&mfc_unres_lock);
916 		cp = &mfc_unres_queue;
917 		while ((c = *cp) != NULL) {
918 			if (!net_eq(mfc_net(c), net)) {
919 				cp = &c->next;
920 				continue;
921 			}
922 			*cp = c->next;
923 
924 			ipmr_destroy_unres(c);
925 		}
926 		spin_unlock_bh(&mfc_unres_lock);
927 	}
928 }
929 
930 static void mrtsock_destruct(struct sock *sk)
931 {
932 	struct net *net = sock_net(sk);
933 
934 	rtnl_lock();
935 	if (sk == net->ipv4.mroute_sk) {
936 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
937 
938 		write_lock_bh(&mrt_lock);
939 		net->ipv4.mroute_sk = NULL;
940 		write_unlock_bh(&mrt_lock);
941 
942 		mroute_clean_tables(net);
943 	}
944 	rtnl_unlock();
945 }
946 
947 /*
948  *	Socket options and virtual interface manipulation. The whole
949  *	virtual interface system is a complete heap, but unfortunately
950  *	that's how BSD mrouted happens to think. Maybe one day with a proper
951  *	MOSPF/PIM router set up we can clean this up.
952  */
953 
954 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
955 {
956 	int ret;
957 	struct vifctl vif;
958 	struct mfcctl mfc;
959 	struct net *net = sock_net(sk);
960 
961 	if (optname != MRT_INIT) {
962 		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
963 			return -EACCES;
964 	}
965 
966 	switch (optname) {
967 	case MRT_INIT:
968 		if (sk->sk_type != SOCK_RAW ||
969 		    inet_sk(sk)->inet_num != IPPROTO_IGMP)
970 			return -EOPNOTSUPP;
971 		if (optlen != sizeof(int))
972 			return -ENOPROTOOPT;
973 
974 		rtnl_lock();
975 		if (net->ipv4.mroute_sk) {
976 			rtnl_unlock();
977 			return -EADDRINUSE;
978 		}
979 
980 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
981 		if (ret == 0) {
982 			write_lock_bh(&mrt_lock);
983 			net->ipv4.mroute_sk = sk;
984 			write_unlock_bh(&mrt_lock);
985 
986 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
987 		}
988 		rtnl_unlock();
989 		return ret;
990 	case MRT_DONE:
991 		if (sk != net->ipv4.mroute_sk)
992 			return -EACCES;
993 		return ip_ra_control(sk, 0, NULL);
994 	case MRT_ADD_VIF:
995 	case MRT_DEL_VIF:
996 		if (optlen != sizeof(vif))
997 			return -EINVAL;
998 		if (copy_from_user(&vif, optval, sizeof(vif)))
999 			return -EFAULT;
1000 		if (vif.vifc_vifi >= MAXVIFS)
1001 			return -ENFILE;
1002 		rtnl_lock();
1003 		if (optname == MRT_ADD_VIF) {
1004 			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1005 		} else {
1006 			ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1007 		}
1008 		rtnl_unlock();
1009 		return ret;
1010 
1011 		/*
1012 		 *	Manipulate the forwarding caches. These live
1013 		 *	in a sort of kernel/user symbiosis.
1014 		 */
1015 	case MRT_ADD_MFC:
1016 	case MRT_DEL_MFC:
1017 		if (optlen != sizeof(mfc))
1018 			return -EINVAL;
1019 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1020 			return -EFAULT;
1021 		rtnl_lock();
1022 		if (optname == MRT_DEL_MFC)
1023 			ret = ipmr_mfc_delete(net, &mfc);
1024 		else
1025 			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1026 		rtnl_unlock();
1027 		return ret;
1028 		/*
1029 		 *	Control PIM assert.
1030 		 */
1031 	case MRT_ASSERT:
1032 	{
1033 		int v;
1034 		if (get_user(v,(int __user *)optval))
1035 			return -EFAULT;
1036 		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1037 		return 0;
1038 	}
1039 #ifdef CONFIG_IP_PIMSM
1040 	case MRT_PIM:
1041 	{
1042 		int v;
1043 
1044 		if (get_user(v,(int __user *)optval))
1045 			return -EFAULT;
1046 		v = (v) ? 1 : 0;
1047 
1048 		rtnl_lock();
1049 		ret = 0;
1050 		if (v != net->ipv4.mroute_do_pim) {
1051 			net->ipv4.mroute_do_pim = v;
1052 			net->ipv4.mroute_do_assert = v;
1053 		}
1054 		rtnl_unlock();
1055 		return ret;
1056 	}
1057 #endif
1058 	/*
1059 	 *	Spurious command, or MRT_VERSION which you cannot
1060 	 *	set.
1061 	 */
1062 	default:
1063 		return -ENOPROTOOPT;
1064 	}
1065 }
1066 
1067 /*
1068  *	Getsock opt support for the multicast routing system.
1069  */
1070 
1071 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1072 {
1073 	int olr;
1074 	int val;
1075 	struct net *net = sock_net(sk);
1076 
1077 	if (optname != MRT_VERSION &&
1078 #ifdef CONFIG_IP_PIMSM
1079 	   optname!=MRT_PIM &&
1080 #endif
1081 	   optname!=MRT_ASSERT)
1082 		return -ENOPROTOOPT;
1083 
1084 	if (get_user(olr, optlen))
1085 		return -EFAULT;
1086 
1087 	olr = min_t(unsigned int, olr, sizeof(int));
1088 	if (olr < 0)
1089 		return -EINVAL;
1090 
1091 	if (put_user(olr, optlen))
1092 		return -EFAULT;
1093 	if (optname == MRT_VERSION)
1094 		val = 0x0305;
1095 #ifdef CONFIG_IP_PIMSM
1096 	else if (optname == MRT_PIM)
1097 		val = net->ipv4.mroute_do_pim;
1098 #endif
1099 	else
1100 		val = net->ipv4.mroute_do_assert;
1101 	if (copy_to_user(optval, &val, olr))
1102 		return -EFAULT;
1103 	return 0;
1104 }
1105 
1106 /*
1107  *	The IP multicast ioctl support routines.
1108  */
1109 
1110 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1111 {
1112 	struct sioc_sg_req sr;
1113 	struct sioc_vif_req vr;
1114 	struct vif_device *vif;
1115 	struct mfc_cache *c;
1116 	struct net *net = sock_net(sk);
1117 
1118 	switch (cmd) {
1119 	case SIOCGETVIFCNT:
1120 		if (copy_from_user(&vr, arg, sizeof(vr)))
1121 			return -EFAULT;
1122 		if (vr.vifi >= net->ipv4.maxvif)
1123 			return -EINVAL;
1124 		read_lock(&mrt_lock);
1125 		vif = &net->ipv4.vif_table[vr.vifi];
1126 		if (VIF_EXISTS(net, vr.vifi)) {
1127 			vr.icount = vif->pkt_in;
1128 			vr.ocount = vif->pkt_out;
1129 			vr.ibytes = vif->bytes_in;
1130 			vr.obytes = vif->bytes_out;
1131 			read_unlock(&mrt_lock);
1132 
1133 			if (copy_to_user(arg, &vr, sizeof(vr)))
1134 				return -EFAULT;
1135 			return 0;
1136 		}
1137 		read_unlock(&mrt_lock);
1138 		return -EADDRNOTAVAIL;
1139 	case SIOCGETSGCNT:
1140 		if (copy_from_user(&sr, arg, sizeof(sr)))
1141 			return -EFAULT;
1142 
1143 		read_lock(&mrt_lock);
1144 		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1145 		if (c) {
1146 			sr.pktcnt = c->mfc_un.res.pkt;
1147 			sr.bytecnt = c->mfc_un.res.bytes;
1148 			sr.wrong_if = c->mfc_un.res.wrong_if;
1149 			read_unlock(&mrt_lock);
1150 
1151 			if (copy_to_user(arg, &sr, sizeof(sr)))
1152 				return -EFAULT;
1153 			return 0;
1154 		}
1155 		read_unlock(&mrt_lock);
1156 		return -EADDRNOTAVAIL;
1157 	default:
1158 		return -ENOIOCTLCMD;
1159 	}
1160 }
1161 
1162 
1163 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1164 {
1165 	struct net_device *dev = ptr;
1166 	struct net *net = dev_net(dev);
1167 	struct vif_device *v;
1168 	int ct;
1169 	LIST_HEAD(list);
1170 
1171 	if (event != NETDEV_UNREGISTER)
1172 		return NOTIFY_DONE;
1173 	v = &net->ipv4.vif_table[0];
1174 	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1175 		if (v->dev == dev)
1176 			vif_delete(net, ct, 1, &list);
1177 	}
1178 	unregister_netdevice_many(&list);
1179 	return NOTIFY_DONE;
1180 }
1181 
1182 
1183 static struct notifier_block ip_mr_notifier = {
1184 	.notifier_call = ipmr_device_event,
1185 };
1186 
1187 /*
1188  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1189  *	This avoids tunnel drivers and other mess and gives us the speed so
1190  *	important for multicast video.
1191  */
1192 
1193 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1194 {
1195 	struct iphdr *iph;
1196 	struct iphdr *old_iph = ip_hdr(skb);
1197 
1198 	skb_push(skb, sizeof(struct iphdr));
1199 	skb->transport_header = skb->network_header;
1200 	skb_reset_network_header(skb);
1201 	iph = ip_hdr(skb);
1202 
1203 	iph->version	= 	4;
1204 	iph->tos	=	old_iph->tos;
1205 	iph->ttl	=	old_iph->ttl;
1206 	iph->frag_off	=	0;
1207 	iph->daddr	=	daddr;
1208 	iph->saddr	=	saddr;
1209 	iph->protocol	=	IPPROTO_IPIP;
1210 	iph->ihl	=	5;
1211 	iph->tot_len	=	htons(skb->len);
1212 	ip_select_ident(iph, skb_dst(skb), NULL);
1213 	ip_send_check(iph);
1214 
1215 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1216 	nf_reset(skb);
1217 }
1218 
1219 static inline int ipmr_forward_finish(struct sk_buff *skb)
1220 {
1221 	struct ip_options * opt	= &(IPCB(skb)->opt);
1222 
1223 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1224 
1225 	if (unlikely(opt->optlen))
1226 		ip_forward_options(skb);
1227 
1228 	return dst_output(skb);
1229 }
1230 
1231 /*
1232  *	Processing handlers for ipmr_forward
1233  */
1234 
1235 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1236 {
1237 	struct net *net = mfc_net(c);
1238 	const struct iphdr *iph = ip_hdr(skb);
1239 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
1240 	struct net_device *dev;
1241 	struct rtable *rt;
1242 	int    encap = 0;
1243 
1244 	if (vif->dev == NULL)
1245 		goto out_free;
1246 
1247 #ifdef CONFIG_IP_PIMSM
1248 	if (vif->flags & VIFF_REGISTER) {
1249 		vif->pkt_out++;
1250 		vif->bytes_out += skb->len;
1251 		vif->dev->stats.tx_bytes += skb->len;
1252 		vif->dev->stats.tx_packets++;
1253 		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1254 		goto out_free;
1255 	}
1256 #endif
1257 
1258 	if (vif->flags&VIFF_TUNNEL) {
1259 		struct flowi fl = { .oif = vif->link,
1260 				    .nl_u = { .ip4_u =
1261 					      { .daddr = vif->remote,
1262 						.saddr = vif->local,
1263 						.tos = RT_TOS(iph->tos) } },
1264 				    .proto = IPPROTO_IPIP };
1265 		if (ip_route_output_key(net, &rt, &fl))
1266 			goto out_free;
1267 		encap = sizeof(struct iphdr);
1268 	} else {
1269 		struct flowi fl = { .oif = vif->link,
1270 				    .nl_u = { .ip4_u =
1271 					      { .daddr = iph->daddr,
1272 						.tos = RT_TOS(iph->tos) } },
1273 				    .proto = IPPROTO_IPIP };
1274 		if (ip_route_output_key(net, &rt, &fl))
1275 			goto out_free;
1276 	}
1277 
1278 	dev = rt->u.dst.dev;
1279 
1280 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1281 		/* Do not fragment multicasts. Alas, IPv4 does not
1282 		   allow to send ICMP, so that packets will disappear
1283 		   to blackhole.
1284 		 */
1285 
1286 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1287 		ip_rt_put(rt);
1288 		goto out_free;
1289 	}
1290 
1291 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1292 
1293 	if (skb_cow(skb, encap)) {
1294 		ip_rt_put(rt);
1295 		goto out_free;
1296 	}
1297 
1298 	vif->pkt_out++;
1299 	vif->bytes_out += skb->len;
1300 
1301 	skb_dst_drop(skb);
1302 	skb_dst_set(skb, &rt->u.dst);
1303 	ip_decrease_ttl(ip_hdr(skb));
1304 
1305 	/* FIXME: forward and output firewalls used to be called here.
1306 	 * What do we do with netfilter? -- RR */
1307 	if (vif->flags & VIFF_TUNNEL) {
1308 		ip_encap(skb, vif->local, vif->remote);
1309 		/* FIXME: extra output firewall step used to be here. --RR */
1310 		vif->dev->stats.tx_packets++;
1311 		vif->dev->stats.tx_bytes += skb->len;
1312 	}
1313 
1314 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1315 
1316 	/*
1317 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1318 	 * not only before forwarding, but after forwarding on all output
1319 	 * interfaces. It is clear, if mrouter runs a multicasting
1320 	 * program, it should receive packets not depending to what interface
1321 	 * program is joined.
1322 	 * If we will not make it, the program will have to join on all
1323 	 * interfaces. On the other hand, multihoming host (or router, but
1324 	 * not mrouter) cannot join to more than one interface - it will
1325 	 * result in receiving multiple packets.
1326 	 */
1327 	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1328 		ipmr_forward_finish);
1329 	return;
1330 
1331 out_free:
1332 	kfree_skb(skb);
1333 	return;
1334 }
1335 
1336 static int ipmr_find_vif(struct net_device *dev)
1337 {
1338 	struct net *net = dev_net(dev);
1339 	int ct;
1340 	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1341 		if (net->ipv4.vif_table[ct].dev == dev)
1342 			break;
1343 	}
1344 	return ct;
1345 }
1346 
1347 /* "local" means that we should preserve one skb (for local delivery) */
1348 
1349 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1350 {
1351 	int psend = -1;
1352 	int vif, ct;
1353 	struct net *net = mfc_net(cache);
1354 
1355 	vif = cache->mfc_parent;
1356 	cache->mfc_un.res.pkt++;
1357 	cache->mfc_un.res.bytes += skb->len;
1358 
1359 	/*
1360 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1361 	 */
1362 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
1363 		int true_vifi;
1364 
1365 		if (skb_rtable(skb)->fl.iif == 0) {
1366 			/* It is our own packet, looped back.
1367 			   Very complicated situation...
1368 
1369 			   The best workaround until routing daemons will be
1370 			   fixed is not to redistribute packet, if it was
1371 			   send through wrong interface. It means, that
1372 			   multicast applications WILL NOT work for
1373 			   (S,G), which have default multicast route pointing
1374 			   to wrong oif. In any case, it is not a good
1375 			   idea to use multicasting applications on router.
1376 			 */
1377 			goto dont_forward;
1378 		}
1379 
1380 		cache->mfc_un.res.wrong_if++;
1381 		true_vifi = ipmr_find_vif(skb->dev);
1382 
1383 		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1384 		    /* pimsm uses asserts, when switching from RPT to SPT,
1385 		       so that we cannot check that packet arrived on an oif.
1386 		       It is bad, but otherwise we would need to move pretty
1387 		       large chunk of pimd to kernel. Ough... --ANK
1388 		     */
1389 		    (net->ipv4.mroute_do_pim ||
1390 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1391 		    time_after(jiffies,
1392 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1393 			cache->mfc_un.res.last_assert = jiffies;
1394 			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1395 		}
1396 		goto dont_forward;
1397 	}
1398 
1399 	net->ipv4.vif_table[vif].pkt_in++;
1400 	net->ipv4.vif_table[vif].bytes_in += skb->len;
1401 
1402 	/*
1403 	 *	Forward the frame
1404 	 */
1405 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1406 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1407 			if (psend != -1) {
1408 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1409 				if (skb2)
1410 					ipmr_queue_xmit(skb2, cache, psend);
1411 			}
1412 			psend = ct;
1413 		}
1414 	}
1415 	if (psend != -1) {
1416 		if (local) {
1417 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1418 			if (skb2)
1419 				ipmr_queue_xmit(skb2, cache, psend);
1420 		} else {
1421 			ipmr_queue_xmit(skb, cache, psend);
1422 			return 0;
1423 		}
1424 	}
1425 
1426 dont_forward:
1427 	if (!local)
1428 		kfree_skb(skb);
1429 	return 0;
1430 }
1431 
1432 
1433 /*
1434  *	Multicast packets for forwarding arrive here
1435  */
1436 
1437 int ip_mr_input(struct sk_buff *skb)
1438 {
1439 	struct mfc_cache *cache;
1440 	struct net *net = dev_net(skb->dev);
1441 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1442 
1443 	/* Packet is looped back after forward, it should not be
1444 	   forwarded second time, but still can be delivered locally.
1445 	 */
1446 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1447 		goto dont_forward;
1448 
1449 	if (!local) {
1450 		    if (IPCB(skb)->opt.router_alert) {
1451 			    if (ip_call_ra_chain(skb))
1452 				    return 0;
1453 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1454 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1455 			       Cisco IOS <= 11.2(8)) do not put router alert
1456 			       option to IGMP packets destined to routable
1457 			       groups. It is very bad, because it means
1458 			       that we can forward NO IGMP messages.
1459 			     */
1460 			    read_lock(&mrt_lock);
1461 			    if (net->ipv4.mroute_sk) {
1462 				    nf_reset(skb);
1463 				    raw_rcv(net->ipv4.mroute_sk, skb);
1464 				    read_unlock(&mrt_lock);
1465 				    return 0;
1466 			    }
1467 			    read_unlock(&mrt_lock);
1468 		    }
1469 	}
1470 
1471 	read_lock(&mrt_lock);
1472 	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1473 
1474 	/*
1475 	 *	No usable cache entry
1476 	 */
1477 	if (cache == NULL) {
1478 		int vif;
1479 
1480 		if (local) {
1481 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1482 			ip_local_deliver(skb);
1483 			if (skb2 == NULL) {
1484 				read_unlock(&mrt_lock);
1485 				return -ENOBUFS;
1486 			}
1487 			skb = skb2;
1488 		}
1489 
1490 		vif = ipmr_find_vif(skb->dev);
1491 		if (vif >= 0) {
1492 			int err = ipmr_cache_unresolved(net, vif, skb);
1493 			read_unlock(&mrt_lock);
1494 
1495 			return err;
1496 		}
1497 		read_unlock(&mrt_lock);
1498 		kfree_skb(skb);
1499 		return -ENODEV;
1500 	}
1501 
1502 	ip_mr_forward(skb, cache, local);
1503 
1504 	read_unlock(&mrt_lock);
1505 
1506 	if (local)
1507 		return ip_local_deliver(skb);
1508 
1509 	return 0;
1510 
1511 dont_forward:
1512 	if (local)
1513 		return ip_local_deliver(skb);
1514 	kfree_skb(skb);
1515 	return 0;
1516 }
1517 
1518 #ifdef CONFIG_IP_PIMSM
1519 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1520 {
1521 	struct net_device *reg_dev = NULL;
1522 	struct iphdr *encap;
1523 	struct net *net = dev_net(skb->dev);
1524 
1525 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1526 	/*
1527 	   Check that:
1528 	   a. packet is really destinted to a multicast group
1529 	   b. packet is not a NULL-REGISTER
1530 	   c. packet is not truncated
1531 	 */
1532 	if (!ipv4_is_multicast(encap->daddr) ||
1533 	    encap->tot_len == 0 ||
1534 	    ntohs(encap->tot_len) + pimlen > skb->len)
1535 		return 1;
1536 
1537 	read_lock(&mrt_lock);
1538 	if (net->ipv4.mroute_reg_vif_num >= 0)
1539 		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1540 	if (reg_dev)
1541 		dev_hold(reg_dev);
1542 	read_unlock(&mrt_lock);
1543 
1544 	if (reg_dev == NULL)
1545 		return 1;
1546 
1547 	skb->mac_header = skb->network_header;
1548 	skb_pull(skb, (u8*)encap - skb->data);
1549 	skb_reset_network_header(skb);
1550 	skb->dev = reg_dev;
1551 	skb->protocol = htons(ETH_P_IP);
1552 	skb->ip_summed = 0;
1553 	skb->pkt_type = PACKET_HOST;
1554 	skb_dst_drop(skb);
1555 	reg_dev->stats.rx_bytes += skb->len;
1556 	reg_dev->stats.rx_packets++;
1557 	nf_reset(skb);
1558 	netif_rx(skb);
1559 	dev_put(reg_dev);
1560 
1561 	return 0;
1562 }
1563 #endif
1564 
1565 #ifdef CONFIG_IP_PIMSM_V1
1566 /*
1567  * Handle IGMP messages of PIMv1
1568  */
1569 
1570 int pim_rcv_v1(struct sk_buff * skb)
1571 {
1572 	struct igmphdr *pim;
1573 	struct net *net = dev_net(skb->dev);
1574 
1575 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1576 		goto drop;
1577 
1578 	pim = igmp_hdr(skb);
1579 
1580 	if (!net->ipv4.mroute_do_pim ||
1581 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1582 		goto drop;
1583 
1584 	if (__pim_rcv(skb, sizeof(*pim))) {
1585 drop:
1586 		kfree_skb(skb);
1587 	}
1588 	return 0;
1589 }
1590 #endif
1591 
1592 #ifdef CONFIG_IP_PIMSM_V2
1593 static int pim_rcv(struct sk_buff * skb)
1594 {
1595 	struct pimreghdr *pim;
1596 
1597 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1598 		goto drop;
1599 
1600 	pim = (struct pimreghdr *)skb_transport_header(skb);
1601 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1602 	    (pim->flags&PIM_NULL_REGISTER) ||
1603 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1604 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1605 		goto drop;
1606 
1607 	if (__pim_rcv(skb, sizeof(*pim))) {
1608 drop:
1609 		kfree_skb(skb);
1610 	}
1611 	return 0;
1612 }
1613 #endif
1614 
1615 static int
1616 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1617 {
1618 	int ct;
1619 	struct rtnexthop *nhp;
1620 	struct net *net = mfc_net(c);
1621 	u8 *b = skb_tail_pointer(skb);
1622 	struct rtattr *mp_head;
1623 
1624 	/* If cache is unresolved, don't try to parse IIF and OIF */
1625 	if (c->mfc_parent > MAXVIFS)
1626 		return -ENOENT;
1627 
1628 	if (VIF_EXISTS(net, c->mfc_parent))
1629 		RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1630 
1631 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1632 
1633 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1634 		if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1635 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1636 				goto rtattr_failure;
1637 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1638 			nhp->rtnh_flags = 0;
1639 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1640 			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1641 			nhp->rtnh_len = sizeof(*nhp);
1642 		}
1643 	}
1644 	mp_head->rta_type = RTA_MULTIPATH;
1645 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1646 	rtm->rtm_type = RTN_MULTICAST;
1647 	return 1;
1648 
1649 rtattr_failure:
1650 	nlmsg_trim(skb, b);
1651 	return -EMSGSIZE;
1652 }
1653 
1654 int ipmr_get_route(struct net *net,
1655 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1656 {
1657 	int err;
1658 	struct mfc_cache *cache;
1659 	struct rtable *rt = skb_rtable(skb);
1660 
1661 	read_lock(&mrt_lock);
1662 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1663 
1664 	if (cache == NULL) {
1665 		struct sk_buff *skb2;
1666 		struct iphdr *iph;
1667 		struct net_device *dev;
1668 		int vif;
1669 
1670 		if (nowait) {
1671 			read_unlock(&mrt_lock);
1672 			return -EAGAIN;
1673 		}
1674 
1675 		dev = skb->dev;
1676 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1677 			read_unlock(&mrt_lock);
1678 			return -ENODEV;
1679 		}
1680 		skb2 = skb_clone(skb, GFP_ATOMIC);
1681 		if (!skb2) {
1682 			read_unlock(&mrt_lock);
1683 			return -ENOMEM;
1684 		}
1685 
1686 		skb_push(skb2, sizeof(struct iphdr));
1687 		skb_reset_network_header(skb2);
1688 		iph = ip_hdr(skb2);
1689 		iph->ihl = sizeof(struct iphdr) >> 2;
1690 		iph->saddr = rt->rt_src;
1691 		iph->daddr = rt->rt_dst;
1692 		iph->version = 0;
1693 		err = ipmr_cache_unresolved(net, vif, skb2);
1694 		read_unlock(&mrt_lock);
1695 		return err;
1696 	}
1697 
1698 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1699 		cache->mfc_flags |= MFC_NOTIFY;
1700 	err = ipmr_fill_mroute(skb, cache, rtm);
1701 	read_unlock(&mrt_lock);
1702 	return err;
1703 }
1704 
1705 #ifdef CONFIG_PROC_FS
1706 /*
1707  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1708  */
1709 struct ipmr_vif_iter {
1710 	struct seq_net_private p;
1711 	int ct;
1712 };
1713 
1714 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1715 					   struct ipmr_vif_iter *iter,
1716 					   loff_t pos)
1717 {
1718 	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1719 		if (!VIF_EXISTS(net, iter->ct))
1720 			continue;
1721 		if (pos-- == 0)
1722 			return &net->ipv4.vif_table[iter->ct];
1723 	}
1724 	return NULL;
1725 }
1726 
1727 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1728 	__acquires(mrt_lock)
1729 {
1730 	struct net *net = seq_file_net(seq);
1731 
1732 	read_lock(&mrt_lock);
1733 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1734 		: SEQ_START_TOKEN;
1735 }
1736 
1737 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1738 {
1739 	struct ipmr_vif_iter *iter = seq->private;
1740 	struct net *net = seq_file_net(seq);
1741 
1742 	++*pos;
1743 	if (v == SEQ_START_TOKEN)
1744 		return ipmr_vif_seq_idx(net, iter, 0);
1745 
1746 	while (++iter->ct < net->ipv4.maxvif) {
1747 		if (!VIF_EXISTS(net, iter->ct))
1748 			continue;
1749 		return &net->ipv4.vif_table[iter->ct];
1750 	}
1751 	return NULL;
1752 }
1753 
1754 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1755 	__releases(mrt_lock)
1756 {
1757 	read_unlock(&mrt_lock);
1758 }
1759 
1760 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1761 {
1762 	struct net *net = seq_file_net(seq);
1763 
1764 	if (v == SEQ_START_TOKEN) {
1765 		seq_puts(seq,
1766 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1767 	} else {
1768 		const struct vif_device *vif = v;
1769 		const char *name =  vif->dev ? vif->dev->name : "none";
1770 
1771 		seq_printf(seq,
1772 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1773 			   vif - net->ipv4.vif_table,
1774 			   name, vif->bytes_in, vif->pkt_in,
1775 			   vif->bytes_out, vif->pkt_out,
1776 			   vif->flags, vif->local, vif->remote);
1777 	}
1778 	return 0;
1779 }
1780 
1781 static const struct seq_operations ipmr_vif_seq_ops = {
1782 	.start = ipmr_vif_seq_start,
1783 	.next  = ipmr_vif_seq_next,
1784 	.stop  = ipmr_vif_seq_stop,
1785 	.show  = ipmr_vif_seq_show,
1786 };
1787 
1788 static int ipmr_vif_open(struct inode *inode, struct file *file)
1789 {
1790 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1791 			    sizeof(struct ipmr_vif_iter));
1792 }
1793 
1794 static const struct file_operations ipmr_vif_fops = {
1795 	.owner	 = THIS_MODULE,
1796 	.open    = ipmr_vif_open,
1797 	.read    = seq_read,
1798 	.llseek  = seq_lseek,
1799 	.release = seq_release_net,
1800 };
1801 
1802 struct ipmr_mfc_iter {
1803 	struct seq_net_private p;
1804 	struct mfc_cache **cache;
1805 	int ct;
1806 };
1807 
1808 
1809 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1810 					  struct ipmr_mfc_iter *it, loff_t pos)
1811 {
1812 	struct mfc_cache *mfc;
1813 
1814 	it->cache = net->ipv4.mfc_cache_array;
1815 	read_lock(&mrt_lock);
1816 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1817 		for (mfc = net->ipv4.mfc_cache_array[it->ct];
1818 		     mfc; mfc = mfc->next)
1819 			if (pos-- == 0)
1820 				return mfc;
1821 	read_unlock(&mrt_lock);
1822 
1823 	it->cache = &mfc_unres_queue;
1824 	spin_lock_bh(&mfc_unres_lock);
1825 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1826 		if (net_eq(mfc_net(mfc), net) &&
1827 		    pos-- == 0)
1828 			return mfc;
1829 	spin_unlock_bh(&mfc_unres_lock);
1830 
1831 	it->cache = NULL;
1832 	return NULL;
1833 }
1834 
1835 
1836 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1837 {
1838 	struct ipmr_mfc_iter *it = seq->private;
1839 	struct net *net = seq_file_net(seq);
1840 
1841 	it->cache = NULL;
1842 	it->ct = 0;
1843 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1844 		: SEQ_START_TOKEN;
1845 }
1846 
1847 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1848 {
1849 	struct mfc_cache *mfc = v;
1850 	struct ipmr_mfc_iter *it = seq->private;
1851 	struct net *net = seq_file_net(seq);
1852 
1853 	++*pos;
1854 
1855 	if (v == SEQ_START_TOKEN)
1856 		return ipmr_mfc_seq_idx(net, seq->private, 0);
1857 
1858 	if (mfc->next)
1859 		return mfc->next;
1860 
1861 	if (it->cache == &mfc_unres_queue)
1862 		goto end_of_list;
1863 
1864 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1865 
1866 	while (++it->ct < MFC_LINES) {
1867 		mfc = net->ipv4.mfc_cache_array[it->ct];
1868 		if (mfc)
1869 			return mfc;
1870 	}
1871 
1872 	/* exhausted cache_array, show unresolved */
1873 	read_unlock(&mrt_lock);
1874 	it->cache = &mfc_unres_queue;
1875 	it->ct = 0;
1876 
1877 	spin_lock_bh(&mfc_unres_lock);
1878 	mfc = mfc_unres_queue;
1879 	while (mfc && !net_eq(mfc_net(mfc), net))
1880 		mfc = mfc->next;
1881 	if (mfc)
1882 		return mfc;
1883 
1884  end_of_list:
1885 	spin_unlock_bh(&mfc_unres_lock);
1886 	it->cache = NULL;
1887 
1888 	return NULL;
1889 }
1890 
1891 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1892 {
1893 	struct ipmr_mfc_iter *it = seq->private;
1894 	struct net *net = seq_file_net(seq);
1895 
1896 	if (it->cache == &mfc_unres_queue)
1897 		spin_unlock_bh(&mfc_unres_lock);
1898 	else if (it->cache == net->ipv4.mfc_cache_array)
1899 		read_unlock(&mrt_lock);
1900 }
1901 
1902 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1903 {
1904 	int n;
1905 	struct net *net = seq_file_net(seq);
1906 
1907 	if (v == SEQ_START_TOKEN) {
1908 		seq_puts(seq,
1909 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1910 	} else {
1911 		const struct mfc_cache *mfc = v;
1912 		const struct ipmr_mfc_iter *it = seq->private;
1913 
1914 		seq_printf(seq, "%08lX %08lX %-3hd",
1915 			   (unsigned long) mfc->mfc_mcastgrp,
1916 			   (unsigned long) mfc->mfc_origin,
1917 			   mfc->mfc_parent);
1918 
1919 		if (it->cache != &mfc_unres_queue) {
1920 			seq_printf(seq, " %8lu %8lu %8lu",
1921 				   mfc->mfc_un.res.pkt,
1922 				   mfc->mfc_un.res.bytes,
1923 				   mfc->mfc_un.res.wrong_if);
1924 			for (n = mfc->mfc_un.res.minvif;
1925 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1926 				if (VIF_EXISTS(net, n) &&
1927 				    mfc->mfc_un.res.ttls[n] < 255)
1928 					seq_printf(seq,
1929 					   " %2d:%-3d",
1930 					   n, mfc->mfc_un.res.ttls[n]);
1931 			}
1932 		} else {
1933 			/* unresolved mfc_caches don't contain
1934 			 * pkt, bytes and wrong_if values
1935 			 */
1936 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1937 		}
1938 		seq_putc(seq, '\n');
1939 	}
1940 	return 0;
1941 }
1942 
1943 static const struct seq_operations ipmr_mfc_seq_ops = {
1944 	.start = ipmr_mfc_seq_start,
1945 	.next  = ipmr_mfc_seq_next,
1946 	.stop  = ipmr_mfc_seq_stop,
1947 	.show  = ipmr_mfc_seq_show,
1948 };
1949 
1950 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1951 {
1952 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1953 			    sizeof(struct ipmr_mfc_iter));
1954 }
1955 
1956 static const struct file_operations ipmr_mfc_fops = {
1957 	.owner	 = THIS_MODULE,
1958 	.open    = ipmr_mfc_open,
1959 	.read    = seq_read,
1960 	.llseek  = seq_lseek,
1961 	.release = seq_release_net,
1962 };
1963 #endif
1964 
1965 #ifdef CONFIG_IP_PIMSM_V2
1966 static const struct net_protocol pim_protocol = {
1967 	.handler	=	pim_rcv,
1968 	.netns_ok	=	1,
1969 };
1970 #endif
1971 
1972 
1973 /*
1974  *	Setup for IP multicast routing
1975  */
1976 static int __net_init ipmr_net_init(struct net *net)
1977 {
1978 	int err = 0;
1979 
1980 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1981 				      GFP_KERNEL);
1982 	if (!net->ipv4.vif_table) {
1983 		err = -ENOMEM;
1984 		goto fail;
1985 	}
1986 
1987 	/* Forwarding cache */
1988 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1989 					    sizeof(struct mfc_cache *),
1990 					    GFP_KERNEL);
1991 	if (!net->ipv4.mfc_cache_array) {
1992 		err = -ENOMEM;
1993 		goto fail_mfc_cache;
1994 	}
1995 
1996 #ifdef CONFIG_IP_PIMSM
1997 	net->ipv4.mroute_reg_vif_num = -1;
1998 #endif
1999 
2000 #ifdef CONFIG_PROC_FS
2001 	err = -ENOMEM;
2002 	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2003 		goto proc_vif_fail;
2004 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2005 		goto proc_cache_fail;
2006 #endif
2007 	return 0;
2008 
2009 #ifdef CONFIG_PROC_FS
2010 proc_cache_fail:
2011 	proc_net_remove(net, "ip_mr_vif");
2012 proc_vif_fail:
2013 	kfree(net->ipv4.mfc_cache_array);
2014 #endif
2015 fail_mfc_cache:
2016 	kfree(net->ipv4.vif_table);
2017 fail:
2018 	return err;
2019 }
2020 
2021 static void __net_exit ipmr_net_exit(struct net *net)
2022 {
2023 #ifdef CONFIG_PROC_FS
2024 	proc_net_remove(net, "ip_mr_cache");
2025 	proc_net_remove(net, "ip_mr_vif");
2026 #endif
2027 	kfree(net->ipv4.mfc_cache_array);
2028 	kfree(net->ipv4.vif_table);
2029 }
2030 
2031 static struct pernet_operations ipmr_net_ops = {
2032 	.init = ipmr_net_init,
2033 	.exit = ipmr_net_exit,
2034 };
2035 
2036 int __init ip_mr_init(void)
2037 {
2038 	int err;
2039 
2040 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2041 				       sizeof(struct mfc_cache),
2042 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2043 				       NULL);
2044 	if (!mrt_cachep)
2045 		return -ENOMEM;
2046 
2047 	err = register_pernet_subsys(&ipmr_net_ops);
2048 	if (err)
2049 		goto reg_pernet_fail;
2050 
2051 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2052 	err = register_netdevice_notifier(&ip_mr_notifier);
2053 	if (err)
2054 		goto reg_notif_fail;
2055 #ifdef CONFIG_IP_PIMSM_V2
2056 	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2057 		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2058 		err = -EAGAIN;
2059 		goto add_proto_fail;
2060 	}
2061 #endif
2062 	return 0;
2063 
2064 #ifdef CONFIG_IP_PIMSM_V2
2065 add_proto_fail:
2066 	unregister_netdevice_notifier(&ip_mr_notifier);
2067 #endif
2068 reg_notif_fail:
2069 	del_timer(&ipmr_expire_timer);
2070 	unregister_pernet_subsys(&ipmr_net_ops);
2071 reg_pernet_fail:
2072 	kmem_cache_destroy(mrt_cachep);
2073 	return err;
2074 }
2075