xref: /linux/net/ipv4/ipmr.c (revision 5bdef865eb358b6f3760e25e591ae115e9eeddef)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Fixes:
13  *	Michael Chastain	:	Incorrect size of copying.
14  *	Alan Cox		:	Added the cache manager code
15  *	Alan Cox		:	Fixed the clone/copy bug and device race.
16  *	Mike McLagan		:	Routing by source
17  *	Malcolm Beattie		:	Buffer handling fixes.
18  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19  *	SVR Anand		:	Fixed several multicast bugs and problems.
20  *	Alexey Kuznetsov	:	Status, optimisations and more.
21  *	Brad Parker		:	Better behaviour on mrouted upcall
22  *					overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25  *					Relax this requrement to work with older peers.
26  *
27  */
28 
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65 
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM	1
68 #endif
69 
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73 
74 static DEFINE_RWLOCK(mrt_lock);
75 
76 /*
77  *	Multicast router control variables
78  */
79 
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81 
82 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
83 
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86 
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91 
92    In this case data path is free of exclusive locks at all.
93  */
94 
95 static struct kmem_cache *mrt_cachep __read_mostly;
96 
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99 			     struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101 
102 #ifdef CONFIG_IP_PIMSM_V2
103 static struct net_protocol pim_protocol;
104 #endif
105 
106 static struct timer_list ipmr_expire_timer;
107 
108 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109 
110 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111 {
112 	struct net *net = dev_net(dev);
113 
114 	dev_close(dev);
115 
116 	dev = __dev_get_by_name(net, "tunl0");
117 	if (dev) {
118 		const struct net_device_ops *ops = dev->netdev_ops;
119 		struct ifreq ifr;
120 		struct ip_tunnel_parm p;
121 
122 		memset(&p, 0, sizeof(p));
123 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
124 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
125 		p.iph.version = 4;
126 		p.iph.ihl = 5;
127 		p.iph.protocol = IPPROTO_IPIP;
128 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130 
131 		if (ops->ndo_do_ioctl) {
132 			mm_segment_t oldfs = get_fs();
133 
134 			set_fs(KERNEL_DS);
135 			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136 			set_fs(oldfs);
137 		}
138 	}
139 }
140 
141 static
142 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
143 {
144 	struct net_device  *dev;
145 
146 	dev = __dev_get_by_name(net, "tunl0");
147 
148 	if (dev) {
149 		const struct net_device_ops *ops = dev->netdev_ops;
150 		int err;
151 		struct ifreq ifr;
152 		struct ip_tunnel_parm p;
153 		struct in_device  *in_dev;
154 
155 		memset(&p, 0, sizeof(p));
156 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
157 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
158 		p.iph.version = 4;
159 		p.iph.ihl = 5;
160 		p.iph.protocol = IPPROTO_IPIP;
161 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
162 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
163 
164 		if (ops->ndo_do_ioctl) {
165 			mm_segment_t oldfs = get_fs();
166 
167 			set_fs(KERNEL_DS);
168 			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169 			set_fs(oldfs);
170 		} else
171 			err = -EOPNOTSUPP;
172 
173 		dev = NULL;
174 
175 		if (err == 0 &&
176 		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
177 			dev->flags |= IFF_MULTICAST;
178 
179 			in_dev = __in_dev_get_rtnl(dev);
180 			if (in_dev == NULL)
181 				goto failure;
182 
183 			ipv4_devconf_setall(in_dev);
184 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
185 
186 			if (dev_open(dev))
187 				goto failure;
188 			dev_hold(dev);
189 		}
190 	}
191 	return dev;
192 
193 failure:
194 	/* allow the register to be completed before unregistering. */
195 	rtnl_unlock();
196 	rtnl_lock();
197 
198 	unregister_netdevice(dev);
199 	return NULL;
200 }
201 
202 #ifdef CONFIG_IP_PIMSM
203 
204 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205 {
206 	struct net *net = dev_net(dev);
207 
208 	read_lock(&mrt_lock);
209 	dev->stats.tx_bytes += skb->len;
210 	dev->stats.tx_packets++;
211 	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212 			  IGMPMSG_WHOLEPKT);
213 	read_unlock(&mrt_lock);
214 	kfree_skb(skb);
215 	return 0;
216 }
217 
218 static const struct net_device_ops reg_vif_netdev_ops = {
219 	.ndo_start_xmit	= reg_vif_xmit,
220 };
221 
222 static void reg_vif_setup(struct net_device *dev)
223 {
224 	dev->type		= ARPHRD_PIMREG;
225 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
226 	dev->flags		= IFF_NOARP;
227 	dev->netdev_ops		= &reg_vif_netdev_ops,
228 	dev->destructor		= free_netdev;
229 	dev->features		|= NETIF_F_NETNS_LOCAL;
230 }
231 
232 static struct net_device *ipmr_reg_vif(struct net *net)
233 {
234 	struct net_device *dev;
235 	struct in_device *in_dev;
236 
237 	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
238 
239 	if (dev == NULL)
240 		return NULL;
241 
242 	dev_net_set(dev, net);
243 
244 	if (register_netdevice(dev)) {
245 		free_netdev(dev);
246 		return NULL;
247 	}
248 	dev->iflink = 0;
249 
250 	rcu_read_lock();
251 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
252 		rcu_read_unlock();
253 		goto failure;
254 	}
255 
256 	ipv4_devconf_setall(in_dev);
257 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
258 	rcu_read_unlock();
259 
260 	if (dev_open(dev))
261 		goto failure;
262 
263 	dev_hold(dev);
264 
265 	return dev;
266 
267 failure:
268 	/* allow the register to be completed before unregistering. */
269 	rtnl_unlock();
270 	rtnl_lock();
271 
272 	unregister_netdevice(dev);
273 	return NULL;
274 }
275 #endif
276 
277 /*
278  *	Delete a VIF entry
279  *	@notify: Set to 1, if the caller is a notifier_call
280  */
281 
282 static int vif_delete(struct net *net, int vifi, int notify)
283 {
284 	struct vif_device *v;
285 	struct net_device *dev;
286 	struct in_device *in_dev;
287 
288 	if (vifi < 0 || vifi >= net->ipv4.maxvif)
289 		return -EADDRNOTAVAIL;
290 
291 	v = &net->ipv4.vif_table[vifi];
292 
293 	write_lock_bh(&mrt_lock);
294 	dev = v->dev;
295 	v->dev = NULL;
296 
297 	if (!dev) {
298 		write_unlock_bh(&mrt_lock);
299 		return -EADDRNOTAVAIL;
300 	}
301 
302 #ifdef CONFIG_IP_PIMSM
303 	if (vifi == net->ipv4.mroute_reg_vif_num)
304 		net->ipv4.mroute_reg_vif_num = -1;
305 #endif
306 
307 	if (vifi+1 == net->ipv4.maxvif) {
308 		int tmp;
309 		for (tmp=vifi-1; tmp>=0; tmp--) {
310 			if (VIF_EXISTS(net, tmp))
311 				break;
312 		}
313 		net->ipv4.maxvif = tmp+1;
314 	}
315 
316 	write_unlock_bh(&mrt_lock);
317 
318 	dev_set_allmulti(dev, -1);
319 
320 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
321 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
322 		ip_rt_multicast_event(in_dev);
323 	}
324 
325 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
326 		unregister_netdevice(dev);
327 
328 	dev_put(dev);
329 	return 0;
330 }
331 
332 static inline void ipmr_cache_free(struct mfc_cache *c)
333 {
334 	release_net(mfc_net(c));
335 	kmem_cache_free(mrt_cachep, c);
336 }
337 
338 /* Destroy an unresolved cache entry, killing queued skbs
339    and reporting error to netlink readers.
340  */
341 
342 static void ipmr_destroy_unres(struct mfc_cache *c)
343 {
344 	struct sk_buff *skb;
345 	struct nlmsgerr *e;
346 	struct net *net = mfc_net(c);
347 
348 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
349 
350 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
351 		if (ip_hdr(skb)->version == 0) {
352 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
353 			nlh->nlmsg_type = NLMSG_ERROR;
354 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
355 			skb_trim(skb, nlh->nlmsg_len);
356 			e = NLMSG_DATA(nlh);
357 			e->error = -ETIMEDOUT;
358 			memset(&e->msg, 0, sizeof(e->msg));
359 
360 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
361 		} else
362 			kfree_skb(skb);
363 	}
364 
365 	ipmr_cache_free(c);
366 }
367 
368 
369 /* Single timer process for all the unresolved queue. */
370 
371 static void ipmr_expire_process(unsigned long dummy)
372 {
373 	unsigned long now;
374 	unsigned long expires;
375 	struct mfc_cache *c, **cp;
376 
377 	if (!spin_trylock(&mfc_unres_lock)) {
378 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
379 		return;
380 	}
381 
382 	if (mfc_unres_queue == NULL)
383 		goto out;
384 
385 	now = jiffies;
386 	expires = 10*HZ;
387 	cp = &mfc_unres_queue;
388 
389 	while ((c=*cp) != NULL) {
390 		if (time_after(c->mfc_un.unres.expires, now)) {
391 			unsigned long interval = c->mfc_un.unres.expires - now;
392 			if (interval < expires)
393 				expires = interval;
394 			cp = &c->next;
395 			continue;
396 		}
397 
398 		*cp = c->next;
399 
400 		ipmr_destroy_unres(c);
401 	}
402 
403 	if (mfc_unres_queue != NULL)
404 		mod_timer(&ipmr_expire_timer, jiffies + expires);
405 
406 out:
407 	spin_unlock(&mfc_unres_lock);
408 }
409 
410 /* Fill oifs list. It is called under write locked mrt_lock. */
411 
412 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
413 {
414 	int vifi;
415 	struct net *net = mfc_net(cache);
416 
417 	cache->mfc_un.res.minvif = MAXVIFS;
418 	cache->mfc_un.res.maxvif = 0;
419 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
420 
421 	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
422 		if (VIF_EXISTS(net, vifi) &&
423 		    ttls[vifi] && ttls[vifi] < 255) {
424 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
425 			if (cache->mfc_un.res.minvif > vifi)
426 				cache->mfc_un.res.minvif = vifi;
427 			if (cache->mfc_un.res.maxvif <= vifi)
428 				cache->mfc_un.res.maxvif = vifi + 1;
429 		}
430 	}
431 }
432 
433 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
434 {
435 	int vifi = vifc->vifc_vifi;
436 	struct vif_device *v = &net->ipv4.vif_table[vifi];
437 	struct net_device *dev;
438 	struct in_device *in_dev;
439 	int err;
440 
441 	/* Is vif busy ? */
442 	if (VIF_EXISTS(net, vifi))
443 		return -EADDRINUSE;
444 
445 	switch (vifc->vifc_flags) {
446 #ifdef CONFIG_IP_PIMSM
447 	case VIFF_REGISTER:
448 		/*
449 		 * Special Purpose VIF in PIM
450 		 * All the packets will be sent to the daemon
451 		 */
452 		if (net->ipv4.mroute_reg_vif_num >= 0)
453 			return -EADDRINUSE;
454 		dev = ipmr_reg_vif(net);
455 		if (!dev)
456 			return -ENOBUFS;
457 		err = dev_set_allmulti(dev, 1);
458 		if (err) {
459 			unregister_netdevice(dev);
460 			dev_put(dev);
461 			return err;
462 		}
463 		break;
464 #endif
465 	case VIFF_TUNNEL:
466 		dev = ipmr_new_tunnel(net, vifc);
467 		if (!dev)
468 			return -ENOBUFS;
469 		err = dev_set_allmulti(dev, 1);
470 		if (err) {
471 			ipmr_del_tunnel(dev, vifc);
472 			dev_put(dev);
473 			return err;
474 		}
475 		break;
476 	case 0:
477 		dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
478 		if (!dev)
479 			return -EADDRNOTAVAIL;
480 		err = dev_set_allmulti(dev, 1);
481 		if (err) {
482 			dev_put(dev);
483 			return err;
484 		}
485 		break;
486 	default:
487 		return -EINVAL;
488 	}
489 
490 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
491 		return -EADDRNOTAVAIL;
492 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
493 	ip_rt_multicast_event(in_dev);
494 
495 	/*
496 	 *	Fill in the VIF structures
497 	 */
498 	v->rate_limit = vifc->vifc_rate_limit;
499 	v->local = vifc->vifc_lcl_addr.s_addr;
500 	v->remote = vifc->vifc_rmt_addr.s_addr;
501 	v->flags = vifc->vifc_flags;
502 	if (!mrtsock)
503 		v->flags |= VIFF_STATIC;
504 	v->threshold = vifc->vifc_threshold;
505 	v->bytes_in = 0;
506 	v->bytes_out = 0;
507 	v->pkt_in = 0;
508 	v->pkt_out = 0;
509 	v->link = dev->ifindex;
510 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
511 		v->link = dev->iflink;
512 
513 	/* And finish update writing critical data */
514 	write_lock_bh(&mrt_lock);
515 	v->dev = dev;
516 #ifdef CONFIG_IP_PIMSM
517 	if (v->flags&VIFF_REGISTER)
518 		net->ipv4.mroute_reg_vif_num = vifi;
519 #endif
520 	if (vifi+1 > net->ipv4.maxvif)
521 		net->ipv4.maxvif = vifi+1;
522 	write_unlock_bh(&mrt_lock);
523 	return 0;
524 }
525 
526 static struct mfc_cache *ipmr_cache_find(struct net *net,
527 					 __be32 origin,
528 					 __be32 mcastgrp)
529 {
530 	int line = MFC_HASH(mcastgrp, origin);
531 	struct mfc_cache *c;
532 
533 	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
534 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
535 			break;
536 	}
537 	return c;
538 }
539 
540 /*
541  *	Allocate a multicast cache entry
542  */
543 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
544 {
545 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
546 	if (c == NULL)
547 		return NULL;
548 	c->mfc_un.res.minvif = MAXVIFS;
549 	mfc_net_set(c, net);
550 	return c;
551 }
552 
553 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
554 {
555 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
556 	if (c == NULL)
557 		return NULL;
558 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
559 	c->mfc_un.unres.expires = jiffies + 10*HZ;
560 	mfc_net_set(c, net);
561 	return c;
562 }
563 
564 /*
565  *	A cache entry has gone into a resolved state from queued
566  */
567 
568 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
569 {
570 	struct sk_buff *skb;
571 	struct nlmsgerr *e;
572 
573 	/*
574 	 *	Play the pending entries through our router
575 	 */
576 
577 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
578 		if (ip_hdr(skb)->version == 0) {
579 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
580 
581 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
582 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
583 						  (u8 *)nlh);
584 			} else {
585 				nlh->nlmsg_type = NLMSG_ERROR;
586 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
587 				skb_trim(skb, nlh->nlmsg_len);
588 				e = NLMSG_DATA(nlh);
589 				e->error = -EMSGSIZE;
590 				memset(&e->msg, 0, sizeof(e->msg));
591 			}
592 
593 			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
594 		} else
595 			ip_mr_forward(skb, c, 0);
596 	}
597 }
598 
599 /*
600  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
601  *	expects the following bizarre scheme.
602  *
603  *	Called under mrt_lock.
604  */
605 
606 static int ipmr_cache_report(struct net *net,
607 			     struct sk_buff *pkt, vifi_t vifi, int assert)
608 {
609 	struct sk_buff *skb;
610 	const int ihl = ip_hdrlen(pkt);
611 	struct igmphdr *igmp;
612 	struct igmpmsg *msg;
613 	int ret;
614 
615 #ifdef CONFIG_IP_PIMSM
616 	if (assert == IGMPMSG_WHOLEPKT)
617 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
618 	else
619 #endif
620 		skb = alloc_skb(128, GFP_ATOMIC);
621 
622 	if (!skb)
623 		return -ENOBUFS;
624 
625 #ifdef CONFIG_IP_PIMSM
626 	if (assert == IGMPMSG_WHOLEPKT) {
627 		/* Ugly, but we have no choice with this interface.
628 		   Duplicate old header, fix ihl, length etc.
629 		   And all this only to mangle msg->im_msgtype and
630 		   to set msg->im_mbz to "mbz" :-)
631 		 */
632 		skb_push(skb, sizeof(struct iphdr));
633 		skb_reset_network_header(skb);
634 		skb_reset_transport_header(skb);
635 		msg = (struct igmpmsg *)skb_network_header(skb);
636 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
637 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
638 		msg->im_mbz = 0;
639 		msg->im_vif = net->ipv4.mroute_reg_vif_num;
640 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
641 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
642 					     sizeof(struct iphdr));
643 	} else
644 #endif
645 	{
646 
647 	/*
648 	 *	Copy the IP header
649 	 */
650 
651 	skb->network_header = skb->tail;
652 	skb_put(skb, ihl);
653 	skb_copy_to_linear_data(skb, pkt->data, ihl);
654 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
655 	msg = (struct igmpmsg *)skb_network_header(skb);
656 	msg->im_vif = vifi;
657 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
658 
659 	/*
660 	 *	Add our header
661 	 */
662 
663 	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
664 	igmp->type	=
665 	msg->im_msgtype = assert;
666 	igmp->code 	=	0;
667 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
668 	skb->transport_header = skb->network_header;
669 	}
670 
671 	if (net->ipv4.mroute_sk == NULL) {
672 		kfree_skb(skb);
673 		return -EINVAL;
674 	}
675 
676 	/*
677 	 *	Deliver to mrouted
678 	 */
679 	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
680 	if (ret < 0) {
681 		if (net_ratelimit())
682 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
683 		kfree_skb(skb);
684 	}
685 
686 	return ret;
687 }
688 
689 /*
690  *	Queue a packet for resolution. It gets locked cache entry!
691  */
692 
693 static int
694 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
695 {
696 	int err;
697 	struct mfc_cache *c;
698 	const struct iphdr *iph = ip_hdr(skb);
699 
700 	spin_lock_bh(&mfc_unres_lock);
701 	for (c=mfc_unres_queue; c; c=c->next) {
702 		if (net_eq(mfc_net(c), net) &&
703 		    c->mfc_mcastgrp == iph->daddr &&
704 		    c->mfc_origin == iph->saddr)
705 			break;
706 	}
707 
708 	if (c == NULL) {
709 		/*
710 		 *	Create a new entry if allowable
711 		 */
712 
713 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
714 		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
715 			spin_unlock_bh(&mfc_unres_lock);
716 
717 			kfree_skb(skb);
718 			return -ENOBUFS;
719 		}
720 
721 		/*
722 		 *	Fill in the new cache entry
723 		 */
724 		c->mfc_parent	= -1;
725 		c->mfc_origin	= iph->saddr;
726 		c->mfc_mcastgrp	= iph->daddr;
727 
728 		/*
729 		 *	Reflect first query at mrouted.
730 		 */
731 		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
732 		if (err < 0) {
733 			/* If the report failed throw the cache entry
734 			   out - Brad Parker
735 			 */
736 			spin_unlock_bh(&mfc_unres_lock);
737 
738 			ipmr_cache_free(c);
739 			kfree_skb(skb);
740 			return err;
741 		}
742 
743 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
744 		c->next = mfc_unres_queue;
745 		mfc_unres_queue = c;
746 
747 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
748 	}
749 
750 	/*
751 	 *	See if we can append the packet
752 	 */
753 	if (c->mfc_un.unres.unresolved.qlen>3) {
754 		kfree_skb(skb);
755 		err = -ENOBUFS;
756 	} else {
757 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
758 		err = 0;
759 	}
760 
761 	spin_unlock_bh(&mfc_unres_lock);
762 	return err;
763 }
764 
765 /*
766  *	MFC cache manipulation by user space mroute daemon
767  */
768 
769 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
770 {
771 	int line;
772 	struct mfc_cache *c, **cp;
773 
774 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
775 
776 	for (cp = &net->ipv4.mfc_cache_array[line];
777 	     (c = *cp) != NULL; cp = &c->next) {
778 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
779 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
780 			write_lock_bh(&mrt_lock);
781 			*cp = c->next;
782 			write_unlock_bh(&mrt_lock);
783 
784 			ipmr_cache_free(c);
785 			return 0;
786 		}
787 	}
788 	return -ENOENT;
789 }
790 
791 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
792 {
793 	int line;
794 	struct mfc_cache *uc, *c, **cp;
795 
796 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
797 
798 	for (cp = &net->ipv4.mfc_cache_array[line];
799 	     (c = *cp) != NULL; cp = &c->next) {
800 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
801 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
802 			break;
803 	}
804 
805 	if (c != NULL) {
806 		write_lock_bh(&mrt_lock);
807 		c->mfc_parent = mfc->mfcc_parent;
808 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
809 		if (!mrtsock)
810 			c->mfc_flags |= MFC_STATIC;
811 		write_unlock_bh(&mrt_lock);
812 		return 0;
813 	}
814 
815 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
816 		return -EINVAL;
817 
818 	c = ipmr_cache_alloc(net);
819 	if (c == NULL)
820 		return -ENOMEM;
821 
822 	c->mfc_origin = mfc->mfcc_origin.s_addr;
823 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
824 	c->mfc_parent = mfc->mfcc_parent;
825 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
826 	if (!mrtsock)
827 		c->mfc_flags |= MFC_STATIC;
828 
829 	write_lock_bh(&mrt_lock);
830 	c->next = net->ipv4.mfc_cache_array[line];
831 	net->ipv4.mfc_cache_array[line] = c;
832 	write_unlock_bh(&mrt_lock);
833 
834 	/*
835 	 *	Check to see if we resolved a queued list. If so we
836 	 *	need to send on the frames and tidy up.
837 	 */
838 	spin_lock_bh(&mfc_unres_lock);
839 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
840 	     cp = &uc->next) {
841 		if (net_eq(mfc_net(uc), net) &&
842 		    uc->mfc_origin == c->mfc_origin &&
843 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
844 			*cp = uc->next;
845 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
846 			break;
847 		}
848 	}
849 	if (mfc_unres_queue == NULL)
850 		del_timer(&ipmr_expire_timer);
851 	spin_unlock_bh(&mfc_unres_lock);
852 
853 	if (uc) {
854 		ipmr_cache_resolve(uc, c);
855 		ipmr_cache_free(uc);
856 	}
857 	return 0;
858 }
859 
860 /*
861  *	Close the multicast socket, and clear the vif tables etc
862  */
863 
864 static void mroute_clean_tables(struct net *net)
865 {
866 	int i;
867 
868 	/*
869 	 *	Shut down all active vif entries
870 	 */
871 	for (i = 0; i < net->ipv4.maxvif; i++) {
872 		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
873 			vif_delete(net, i, 0);
874 	}
875 
876 	/*
877 	 *	Wipe the cache
878 	 */
879 	for (i=0; i<MFC_LINES; i++) {
880 		struct mfc_cache *c, **cp;
881 
882 		cp = &net->ipv4.mfc_cache_array[i];
883 		while ((c = *cp) != NULL) {
884 			if (c->mfc_flags&MFC_STATIC) {
885 				cp = &c->next;
886 				continue;
887 			}
888 			write_lock_bh(&mrt_lock);
889 			*cp = c->next;
890 			write_unlock_bh(&mrt_lock);
891 
892 			ipmr_cache_free(c);
893 		}
894 	}
895 
896 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
897 		struct mfc_cache *c, **cp;
898 
899 		spin_lock_bh(&mfc_unres_lock);
900 		cp = &mfc_unres_queue;
901 		while ((c = *cp) != NULL) {
902 			if (!net_eq(mfc_net(c), net)) {
903 				cp = &c->next;
904 				continue;
905 			}
906 			*cp = c->next;
907 
908 			ipmr_destroy_unres(c);
909 		}
910 		spin_unlock_bh(&mfc_unres_lock);
911 	}
912 }
913 
914 static void mrtsock_destruct(struct sock *sk)
915 {
916 	struct net *net = sock_net(sk);
917 
918 	rtnl_lock();
919 	if (sk == net->ipv4.mroute_sk) {
920 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
921 
922 		write_lock_bh(&mrt_lock);
923 		net->ipv4.mroute_sk = NULL;
924 		write_unlock_bh(&mrt_lock);
925 
926 		mroute_clean_tables(net);
927 	}
928 	rtnl_unlock();
929 }
930 
931 /*
932  *	Socket options and virtual interface manipulation. The whole
933  *	virtual interface system is a complete heap, but unfortunately
934  *	that's how BSD mrouted happens to think. Maybe one day with a proper
935  *	MOSPF/PIM router set up we can clean this up.
936  */
937 
938 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
939 {
940 	int ret;
941 	struct vifctl vif;
942 	struct mfcctl mfc;
943 	struct net *net = sock_net(sk);
944 
945 	if (optname != MRT_INIT) {
946 		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
947 			return -EACCES;
948 	}
949 
950 	switch (optname) {
951 	case MRT_INIT:
952 		if (sk->sk_type != SOCK_RAW ||
953 		    inet_sk(sk)->num != IPPROTO_IGMP)
954 			return -EOPNOTSUPP;
955 		if (optlen != sizeof(int))
956 			return -ENOPROTOOPT;
957 
958 		rtnl_lock();
959 		if (net->ipv4.mroute_sk) {
960 			rtnl_unlock();
961 			return -EADDRINUSE;
962 		}
963 
964 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
965 		if (ret == 0) {
966 			write_lock_bh(&mrt_lock);
967 			net->ipv4.mroute_sk = sk;
968 			write_unlock_bh(&mrt_lock);
969 
970 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
971 		}
972 		rtnl_unlock();
973 		return ret;
974 	case MRT_DONE:
975 		if (sk != net->ipv4.mroute_sk)
976 			return -EACCES;
977 		return ip_ra_control(sk, 0, NULL);
978 	case MRT_ADD_VIF:
979 	case MRT_DEL_VIF:
980 		if (optlen != sizeof(vif))
981 			return -EINVAL;
982 		if (copy_from_user(&vif, optval, sizeof(vif)))
983 			return -EFAULT;
984 		if (vif.vifc_vifi >= MAXVIFS)
985 			return -ENFILE;
986 		rtnl_lock();
987 		if (optname == MRT_ADD_VIF) {
988 			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
989 		} else {
990 			ret = vif_delete(net, vif.vifc_vifi, 0);
991 		}
992 		rtnl_unlock();
993 		return ret;
994 
995 		/*
996 		 *	Manipulate the forwarding caches. These live
997 		 *	in a sort of kernel/user symbiosis.
998 		 */
999 	case MRT_ADD_MFC:
1000 	case MRT_DEL_MFC:
1001 		if (optlen != sizeof(mfc))
1002 			return -EINVAL;
1003 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1004 			return -EFAULT;
1005 		rtnl_lock();
1006 		if (optname == MRT_DEL_MFC)
1007 			ret = ipmr_mfc_delete(net, &mfc);
1008 		else
1009 			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1010 		rtnl_unlock();
1011 		return ret;
1012 		/*
1013 		 *	Control PIM assert.
1014 		 */
1015 	case MRT_ASSERT:
1016 	{
1017 		int v;
1018 		if (get_user(v,(int __user *)optval))
1019 			return -EFAULT;
1020 		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1021 		return 0;
1022 	}
1023 #ifdef CONFIG_IP_PIMSM
1024 	case MRT_PIM:
1025 	{
1026 		int v;
1027 
1028 		if (get_user(v,(int __user *)optval))
1029 			return -EFAULT;
1030 		v = (v) ? 1 : 0;
1031 
1032 		rtnl_lock();
1033 		ret = 0;
1034 		if (v != net->ipv4.mroute_do_pim) {
1035 			net->ipv4.mroute_do_pim = v;
1036 			net->ipv4.mroute_do_assert = v;
1037 		}
1038 		rtnl_unlock();
1039 		return ret;
1040 	}
1041 #endif
1042 	/*
1043 	 *	Spurious command, or MRT_VERSION which you cannot
1044 	 *	set.
1045 	 */
1046 	default:
1047 		return -ENOPROTOOPT;
1048 	}
1049 }
1050 
1051 /*
1052  *	Getsock opt support for the multicast routing system.
1053  */
1054 
1055 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1056 {
1057 	int olr;
1058 	int val;
1059 	struct net *net = sock_net(sk);
1060 
1061 	if (optname != MRT_VERSION &&
1062 #ifdef CONFIG_IP_PIMSM
1063 	   optname!=MRT_PIM &&
1064 #endif
1065 	   optname!=MRT_ASSERT)
1066 		return -ENOPROTOOPT;
1067 
1068 	if (get_user(olr, optlen))
1069 		return -EFAULT;
1070 
1071 	olr = min_t(unsigned int, olr, sizeof(int));
1072 	if (olr < 0)
1073 		return -EINVAL;
1074 
1075 	if (put_user(olr, optlen))
1076 		return -EFAULT;
1077 	if (optname == MRT_VERSION)
1078 		val = 0x0305;
1079 #ifdef CONFIG_IP_PIMSM
1080 	else if (optname == MRT_PIM)
1081 		val = net->ipv4.mroute_do_pim;
1082 #endif
1083 	else
1084 		val = net->ipv4.mroute_do_assert;
1085 	if (copy_to_user(optval, &val, olr))
1086 		return -EFAULT;
1087 	return 0;
1088 }
1089 
1090 /*
1091  *	The IP multicast ioctl support routines.
1092  */
1093 
1094 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1095 {
1096 	struct sioc_sg_req sr;
1097 	struct sioc_vif_req vr;
1098 	struct vif_device *vif;
1099 	struct mfc_cache *c;
1100 	struct net *net = sock_net(sk);
1101 
1102 	switch (cmd) {
1103 	case SIOCGETVIFCNT:
1104 		if (copy_from_user(&vr, arg, sizeof(vr)))
1105 			return -EFAULT;
1106 		if (vr.vifi >= net->ipv4.maxvif)
1107 			return -EINVAL;
1108 		read_lock(&mrt_lock);
1109 		vif = &net->ipv4.vif_table[vr.vifi];
1110 		if (VIF_EXISTS(net, vr.vifi)) {
1111 			vr.icount = vif->pkt_in;
1112 			vr.ocount = vif->pkt_out;
1113 			vr.ibytes = vif->bytes_in;
1114 			vr.obytes = vif->bytes_out;
1115 			read_unlock(&mrt_lock);
1116 
1117 			if (copy_to_user(arg, &vr, sizeof(vr)))
1118 				return -EFAULT;
1119 			return 0;
1120 		}
1121 		read_unlock(&mrt_lock);
1122 		return -EADDRNOTAVAIL;
1123 	case SIOCGETSGCNT:
1124 		if (copy_from_user(&sr, arg, sizeof(sr)))
1125 			return -EFAULT;
1126 
1127 		read_lock(&mrt_lock);
1128 		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1129 		if (c) {
1130 			sr.pktcnt = c->mfc_un.res.pkt;
1131 			sr.bytecnt = c->mfc_un.res.bytes;
1132 			sr.wrong_if = c->mfc_un.res.wrong_if;
1133 			read_unlock(&mrt_lock);
1134 
1135 			if (copy_to_user(arg, &sr, sizeof(sr)))
1136 				return -EFAULT;
1137 			return 0;
1138 		}
1139 		read_unlock(&mrt_lock);
1140 		return -EADDRNOTAVAIL;
1141 	default:
1142 		return -ENOIOCTLCMD;
1143 	}
1144 }
1145 
1146 
1147 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1148 {
1149 	struct net_device *dev = ptr;
1150 	struct net *net = dev_net(dev);
1151 	struct vif_device *v;
1152 	int ct;
1153 
1154 	if (!net_eq(dev_net(dev), net))
1155 		return NOTIFY_DONE;
1156 
1157 	if (event != NETDEV_UNREGISTER)
1158 		return NOTIFY_DONE;
1159 	v = &net->ipv4.vif_table[0];
1160 	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1161 		if (v->dev == dev)
1162 			vif_delete(net, ct, 1);
1163 	}
1164 	return NOTIFY_DONE;
1165 }
1166 
1167 
1168 static struct notifier_block ip_mr_notifier = {
1169 	.notifier_call = ipmr_device_event,
1170 };
1171 
1172 /*
1173  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1174  *	This avoids tunnel drivers and other mess and gives us the speed so
1175  *	important for multicast video.
1176  */
1177 
1178 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1179 {
1180 	struct iphdr *iph;
1181 	struct iphdr *old_iph = ip_hdr(skb);
1182 
1183 	skb_push(skb, sizeof(struct iphdr));
1184 	skb->transport_header = skb->network_header;
1185 	skb_reset_network_header(skb);
1186 	iph = ip_hdr(skb);
1187 
1188 	iph->version	= 	4;
1189 	iph->tos	=	old_iph->tos;
1190 	iph->ttl	=	old_iph->ttl;
1191 	iph->frag_off	=	0;
1192 	iph->daddr	=	daddr;
1193 	iph->saddr	=	saddr;
1194 	iph->protocol	=	IPPROTO_IPIP;
1195 	iph->ihl	=	5;
1196 	iph->tot_len	=	htons(skb->len);
1197 	ip_select_ident(iph, skb_dst(skb), NULL);
1198 	ip_send_check(iph);
1199 
1200 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1201 	nf_reset(skb);
1202 }
1203 
1204 static inline int ipmr_forward_finish(struct sk_buff *skb)
1205 {
1206 	struct ip_options * opt	= &(IPCB(skb)->opt);
1207 
1208 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1209 
1210 	if (unlikely(opt->optlen))
1211 		ip_forward_options(skb);
1212 
1213 	return dst_output(skb);
1214 }
1215 
1216 /*
1217  *	Processing handlers for ipmr_forward
1218  */
1219 
1220 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1221 {
1222 	struct net *net = mfc_net(c);
1223 	const struct iphdr *iph = ip_hdr(skb);
1224 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
1225 	struct net_device *dev;
1226 	struct rtable *rt;
1227 	int    encap = 0;
1228 
1229 	if (vif->dev == NULL)
1230 		goto out_free;
1231 
1232 #ifdef CONFIG_IP_PIMSM
1233 	if (vif->flags & VIFF_REGISTER) {
1234 		vif->pkt_out++;
1235 		vif->bytes_out += skb->len;
1236 		vif->dev->stats.tx_bytes += skb->len;
1237 		vif->dev->stats.tx_packets++;
1238 		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1239 		goto out_free;
1240 	}
1241 #endif
1242 
1243 	if (vif->flags&VIFF_TUNNEL) {
1244 		struct flowi fl = { .oif = vif->link,
1245 				    .nl_u = { .ip4_u =
1246 					      { .daddr = vif->remote,
1247 						.saddr = vif->local,
1248 						.tos = RT_TOS(iph->tos) } },
1249 				    .proto = IPPROTO_IPIP };
1250 		if (ip_route_output_key(net, &rt, &fl))
1251 			goto out_free;
1252 		encap = sizeof(struct iphdr);
1253 	} else {
1254 		struct flowi fl = { .oif = vif->link,
1255 				    .nl_u = { .ip4_u =
1256 					      { .daddr = iph->daddr,
1257 						.tos = RT_TOS(iph->tos) } },
1258 				    .proto = IPPROTO_IPIP };
1259 		if (ip_route_output_key(net, &rt, &fl))
1260 			goto out_free;
1261 	}
1262 
1263 	dev = rt->u.dst.dev;
1264 
1265 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1266 		/* Do not fragment multicasts. Alas, IPv4 does not
1267 		   allow to send ICMP, so that packets will disappear
1268 		   to blackhole.
1269 		 */
1270 
1271 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1272 		ip_rt_put(rt);
1273 		goto out_free;
1274 	}
1275 
1276 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1277 
1278 	if (skb_cow(skb, encap)) {
1279 		ip_rt_put(rt);
1280 		goto out_free;
1281 	}
1282 
1283 	vif->pkt_out++;
1284 	vif->bytes_out += skb->len;
1285 
1286 	skb_dst_drop(skb);
1287 	skb_dst_set(skb, &rt->u.dst);
1288 	ip_decrease_ttl(ip_hdr(skb));
1289 
1290 	/* FIXME: forward and output firewalls used to be called here.
1291 	 * What do we do with netfilter? -- RR */
1292 	if (vif->flags & VIFF_TUNNEL) {
1293 		ip_encap(skb, vif->local, vif->remote);
1294 		/* FIXME: extra output firewall step used to be here. --RR */
1295 		vif->dev->stats.tx_packets++;
1296 		vif->dev->stats.tx_bytes += skb->len;
1297 	}
1298 
1299 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1300 
1301 	/*
1302 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1303 	 * not only before forwarding, but after forwarding on all output
1304 	 * interfaces. It is clear, if mrouter runs a multicasting
1305 	 * program, it should receive packets not depending to what interface
1306 	 * program is joined.
1307 	 * If we will not make it, the program will have to join on all
1308 	 * interfaces. On the other hand, multihoming host (or router, but
1309 	 * not mrouter) cannot join to more than one interface - it will
1310 	 * result in receiving multiple packets.
1311 	 */
1312 	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1313 		ipmr_forward_finish);
1314 	return;
1315 
1316 out_free:
1317 	kfree_skb(skb);
1318 	return;
1319 }
1320 
1321 static int ipmr_find_vif(struct net_device *dev)
1322 {
1323 	struct net *net = dev_net(dev);
1324 	int ct;
1325 	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1326 		if (net->ipv4.vif_table[ct].dev == dev)
1327 			break;
1328 	}
1329 	return ct;
1330 }
1331 
1332 /* "local" means that we should preserve one skb (for local delivery) */
1333 
1334 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1335 {
1336 	int psend = -1;
1337 	int vif, ct;
1338 	struct net *net = mfc_net(cache);
1339 
1340 	vif = cache->mfc_parent;
1341 	cache->mfc_un.res.pkt++;
1342 	cache->mfc_un.res.bytes += skb->len;
1343 
1344 	/*
1345 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1346 	 */
1347 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
1348 		int true_vifi;
1349 
1350 		if (skb_rtable(skb)->fl.iif == 0) {
1351 			/* It is our own packet, looped back.
1352 			   Very complicated situation...
1353 
1354 			   The best workaround until routing daemons will be
1355 			   fixed is not to redistribute packet, if it was
1356 			   send through wrong interface. It means, that
1357 			   multicast applications WILL NOT work for
1358 			   (S,G), which have default multicast route pointing
1359 			   to wrong oif. In any case, it is not a good
1360 			   idea to use multicasting applications on router.
1361 			 */
1362 			goto dont_forward;
1363 		}
1364 
1365 		cache->mfc_un.res.wrong_if++;
1366 		true_vifi = ipmr_find_vif(skb->dev);
1367 
1368 		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1369 		    /* pimsm uses asserts, when switching from RPT to SPT,
1370 		       so that we cannot check that packet arrived on an oif.
1371 		       It is bad, but otherwise we would need to move pretty
1372 		       large chunk of pimd to kernel. Ough... --ANK
1373 		     */
1374 		    (net->ipv4.mroute_do_pim ||
1375 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1376 		    time_after(jiffies,
1377 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1378 			cache->mfc_un.res.last_assert = jiffies;
1379 			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1380 		}
1381 		goto dont_forward;
1382 	}
1383 
1384 	net->ipv4.vif_table[vif].pkt_in++;
1385 	net->ipv4.vif_table[vif].bytes_in += skb->len;
1386 
1387 	/*
1388 	 *	Forward the frame
1389 	 */
1390 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1391 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1392 			if (psend != -1) {
1393 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1394 				if (skb2)
1395 					ipmr_queue_xmit(skb2, cache, psend);
1396 			}
1397 			psend = ct;
1398 		}
1399 	}
1400 	if (psend != -1) {
1401 		if (local) {
1402 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1403 			if (skb2)
1404 				ipmr_queue_xmit(skb2, cache, psend);
1405 		} else {
1406 			ipmr_queue_xmit(skb, cache, psend);
1407 			return 0;
1408 		}
1409 	}
1410 
1411 dont_forward:
1412 	if (!local)
1413 		kfree_skb(skb);
1414 	return 0;
1415 }
1416 
1417 
1418 /*
1419  *	Multicast packets for forwarding arrive here
1420  */
1421 
1422 int ip_mr_input(struct sk_buff *skb)
1423 {
1424 	struct mfc_cache *cache;
1425 	struct net *net = dev_net(skb->dev);
1426 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1427 
1428 	/* Packet is looped back after forward, it should not be
1429 	   forwarded second time, but still can be delivered locally.
1430 	 */
1431 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1432 		goto dont_forward;
1433 
1434 	if (!local) {
1435 		    if (IPCB(skb)->opt.router_alert) {
1436 			    if (ip_call_ra_chain(skb))
1437 				    return 0;
1438 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1439 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1440 			       Cisco IOS <= 11.2(8)) do not put router alert
1441 			       option to IGMP packets destined to routable
1442 			       groups. It is very bad, because it means
1443 			       that we can forward NO IGMP messages.
1444 			     */
1445 			    read_lock(&mrt_lock);
1446 			    if (net->ipv4.mroute_sk) {
1447 				    nf_reset(skb);
1448 				    raw_rcv(net->ipv4.mroute_sk, skb);
1449 				    read_unlock(&mrt_lock);
1450 				    return 0;
1451 			    }
1452 			    read_unlock(&mrt_lock);
1453 		    }
1454 	}
1455 
1456 	read_lock(&mrt_lock);
1457 	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1458 
1459 	/*
1460 	 *	No usable cache entry
1461 	 */
1462 	if (cache == NULL) {
1463 		int vif;
1464 
1465 		if (local) {
1466 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1467 			ip_local_deliver(skb);
1468 			if (skb2 == NULL) {
1469 				read_unlock(&mrt_lock);
1470 				return -ENOBUFS;
1471 			}
1472 			skb = skb2;
1473 		}
1474 
1475 		vif = ipmr_find_vif(skb->dev);
1476 		if (vif >= 0) {
1477 			int err = ipmr_cache_unresolved(net, vif, skb);
1478 			read_unlock(&mrt_lock);
1479 
1480 			return err;
1481 		}
1482 		read_unlock(&mrt_lock);
1483 		kfree_skb(skb);
1484 		return -ENODEV;
1485 	}
1486 
1487 	ip_mr_forward(skb, cache, local);
1488 
1489 	read_unlock(&mrt_lock);
1490 
1491 	if (local)
1492 		return ip_local_deliver(skb);
1493 
1494 	return 0;
1495 
1496 dont_forward:
1497 	if (local)
1498 		return ip_local_deliver(skb);
1499 	kfree_skb(skb);
1500 	return 0;
1501 }
1502 
1503 #ifdef CONFIG_IP_PIMSM
1504 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1505 {
1506 	struct net_device *reg_dev = NULL;
1507 	struct iphdr *encap;
1508 	struct net *net = dev_net(skb->dev);
1509 
1510 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1511 	/*
1512 	   Check that:
1513 	   a. packet is really destinted to a multicast group
1514 	   b. packet is not a NULL-REGISTER
1515 	   c. packet is not truncated
1516 	 */
1517 	if (!ipv4_is_multicast(encap->daddr) ||
1518 	    encap->tot_len == 0 ||
1519 	    ntohs(encap->tot_len) + pimlen > skb->len)
1520 		return 1;
1521 
1522 	read_lock(&mrt_lock);
1523 	if (net->ipv4.mroute_reg_vif_num >= 0)
1524 		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1525 	if (reg_dev)
1526 		dev_hold(reg_dev);
1527 	read_unlock(&mrt_lock);
1528 
1529 	if (reg_dev == NULL)
1530 		return 1;
1531 
1532 	skb->mac_header = skb->network_header;
1533 	skb_pull(skb, (u8*)encap - skb->data);
1534 	skb_reset_network_header(skb);
1535 	skb->dev = reg_dev;
1536 	skb->protocol = htons(ETH_P_IP);
1537 	skb->ip_summed = 0;
1538 	skb->pkt_type = PACKET_HOST;
1539 	skb_dst_drop(skb);
1540 	reg_dev->stats.rx_bytes += skb->len;
1541 	reg_dev->stats.rx_packets++;
1542 	nf_reset(skb);
1543 	netif_rx(skb);
1544 	dev_put(reg_dev);
1545 
1546 	return 0;
1547 }
1548 #endif
1549 
1550 #ifdef CONFIG_IP_PIMSM_V1
1551 /*
1552  * Handle IGMP messages of PIMv1
1553  */
1554 
1555 int pim_rcv_v1(struct sk_buff * skb)
1556 {
1557 	struct igmphdr *pim;
1558 	struct net *net = dev_net(skb->dev);
1559 
1560 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1561 		goto drop;
1562 
1563 	pim = igmp_hdr(skb);
1564 
1565 	if (!net->ipv4.mroute_do_pim ||
1566 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1567 		goto drop;
1568 
1569 	if (__pim_rcv(skb, sizeof(*pim))) {
1570 drop:
1571 		kfree_skb(skb);
1572 	}
1573 	return 0;
1574 }
1575 #endif
1576 
1577 #ifdef CONFIG_IP_PIMSM_V2
1578 static int pim_rcv(struct sk_buff * skb)
1579 {
1580 	struct pimreghdr *pim;
1581 
1582 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1583 		goto drop;
1584 
1585 	pim = (struct pimreghdr *)skb_transport_header(skb);
1586 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1587 	    (pim->flags&PIM_NULL_REGISTER) ||
1588 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1589 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1590 		goto drop;
1591 
1592 	if (__pim_rcv(skb, sizeof(*pim))) {
1593 drop:
1594 		kfree_skb(skb);
1595 	}
1596 	return 0;
1597 }
1598 #endif
1599 
1600 static int
1601 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1602 {
1603 	int ct;
1604 	struct rtnexthop *nhp;
1605 	struct net *net = mfc_net(c);
1606 	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1607 	u8 *b = skb_tail_pointer(skb);
1608 	struct rtattr *mp_head;
1609 
1610 	if (dev)
1611 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1612 
1613 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1614 
1615 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1616 		if (c->mfc_un.res.ttls[ct] < 255) {
1617 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1618 				goto rtattr_failure;
1619 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1620 			nhp->rtnh_flags = 0;
1621 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1622 			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1623 			nhp->rtnh_len = sizeof(*nhp);
1624 		}
1625 	}
1626 	mp_head->rta_type = RTA_MULTIPATH;
1627 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1628 	rtm->rtm_type = RTN_MULTICAST;
1629 	return 1;
1630 
1631 rtattr_failure:
1632 	nlmsg_trim(skb, b);
1633 	return -EMSGSIZE;
1634 }
1635 
1636 int ipmr_get_route(struct net *net,
1637 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1638 {
1639 	int err;
1640 	struct mfc_cache *cache;
1641 	struct rtable *rt = skb_rtable(skb);
1642 
1643 	read_lock(&mrt_lock);
1644 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1645 
1646 	if (cache == NULL) {
1647 		struct sk_buff *skb2;
1648 		struct iphdr *iph;
1649 		struct net_device *dev;
1650 		int vif;
1651 
1652 		if (nowait) {
1653 			read_unlock(&mrt_lock);
1654 			return -EAGAIN;
1655 		}
1656 
1657 		dev = skb->dev;
1658 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1659 			read_unlock(&mrt_lock);
1660 			return -ENODEV;
1661 		}
1662 		skb2 = skb_clone(skb, GFP_ATOMIC);
1663 		if (!skb2) {
1664 			read_unlock(&mrt_lock);
1665 			return -ENOMEM;
1666 		}
1667 
1668 		skb_push(skb2, sizeof(struct iphdr));
1669 		skb_reset_network_header(skb2);
1670 		iph = ip_hdr(skb2);
1671 		iph->ihl = sizeof(struct iphdr) >> 2;
1672 		iph->saddr = rt->rt_src;
1673 		iph->daddr = rt->rt_dst;
1674 		iph->version = 0;
1675 		err = ipmr_cache_unresolved(net, vif, skb2);
1676 		read_unlock(&mrt_lock);
1677 		return err;
1678 	}
1679 
1680 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1681 		cache->mfc_flags |= MFC_NOTIFY;
1682 	err = ipmr_fill_mroute(skb, cache, rtm);
1683 	read_unlock(&mrt_lock);
1684 	return err;
1685 }
1686 
1687 #ifdef CONFIG_PROC_FS
1688 /*
1689  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1690  */
1691 struct ipmr_vif_iter {
1692 	struct seq_net_private p;
1693 	int ct;
1694 };
1695 
1696 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1697 					   struct ipmr_vif_iter *iter,
1698 					   loff_t pos)
1699 {
1700 	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1701 		if (!VIF_EXISTS(net, iter->ct))
1702 			continue;
1703 		if (pos-- == 0)
1704 			return &net->ipv4.vif_table[iter->ct];
1705 	}
1706 	return NULL;
1707 }
1708 
1709 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1710 	__acquires(mrt_lock)
1711 {
1712 	struct net *net = seq_file_net(seq);
1713 
1714 	read_lock(&mrt_lock);
1715 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1716 		: SEQ_START_TOKEN;
1717 }
1718 
1719 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1720 {
1721 	struct ipmr_vif_iter *iter = seq->private;
1722 	struct net *net = seq_file_net(seq);
1723 
1724 	++*pos;
1725 	if (v == SEQ_START_TOKEN)
1726 		return ipmr_vif_seq_idx(net, iter, 0);
1727 
1728 	while (++iter->ct < net->ipv4.maxvif) {
1729 		if (!VIF_EXISTS(net, iter->ct))
1730 			continue;
1731 		return &net->ipv4.vif_table[iter->ct];
1732 	}
1733 	return NULL;
1734 }
1735 
1736 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1737 	__releases(mrt_lock)
1738 {
1739 	read_unlock(&mrt_lock);
1740 }
1741 
1742 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1743 {
1744 	struct net *net = seq_file_net(seq);
1745 
1746 	if (v == SEQ_START_TOKEN) {
1747 		seq_puts(seq,
1748 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1749 	} else {
1750 		const struct vif_device *vif = v;
1751 		const char *name =  vif->dev ? vif->dev->name : "none";
1752 
1753 		seq_printf(seq,
1754 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1755 			   vif - net->ipv4.vif_table,
1756 			   name, vif->bytes_in, vif->pkt_in,
1757 			   vif->bytes_out, vif->pkt_out,
1758 			   vif->flags, vif->local, vif->remote);
1759 	}
1760 	return 0;
1761 }
1762 
1763 static const struct seq_operations ipmr_vif_seq_ops = {
1764 	.start = ipmr_vif_seq_start,
1765 	.next  = ipmr_vif_seq_next,
1766 	.stop  = ipmr_vif_seq_stop,
1767 	.show  = ipmr_vif_seq_show,
1768 };
1769 
1770 static int ipmr_vif_open(struct inode *inode, struct file *file)
1771 {
1772 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1773 			    sizeof(struct ipmr_vif_iter));
1774 }
1775 
1776 static const struct file_operations ipmr_vif_fops = {
1777 	.owner	 = THIS_MODULE,
1778 	.open    = ipmr_vif_open,
1779 	.read    = seq_read,
1780 	.llseek  = seq_lseek,
1781 	.release = seq_release_net,
1782 };
1783 
1784 struct ipmr_mfc_iter {
1785 	struct seq_net_private p;
1786 	struct mfc_cache **cache;
1787 	int ct;
1788 };
1789 
1790 
1791 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1792 					  struct ipmr_mfc_iter *it, loff_t pos)
1793 {
1794 	struct mfc_cache *mfc;
1795 
1796 	it->cache = net->ipv4.mfc_cache_array;
1797 	read_lock(&mrt_lock);
1798 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1799 		for (mfc = net->ipv4.mfc_cache_array[it->ct];
1800 		     mfc; mfc = mfc->next)
1801 			if (pos-- == 0)
1802 				return mfc;
1803 	read_unlock(&mrt_lock);
1804 
1805 	it->cache = &mfc_unres_queue;
1806 	spin_lock_bh(&mfc_unres_lock);
1807 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1808 		if (net_eq(mfc_net(mfc), net) &&
1809 		    pos-- == 0)
1810 			return mfc;
1811 	spin_unlock_bh(&mfc_unres_lock);
1812 
1813 	it->cache = NULL;
1814 	return NULL;
1815 }
1816 
1817 
1818 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1819 {
1820 	struct ipmr_mfc_iter *it = seq->private;
1821 	struct net *net = seq_file_net(seq);
1822 
1823 	it->cache = NULL;
1824 	it->ct = 0;
1825 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1826 		: SEQ_START_TOKEN;
1827 }
1828 
1829 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1830 {
1831 	struct mfc_cache *mfc = v;
1832 	struct ipmr_mfc_iter *it = seq->private;
1833 	struct net *net = seq_file_net(seq);
1834 
1835 	++*pos;
1836 
1837 	if (v == SEQ_START_TOKEN)
1838 		return ipmr_mfc_seq_idx(net, seq->private, 0);
1839 
1840 	if (mfc->next)
1841 		return mfc->next;
1842 
1843 	if (it->cache == &mfc_unres_queue)
1844 		goto end_of_list;
1845 
1846 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1847 
1848 	while (++it->ct < MFC_LINES) {
1849 		mfc = net->ipv4.mfc_cache_array[it->ct];
1850 		if (mfc)
1851 			return mfc;
1852 	}
1853 
1854 	/* exhausted cache_array, show unresolved */
1855 	read_unlock(&mrt_lock);
1856 	it->cache = &mfc_unres_queue;
1857 	it->ct = 0;
1858 
1859 	spin_lock_bh(&mfc_unres_lock);
1860 	mfc = mfc_unres_queue;
1861 	while (mfc && !net_eq(mfc_net(mfc), net))
1862 		mfc = mfc->next;
1863 	if (mfc)
1864 		return mfc;
1865 
1866  end_of_list:
1867 	spin_unlock_bh(&mfc_unres_lock);
1868 	it->cache = NULL;
1869 
1870 	return NULL;
1871 }
1872 
1873 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1874 {
1875 	struct ipmr_mfc_iter *it = seq->private;
1876 	struct net *net = seq_file_net(seq);
1877 
1878 	if (it->cache == &mfc_unres_queue)
1879 		spin_unlock_bh(&mfc_unres_lock);
1880 	else if (it->cache == net->ipv4.mfc_cache_array)
1881 		read_unlock(&mrt_lock);
1882 }
1883 
1884 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1885 {
1886 	int n;
1887 	struct net *net = seq_file_net(seq);
1888 
1889 	if (v == SEQ_START_TOKEN) {
1890 		seq_puts(seq,
1891 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1892 	} else {
1893 		const struct mfc_cache *mfc = v;
1894 		const struct ipmr_mfc_iter *it = seq->private;
1895 
1896 		seq_printf(seq, "%08lX %08lX %-3hd",
1897 			   (unsigned long) mfc->mfc_mcastgrp,
1898 			   (unsigned long) mfc->mfc_origin,
1899 			   mfc->mfc_parent);
1900 
1901 		if (it->cache != &mfc_unres_queue) {
1902 			seq_printf(seq, " %8lu %8lu %8lu",
1903 				   mfc->mfc_un.res.pkt,
1904 				   mfc->mfc_un.res.bytes,
1905 				   mfc->mfc_un.res.wrong_if);
1906 			for (n = mfc->mfc_un.res.minvif;
1907 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1908 				if (VIF_EXISTS(net, n) &&
1909 				    mfc->mfc_un.res.ttls[n] < 255)
1910 					seq_printf(seq,
1911 					   " %2d:%-3d",
1912 					   n, mfc->mfc_un.res.ttls[n]);
1913 			}
1914 		} else {
1915 			/* unresolved mfc_caches don't contain
1916 			 * pkt, bytes and wrong_if values
1917 			 */
1918 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1919 		}
1920 		seq_putc(seq, '\n');
1921 	}
1922 	return 0;
1923 }
1924 
1925 static const struct seq_operations ipmr_mfc_seq_ops = {
1926 	.start = ipmr_mfc_seq_start,
1927 	.next  = ipmr_mfc_seq_next,
1928 	.stop  = ipmr_mfc_seq_stop,
1929 	.show  = ipmr_mfc_seq_show,
1930 };
1931 
1932 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1933 {
1934 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1935 			    sizeof(struct ipmr_mfc_iter));
1936 }
1937 
1938 static const struct file_operations ipmr_mfc_fops = {
1939 	.owner	 = THIS_MODULE,
1940 	.open    = ipmr_mfc_open,
1941 	.read    = seq_read,
1942 	.llseek  = seq_lseek,
1943 	.release = seq_release_net,
1944 };
1945 #endif
1946 
1947 #ifdef CONFIG_IP_PIMSM_V2
1948 static struct net_protocol pim_protocol = {
1949 	.handler	=	pim_rcv,
1950 	.netns_ok	=	1,
1951 };
1952 #endif
1953 
1954 
1955 /*
1956  *	Setup for IP multicast routing
1957  */
1958 static int __net_init ipmr_net_init(struct net *net)
1959 {
1960 	int err = 0;
1961 
1962 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1963 				      GFP_KERNEL);
1964 	if (!net->ipv4.vif_table) {
1965 		err = -ENOMEM;
1966 		goto fail;
1967 	}
1968 
1969 	/* Forwarding cache */
1970 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1971 					    sizeof(struct mfc_cache *),
1972 					    GFP_KERNEL);
1973 	if (!net->ipv4.mfc_cache_array) {
1974 		err = -ENOMEM;
1975 		goto fail_mfc_cache;
1976 	}
1977 
1978 #ifdef CONFIG_IP_PIMSM
1979 	net->ipv4.mroute_reg_vif_num = -1;
1980 #endif
1981 
1982 #ifdef CONFIG_PROC_FS
1983 	err = -ENOMEM;
1984 	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1985 		goto proc_vif_fail;
1986 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1987 		goto proc_cache_fail;
1988 #endif
1989 	return 0;
1990 
1991 #ifdef CONFIG_PROC_FS
1992 proc_cache_fail:
1993 	proc_net_remove(net, "ip_mr_vif");
1994 proc_vif_fail:
1995 	kfree(net->ipv4.mfc_cache_array);
1996 #endif
1997 fail_mfc_cache:
1998 	kfree(net->ipv4.vif_table);
1999 fail:
2000 	return err;
2001 }
2002 
2003 static void __net_exit ipmr_net_exit(struct net *net)
2004 {
2005 #ifdef CONFIG_PROC_FS
2006 	proc_net_remove(net, "ip_mr_cache");
2007 	proc_net_remove(net, "ip_mr_vif");
2008 #endif
2009 	kfree(net->ipv4.mfc_cache_array);
2010 	kfree(net->ipv4.vif_table);
2011 }
2012 
2013 static struct pernet_operations ipmr_net_ops = {
2014 	.init = ipmr_net_init,
2015 	.exit = ipmr_net_exit,
2016 };
2017 
2018 int __init ip_mr_init(void)
2019 {
2020 	int err;
2021 
2022 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2023 				       sizeof(struct mfc_cache),
2024 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2025 				       NULL);
2026 	if (!mrt_cachep)
2027 		return -ENOMEM;
2028 
2029 	err = register_pernet_subsys(&ipmr_net_ops);
2030 	if (err)
2031 		goto reg_pernet_fail;
2032 
2033 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2034 	err = register_netdevice_notifier(&ip_mr_notifier);
2035 	if (err)
2036 		goto reg_notif_fail;
2037 #ifdef CONFIG_IP_PIMSM_V2
2038 	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2039 		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2040 		err = -EAGAIN;
2041 		goto add_proto_fail;
2042 	}
2043 #endif
2044 	return 0;
2045 
2046 #ifdef CONFIG_IP_PIMSM_V2
2047 add_proto_fail:
2048 	unregister_netdevice_notifier(&ip_mr_notifier);
2049 #endif
2050 reg_notif_fail:
2051 	del_timer(&ipmr_expire_timer);
2052 	unregister_pernet_subsys(&ipmr_net_ops);
2053 reg_pernet_fail:
2054 	kmem_cache_destroy(mrt_cachep);
2055 	return err;
2056 }
2057