xref: /linux/net/ipv6/ip6mr.c (revision 93d546399c2b7d66a54d5fbd5eee17de19246bf6)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 
52 struct sock *mroute6_socket;
53 
54 
55 /* Big lock, protecting vif table, mrt cache and mroute socket state.
56    Note that the changes are semaphored via rtnl_lock.
57  */
58 
59 static DEFINE_RWLOCK(mrt_lock);
60 
61 /*
62  *	Multicast router control variables
63  */
64 
65 static struct mif_device vif6_table[MAXMIFS];		/* Devices 		*/
66 static int maxvif;
67 
68 #define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
69 
70 static int mroute_do_assert;				/* Set in PIM assert	*/
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 static int mroute_do_pim;
73 #else
74 #define mroute_do_pim 0
75 #endif
76 
77 static struct mfc6_cache *mfc6_cache_array[MFC6_LINES];	/* Forwarding cache	*/
78 
79 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
80 static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
81 
82 /* Special spinlock for queue of unresolved entries */
83 static DEFINE_SPINLOCK(mfc_unres_lock);
84 
85 /* We return to original Alan's scheme. Hash table of resolved
86    entries is changed only in process context and protected
87    with weak lock mrt_lock. Queue of unresolved entries is protected
88    with strong spinlock mfc_unres_lock.
89 
90    In this case data path is free of exclusive locks at all.
91  */
92 
93 static struct kmem_cache *mrt_cachep __read_mostly;
94 
95 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
96 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
97 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
98 
99 #ifdef CONFIG_IPV6_PIMSM_V2
100 static struct inet6_protocol pim6_protocol;
101 #endif
102 
103 static struct timer_list ipmr_expire_timer;
104 
105 
106 #ifdef CONFIG_PROC_FS
107 
108 struct ipmr_mfc_iter {
109 	struct mfc6_cache **cache;
110 	int ct;
111 };
112 
113 
114 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
115 {
116 	struct mfc6_cache *mfc;
117 
118 	it->cache = mfc6_cache_array;
119 	read_lock(&mrt_lock);
120 	for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 		for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
122 			if (pos-- == 0)
123 				return mfc;
124 	read_unlock(&mrt_lock);
125 
126 	it->cache = &mfc_unres_queue;
127 	spin_lock_bh(&mfc_unres_lock);
128 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
129 		if (pos-- == 0)
130 			return mfc;
131 	spin_unlock_bh(&mfc_unres_lock);
132 
133 	it->cache = NULL;
134 	return NULL;
135 }
136 
137 
138 
139 
140 /*
141  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
142  */
143 
144 struct ipmr_vif_iter {
145 	int ct;
146 };
147 
148 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
149 					    loff_t pos)
150 {
151 	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 		if (!MIF_EXISTS(iter->ct))
153 			continue;
154 		if (pos-- == 0)
155 			return &vif6_table[iter->ct];
156 	}
157 	return NULL;
158 }
159 
160 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
161 	__acquires(mrt_lock)
162 {
163 	read_lock(&mrt_lock);
164 	return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
165 		: SEQ_START_TOKEN);
166 }
167 
168 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
169 {
170 	struct ipmr_vif_iter *iter = seq->private;
171 
172 	++*pos;
173 	if (v == SEQ_START_TOKEN)
174 		return ip6mr_vif_seq_idx(iter, 0);
175 
176 	while (++iter->ct < maxvif) {
177 		if (!MIF_EXISTS(iter->ct))
178 			continue;
179 		return &vif6_table[iter->ct];
180 	}
181 	return NULL;
182 }
183 
184 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
185 	__releases(mrt_lock)
186 {
187 	read_unlock(&mrt_lock);
188 }
189 
190 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
191 {
192 	if (v == SEQ_START_TOKEN) {
193 		seq_puts(seq,
194 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
195 	} else {
196 		const struct mif_device *vif = v;
197 		const char *name = vif->dev ? vif->dev->name : "none";
198 
199 		seq_printf(seq,
200 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
201 			   vif - vif6_table,
202 			   name, vif->bytes_in, vif->pkt_in,
203 			   vif->bytes_out, vif->pkt_out,
204 			   vif->flags);
205 	}
206 	return 0;
207 }
208 
209 static struct seq_operations ip6mr_vif_seq_ops = {
210 	.start = ip6mr_vif_seq_start,
211 	.next  = ip6mr_vif_seq_next,
212 	.stop  = ip6mr_vif_seq_stop,
213 	.show  = ip6mr_vif_seq_show,
214 };
215 
216 static int ip6mr_vif_open(struct inode *inode, struct file *file)
217 {
218 	return seq_open_private(file, &ip6mr_vif_seq_ops,
219 				sizeof(struct ipmr_vif_iter));
220 }
221 
222 static struct file_operations ip6mr_vif_fops = {
223 	.owner	 = THIS_MODULE,
224 	.open    = ip6mr_vif_open,
225 	.read    = seq_read,
226 	.llseek  = seq_lseek,
227 	.release = seq_release_private,
228 };
229 
230 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
231 {
232 	return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
233 		: SEQ_START_TOKEN);
234 }
235 
236 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237 {
238 	struct mfc6_cache *mfc = v;
239 	struct ipmr_mfc_iter *it = seq->private;
240 
241 	++*pos;
242 
243 	if (v == SEQ_START_TOKEN)
244 		return ipmr_mfc_seq_idx(seq->private, 0);
245 
246 	if (mfc->next)
247 		return mfc->next;
248 
249 	if (it->cache == &mfc_unres_queue)
250 		goto end_of_list;
251 
252 	BUG_ON(it->cache != mfc6_cache_array);
253 
254 	while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 		mfc = mfc6_cache_array[it->ct];
256 		if (mfc)
257 			return mfc;
258 	}
259 
260 	/* exhausted cache_array, show unresolved */
261 	read_unlock(&mrt_lock);
262 	it->cache = &mfc_unres_queue;
263 	it->ct = 0;
264 
265 	spin_lock_bh(&mfc_unres_lock);
266 	mfc = mfc_unres_queue;
267 	if (mfc)
268 		return mfc;
269 
270  end_of_list:
271 	spin_unlock_bh(&mfc_unres_lock);
272 	it->cache = NULL;
273 
274 	return NULL;
275 }
276 
277 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278 {
279 	struct ipmr_mfc_iter *it = seq->private;
280 
281 	if (it->cache == &mfc_unres_queue)
282 		spin_unlock_bh(&mfc_unres_lock);
283 	else if (it->cache == mfc6_cache_array)
284 		read_unlock(&mrt_lock);
285 }
286 
287 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
288 {
289 	int n;
290 
291 	if (v == SEQ_START_TOKEN) {
292 		seq_puts(seq,
293 			 "Group                            "
294 			 "Origin                           "
295 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
296 	} else {
297 		const struct mfc6_cache *mfc = v;
298 		const struct ipmr_mfc_iter *it = seq->private;
299 
300 		seq_printf(seq,
301 			   NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 			   NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
303 			   mfc->mf6c_parent,
304 			   mfc->mfc_un.res.pkt,
305 			   mfc->mfc_un.res.bytes,
306 			   mfc->mfc_un.res.wrong_if);
307 
308 		if (it->cache != &mfc_unres_queue) {
309 			for (n = mfc->mfc_un.res.minvif;
310 			     n < mfc->mfc_un.res.maxvif; n++) {
311 				if (MIF_EXISTS(n) &&
312 				    mfc->mfc_un.res.ttls[n] < 255)
313 					seq_printf(seq,
314 						   " %2d:%-3d",
315 						   n, mfc->mfc_un.res.ttls[n]);
316 			}
317 		}
318 		seq_putc(seq, '\n');
319 	}
320 	return 0;
321 }
322 
323 static struct seq_operations ipmr_mfc_seq_ops = {
324 	.start = ipmr_mfc_seq_start,
325 	.next  = ipmr_mfc_seq_next,
326 	.stop  = ipmr_mfc_seq_stop,
327 	.show  = ipmr_mfc_seq_show,
328 };
329 
330 static int ipmr_mfc_open(struct inode *inode, struct file *file)
331 {
332 	return seq_open_private(file, &ipmr_mfc_seq_ops,
333 				sizeof(struct ipmr_mfc_iter));
334 }
335 
336 static struct file_operations ip6mr_mfc_fops = {
337 	.owner	 = THIS_MODULE,
338 	.open    = ipmr_mfc_open,
339 	.read    = seq_read,
340 	.llseek  = seq_lseek,
341 	.release = seq_release_private,
342 };
343 #endif
344 
345 #ifdef CONFIG_IPV6_PIMSM_V2
346 static int reg_vif_num = -1;
347 
348 static int pim6_rcv(struct sk_buff *skb)
349 {
350 	struct pimreghdr *pim;
351 	struct ipv6hdr   *encap;
352 	struct net_device  *reg_dev = NULL;
353 
354 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
355 		goto drop;
356 
357 	pim = (struct pimreghdr *)skb_transport_header(skb);
358 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 	    (pim->flags & PIM_NULL_REGISTER) ||
360 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
361 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
362 		goto drop;
363 
364 	/* check if the inner packet is destined to mcast group */
365 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
366 				   sizeof(*pim));
367 
368 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 	    encap->payload_len == 0 ||
370 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
371 		goto drop;
372 
373 	read_lock(&mrt_lock);
374 	if (reg_vif_num >= 0)
375 		reg_dev = vif6_table[reg_vif_num].dev;
376 	if (reg_dev)
377 		dev_hold(reg_dev);
378 	read_unlock(&mrt_lock);
379 
380 	if (reg_dev == NULL)
381 		goto drop;
382 
383 	skb->mac_header = skb->network_header;
384 	skb_pull(skb, (u8 *)encap - skb->data);
385 	skb_reset_network_header(skb);
386 	skb->dev = reg_dev;
387 	skb->protocol = htons(ETH_P_IP);
388 	skb->ip_summed = 0;
389 	skb->pkt_type = PACKET_HOST;
390 	dst_release(skb->dst);
391 	reg_dev->stats.rx_bytes += skb->len;
392 	reg_dev->stats.rx_packets++;
393 	skb->dst = NULL;
394 	nf_reset(skb);
395 	netif_rx(skb);
396 	dev_put(reg_dev);
397 	return 0;
398  drop:
399 	kfree_skb(skb);
400 	return 0;
401 }
402 
403 static struct inet6_protocol pim6_protocol = {
404 	.handler	=	pim6_rcv,
405 };
406 
407 /* Service routines creating virtual interfaces: PIMREG */
408 
409 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410 {
411 	read_lock(&mrt_lock);
412 	dev->stats.tx_bytes += skb->len;
413 	dev->stats.tx_packets++;
414 	ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 	read_unlock(&mrt_lock);
416 	kfree_skb(skb);
417 	return 0;
418 }
419 
420 static void reg_vif_setup(struct net_device *dev)
421 {
422 	dev->type		= ARPHRD_PIMREG;
423 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
424 	dev->flags		= IFF_NOARP;
425 	dev->hard_start_xmit	= reg_vif_xmit;
426 	dev->destructor		= free_netdev;
427 }
428 
429 static struct net_device *ip6mr_reg_vif(void)
430 {
431 	struct net_device *dev;
432 
433 	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
434 	if (dev == NULL)
435 		return NULL;
436 
437 	if (register_netdevice(dev)) {
438 		free_netdev(dev);
439 		return NULL;
440 	}
441 	dev->iflink = 0;
442 
443 	if (dev_open(dev))
444 		goto failure;
445 
446 	dev_hold(dev);
447 	return dev;
448 
449 failure:
450 	/* allow the register to be completed before unregistering. */
451 	rtnl_unlock();
452 	rtnl_lock();
453 
454 	unregister_netdevice(dev);
455 	return NULL;
456 }
457 #endif
458 
459 /*
460  *	Delete a VIF entry
461  */
462 
463 static int mif6_delete(int vifi)
464 {
465 	struct mif_device *v;
466 	struct net_device *dev;
467 	if (vifi < 0 || vifi >= maxvif)
468 		return -EADDRNOTAVAIL;
469 
470 	v = &vif6_table[vifi];
471 
472 	write_lock_bh(&mrt_lock);
473 	dev = v->dev;
474 	v->dev = NULL;
475 
476 	if (!dev) {
477 		write_unlock_bh(&mrt_lock);
478 		return -EADDRNOTAVAIL;
479 	}
480 
481 #ifdef CONFIG_IPV6_PIMSM_V2
482 	if (vifi == reg_vif_num)
483 		reg_vif_num = -1;
484 #endif
485 
486 	if (vifi + 1 == maxvif) {
487 		int tmp;
488 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
489 			if (MIF_EXISTS(tmp))
490 				break;
491 		}
492 		maxvif = tmp + 1;
493 	}
494 
495 	write_unlock_bh(&mrt_lock);
496 
497 	dev_set_allmulti(dev, -1);
498 
499 	if (v->flags & MIFF_REGISTER)
500 		unregister_netdevice(dev);
501 
502 	dev_put(dev);
503 	return 0;
504 }
505 
506 /* Destroy an unresolved cache entry, killing queued skbs
507    and reporting error to netlink readers.
508  */
509 
510 static void ip6mr_destroy_unres(struct mfc6_cache *c)
511 {
512 	struct sk_buff *skb;
513 
514 	atomic_dec(&cache_resolve_queue_len);
515 
516 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
517 		if (ipv6_hdr(skb)->version == 0) {
518 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
519 			nlh->nlmsg_type = NLMSG_ERROR;
520 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
521 			skb_trim(skb, nlh->nlmsg_len);
522 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
523 			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
524 		} else
525 			kfree_skb(skb);
526 	}
527 
528 	kmem_cache_free(mrt_cachep, c);
529 }
530 
531 
532 /* Single timer process for all the unresolved queue. */
533 
534 static void ipmr_do_expire_process(unsigned long dummy)
535 {
536 	unsigned long now = jiffies;
537 	unsigned long expires = 10 * HZ;
538 	struct mfc6_cache *c, **cp;
539 
540 	cp = &mfc_unres_queue;
541 
542 	while ((c = *cp) != NULL) {
543 		if (time_after(c->mfc_un.unres.expires, now)) {
544 			/* not yet... */
545 			unsigned long interval = c->mfc_un.unres.expires - now;
546 			if (interval < expires)
547 				expires = interval;
548 			cp = &c->next;
549 			continue;
550 		}
551 
552 		*cp = c->next;
553 		ip6mr_destroy_unres(c);
554 	}
555 
556 	if (atomic_read(&cache_resolve_queue_len))
557 		mod_timer(&ipmr_expire_timer, jiffies + expires);
558 }
559 
560 static void ipmr_expire_process(unsigned long dummy)
561 {
562 	if (!spin_trylock(&mfc_unres_lock)) {
563 		mod_timer(&ipmr_expire_timer, jiffies + 1);
564 		return;
565 	}
566 
567 	if (atomic_read(&cache_resolve_queue_len))
568 		ipmr_do_expire_process(dummy);
569 
570 	spin_unlock(&mfc_unres_lock);
571 }
572 
573 /* Fill oifs list. It is called under write locked mrt_lock. */
574 
575 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
576 {
577 	int vifi;
578 
579 	cache->mfc_un.res.minvif = MAXMIFS;
580 	cache->mfc_un.res.maxvif = 0;
581 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
582 
583 	for (vifi = 0; vifi < maxvif; vifi++) {
584 		if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
585 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
586 			if (cache->mfc_un.res.minvif > vifi)
587 				cache->mfc_un.res.minvif = vifi;
588 			if (cache->mfc_un.res.maxvif <= vifi)
589 				cache->mfc_un.res.maxvif = vifi + 1;
590 		}
591 	}
592 }
593 
594 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
595 {
596 	int vifi = vifc->mif6c_mifi;
597 	struct mif_device *v = &vif6_table[vifi];
598 	struct net_device *dev;
599 	int err;
600 
601 	/* Is vif busy ? */
602 	if (MIF_EXISTS(vifi))
603 		return -EADDRINUSE;
604 
605 	switch (vifc->mif6c_flags) {
606 #ifdef CONFIG_IPV6_PIMSM_V2
607 	case MIFF_REGISTER:
608 		/*
609 		 * Special Purpose VIF in PIM
610 		 * All the packets will be sent to the daemon
611 		 */
612 		if (reg_vif_num >= 0)
613 			return -EADDRINUSE;
614 		dev = ip6mr_reg_vif();
615 		if (!dev)
616 			return -ENOBUFS;
617 		err = dev_set_allmulti(dev, 1);
618 		if (err) {
619 			unregister_netdevice(dev);
620 			dev_put(dev);
621 			return err;
622 		}
623 		break;
624 #endif
625 	case 0:
626 		dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
627 		if (!dev)
628 			return -EADDRNOTAVAIL;
629 		err = dev_set_allmulti(dev, 1);
630 		if (err) {
631 			dev_put(dev);
632 			return err;
633 		}
634 		break;
635 	default:
636 		return -EINVAL;
637 	}
638 
639 	/*
640 	 *	Fill in the VIF structures
641 	 */
642 	v->rate_limit = vifc->vifc_rate_limit;
643 	v->flags = vifc->mif6c_flags;
644 	if (!mrtsock)
645 		v->flags |= VIFF_STATIC;
646 	v->threshold = vifc->vifc_threshold;
647 	v->bytes_in = 0;
648 	v->bytes_out = 0;
649 	v->pkt_in = 0;
650 	v->pkt_out = 0;
651 	v->link = dev->ifindex;
652 	if (v->flags & MIFF_REGISTER)
653 		v->link = dev->iflink;
654 
655 	/* And finish update writing critical data */
656 	write_lock_bh(&mrt_lock);
657 	v->dev = dev;
658 #ifdef CONFIG_IPV6_PIMSM_V2
659 	if (v->flags & MIFF_REGISTER)
660 		reg_vif_num = vifi;
661 #endif
662 	if (vifi + 1 > maxvif)
663 		maxvif = vifi + 1;
664 	write_unlock_bh(&mrt_lock);
665 	return 0;
666 }
667 
668 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
669 {
670 	int line = MFC6_HASH(mcastgrp, origin);
671 	struct mfc6_cache *c;
672 
673 	for (c = mfc6_cache_array[line]; c; c = c->next) {
674 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
675 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
676 			break;
677 	}
678 	return c;
679 }
680 
681 /*
682  *	Allocate a multicast cache entry
683  */
684 static struct mfc6_cache *ip6mr_cache_alloc(void)
685 {
686 	struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
687 	if (c == NULL)
688 		return NULL;
689 	memset(c, 0, sizeof(*c));
690 	c->mfc_un.res.minvif = MAXMIFS;
691 	return c;
692 }
693 
694 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
695 {
696 	struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
697 	if (c == NULL)
698 		return NULL;
699 	memset(c, 0, sizeof(*c));
700 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
701 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
702 	return c;
703 }
704 
705 /*
706  *	A cache entry has gone into a resolved state from queued
707  */
708 
709 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
710 {
711 	struct sk_buff *skb;
712 
713 	/*
714 	 *	Play the pending entries through our router
715 	 */
716 
717 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
718 		if (ipv6_hdr(skb)->version == 0) {
719 			int err;
720 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
721 
722 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
723 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
724 			} else {
725 				nlh->nlmsg_type = NLMSG_ERROR;
726 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
727 				skb_trim(skb, nlh->nlmsg_len);
728 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
729 			}
730 			err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
731 		} else
732 			ip6_mr_forward(skb, c);
733 	}
734 }
735 
736 /*
737  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
738  *	expects the following bizarre scheme.
739  *
740  *	Called under mrt_lock.
741  */
742 
743 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
744 {
745 	struct sk_buff *skb;
746 	struct mrt6msg *msg;
747 	int ret;
748 
749 #ifdef CONFIG_IPV6_PIMSM_V2
750 	if (assert == MRT6MSG_WHOLEPKT)
751 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
752 						+sizeof(*msg));
753 	else
754 #endif
755 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
756 
757 	if (!skb)
758 		return -ENOBUFS;
759 
760 	/* I suppose that internal messages
761 	 * do not require checksums */
762 
763 	skb->ip_summed = CHECKSUM_UNNECESSARY;
764 
765 #ifdef CONFIG_IPV6_PIMSM_V2
766 	if (assert == MRT6MSG_WHOLEPKT) {
767 		/* Ugly, but we have no choice with this interface.
768 		   Duplicate old header, fix length etc.
769 		   And all this only to mangle msg->im6_msgtype and
770 		   to set msg->im6_mbz to "mbz" :-)
771 		 */
772 		skb_push(skb, -skb_network_offset(pkt));
773 
774 		skb_push(skb, sizeof(*msg));
775 		skb_reset_transport_header(skb);
776 		msg = (struct mrt6msg *)skb_transport_header(skb);
777 		msg->im6_mbz = 0;
778 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
779 		msg->im6_mif = reg_vif_num;
780 		msg->im6_pad = 0;
781 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
782 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
783 
784 		skb->ip_summed = CHECKSUM_UNNECESSARY;
785 	} else
786 #endif
787 	{
788 	/*
789 	 *	Copy the IP header
790 	 */
791 
792 	skb_put(skb, sizeof(struct ipv6hdr));
793 	skb_reset_network_header(skb);
794 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
795 
796 	/*
797 	 *	Add our header
798 	 */
799 	skb_put(skb, sizeof(*msg));
800 	skb_reset_transport_header(skb);
801 	msg = (struct mrt6msg *)skb_transport_header(skb);
802 
803 	msg->im6_mbz = 0;
804 	msg->im6_msgtype = assert;
805 	msg->im6_mif = mifi;
806 	msg->im6_pad = 0;
807 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
808 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
809 
810 	skb->dst = dst_clone(pkt->dst);
811 	skb->ip_summed = CHECKSUM_UNNECESSARY;
812 
813 	skb_pull(skb, sizeof(struct ipv6hdr));
814 	}
815 
816 	if (mroute6_socket == NULL) {
817 		kfree_skb(skb);
818 		return -EINVAL;
819 	}
820 
821 	/*
822 	 *	Deliver to user space multicast routing algorithms
823 	 */
824 	if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
825 		if (net_ratelimit())
826 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
827 		kfree_skb(skb);
828 	}
829 
830 	return ret;
831 }
832 
833 /*
834  *	Queue a packet for resolution. It gets locked cache entry!
835  */
836 
837 static int
838 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
839 {
840 	int err;
841 	struct mfc6_cache *c;
842 
843 	spin_lock_bh(&mfc_unres_lock);
844 	for (c = mfc_unres_queue; c; c = c->next) {
845 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
846 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
847 			break;
848 	}
849 
850 	if (c == NULL) {
851 		/*
852 		 *	Create a new entry if allowable
853 		 */
854 
855 		if (atomic_read(&cache_resolve_queue_len) >= 10 ||
856 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
857 			spin_unlock_bh(&mfc_unres_lock);
858 
859 			kfree_skb(skb);
860 			return -ENOBUFS;
861 		}
862 
863 		/*
864 		 *	Fill in the new cache entry
865 		 */
866 		c->mf6c_parent = -1;
867 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
868 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
869 
870 		/*
871 		 *	Reflect first query at pim6sd
872 		 */
873 		if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
874 			/* If the report failed throw the cache entry
875 			   out - Brad Parker
876 			 */
877 			spin_unlock_bh(&mfc_unres_lock);
878 
879 			kmem_cache_free(mrt_cachep, c);
880 			kfree_skb(skb);
881 			return err;
882 		}
883 
884 		atomic_inc(&cache_resolve_queue_len);
885 		c->next = mfc_unres_queue;
886 		mfc_unres_queue = c;
887 
888 		ipmr_do_expire_process(1);
889 	}
890 
891 	/*
892 	 *	See if we can append the packet
893 	 */
894 	if (c->mfc_un.unres.unresolved.qlen > 3) {
895 		kfree_skb(skb);
896 		err = -ENOBUFS;
897 	} else {
898 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
899 		err = 0;
900 	}
901 
902 	spin_unlock_bh(&mfc_unres_lock);
903 	return err;
904 }
905 
906 /*
907  *	MFC6 cache manipulation by user space
908  */
909 
910 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
911 {
912 	int line;
913 	struct mfc6_cache *c, **cp;
914 
915 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
916 
917 	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
918 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
919 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
920 			write_lock_bh(&mrt_lock);
921 			*cp = c->next;
922 			write_unlock_bh(&mrt_lock);
923 
924 			kmem_cache_free(mrt_cachep, c);
925 			return 0;
926 		}
927 	}
928 	return -ENOENT;
929 }
930 
931 static int ip6mr_device_event(struct notifier_block *this,
932 			      unsigned long event, void *ptr)
933 {
934 	struct net_device *dev = ptr;
935 	struct mif_device *v;
936 	int ct;
937 
938 	if (!net_eq(dev_net(dev), &init_net))
939 		return NOTIFY_DONE;
940 
941 	if (event != NETDEV_UNREGISTER)
942 		return NOTIFY_DONE;
943 
944 	v = &vif6_table[0];
945 	for (ct = 0; ct < maxvif; ct++, v++) {
946 		if (v->dev == dev)
947 			mif6_delete(ct);
948 	}
949 	return NOTIFY_DONE;
950 }
951 
952 static struct notifier_block ip6_mr_notifier = {
953 	.notifier_call = ip6mr_device_event
954 };
955 
956 /*
957  *	Setup for IP multicast routing
958  */
959 
960 int __init ip6_mr_init(void)
961 {
962 	int err;
963 
964 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
965 				       sizeof(struct mfc6_cache),
966 				       0, SLAB_HWCACHE_ALIGN,
967 				       NULL);
968 	if (!mrt_cachep)
969 		return -ENOMEM;
970 
971 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
972 	err = register_netdevice_notifier(&ip6_mr_notifier);
973 	if (err)
974 		goto reg_notif_fail;
975 #ifdef CONFIG_PROC_FS
976 	err = -ENOMEM;
977 	if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
978 		goto proc_vif_fail;
979 	if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
980 				     0, &ip6mr_mfc_fops))
981 		goto proc_cache_fail;
982 #endif
983 	return 0;
984 #ifdef CONFIG_PROC_FS
985 proc_cache_fail:
986 	proc_net_remove(&init_net, "ip6_mr_vif");
987 proc_vif_fail:
988 	unregister_netdevice_notifier(&ip6_mr_notifier);
989 #endif
990 reg_notif_fail:
991 	del_timer(&ipmr_expire_timer);
992 	kmem_cache_destroy(mrt_cachep);
993 	return err;
994 }
995 
996 void ip6_mr_cleanup(void)
997 {
998 #ifdef CONFIG_PROC_FS
999 	proc_net_remove(&init_net, "ip6_mr_cache");
1000 	proc_net_remove(&init_net, "ip6_mr_vif");
1001 #endif
1002 	unregister_netdevice_notifier(&ip6_mr_notifier);
1003 	del_timer(&ipmr_expire_timer);
1004 	kmem_cache_destroy(mrt_cachep);
1005 }
1006 
1007 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1008 {
1009 	int line;
1010 	struct mfc6_cache *uc, *c, **cp;
1011 	unsigned char ttls[MAXMIFS];
1012 	int i;
1013 
1014 	memset(ttls, 255, MAXMIFS);
1015 	for (i = 0; i < MAXMIFS; i++) {
1016 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1017 			ttls[i] = 1;
1018 
1019 	}
1020 
1021 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1022 
1023 	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1024 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1025 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1026 			break;
1027 	}
1028 
1029 	if (c != NULL) {
1030 		write_lock_bh(&mrt_lock);
1031 		c->mf6c_parent = mfc->mf6cc_parent;
1032 		ip6mr_update_thresholds(c, ttls);
1033 		if (!mrtsock)
1034 			c->mfc_flags |= MFC_STATIC;
1035 		write_unlock_bh(&mrt_lock);
1036 		return 0;
1037 	}
1038 
1039 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1040 		return -EINVAL;
1041 
1042 	c = ip6mr_cache_alloc();
1043 	if (c == NULL)
1044 		return -ENOMEM;
1045 
1046 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1047 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1048 	c->mf6c_parent = mfc->mf6cc_parent;
1049 	ip6mr_update_thresholds(c, ttls);
1050 	if (!mrtsock)
1051 		c->mfc_flags |= MFC_STATIC;
1052 
1053 	write_lock_bh(&mrt_lock);
1054 	c->next = mfc6_cache_array[line];
1055 	mfc6_cache_array[line] = c;
1056 	write_unlock_bh(&mrt_lock);
1057 
1058 	/*
1059 	 *	Check to see if we resolved a queued list. If so we
1060 	 *	need to send on the frames and tidy up.
1061 	 */
1062 	spin_lock_bh(&mfc_unres_lock);
1063 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1064 	     cp = &uc->next) {
1065 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1066 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1067 			*cp = uc->next;
1068 			if (atomic_dec_and_test(&cache_resolve_queue_len))
1069 				del_timer(&ipmr_expire_timer);
1070 			break;
1071 		}
1072 	}
1073 	spin_unlock_bh(&mfc_unres_lock);
1074 
1075 	if (uc) {
1076 		ip6mr_cache_resolve(uc, c);
1077 		kmem_cache_free(mrt_cachep, uc);
1078 	}
1079 	return 0;
1080 }
1081 
1082 /*
1083  *	Close the multicast socket, and clear the vif tables etc
1084  */
1085 
1086 static void mroute_clean_tables(struct sock *sk)
1087 {
1088 	int i;
1089 
1090 	/*
1091 	 *	Shut down all active vif entries
1092 	 */
1093 	for (i = 0; i < maxvif; i++) {
1094 		if (!(vif6_table[i].flags & VIFF_STATIC))
1095 			mif6_delete(i);
1096 	}
1097 
1098 	/*
1099 	 *	Wipe the cache
1100 	 */
1101 	for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1102 		struct mfc6_cache *c, **cp;
1103 
1104 		cp = &mfc6_cache_array[i];
1105 		while ((c = *cp) != NULL) {
1106 			if (c->mfc_flags & MFC_STATIC) {
1107 				cp = &c->next;
1108 				continue;
1109 			}
1110 			write_lock_bh(&mrt_lock);
1111 			*cp = c->next;
1112 			write_unlock_bh(&mrt_lock);
1113 
1114 			kmem_cache_free(mrt_cachep, c);
1115 		}
1116 	}
1117 
1118 	if (atomic_read(&cache_resolve_queue_len) != 0) {
1119 		struct mfc6_cache *c;
1120 
1121 		spin_lock_bh(&mfc_unres_lock);
1122 		while (mfc_unres_queue != NULL) {
1123 			c = mfc_unres_queue;
1124 			mfc_unres_queue = c->next;
1125 			spin_unlock_bh(&mfc_unres_lock);
1126 
1127 			ip6mr_destroy_unres(c);
1128 
1129 			spin_lock_bh(&mfc_unres_lock);
1130 		}
1131 		spin_unlock_bh(&mfc_unres_lock);
1132 	}
1133 }
1134 
1135 static int ip6mr_sk_init(struct sock *sk)
1136 {
1137 	int err = 0;
1138 
1139 	rtnl_lock();
1140 	write_lock_bh(&mrt_lock);
1141 	if (likely(mroute6_socket == NULL))
1142 		mroute6_socket = sk;
1143 	else
1144 		err = -EADDRINUSE;
1145 	write_unlock_bh(&mrt_lock);
1146 
1147 	rtnl_unlock();
1148 
1149 	return err;
1150 }
1151 
1152 int ip6mr_sk_done(struct sock *sk)
1153 {
1154 	int err = 0;
1155 
1156 	rtnl_lock();
1157 	if (sk == mroute6_socket) {
1158 		write_lock_bh(&mrt_lock);
1159 		mroute6_socket = NULL;
1160 		write_unlock_bh(&mrt_lock);
1161 
1162 		mroute_clean_tables(sk);
1163 	} else
1164 		err = -EACCES;
1165 	rtnl_unlock();
1166 
1167 	return err;
1168 }
1169 
1170 /*
1171  *	Socket options and virtual interface manipulation. The whole
1172  *	virtual interface system is a complete heap, but unfortunately
1173  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1174  *	MOSPF/PIM router set up we can clean this up.
1175  */
1176 
1177 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1178 {
1179 	int ret;
1180 	struct mif6ctl vif;
1181 	struct mf6cctl mfc;
1182 	mifi_t mifi;
1183 
1184 	if (optname != MRT6_INIT) {
1185 		if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1186 			return -EACCES;
1187 	}
1188 
1189 	switch (optname) {
1190 	case MRT6_INIT:
1191 		if (sk->sk_type != SOCK_RAW ||
1192 		    inet_sk(sk)->num != IPPROTO_ICMPV6)
1193 			return -EOPNOTSUPP;
1194 		if (optlen < sizeof(int))
1195 			return -EINVAL;
1196 
1197 		return ip6mr_sk_init(sk);
1198 
1199 	case MRT6_DONE:
1200 		return ip6mr_sk_done(sk);
1201 
1202 	case MRT6_ADD_MIF:
1203 		if (optlen < sizeof(vif))
1204 			return -EINVAL;
1205 		if (copy_from_user(&vif, optval, sizeof(vif)))
1206 			return -EFAULT;
1207 		if (vif.mif6c_mifi >= MAXMIFS)
1208 			return -ENFILE;
1209 		rtnl_lock();
1210 		ret = mif6_add(&vif, sk == mroute6_socket);
1211 		rtnl_unlock();
1212 		return ret;
1213 
1214 	case MRT6_DEL_MIF:
1215 		if (optlen < sizeof(mifi_t))
1216 			return -EINVAL;
1217 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1218 			return -EFAULT;
1219 		rtnl_lock();
1220 		ret = mif6_delete(mifi);
1221 		rtnl_unlock();
1222 		return ret;
1223 
1224 	/*
1225 	 *	Manipulate the forwarding caches. These live
1226 	 *	in a sort of kernel/user symbiosis.
1227 	 */
1228 	case MRT6_ADD_MFC:
1229 	case MRT6_DEL_MFC:
1230 		if (optlen < sizeof(mfc))
1231 			return -EINVAL;
1232 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1233 			return -EFAULT;
1234 		rtnl_lock();
1235 		if (optname == MRT6_DEL_MFC)
1236 			ret = ip6mr_mfc_delete(&mfc);
1237 		else
1238 			ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1239 		rtnl_unlock();
1240 		return ret;
1241 
1242 	/*
1243 	 *	Control PIM assert (to activate pim will activate assert)
1244 	 */
1245 	case MRT6_ASSERT:
1246 	{
1247 		int v;
1248 		if (get_user(v, (int __user *)optval))
1249 			return -EFAULT;
1250 		mroute_do_assert = !!v;
1251 		return 0;
1252 	}
1253 
1254 #ifdef CONFIG_IPV6_PIMSM_V2
1255 	case MRT6_PIM:
1256 	{
1257 		int v;
1258 		if (get_user(v, (int __user *)optval))
1259 			return -EFAULT;
1260 		v = !!v;
1261 		rtnl_lock();
1262 		ret = 0;
1263 		if (v != mroute_do_pim) {
1264 			mroute_do_pim = v;
1265 			mroute_do_assert = v;
1266 			if (mroute_do_pim)
1267 				ret = inet6_add_protocol(&pim6_protocol,
1268 							 IPPROTO_PIM);
1269 			else
1270 				ret = inet6_del_protocol(&pim6_protocol,
1271 							 IPPROTO_PIM);
1272 			if (ret < 0)
1273 				ret = -EAGAIN;
1274 		}
1275 		rtnl_unlock();
1276 		return ret;
1277 	}
1278 
1279 #endif
1280 	/*
1281 	 *	Spurious command, or MRT6_VERSION which you cannot
1282 	 *	set.
1283 	 */
1284 	default:
1285 		return -ENOPROTOOPT;
1286 	}
1287 }
1288 
1289 /*
1290  *	Getsock opt support for the multicast routing system.
1291  */
1292 
1293 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1294 			  int __user *optlen)
1295 {
1296 	int olr;
1297 	int val;
1298 
1299 	switch (optname) {
1300 	case MRT6_VERSION:
1301 		val = 0x0305;
1302 		break;
1303 #ifdef CONFIG_IPV6_PIMSM_V2
1304 	case MRT6_PIM:
1305 		val = mroute_do_pim;
1306 		break;
1307 #endif
1308 	case MRT6_ASSERT:
1309 		val = mroute_do_assert;
1310 		break;
1311 	default:
1312 		return -ENOPROTOOPT;
1313 	}
1314 
1315 	if (get_user(olr, optlen))
1316 		return -EFAULT;
1317 
1318 	olr = min_t(int, olr, sizeof(int));
1319 	if (olr < 0)
1320 		return -EINVAL;
1321 
1322 	if (put_user(olr, optlen))
1323 		return -EFAULT;
1324 	if (copy_to_user(optval, &val, olr))
1325 		return -EFAULT;
1326 	return 0;
1327 }
1328 
1329 /*
1330  *	The IP multicast ioctl support routines.
1331  */
1332 
1333 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1334 {
1335 	struct sioc_sg_req6 sr;
1336 	struct sioc_mif_req6 vr;
1337 	struct mif_device *vif;
1338 	struct mfc6_cache *c;
1339 
1340 	switch (cmd) {
1341 	case SIOCGETMIFCNT_IN6:
1342 		if (copy_from_user(&vr, arg, sizeof(vr)))
1343 			return -EFAULT;
1344 		if (vr.mifi >= maxvif)
1345 			return -EINVAL;
1346 		read_lock(&mrt_lock);
1347 		vif = &vif6_table[vr.mifi];
1348 		if (MIF_EXISTS(vr.mifi)) {
1349 			vr.icount = vif->pkt_in;
1350 			vr.ocount = vif->pkt_out;
1351 			vr.ibytes = vif->bytes_in;
1352 			vr.obytes = vif->bytes_out;
1353 			read_unlock(&mrt_lock);
1354 
1355 			if (copy_to_user(arg, &vr, sizeof(vr)))
1356 				return -EFAULT;
1357 			return 0;
1358 		}
1359 		read_unlock(&mrt_lock);
1360 		return -EADDRNOTAVAIL;
1361 	case SIOCGETSGCNT_IN6:
1362 		if (copy_from_user(&sr, arg, sizeof(sr)))
1363 			return -EFAULT;
1364 
1365 		read_lock(&mrt_lock);
1366 		c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1367 		if (c) {
1368 			sr.pktcnt = c->mfc_un.res.pkt;
1369 			sr.bytecnt = c->mfc_un.res.bytes;
1370 			sr.wrong_if = c->mfc_un.res.wrong_if;
1371 			read_unlock(&mrt_lock);
1372 
1373 			if (copy_to_user(arg, &sr, sizeof(sr)))
1374 				return -EFAULT;
1375 			return 0;
1376 		}
1377 		read_unlock(&mrt_lock);
1378 		return -EADDRNOTAVAIL;
1379 	default:
1380 		return -ENOIOCTLCMD;
1381 	}
1382 }
1383 
1384 
1385 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1386 {
1387 	IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1388 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1389 	return dst_output(skb);
1390 }
1391 
1392 /*
1393  *	Processing handlers for ip6mr_forward
1394  */
1395 
1396 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1397 {
1398 	struct ipv6hdr *ipv6h;
1399 	struct mif_device *vif = &vif6_table[vifi];
1400 	struct net_device *dev;
1401 	struct dst_entry *dst;
1402 	struct flowi fl;
1403 
1404 	if (vif->dev == NULL)
1405 		goto out_free;
1406 
1407 #ifdef CONFIG_IPV6_PIMSM_V2
1408 	if (vif->flags & MIFF_REGISTER) {
1409 		vif->pkt_out++;
1410 		vif->bytes_out += skb->len;
1411 		vif->dev->stats.tx_bytes += skb->len;
1412 		vif->dev->stats.tx_packets++;
1413 		ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1414 		kfree_skb(skb);
1415 		return 0;
1416 	}
1417 #endif
1418 
1419 	ipv6h = ipv6_hdr(skb);
1420 
1421 	fl = (struct flowi) {
1422 		.oif = vif->link,
1423 		.nl_u = { .ip6_u =
1424 				{ .daddr = ipv6h->daddr, }
1425 		}
1426 	};
1427 
1428 	dst = ip6_route_output(&init_net, NULL, &fl);
1429 	if (!dst)
1430 		goto out_free;
1431 
1432 	dst_release(skb->dst);
1433 	skb->dst = dst;
1434 
1435 	/*
1436 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1437 	 * not only before forwarding, but after forwarding on all output
1438 	 * interfaces. It is clear, if mrouter runs a multicasting
1439 	 * program, it should receive packets not depending to what interface
1440 	 * program is joined.
1441 	 * If we will not make it, the program will have to join on all
1442 	 * interfaces. On the other hand, multihoming host (or router, but
1443 	 * not mrouter) cannot join to more than one interface - it will
1444 	 * result in receiving multiple packets.
1445 	 */
1446 	dev = vif->dev;
1447 	skb->dev = dev;
1448 	vif->pkt_out++;
1449 	vif->bytes_out += skb->len;
1450 
1451 	/* We are about to write */
1452 	/* XXX: extension headers? */
1453 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1454 		goto out_free;
1455 
1456 	ipv6h = ipv6_hdr(skb);
1457 	ipv6h->hop_limit--;
1458 
1459 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1460 
1461 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1462 		       ip6mr_forward2_finish);
1463 
1464 out_free:
1465 	kfree_skb(skb);
1466 	return 0;
1467 }
1468 
1469 static int ip6mr_find_vif(struct net_device *dev)
1470 {
1471 	int ct;
1472 	for (ct = maxvif - 1; ct >= 0; ct--) {
1473 		if (vif6_table[ct].dev == dev)
1474 			break;
1475 	}
1476 	return ct;
1477 }
1478 
1479 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1480 {
1481 	int psend = -1;
1482 	int vif, ct;
1483 
1484 	vif = cache->mf6c_parent;
1485 	cache->mfc_un.res.pkt++;
1486 	cache->mfc_un.res.bytes += skb->len;
1487 
1488 	/*
1489 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1490 	 */
1491 	if (vif6_table[vif].dev != skb->dev) {
1492 		int true_vifi;
1493 
1494 		cache->mfc_un.res.wrong_if++;
1495 		true_vifi = ip6mr_find_vif(skb->dev);
1496 
1497 		if (true_vifi >= 0 && mroute_do_assert &&
1498 		    /* pimsm uses asserts, when switching from RPT to SPT,
1499 		       so that we cannot check that packet arrived on an oif.
1500 		       It is bad, but otherwise we would need to move pretty
1501 		       large chunk of pimd to kernel. Ough... --ANK
1502 		     */
1503 		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1504 		    time_after(jiffies,
1505 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1506 			cache->mfc_un.res.last_assert = jiffies;
1507 			ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1508 		}
1509 		goto dont_forward;
1510 	}
1511 
1512 	vif6_table[vif].pkt_in++;
1513 	vif6_table[vif].bytes_in += skb->len;
1514 
1515 	/*
1516 	 *	Forward the frame
1517 	 */
1518 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1519 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1520 			if (psend != -1) {
1521 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1522 				if (skb2)
1523 					ip6mr_forward2(skb2, cache, psend);
1524 			}
1525 			psend = ct;
1526 		}
1527 	}
1528 	if (psend != -1) {
1529 		ip6mr_forward2(skb, cache, psend);
1530 		return 0;
1531 	}
1532 
1533 dont_forward:
1534 	kfree_skb(skb);
1535 	return 0;
1536 }
1537 
1538 
1539 /*
1540  *	Multicast packets for forwarding arrive here
1541  */
1542 
1543 int ip6_mr_input(struct sk_buff *skb)
1544 {
1545 	struct mfc6_cache *cache;
1546 
1547 	read_lock(&mrt_lock);
1548 	cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1549 
1550 	/*
1551 	 *	No usable cache entry
1552 	 */
1553 	if (cache == NULL) {
1554 		int vif;
1555 
1556 		vif = ip6mr_find_vif(skb->dev);
1557 		if (vif >= 0) {
1558 			int err = ip6mr_cache_unresolved(vif, skb);
1559 			read_unlock(&mrt_lock);
1560 
1561 			return err;
1562 		}
1563 		read_unlock(&mrt_lock);
1564 		kfree_skb(skb);
1565 		return -ENODEV;
1566 	}
1567 
1568 	ip6_mr_forward(skb, cache);
1569 
1570 	read_unlock(&mrt_lock);
1571 
1572 	return 0;
1573 }
1574 
1575 
1576 static int
1577 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1578 {
1579 	int ct;
1580 	struct rtnexthop *nhp;
1581 	struct net_device *dev = vif6_table[c->mf6c_parent].dev;
1582 	u8 *b = skb_tail_pointer(skb);
1583 	struct rtattr *mp_head;
1584 
1585 	if (dev)
1586 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1587 
1588 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1589 
1590 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1591 		if (c->mfc_un.res.ttls[ct] < 255) {
1592 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1593 				goto rtattr_failure;
1594 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1595 			nhp->rtnh_flags = 0;
1596 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1597 			nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1598 			nhp->rtnh_len = sizeof(*nhp);
1599 		}
1600 	}
1601 	mp_head->rta_type = RTA_MULTIPATH;
1602 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1603 	rtm->rtm_type = RTN_MULTICAST;
1604 	return 1;
1605 
1606 rtattr_failure:
1607 	nlmsg_trim(skb, b);
1608 	return -EMSGSIZE;
1609 }
1610 
1611 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1612 {
1613 	int err;
1614 	struct mfc6_cache *cache;
1615 	struct rt6_info *rt = (struct rt6_info *)skb->dst;
1616 
1617 	read_lock(&mrt_lock);
1618 	cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1619 
1620 	if (!cache) {
1621 		struct sk_buff *skb2;
1622 		struct ipv6hdr *iph;
1623 		struct net_device *dev;
1624 		int vif;
1625 
1626 		if (nowait) {
1627 			read_unlock(&mrt_lock);
1628 			return -EAGAIN;
1629 		}
1630 
1631 		dev = skb->dev;
1632 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1633 			read_unlock(&mrt_lock);
1634 			return -ENODEV;
1635 		}
1636 
1637 		/* really correct? */
1638 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1639 		if (!skb2) {
1640 			read_unlock(&mrt_lock);
1641 			return -ENOMEM;
1642 		}
1643 
1644 		skb_reset_transport_header(skb2);
1645 
1646 		skb_put(skb2, sizeof(struct ipv6hdr));
1647 		skb_reset_network_header(skb2);
1648 
1649 		iph = ipv6_hdr(skb2);
1650 		iph->version = 0;
1651 		iph->priority = 0;
1652 		iph->flow_lbl[0] = 0;
1653 		iph->flow_lbl[1] = 0;
1654 		iph->flow_lbl[2] = 0;
1655 		iph->payload_len = 0;
1656 		iph->nexthdr = IPPROTO_NONE;
1657 		iph->hop_limit = 0;
1658 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1659 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1660 
1661 		err = ip6mr_cache_unresolved(vif, skb2);
1662 		read_unlock(&mrt_lock);
1663 
1664 		return err;
1665 	}
1666 
1667 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1668 		cache->mfc_flags |= MFC_NOTIFY;
1669 
1670 	err = ip6mr_fill_mroute(skb, cache, rtm);
1671 	read_unlock(&mrt_lock);
1672 	return err;
1673 }
1674 
1675