xref: /linux/net/ipv6/ip6mr.c (revision 8fa5723aa7e053d498336b48448b292fc2e0458b)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 
52 struct sock *mroute6_socket;
53 
54 
55 /* Big lock, protecting vif table, mrt cache and mroute socket state.
56    Note that the changes are semaphored via rtnl_lock.
57  */
58 
59 static DEFINE_RWLOCK(mrt_lock);
60 
61 /*
62  *	Multicast router control variables
63  */
64 
65 static struct mif_device vif6_table[MAXMIFS];		/* Devices 		*/
66 static int maxvif;
67 
68 #define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
69 
70 static int mroute_do_assert;				/* Set in PIM assert	*/
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 static int mroute_do_pim;
73 #else
74 #define mroute_do_pim 0
75 #endif
76 
77 static struct mfc6_cache *mfc6_cache_array[MFC6_LINES];	/* Forwarding cache	*/
78 
79 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
80 static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
81 
82 /* Special spinlock for queue of unresolved entries */
83 static DEFINE_SPINLOCK(mfc_unres_lock);
84 
85 /* We return to original Alan's scheme. Hash table of resolved
86    entries is changed only in process context and protected
87    with weak lock mrt_lock. Queue of unresolved entries is protected
88    with strong spinlock mfc_unres_lock.
89 
90    In this case data path is free of exclusive locks at all.
91  */
92 
93 static struct kmem_cache *mrt_cachep __read_mostly;
94 
95 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
96 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
97 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
98 
99 #ifdef CONFIG_IPV6_PIMSM_V2
100 static struct inet6_protocol pim6_protocol;
101 #endif
102 
103 static struct timer_list ipmr_expire_timer;
104 
105 
106 #ifdef CONFIG_PROC_FS
107 
108 struct ipmr_mfc_iter {
109 	struct mfc6_cache **cache;
110 	int ct;
111 };
112 
113 
114 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
115 {
116 	struct mfc6_cache *mfc;
117 
118 	it->cache = mfc6_cache_array;
119 	read_lock(&mrt_lock);
120 	for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 		for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
122 			if (pos-- == 0)
123 				return mfc;
124 	read_unlock(&mrt_lock);
125 
126 	it->cache = &mfc_unres_queue;
127 	spin_lock_bh(&mfc_unres_lock);
128 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
129 		if (pos-- == 0)
130 			return mfc;
131 	spin_unlock_bh(&mfc_unres_lock);
132 
133 	it->cache = NULL;
134 	return NULL;
135 }
136 
137 
138 
139 
140 /*
141  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
142  */
143 
144 struct ipmr_vif_iter {
145 	int ct;
146 };
147 
148 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
149 					    loff_t pos)
150 {
151 	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 		if (!MIF_EXISTS(iter->ct))
153 			continue;
154 		if (pos-- == 0)
155 			return &vif6_table[iter->ct];
156 	}
157 	return NULL;
158 }
159 
160 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
161 	__acquires(mrt_lock)
162 {
163 	read_lock(&mrt_lock);
164 	return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
165 		: SEQ_START_TOKEN);
166 }
167 
168 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
169 {
170 	struct ipmr_vif_iter *iter = seq->private;
171 
172 	++*pos;
173 	if (v == SEQ_START_TOKEN)
174 		return ip6mr_vif_seq_idx(iter, 0);
175 
176 	while (++iter->ct < maxvif) {
177 		if (!MIF_EXISTS(iter->ct))
178 			continue;
179 		return &vif6_table[iter->ct];
180 	}
181 	return NULL;
182 }
183 
184 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
185 	__releases(mrt_lock)
186 {
187 	read_unlock(&mrt_lock);
188 }
189 
190 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
191 {
192 	if (v == SEQ_START_TOKEN) {
193 		seq_puts(seq,
194 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
195 	} else {
196 		const struct mif_device *vif = v;
197 		const char *name = vif->dev ? vif->dev->name : "none";
198 
199 		seq_printf(seq,
200 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
201 			   vif - vif6_table,
202 			   name, vif->bytes_in, vif->pkt_in,
203 			   vif->bytes_out, vif->pkt_out,
204 			   vif->flags);
205 	}
206 	return 0;
207 }
208 
209 static struct seq_operations ip6mr_vif_seq_ops = {
210 	.start = ip6mr_vif_seq_start,
211 	.next  = ip6mr_vif_seq_next,
212 	.stop  = ip6mr_vif_seq_stop,
213 	.show  = ip6mr_vif_seq_show,
214 };
215 
216 static int ip6mr_vif_open(struct inode *inode, struct file *file)
217 {
218 	return seq_open_private(file, &ip6mr_vif_seq_ops,
219 				sizeof(struct ipmr_vif_iter));
220 }
221 
222 static struct file_operations ip6mr_vif_fops = {
223 	.owner	 = THIS_MODULE,
224 	.open    = ip6mr_vif_open,
225 	.read    = seq_read,
226 	.llseek  = seq_lseek,
227 	.release = seq_release,
228 };
229 
230 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
231 {
232 	return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
233 		: SEQ_START_TOKEN);
234 }
235 
236 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237 {
238 	struct mfc6_cache *mfc = v;
239 	struct ipmr_mfc_iter *it = seq->private;
240 
241 	++*pos;
242 
243 	if (v == SEQ_START_TOKEN)
244 		return ipmr_mfc_seq_idx(seq->private, 0);
245 
246 	if (mfc->next)
247 		return mfc->next;
248 
249 	if (it->cache == &mfc_unres_queue)
250 		goto end_of_list;
251 
252 	BUG_ON(it->cache != mfc6_cache_array);
253 
254 	while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 		mfc = mfc6_cache_array[it->ct];
256 		if (mfc)
257 			return mfc;
258 	}
259 
260 	/* exhausted cache_array, show unresolved */
261 	read_unlock(&mrt_lock);
262 	it->cache = &mfc_unres_queue;
263 	it->ct = 0;
264 
265 	spin_lock_bh(&mfc_unres_lock);
266 	mfc = mfc_unres_queue;
267 	if (mfc)
268 		return mfc;
269 
270  end_of_list:
271 	spin_unlock_bh(&mfc_unres_lock);
272 	it->cache = NULL;
273 
274 	return NULL;
275 }
276 
277 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278 {
279 	struct ipmr_mfc_iter *it = seq->private;
280 
281 	if (it->cache == &mfc_unres_queue)
282 		spin_unlock_bh(&mfc_unres_lock);
283 	else if (it->cache == mfc6_cache_array)
284 		read_unlock(&mrt_lock);
285 }
286 
287 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
288 {
289 	int n;
290 
291 	if (v == SEQ_START_TOKEN) {
292 		seq_puts(seq,
293 			 "Group                            "
294 			 "Origin                           "
295 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
296 	} else {
297 		const struct mfc6_cache *mfc = v;
298 		const struct ipmr_mfc_iter *it = seq->private;
299 
300 		seq_printf(seq,
301 			   NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 			   NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
303 			   mfc->mf6c_parent,
304 			   mfc->mfc_un.res.pkt,
305 			   mfc->mfc_un.res.bytes,
306 			   mfc->mfc_un.res.wrong_if);
307 
308 		if (it->cache != &mfc_unres_queue) {
309 			for (n = mfc->mfc_un.res.minvif;
310 			     n < mfc->mfc_un.res.maxvif; n++) {
311 				if (MIF_EXISTS(n) &&
312 				    mfc->mfc_un.res.ttls[n] < 255)
313 					seq_printf(seq,
314 						   " %2d:%-3d",
315 						   n, mfc->mfc_un.res.ttls[n]);
316 			}
317 		}
318 		seq_putc(seq, '\n');
319 	}
320 	return 0;
321 }
322 
323 static struct seq_operations ipmr_mfc_seq_ops = {
324 	.start = ipmr_mfc_seq_start,
325 	.next  = ipmr_mfc_seq_next,
326 	.stop  = ipmr_mfc_seq_stop,
327 	.show  = ipmr_mfc_seq_show,
328 };
329 
330 static int ipmr_mfc_open(struct inode *inode, struct file *file)
331 {
332 	return seq_open_private(file, &ipmr_mfc_seq_ops,
333 				sizeof(struct ipmr_mfc_iter));
334 }
335 
336 static struct file_operations ip6mr_mfc_fops = {
337 	.owner	 = THIS_MODULE,
338 	.open    = ipmr_mfc_open,
339 	.read    = seq_read,
340 	.llseek  = seq_lseek,
341 	.release = seq_release,
342 };
343 #endif
344 
345 #ifdef CONFIG_IPV6_PIMSM_V2
346 static int reg_vif_num = -1;
347 
348 static int pim6_rcv(struct sk_buff *skb)
349 {
350 	struct pimreghdr *pim;
351 	struct ipv6hdr   *encap;
352 	struct net_device  *reg_dev = NULL;
353 
354 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
355 		goto drop;
356 
357 	pim = (struct pimreghdr *)skb_transport_header(skb);
358 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 	    (pim->flags & PIM_NULL_REGISTER) ||
360 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
361 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
362 		goto drop;
363 
364 	/* check if the inner packet is destined to mcast group */
365 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
366 				   sizeof(*pim));
367 
368 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 	    encap->payload_len == 0 ||
370 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
371 		goto drop;
372 
373 	read_lock(&mrt_lock);
374 	if (reg_vif_num >= 0)
375 		reg_dev = vif6_table[reg_vif_num].dev;
376 	if (reg_dev)
377 		dev_hold(reg_dev);
378 	read_unlock(&mrt_lock);
379 
380 	if (reg_dev == NULL)
381 		goto drop;
382 
383 	skb->mac_header = skb->network_header;
384 	skb_pull(skb, (u8 *)encap - skb->data);
385 	skb_reset_network_header(skb);
386 	skb->dev = reg_dev;
387 	skb->protocol = htons(ETH_P_IP);
388 	skb->ip_summed = 0;
389 	skb->pkt_type = PACKET_HOST;
390 	dst_release(skb->dst);
391 	reg_dev->stats.rx_bytes += skb->len;
392 	reg_dev->stats.rx_packets++;
393 	skb->dst = NULL;
394 	nf_reset(skb);
395 	netif_rx(skb);
396 	dev_put(reg_dev);
397 	return 0;
398  drop:
399 	kfree_skb(skb);
400 	return 0;
401 }
402 
403 static struct inet6_protocol pim6_protocol = {
404 	.handler	=	pim6_rcv,
405 };
406 
407 /* Service routines creating virtual interfaces: PIMREG */
408 
409 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410 {
411 	read_lock(&mrt_lock);
412 	dev->stats.tx_bytes += skb->len;
413 	dev->stats.tx_packets++;
414 	ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 	read_unlock(&mrt_lock);
416 	kfree_skb(skb);
417 	return 0;
418 }
419 
420 static void reg_vif_setup(struct net_device *dev)
421 {
422 	dev->type		= ARPHRD_PIMREG;
423 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
424 	dev->flags		= IFF_NOARP;
425 	dev->hard_start_xmit	= reg_vif_xmit;
426 	dev->destructor		= free_netdev;
427 }
428 
429 static struct net_device *ip6mr_reg_vif(void)
430 {
431 	struct net_device *dev;
432 
433 	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
434 	if (dev == NULL)
435 		return NULL;
436 
437 	if (register_netdevice(dev)) {
438 		free_netdev(dev);
439 		return NULL;
440 	}
441 	dev->iflink = 0;
442 
443 	if (dev_open(dev))
444 		goto failure;
445 
446 	dev_hold(dev);
447 	return dev;
448 
449 failure:
450 	/* allow the register to be completed before unregistering. */
451 	rtnl_unlock();
452 	rtnl_lock();
453 
454 	unregister_netdevice(dev);
455 	return NULL;
456 }
457 #endif
458 
459 /*
460  *	Delete a VIF entry
461  */
462 
463 static int mif6_delete(int vifi)
464 {
465 	struct mif_device *v;
466 	struct net_device *dev;
467 	if (vifi < 0 || vifi >= maxvif)
468 		return -EADDRNOTAVAIL;
469 
470 	v = &vif6_table[vifi];
471 
472 	write_lock_bh(&mrt_lock);
473 	dev = v->dev;
474 	v->dev = NULL;
475 
476 	if (!dev) {
477 		write_unlock_bh(&mrt_lock);
478 		return -EADDRNOTAVAIL;
479 	}
480 
481 #ifdef CONFIG_IPV6_PIMSM_V2
482 	if (vifi == reg_vif_num)
483 		reg_vif_num = -1;
484 #endif
485 
486 	if (vifi + 1 == maxvif) {
487 		int tmp;
488 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
489 			if (MIF_EXISTS(tmp))
490 				break;
491 		}
492 		maxvif = tmp + 1;
493 	}
494 
495 	write_unlock_bh(&mrt_lock);
496 
497 	dev_set_allmulti(dev, -1);
498 
499 	if (v->flags & MIFF_REGISTER)
500 		unregister_netdevice(dev);
501 
502 	dev_put(dev);
503 	return 0;
504 }
505 
506 /* Destroy an unresolved cache entry, killing queued skbs
507    and reporting error to netlink readers.
508  */
509 
510 static void ip6mr_destroy_unres(struct mfc6_cache *c)
511 {
512 	struct sk_buff *skb;
513 
514 	atomic_dec(&cache_resolve_queue_len);
515 
516 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
517 		if (ipv6_hdr(skb)->version == 0) {
518 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
519 			nlh->nlmsg_type = NLMSG_ERROR;
520 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
521 			skb_trim(skb, nlh->nlmsg_len);
522 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
523 			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
524 		} else
525 			kfree_skb(skb);
526 	}
527 
528 	kmem_cache_free(mrt_cachep, c);
529 }
530 
531 
532 /* Single timer process for all the unresolved queue. */
533 
534 static void ipmr_do_expire_process(unsigned long dummy)
535 {
536 	unsigned long now = jiffies;
537 	unsigned long expires = 10 * HZ;
538 	struct mfc6_cache *c, **cp;
539 
540 	cp = &mfc_unres_queue;
541 
542 	while ((c = *cp) != NULL) {
543 		if (time_after(c->mfc_un.unres.expires, now)) {
544 			/* not yet... */
545 			unsigned long interval = c->mfc_un.unres.expires - now;
546 			if (interval < expires)
547 				expires = interval;
548 			cp = &c->next;
549 			continue;
550 		}
551 
552 		*cp = c->next;
553 		ip6mr_destroy_unres(c);
554 	}
555 
556 	if (atomic_read(&cache_resolve_queue_len))
557 		mod_timer(&ipmr_expire_timer, jiffies + expires);
558 }
559 
560 static void ipmr_expire_process(unsigned long dummy)
561 {
562 	if (!spin_trylock(&mfc_unres_lock)) {
563 		mod_timer(&ipmr_expire_timer, jiffies + 1);
564 		return;
565 	}
566 
567 	if (atomic_read(&cache_resolve_queue_len))
568 		ipmr_do_expire_process(dummy);
569 
570 	spin_unlock(&mfc_unres_lock);
571 }
572 
573 /* Fill oifs list. It is called under write locked mrt_lock. */
574 
575 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
576 {
577 	int vifi;
578 
579 	cache->mfc_un.res.minvif = MAXMIFS;
580 	cache->mfc_un.res.maxvif = 0;
581 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
582 
583 	for (vifi = 0; vifi < maxvif; vifi++) {
584 		if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
585 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
586 			if (cache->mfc_un.res.minvif > vifi)
587 				cache->mfc_un.res.minvif = vifi;
588 			if (cache->mfc_un.res.maxvif <= vifi)
589 				cache->mfc_un.res.maxvif = vifi + 1;
590 		}
591 	}
592 }
593 
594 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
595 {
596 	int vifi = vifc->mif6c_mifi;
597 	struct mif_device *v = &vif6_table[vifi];
598 	struct net_device *dev;
599 	int err;
600 
601 	/* Is vif busy ? */
602 	if (MIF_EXISTS(vifi))
603 		return -EADDRINUSE;
604 
605 	switch (vifc->mif6c_flags) {
606 #ifdef CONFIG_IPV6_PIMSM_V2
607 	case MIFF_REGISTER:
608 		/*
609 		 * Special Purpose VIF in PIM
610 		 * All the packets will be sent to the daemon
611 		 */
612 		if (reg_vif_num >= 0)
613 			return -EADDRINUSE;
614 		dev = ip6mr_reg_vif();
615 		if (!dev)
616 			return -ENOBUFS;
617 		err = dev_set_allmulti(dev, 1);
618 		if (err) {
619 			unregister_netdevice(dev);
620 			dev_put(dev);
621 			return err;
622 		}
623 		break;
624 #endif
625 	case 0:
626 		dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
627 		if (!dev)
628 			return -EADDRNOTAVAIL;
629 		err = dev_set_allmulti(dev, 1);
630 		if (err) {
631 			dev_put(dev);
632 			return err;
633 		}
634 		break;
635 	default:
636 		return -EINVAL;
637 	}
638 
639 	/*
640 	 *	Fill in the VIF structures
641 	 */
642 	v->rate_limit = vifc->vifc_rate_limit;
643 	v->flags = vifc->mif6c_flags;
644 	if (!mrtsock)
645 		v->flags |= VIFF_STATIC;
646 	v->threshold = vifc->vifc_threshold;
647 	v->bytes_in = 0;
648 	v->bytes_out = 0;
649 	v->pkt_in = 0;
650 	v->pkt_out = 0;
651 	v->link = dev->ifindex;
652 	if (v->flags & MIFF_REGISTER)
653 		v->link = dev->iflink;
654 
655 	/* And finish update writing critical data */
656 	write_lock_bh(&mrt_lock);
657 	v->dev = dev;
658 #ifdef CONFIG_IPV6_PIMSM_V2
659 	if (v->flags & MIFF_REGISTER)
660 		reg_vif_num = vifi;
661 #endif
662 	if (vifi + 1 > maxvif)
663 		maxvif = vifi + 1;
664 	write_unlock_bh(&mrt_lock);
665 	return 0;
666 }
667 
668 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
669 {
670 	int line = MFC6_HASH(mcastgrp, origin);
671 	struct mfc6_cache *c;
672 
673 	for (c = mfc6_cache_array[line]; c; c = c->next) {
674 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
675 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
676 			break;
677 	}
678 	return c;
679 }
680 
681 /*
682  *	Allocate a multicast cache entry
683  */
684 static struct mfc6_cache *ip6mr_cache_alloc(void)
685 {
686 	struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
687 	if (c == NULL)
688 		return NULL;
689 	memset(c, 0, sizeof(*c));
690 	c->mfc_un.res.minvif = MAXMIFS;
691 	return c;
692 }
693 
694 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
695 {
696 	struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
697 	if (c == NULL)
698 		return NULL;
699 	memset(c, 0, sizeof(*c));
700 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
701 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
702 	return c;
703 }
704 
705 /*
706  *	A cache entry has gone into a resolved state from queued
707  */
708 
709 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
710 {
711 	struct sk_buff *skb;
712 
713 	/*
714 	 *	Play the pending entries through our router
715 	 */
716 
717 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
718 		if (ipv6_hdr(skb)->version == 0) {
719 			int err;
720 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
721 
722 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
723 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
724 			} else {
725 				nlh->nlmsg_type = NLMSG_ERROR;
726 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
727 				skb_trim(skb, nlh->nlmsg_len);
728 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
729 			}
730 			err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
731 		} else
732 			ip6_mr_forward(skb, c);
733 	}
734 }
735 
736 /*
737  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
738  *	expects the following bizarre scheme.
739  *
740  *	Called under mrt_lock.
741  */
742 
743 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
744 {
745 	struct sk_buff *skb;
746 	struct mrt6msg *msg;
747 	int ret;
748 
749 #ifdef CONFIG_IPV6_PIMSM_V2
750 	if (assert == MRT6MSG_WHOLEPKT)
751 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
752 						+sizeof(*msg));
753 	else
754 #endif
755 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
756 
757 	if (!skb)
758 		return -ENOBUFS;
759 
760 	/* I suppose that internal messages
761 	 * do not require checksums */
762 
763 	skb->ip_summed = CHECKSUM_UNNECESSARY;
764 
765 #ifdef CONFIG_IPV6_PIMSM_V2
766 	if (assert == MRT6MSG_WHOLEPKT) {
767 		/* Ugly, but we have no choice with this interface.
768 		   Duplicate old header, fix length etc.
769 		   And all this only to mangle msg->im6_msgtype and
770 		   to set msg->im6_mbz to "mbz" :-)
771 		 */
772 		skb_push(skb, -skb_network_offset(pkt));
773 
774 		skb_push(skb, sizeof(*msg));
775 		skb_reset_transport_header(skb);
776 		msg = (struct mrt6msg *)skb_transport_header(skb);
777 		msg->im6_mbz = 0;
778 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
779 		msg->im6_mif = reg_vif_num;
780 		msg->im6_pad = 0;
781 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
782 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
783 
784 		skb->ip_summed = CHECKSUM_UNNECESSARY;
785 	} else
786 #endif
787 	{
788 	/*
789 	 *	Copy the IP header
790 	 */
791 
792 	skb_put(skb, sizeof(struct ipv6hdr));
793 	skb_reset_network_header(skb);
794 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
795 
796 	/*
797 	 *	Add our header
798 	 */
799 	skb_put(skb, sizeof(*msg));
800 	skb_reset_transport_header(skb);
801 	msg = (struct mrt6msg *)skb_transport_header(skb);
802 
803 	msg->im6_mbz = 0;
804 	msg->im6_msgtype = assert;
805 	msg->im6_mif = mifi;
806 	msg->im6_pad = 0;
807 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
808 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
809 
810 	skb->dst = dst_clone(pkt->dst);
811 	skb->ip_summed = CHECKSUM_UNNECESSARY;
812 
813 	skb_pull(skb, sizeof(struct ipv6hdr));
814 	}
815 
816 	if (mroute6_socket == NULL) {
817 		kfree_skb(skb);
818 		return -EINVAL;
819 	}
820 
821 	/*
822 	 *	Deliver to user space multicast routing algorithms
823 	 */
824 	if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
825 		if (net_ratelimit())
826 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
827 		kfree_skb(skb);
828 	}
829 
830 	return ret;
831 }
832 
833 /*
834  *	Queue a packet for resolution. It gets locked cache entry!
835  */
836 
837 static int
838 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
839 {
840 	int err;
841 	struct mfc6_cache *c;
842 
843 	spin_lock_bh(&mfc_unres_lock);
844 	for (c = mfc_unres_queue; c; c = c->next) {
845 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
846 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
847 			break;
848 	}
849 
850 	if (c == NULL) {
851 		/*
852 		 *	Create a new entry if allowable
853 		 */
854 
855 		if (atomic_read(&cache_resolve_queue_len) >= 10 ||
856 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
857 			spin_unlock_bh(&mfc_unres_lock);
858 
859 			kfree_skb(skb);
860 			return -ENOBUFS;
861 		}
862 
863 		/*
864 		 *	Fill in the new cache entry
865 		 */
866 		c->mf6c_parent = -1;
867 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
868 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
869 
870 		/*
871 		 *	Reflect first query at pim6sd
872 		 */
873 		if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
874 			/* If the report failed throw the cache entry
875 			   out - Brad Parker
876 			 */
877 			spin_unlock_bh(&mfc_unres_lock);
878 
879 			kmem_cache_free(mrt_cachep, c);
880 			kfree_skb(skb);
881 			return err;
882 		}
883 
884 		atomic_inc(&cache_resolve_queue_len);
885 		c->next = mfc_unres_queue;
886 		mfc_unres_queue = c;
887 
888 		ipmr_do_expire_process(1);
889 	}
890 
891 	/*
892 	 *	See if we can append the packet
893 	 */
894 	if (c->mfc_un.unres.unresolved.qlen > 3) {
895 		kfree_skb(skb);
896 		err = -ENOBUFS;
897 	} else {
898 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
899 		err = 0;
900 	}
901 
902 	spin_unlock_bh(&mfc_unres_lock);
903 	return err;
904 }
905 
906 /*
907  *	MFC6 cache manipulation by user space
908  */
909 
910 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
911 {
912 	int line;
913 	struct mfc6_cache *c, **cp;
914 
915 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
916 
917 	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
918 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
919 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
920 			write_lock_bh(&mrt_lock);
921 			*cp = c->next;
922 			write_unlock_bh(&mrt_lock);
923 
924 			kmem_cache_free(mrt_cachep, c);
925 			return 0;
926 		}
927 	}
928 	return -ENOENT;
929 }
930 
931 static int ip6mr_device_event(struct notifier_block *this,
932 			      unsigned long event, void *ptr)
933 {
934 	struct net_device *dev = ptr;
935 	struct mif_device *v;
936 	int ct;
937 
938 	if (!net_eq(dev_net(dev), &init_net))
939 		return NOTIFY_DONE;
940 
941 	if (event != NETDEV_UNREGISTER)
942 		return NOTIFY_DONE;
943 
944 	v = &vif6_table[0];
945 	for (ct = 0; ct < maxvif; ct++, v++) {
946 		if (v->dev == dev)
947 			mif6_delete(ct);
948 	}
949 	return NOTIFY_DONE;
950 }
951 
952 static struct notifier_block ip6_mr_notifier = {
953 	.notifier_call = ip6mr_device_event
954 };
955 
956 /*
957  *	Setup for IP multicast routing
958  */
959 
960 int __init ip6_mr_init(void)
961 {
962 	int err;
963 
964 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
965 				       sizeof(struct mfc6_cache),
966 				       0, SLAB_HWCACHE_ALIGN,
967 				       NULL);
968 	if (!mrt_cachep)
969 		return -ENOMEM;
970 
971 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
972 	err = register_netdevice_notifier(&ip6_mr_notifier);
973 	if (err)
974 		goto reg_notif_fail;
975 #ifdef CONFIG_PROC_FS
976 	err = -ENOMEM;
977 	if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
978 		goto proc_vif_fail;
979 	if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
980 				     0, &ip6mr_mfc_fops))
981 		goto proc_cache_fail;
982 #endif
983 	return 0;
984 reg_notif_fail:
985 	kmem_cache_destroy(mrt_cachep);
986 #ifdef CONFIG_PROC_FS
987 proc_vif_fail:
988 	unregister_netdevice_notifier(&ip6_mr_notifier);
989 proc_cache_fail:
990 	proc_net_remove(&init_net, "ip6_mr_vif");
991 #endif
992 	return err;
993 }
994 
995 void ip6_mr_cleanup(void)
996 {
997 #ifdef CONFIG_PROC_FS
998 	proc_net_remove(&init_net, "ip6_mr_cache");
999 	proc_net_remove(&init_net, "ip6_mr_vif");
1000 #endif
1001 	unregister_netdevice_notifier(&ip6_mr_notifier);
1002 	del_timer(&ipmr_expire_timer);
1003 	kmem_cache_destroy(mrt_cachep);
1004 }
1005 
1006 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1007 {
1008 	int line;
1009 	struct mfc6_cache *uc, *c, **cp;
1010 	unsigned char ttls[MAXMIFS];
1011 	int i;
1012 
1013 	memset(ttls, 255, MAXMIFS);
1014 	for (i = 0; i < MAXMIFS; i++) {
1015 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1016 			ttls[i] = 1;
1017 
1018 	}
1019 
1020 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1021 
1022 	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1023 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1024 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1025 			break;
1026 	}
1027 
1028 	if (c != NULL) {
1029 		write_lock_bh(&mrt_lock);
1030 		c->mf6c_parent = mfc->mf6cc_parent;
1031 		ip6mr_update_thresholds(c, ttls);
1032 		if (!mrtsock)
1033 			c->mfc_flags |= MFC_STATIC;
1034 		write_unlock_bh(&mrt_lock);
1035 		return 0;
1036 	}
1037 
1038 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1039 		return -EINVAL;
1040 
1041 	c = ip6mr_cache_alloc();
1042 	if (c == NULL)
1043 		return -ENOMEM;
1044 
1045 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1046 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1047 	c->mf6c_parent = mfc->mf6cc_parent;
1048 	ip6mr_update_thresholds(c, ttls);
1049 	if (!mrtsock)
1050 		c->mfc_flags |= MFC_STATIC;
1051 
1052 	write_lock_bh(&mrt_lock);
1053 	c->next = mfc6_cache_array[line];
1054 	mfc6_cache_array[line] = c;
1055 	write_unlock_bh(&mrt_lock);
1056 
1057 	/*
1058 	 *	Check to see if we resolved a queued list. If so we
1059 	 *	need to send on the frames and tidy up.
1060 	 */
1061 	spin_lock_bh(&mfc_unres_lock);
1062 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1063 	     cp = &uc->next) {
1064 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1065 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1066 			*cp = uc->next;
1067 			if (atomic_dec_and_test(&cache_resolve_queue_len))
1068 				del_timer(&ipmr_expire_timer);
1069 			break;
1070 		}
1071 	}
1072 	spin_unlock_bh(&mfc_unres_lock);
1073 
1074 	if (uc) {
1075 		ip6mr_cache_resolve(uc, c);
1076 		kmem_cache_free(mrt_cachep, uc);
1077 	}
1078 	return 0;
1079 }
1080 
1081 /*
1082  *	Close the multicast socket, and clear the vif tables etc
1083  */
1084 
1085 static void mroute_clean_tables(struct sock *sk)
1086 {
1087 	int i;
1088 
1089 	/*
1090 	 *	Shut down all active vif entries
1091 	 */
1092 	for (i = 0; i < maxvif; i++) {
1093 		if (!(vif6_table[i].flags & VIFF_STATIC))
1094 			mif6_delete(i);
1095 	}
1096 
1097 	/*
1098 	 *	Wipe the cache
1099 	 */
1100 	for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1101 		struct mfc6_cache *c, **cp;
1102 
1103 		cp = &mfc6_cache_array[i];
1104 		while ((c = *cp) != NULL) {
1105 			if (c->mfc_flags & MFC_STATIC) {
1106 				cp = &c->next;
1107 				continue;
1108 			}
1109 			write_lock_bh(&mrt_lock);
1110 			*cp = c->next;
1111 			write_unlock_bh(&mrt_lock);
1112 
1113 			kmem_cache_free(mrt_cachep, c);
1114 		}
1115 	}
1116 
1117 	if (atomic_read(&cache_resolve_queue_len) != 0) {
1118 		struct mfc6_cache *c;
1119 
1120 		spin_lock_bh(&mfc_unres_lock);
1121 		while (mfc_unres_queue != NULL) {
1122 			c = mfc_unres_queue;
1123 			mfc_unres_queue = c->next;
1124 			spin_unlock_bh(&mfc_unres_lock);
1125 
1126 			ip6mr_destroy_unres(c);
1127 
1128 			spin_lock_bh(&mfc_unres_lock);
1129 		}
1130 		spin_unlock_bh(&mfc_unres_lock);
1131 	}
1132 }
1133 
1134 static int ip6mr_sk_init(struct sock *sk)
1135 {
1136 	int err = 0;
1137 
1138 	rtnl_lock();
1139 	write_lock_bh(&mrt_lock);
1140 	if (likely(mroute6_socket == NULL))
1141 		mroute6_socket = sk;
1142 	else
1143 		err = -EADDRINUSE;
1144 	write_unlock_bh(&mrt_lock);
1145 
1146 	rtnl_unlock();
1147 
1148 	return err;
1149 }
1150 
1151 int ip6mr_sk_done(struct sock *sk)
1152 {
1153 	int err = 0;
1154 
1155 	rtnl_lock();
1156 	if (sk == mroute6_socket) {
1157 		write_lock_bh(&mrt_lock);
1158 		mroute6_socket = NULL;
1159 		write_unlock_bh(&mrt_lock);
1160 
1161 		mroute_clean_tables(sk);
1162 	} else
1163 		err = -EACCES;
1164 	rtnl_unlock();
1165 
1166 	return err;
1167 }
1168 
1169 /*
1170  *	Socket options and virtual interface manipulation. The whole
1171  *	virtual interface system is a complete heap, but unfortunately
1172  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1173  *	MOSPF/PIM router set up we can clean this up.
1174  */
1175 
1176 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1177 {
1178 	int ret;
1179 	struct mif6ctl vif;
1180 	struct mf6cctl mfc;
1181 	mifi_t mifi;
1182 
1183 	if (optname != MRT6_INIT) {
1184 		if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1185 			return -EACCES;
1186 	}
1187 
1188 	switch (optname) {
1189 	case MRT6_INIT:
1190 		if (sk->sk_type != SOCK_RAW ||
1191 		    inet_sk(sk)->num != IPPROTO_ICMPV6)
1192 			return -EOPNOTSUPP;
1193 		if (optlen < sizeof(int))
1194 			return -EINVAL;
1195 
1196 		return ip6mr_sk_init(sk);
1197 
1198 	case MRT6_DONE:
1199 		return ip6mr_sk_done(sk);
1200 
1201 	case MRT6_ADD_MIF:
1202 		if (optlen < sizeof(vif))
1203 			return -EINVAL;
1204 		if (copy_from_user(&vif, optval, sizeof(vif)))
1205 			return -EFAULT;
1206 		if (vif.mif6c_mifi >= MAXMIFS)
1207 			return -ENFILE;
1208 		rtnl_lock();
1209 		ret = mif6_add(&vif, sk == mroute6_socket);
1210 		rtnl_unlock();
1211 		return ret;
1212 
1213 	case MRT6_DEL_MIF:
1214 		if (optlen < sizeof(mifi_t))
1215 			return -EINVAL;
1216 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1217 			return -EFAULT;
1218 		rtnl_lock();
1219 		ret = mif6_delete(mifi);
1220 		rtnl_unlock();
1221 		return ret;
1222 
1223 	/*
1224 	 *	Manipulate the forwarding caches. These live
1225 	 *	in a sort of kernel/user symbiosis.
1226 	 */
1227 	case MRT6_ADD_MFC:
1228 	case MRT6_DEL_MFC:
1229 		if (optlen < sizeof(mfc))
1230 			return -EINVAL;
1231 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1232 			return -EFAULT;
1233 		rtnl_lock();
1234 		if (optname == MRT6_DEL_MFC)
1235 			ret = ip6mr_mfc_delete(&mfc);
1236 		else
1237 			ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1238 		rtnl_unlock();
1239 		return ret;
1240 
1241 	/*
1242 	 *	Control PIM assert (to activate pim will activate assert)
1243 	 */
1244 	case MRT6_ASSERT:
1245 	{
1246 		int v;
1247 		if (get_user(v, (int __user *)optval))
1248 			return -EFAULT;
1249 		mroute_do_assert = !!v;
1250 		return 0;
1251 	}
1252 
1253 #ifdef CONFIG_IPV6_PIMSM_V2
1254 	case MRT6_PIM:
1255 	{
1256 		int v;
1257 		if (get_user(v, (int __user *)optval))
1258 			return -EFAULT;
1259 		v = !!v;
1260 		rtnl_lock();
1261 		ret = 0;
1262 		if (v != mroute_do_pim) {
1263 			mroute_do_pim = v;
1264 			mroute_do_assert = v;
1265 			if (mroute_do_pim)
1266 				ret = inet6_add_protocol(&pim6_protocol,
1267 							 IPPROTO_PIM);
1268 			else
1269 				ret = inet6_del_protocol(&pim6_protocol,
1270 							 IPPROTO_PIM);
1271 			if (ret < 0)
1272 				ret = -EAGAIN;
1273 		}
1274 		rtnl_unlock();
1275 		return ret;
1276 	}
1277 
1278 #endif
1279 	/*
1280 	 *	Spurious command, or MRT6_VERSION which you cannot
1281 	 *	set.
1282 	 */
1283 	default:
1284 		return -ENOPROTOOPT;
1285 	}
1286 }
1287 
1288 /*
1289  *	Getsock opt support for the multicast routing system.
1290  */
1291 
1292 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1293 			  int __user *optlen)
1294 {
1295 	int olr;
1296 	int val;
1297 
1298 	switch (optname) {
1299 	case MRT6_VERSION:
1300 		val = 0x0305;
1301 		break;
1302 #ifdef CONFIG_IPV6_PIMSM_V2
1303 	case MRT6_PIM:
1304 		val = mroute_do_pim;
1305 		break;
1306 #endif
1307 	case MRT6_ASSERT:
1308 		val = mroute_do_assert;
1309 		break;
1310 	default:
1311 		return -ENOPROTOOPT;
1312 	}
1313 
1314 	if (get_user(olr, optlen))
1315 		return -EFAULT;
1316 
1317 	olr = min_t(int, olr, sizeof(int));
1318 	if (olr < 0)
1319 		return -EINVAL;
1320 
1321 	if (put_user(olr, optlen))
1322 		return -EFAULT;
1323 	if (copy_to_user(optval, &val, olr))
1324 		return -EFAULT;
1325 	return 0;
1326 }
1327 
1328 /*
1329  *	The IP multicast ioctl support routines.
1330  */
1331 
1332 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1333 {
1334 	struct sioc_sg_req6 sr;
1335 	struct sioc_mif_req6 vr;
1336 	struct mif_device *vif;
1337 	struct mfc6_cache *c;
1338 
1339 	switch (cmd) {
1340 	case SIOCGETMIFCNT_IN6:
1341 		if (copy_from_user(&vr, arg, sizeof(vr)))
1342 			return -EFAULT;
1343 		if (vr.mifi >= maxvif)
1344 			return -EINVAL;
1345 		read_lock(&mrt_lock);
1346 		vif = &vif6_table[vr.mifi];
1347 		if (MIF_EXISTS(vr.mifi)) {
1348 			vr.icount = vif->pkt_in;
1349 			vr.ocount = vif->pkt_out;
1350 			vr.ibytes = vif->bytes_in;
1351 			vr.obytes = vif->bytes_out;
1352 			read_unlock(&mrt_lock);
1353 
1354 			if (copy_to_user(arg, &vr, sizeof(vr)))
1355 				return -EFAULT;
1356 			return 0;
1357 		}
1358 		read_unlock(&mrt_lock);
1359 		return -EADDRNOTAVAIL;
1360 	case SIOCGETSGCNT_IN6:
1361 		if (copy_from_user(&sr, arg, sizeof(sr)))
1362 			return -EFAULT;
1363 
1364 		read_lock(&mrt_lock);
1365 		c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1366 		if (c) {
1367 			sr.pktcnt = c->mfc_un.res.pkt;
1368 			sr.bytecnt = c->mfc_un.res.bytes;
1369 			sr.wrong_if = c->mfc_un.res.wrong_if;
1370 			read_unlock(&mrt_lock);
1371 
1372 			if (copy_to_user(arg, &sr, sizeof(sr)))
1373 				return -EFAULT;
1374 			return 0;
1375 		}
1376 		read_unlock(&mrt_lock);
1377 		return -EADDRNOTAVAIL;
1378 	default:
1379 		return -ENOIOCTLCMD;
1380 	}
1381 }
1382 
1383 
1384 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1385 {
1386 	IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1387 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1388 	return dst_output(skb);
1389 }
1390 
1391 /*
1392  *	Processing handlers for ip6mr_forward
1393  */
1394 
1395 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1396 {
1397 	struct ipv6hdr *ipv6h;
1398 	struct mif_device *vif = &vif6_table[vifi];
1399 	struct net_device *dev;
1400 	struct dst_entry *dst;
1401 	struct flowi fl;
1402 
1403 	if (vif->dev == NULL)
1404 		goto out_free;
1405 
1406 #ifdef CONFIG_IPV6_PIMSM_V2
1407 	if (vif->flags & MIFF_REGISTER) {
1408 		vif->pkt_out++;
1409 		vif->bytes_out += skb->len;
1410 		vif->dev->stats.tx_bytes += skb->len;
1411 		vif->dev->stats.tx_packets++;
1412 		ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1413 		kfree_skb(skb);
1414 		return 0;
1415 	}
1416 #endif
1417 
1418 	ipv6h = ipv6_hdr(skb);
1419 
1420 	fl = (struct flowi) {
1421 		.oif = vif->link,
1422 		.nl_u = { .ip6_u =
1423 				{ .daddr = ipv6h->daddr, }
1424 		}
1425 	};
1426 
1427 	dst = ip6_route_output(&init_net, NULL, &fl);
1428 	if (!dst)
1429 		goto out_free;
1430 
1431 	dst_release(skb->dst);
1432 	skb->dst = dst;
1433 
1434 	/*
1435 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1436 	 * not only before forwarding, but after forwarding on all output
1437 	 * interfaces. It is clear, if mrouter runs a multicasting
1438 	 * program, it should receive packets not depending to what interface
1439 	 * program is joined.
1440 	 * If we will not make it, the program will have to join on all
1441 	 * interfaces. On the other hand, multihoming host (or router, but
1442 	 * not mrouter) cannot join to more than one interface - it will
1443 	 * result in receiving multiple packets.
1444 	 */
1445 	dev = vif->dev;
1446 	skb->dev = dev;
1447 	vif->pkt_out++;
1448 	vif->bytes_out += skb->len;
1449 
1450 	/* We are about to write */
1451 	/* XXX: extension headers? */
1452 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1453 		goto out_free;
1454 
1455 	ipv6h = ipv6_hdr(skb);
1456 	ipv6h->hop_limit--;
1457 
1458 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1459 
1460 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1461 		       ip6mr_forward2_finish);
1462 
1463 out_free:
1464 	kfree_skb(skb);
1465 	return 0;
1466 }
1467 
1468 static int ip6mr_find_vif(struct net_device *dev)
1469 {
1470 	int ct;
1471 	for (ct = maxvif - 1; ct >= 0; ct--) {
1472 		if (vif6_table[ct].dev == dev)
1473 			break;
1474 	}
1475 	return ct;
1476 }
1477 
1478 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1479 {
1480 	int psend = -1;
1481 	int vif, ct;
1482 
1483 	vif = cache->mf6c_parent;
1484 	cache->mfc_un.res.pkt++;
1485 	cache->mfc_un.res.bytes += skb->len;
1486 
1487 	/*
1488 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1489 	 */
1490 	if (vif6_table[vif].dev != skb->dev) {
1491 		int true_vifi;
1492 
1493 		cache->mfc_un.res.wrong_if++;
1494 		true_vifi = ip6mr_find_vif(skb->dev);
1495 
1496 		if (true_vifi >= 0 && mroute_do_assert &&
1497 		    /* pimsm uses asserts, when switching from RPT to SPT,
1498 		       so that we cannot check that packet arrived on an oif.
1499 		       It is bad, but otherwise we would need to move pretty
1500 		       large chunk of pimd to kernel. Ough... --ANK
1501 		     */
1502 		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1503 		    time_after(jiffies,
1504 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1505 			cache->mfc_un.res.last_assert = jiffies;
1506 			ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1507 		}
1508 		goto dont_forward;
1509 	}
1510 
1511 	vif6_table[vif].pkt_in++;
1512 	vif6_table[vif].bytes_in += skb->len;
1513 
1514 	/*
1515 	 *	Forward the frame
1516 	 */
1517 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1518 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1519 			if (psend != -1) {
1520 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1521 				if (skb2)
1522 					ip6mr_forward2(skb2, cache, psend);
1523 			}
1524 			psend = ct;
1525 		}
1526 	}
1527 	if (psend != -1) {
1528 		ip6mr_forward2(skb, cache, psend);
1529 		return 0;
1530 	}
1531 
1532 dont_forward:
1533 	kfree_skb(skb);
1534 	return 0;
1535 }
1536 
1537 
1538 /*
1539  *	Multicast packets for forwarding arrive here
1540  */
1541 
1542 int ip6_mr_input(struct sk_buff *skb)
1543 {
1544 	struct mfc6_cache *cache;
1545 
1546 	read_lock(&mrt_lock);
1547 	cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1548 
1549 	/*
1550 	 *	No usable cache entry
1551 	 */
1552 	if (cache == NULL) {
1553 		int vif;
1554 
1555 		vif = ip6mr_find_vif(skb->dev);
1556 		if (vif >= 0) {
1557 			int err = ip6mr_cache_unresolved(vif, skb);
1558 			read_unlock(&mrt_lock);
1559 
1560 			return err;
1561 		}
1562 		read_unlock(&mrt_lock);
1563 		kfree_skb(skb);
1564 		return -ENODEV;
1565 	}
1566 
1567 	ip6_mr_forward(skb, cache);
1568 
1569 	read_unlock(&mrt_lock);
1570 
1571 	return 0;
1572 }
1573 
1574 
1575 static int
1576 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1577 {
1578 	int ct;
1579 	struct rtnexthop *nhp;
1580 	struct net_device *dev = vif6_table[c->mf6c_parent].dev;
1581 	u8 *b = skb_tail_pointer(skb);
1582 	struct rtattr *mp_head;
1583 
1584 	if (dev)
1585 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1586 
1587 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1588 
1589 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1590 		if (c->mfc_un.res.ttls[ct] < 255) {
1591 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1592 				goto rtattr_failure;
1593 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1594 			nhp->rtnh_flags = 0;
1595 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1596 			nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1597 			nhp->rtnh_len = sizeof(*nhp);
1598 		}
1599 	}
1600 	mp_head->rta_type = RTA_MULTIPATH;
1601 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1602 	rtm->rtm_type = RTN_MULTICAST;
1603 	return 1;
1604 
1605 rtattr_failure:
1606 	nlmsg_trim(skb, b);
1607 	return -EMSGSIZE;
1608 }
1609 
1610 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1611 {
1612 	int err;
1613 	struct mfc6_cache *cache;
1614 	struct rt6_info *rt = (struct rt6_info *)skb->dst;
1615 
1616 	read_lock(&mrt_lock);
1617 	cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1618 
1619 	if (!cache) {
1620 		struct sk_buff *skb2;
1621 		struct ipv6hdr *iph;
1622 		struct net_device *dev;
1623 		int vif;
1624 
1625 		if (nowait) {
1626 			read_unlock(&mrt_lock);
1627 			return -EAGAIN;
1628 		}
1629 
1630 		dev = skb->dev;
1631 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1632 			read_unlock(&mrt_lock);
1633 			return -ENODEV;
1634 		}
1635 
1636 		/* really correct? */
1637 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1638 		if (!skb2) {
1639 			read_unlock(&mrt_lock);
1640 			return -ENOMEM;
1641 		}
1642 
1643 		skb_reset_transport_header(skb2);
1644 
1645 		skb_put(skb2, sizeof(struct ipv6hdr));
1646 		skb_reset_network_header(skb2);
1647 
1648 		iph = ipv6_hdr(skb2);
1649 		iph->version = 0;
1650 		iph->priority = 0;
1651 		iph->flow_lbl[0] = 0;
1652 		iph->flow_lbl[1] = 0;
1653 		iph->flow_lbl[2] = 0;
1654 		iph->payload_len = 0;
1655 		iph->nexthdr = IPPROTO_NONE;
1656 		iph->hop_limit = 0;
1657 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1658 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1659 
1660 		err = ip6mr_cache_unresolved(vif, skb2);
1661 		read_unlock(&mrt_lock);
1662 
1663 		return err;
1664 	}
1665 
1666 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1667 		cache->mfc_flags |= MFC_NOTIFY;
1668 
1669 	err = ip6mr_fill_mroute(skb, cache, rtm);
1670 	read_unlock(&mrt_lock);
1671 	return err;
1672 }
1673 
1674