xref: /linux/net/ipv6/ip6mr.c (revision 5bdef865eb358b6f3760e25e591ae115e9eeddef)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <net/ip6_checksum.h>
52 
53 /* Big lock, protecting vif table, mrt cache and mroute socket state.
54    Note that the changes are semaphored via rtnl_lock.
55  */
56 
57 static DEFINE_RWLOCK(mrt_lock);
58 
59 /*
60  *	Multicast router control variables
61  */
62 
63 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 
65 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
66 
67 /* Special spinlock for queue of unresolved entries */
68 static DEFINE_SPINLOCK(mfc_unres_lock);
69 
70 /* We return to original Alan's scheme. Hash table of resolved
71    entries is changed only in process context and protected
72    with weak lock mrt_lock. Queue of unresolved entries is protected
73    with strong spinlock mfc_unres_lock.
74 
75    In this case data path is free of exclusive locks at all.
76  */
77 
78 static struct kmem_cache *mrt_cachep __read_mostly;
79 
80 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
81 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
82 			      mifi_t mifi, int assert);
83 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
84 static void mroute_clean_tables(struct net *net);
85 
86 #ifdef CONFIG_IPV6_PIMSM_V2
87 static struct inet6_protocol pim6_protocol;
88 #endif
89 
90 static struct timer_list ipmr_expire_timer;
91 
92 
93 #ifdef CONFIG_PROC_FS
94 
95 struct ipmr_mfc_iter {
96 	struct seq_net_private p;
97 	struct mfc6_cache **cache;
98 	int ct;
99 };
100 
101 
102 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
103 					   struct ipmr_mfc_iter *it, loff_t pos)
104 {
105 	struct mfc6_cache *mfc;
106 
107 	it->cache = net->ipv6.mfc6_cache_array;
108 	read_lock(&mrt_lock);
109 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
110 		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
111 		     mfc; mfc = mfc->next)
112 			if (pos-- == 0)
113 				return mfc;
114 	read_unlock(&mrt_lock);
115 
116 	it->cache = &mfc_unres_queue;
117 	spin_lock_bh(&mfc_unres_lock);
118 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
119 		if (net_eq(mfc6_net(mfc), net) &&
120 		    pos-- == 0)
121 			return mfc;
122 	spin_unlock_bh(&mfc_unres_lock);
123 
124 	it->cache = NULL;
125 	return NULL;
126 }
127 
128 
129 
130 
131 /*
132  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
133  */
134 
135 struct ipmr_vif_iter {
136 	struct seq_net_private p;
137 	int ct;
138 };
139 
140 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
141 					    struct ipmr_vif_iter *iter,
142 					    loff_t pos)
143 {
144 	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
145 		if (!MIF_EXISTS(net, iter->ct))
146 			continue;
147 		if (pos-- == 0)
148 			return &net->ipv6.vif6_table[iter->ct];
149 	}
150 	return NULL;
151 }
152 
153 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
154 	__acquires(mrt_lock)
155 {
156 	struct net *net = seq_file_net(seq);
157 
158 	read_lock(&mrt_lock);
159 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
160 		: SEQ_START_TOKEN;
161 }
162 
163 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
164 {
165 	struct ipmr_vif_iter *iter = seq->private;
166 	struct net *net = seq_file_net(seq);
167 
168 	++*pos;
169 	if (v == SEQ_START_TOKEN)
170 		return ip6mr_vif_seq_idx(net, iter, 0);
171 
172 	while (++iter->ct < net->ipv6.maxvif) {
173 		if (!MIF_EXISTS(net, iter->ct))
174 			continue;
175 		return &net->ipv6.vif6_table[iter->ct];
176 	}
177 	return NULL;
178 }
179 
180 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
181 	__releases(mrt_lock)
182 {
183 	read_unlock(&mrt_lock);
184 }
185 
186 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
187 {
188 	struct net *net = seq_file_net(seq);
189 
190 	if (v == SEQ_START_TOKEN) {
191 		seq_puts(seq,
192 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
193 	} else {
194 		const struct mif_device *vif = v;
195 		const char *name = vif->dev ? vif->dev->name : "none";
196 
197 		seq_printf(seq,
198 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
199 			   vif - net->ipv6.vif6_table,
200 			   name, vif->bytes_in, vif->pkt_in,
201 			   vif->bytes_out, vif->pkt_out,
202 			   vif->flags);
203 	}
204 	return 0;
205 }
206 
207 static struct seq_operations ip6mr_vif_seq_ops = {
208 	.start = ip6mr_vif_seq_start,
209 	.next  = ip6mr_vif_seq_next,
210 	.stop  = ip6mr_vif_seq_stop,
211 	.show  = ip6mr_vif_seq_show,
212 };
213 
214 static int ip6mr_vif_open(struct inode *inode, struct file *file)
215 {
216 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
217 			    sizeof(struct ipmr_vif_iter));
218 }
219 
220 static struct file_operations ip6mr_vif_fops = {
221 	.owner	 = THIS_MODULE,
222 	.open    = ip6mr_vif_open,
223 	.read    = seq_read,
224 	.llseek  = seq_lseek,
225 	.release = seq_release_net,
226 };
227 
228 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
229 {
230 	struct net *net = seq_file_net(seq);
231 
232 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
233 		: SEQ_START_TOKEN;
234 }
235 
236 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237 {
238 	struct mfc6_cache *mfc = v;
239 	struct ipmr_mfc_iter *it = seq->private;
240 	struct net *net = seq_file_net(seq);
241 
242 	++*pos;
243 
244 	if (v == SEQ_START_TOKEN)
245 		return ipmr_mfc_seq_idx(net, seq->private, 0);
246 
247 	if (mfc->next)
248 		return mfc->next;
249 
250 	if (it->cache == &mfc_unres_queue)
251 		goto end_of_list;
252 
253 	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
254 
255 	while (++it->ct < MFC6_LINES) {
256 		mfc = net->ipv6.mfc6_cache_array[it->ct];
257 		if (mfc)
258 			return mfc;
259 	}
260 
261 	/* exhausted cache_array, show unresolved */
262 	read_unlock(&mrt_lock);
263 	it->cache = &mfc_unres_queue;
264 	it->ct = 0;
265 
266 	spin_lock_bh(&mfc_unres_lock);
267 	mfc = mfc_unres_queue;
268 	if (mfc)
269 		return mfc;
270 
271  end_of_list:
272 	spin_unlock_bh(&mfc_unres_lock);
273 	it->cache = NULL;
274 
275 	return NULL;
276 }
277 
278 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
279 {
280 	struct ipmr_mfc_iter *it = seq->private;
281 	struct net *net = seq_file_net(seq);
282 
283 	if (it->cache == &mfc_unres_queue)
284 		spin_unlock_bh(&mfc_unres_lock);
285 	else if (it->cache == net->ipv6.mfc6_cache_array)
286 		read_unlock(&mrt_lock);
287 }
288 
289 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
290 {
291 	int n;
292 	struct net *net = seq_file_net(seq);
293 
294 	if (v == SEQ_START_TOKEN) {
295 		seq_puts(seq,
296 			 "Group                            "
297 			 "Origin                           "
298 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
299 	} else {
300 		const struct mfc6_cache *mfc = v;
301 		const struct ipmr_mfc_iter *it = seq->private;
302 
303 		seq_printf(seq, "%pI6 %pI6 %-3hd",
304 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
305 			   mfc->mf6c_parent);
306 
307 		if (it->cache != &mfc_unres_queue) {
308 			seq_printf(seq, " %8lu %8lu %8lu",
309 				   mfc->mfc_un.res.pkt,
310 				   mfc->mfc_un.res.bytes,
311 				   mfc->mfc_un.res.wrong_if);
312 			for (n = mfc->mfc_un.res.minvif;
313 			     n < mfc->mfc_un.res.maxvif; n++) {
314 				if (MIF_EXISTS(net, n) &&
315 				    mfc->mfc_un.res.ttls[n] < 255)
316 					seq_printf(seq,
317 						   " %2d:%-3d",
318 						   n, mfc->mfc_un.res.ttls[n]);
319 			}
320 		} else {
321 			/* unresolved mfc_caches don't contain
322 			 * pkt, bytes and wrong_if values
323 			 */
324 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
325 		}
326 		seq_putc(seq, '\n');
327 	}
328 	return 0;
329 }
330 
331 static struct seq_operations ipmr_mfc_seq_ops = {
332 	.start = ipmr_mfc_seq_start,
333 	.next  = ipmr_mfc_seq_next,
334 	.stop  = ipmr_mfc_seq_stop,
335 	.show  = ipmr_mfc_seq_show,
336 };
337 
338 static int ipmr_mfc_open(struct inode *inode, struct file *file)
339 {
340 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
341 			    sizeof(struct ipmr_mfc_iter));
342 }
343 
344 static struct file_operations ip6mr_mfc_fops = {
345 	.owner	 = THIS_MODULE,
346 	.open    = ipmr_mfc_open,
347 	.read    = seq_read,
348 	.llseek  = seq_lseek,
349 	.release = seq_release_net,
350 };
351 #endif
352 
353 #ifdef CONFIG_IPV6_PIMSM_V2
354 
355 static int pim6_rcv(struct sk_buff *skb)
356 {
357 	struct pimreghdr *pim;
358 	struct ipv6hdr   *encap;
359 	struct net_device  *reg_dev = NULL;
360 	struct net *net = dev_net(skb->dev);
361 	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
362 
363 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
364 		goto drop;
365 
366 	pim = (struct pimreghdr *)skb_transport_header(skb);
367 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
368 	    (pim->flags & PIM_NULL_REGISTER) ||
369 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
370 			     sizeof(*pim), IPPROTO_PIM,
371 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
372 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
373 		goto drop;
374 
375 	/* check if the inner packet is destined to mcast group */
376 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
377 				   sizeof(*pim));
378 
379 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
380 	    encap->payload_len == 0 ||
381 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
382 		goto drop;
383 
384 	read_lock(&mrt_lock);
385 	if (reg_vif_num >= 0)
386 		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
387 	if (reg_dev)
388 		dev_hold(reg_dev);
389 	read_unlock(&mrt_lock);
390 
391 	if (reg_dev == NULL)
392 		goto drop;
393 
394 	skb->mac_header = skb->network_header;
395 	skb_pull(skb, (u8 *)encap - skb->data);
396 	skb_reset_network_header(skb);
397 	skb->dev = reg_dev;
398 	skb->protocol = htons(ETH_P_IPV6);
399 	skb->ip_summed = 0;
400 	skb->pkt_type = PACKET_HOST;
401 	skb_dst_drop(skb);
402 	reg_dev->stats.rx_bytes += skb->len;
403 	reg_dev->stats.rx_packets++;
404 	nf_reset(skb);
405 	netif_rx(skb);
406 	dev_put(reg_dev);
407 	return 0;
408  drop:
409 	kfree_skb(skb);
410 	return 0;
411 }
412 
413 static struct inet6_protocol pim6_protocol = {
414 	.handler	=	pim6_rcv,
415 };
416 
417 /* Service routines creating virtual interfaces: PIMREG */
418 
419 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
420 {
421 	struct net *net = dev_net(dev);
422 
423 	read_lock(&mrt_lock);
424 	dev->stats.tx_bytes += skb->len;
425 	dev->stats.tx_packets++;
426 	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
427 			   MRT6MSG_WHOLEPKT);
428 	read_unlock(&mrt_lock);
429 	kfree_skb(skb);
430 	return 0;
431 }
432 
433 static const struct net_device_ops reg_vif_netdev_ops = {
434 	.ndo_start_xmit	= reg_vif_xmit,
435 };
436 
437 static void reg_vif_setup(struct net_device *dev)
438 {
439 	dev->type		= ARPHRD_PIMREG;
440 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
441 	dev->flags		= IFF_NOARP;
442 	dev->netdev_ops		= &reg_vif_netdev_ops;
443 	dev->destructor		= free_netdev;
444 	dev->features		|= NETIF_F_NETNS_LOCAL;
445 }
446 
447 static struct net_device *ip6mr_reg_vif(struct net *net)
448 {
449 	struct net_device *dev;
450 
451 	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
452 	if (dev == NULL)
453 		return NULL;
454 
455 	dev_net_set(dev, net);
456 
457 	if (register_netdevice(dev)) {
458 		free_netdev(dev);
459 		return NULL;
460 	}
461 	dev->iflink = 0;
462 
463 	if (dev_open(dev))
464 		goto failure;
465 
466 	dev_hold(dev);
467 	return dev;
468 
469 failure:
470 	/* allow the register to be completed before unregistering. */
471 	rtnl_unlock();
472 	rtnl_lock();
473 
474 	unregister_netdevice(dev);
475 	return NULL;
476 }
477 #endif
478 
479 /*
480  *	Delete a VIF entry
481  */
482 
483 static int mif6_delete(struct net *net, int vifi)
484 {
485 	struct mif_device *v;
486 	struct net_device *dev;
487 	struct inet6_dev *in6_dev;
488 	if (vifi < 0 || vifi >= net->ipv6.maxvif)
489 		return -EADDRNOTAVAIL;
490 
491 	v = &net->ipv6.vif6_table[vifi];
492 
493 	write_lock_bh(&mrt_lock);
494 	dev = v->dev;
495 	v->dev = NULL;
496 
497 	if (!dev) {
498 		write_unlock_bh(&mrt_lock);
499 		return -EADDRNOTAVAIL;
500 	}
501 
502 #ifdef CONFIG_IPV6_PIMSM_V2
503 	if (vifi == net->ipv6.mroute_reg_vif_num)
504 		net->ipv6.mroute_reg_vif_num = -1;
505 #endif
506 
507 	if (vifi + 1 == net->ipv6.maxvif) {
508 		int tmp;
509 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
510 			if (MIF_EXISTS(net, tmp))
511 				break;
512 		}
513 		net->ipv6.maxvif = tmp + 1;
514 	}
515 
516 	write_unlock_bh(&mrt_lock);
517 
518 	dev_set_allmulti(dev, -1);
519 
520 	in6_dev = __in6_dev_get(dev);
521 	if (in6_dev)
522 		in6_dev->cnf.mc_forwarding--;
523 
524 	if (v->flags & MIFF_REGISTER)
525 		unregister_netdevice(dev);
526 
527 	dev_put(dev);
528 	return 0;
529 }
530 
531 static inline void ip6mr_cache_free(struct mfc6_cache *c)
532 {
533 	release_net(mfc6_net(c));
534 	kmem_cache_free(mrt_cachep, c);
535 }
536 
537 /* Destroy an unresolved cache entry, killing queued skbs
538    and reporting error to netlink readers.
539  */
540 
541 static void ip6mr_destroy_unres(struct mfc6_cache *c)
542 {
543 	struct sk_buff *skb;
544 	struct net *net = mfc6_net(c);
545 
546 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
547 
548 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
549 		if (ipv6_hdr(skb)->version == 0) {
550 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
551 			nlh->nlmsg_type = NLMSG_ERROR;
552 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
553 			skb_trim(skb, nlh->nlmsg_len);
554 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
555 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
556 		} else
557 			kfree_skb(skb);
558 	}
559 
560 	ip6mr_cache_free(c);
561 }
562 
563 
564 /* Single timer process for all the unresolved queue. */
565 
566 static void ipmr_do_expire_process(unsigned long dummy)
567 {
568 	unsigned long now = jiffies;
569 	unsigned long expires = 10 * HZ;
570 	struct mfc6_cache *c, **cp;
571 
572 	cp = &mfc_unres_queue;
573 
574 	while ((c = *cp) != NULL) {
575 		if (time_after(c->mfc_un.unres.expires, now)) {
576 			/* not yet... */
577 			unsigned long interval = c->mfc_un.unres.expires - now;
578 			if (interval < expires)
579 				expires = interval;
580 			cp = &c->next;
581 			continue;
582 		}
583 
584 		*cp = c->next;
585 		ip6mr_destroy_unres(c);
586 	}
587 
588 	if (mfc_unres_queue != NULL)
589 		mod_timer(&ipmr_expire_timer, jiffies + expires);
590 }
591 
592 static void ipmr_expire_process(unsigned long dummy)
593 {
594 	if (!spin_trylock(&mfc_unres_lock)) {
595 		mod_timer(&ipmr_expire_timer, jiffies + 1);
596 		return;
597 	}
598 
599 	if (mfc_unres_queue != NULL)
600 		ipmr_do_expire_process(dummy);
601 
602 	spin_unlock(&mfc_unres_lock);
603 }
604 
605 /* Fill oifs list. It is called under write locked mrt_lock. */
606 
607 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
608 {
609 	int vifi;
610 	struct net *net = mfc6_net(cache);
611 
612 	cache->mfc_un.res.minvif = MAXMIFS;
613 	cache->mfc_un.res.maxvif = 0;
614 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
615 
616 	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
617 		if (MIF_EXISTS(net, vifi) &&
618 		    ttls[vifi] && ttls[vifi] < 255) {
619 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
620 			if (cache->mfc_un.res.minvif > vifi)
621 				cache->mfc_un.res.minvif = vifi;
622 			if (cache->mfc_un.res.maxvif <= vifi)
623 				cache->mfc_un.res.maxvif = vifi + 1;
624 		}
625 	}
626 }
627 
628 static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
629 {
630 	int vifi = vifc->mif6c_mifi;
631 	struct mif_device *v = &net->ipv6.vif6_table[vifi];
632 	struct net_device *dev;
633 	struct inet6_dev *in6_dev;
634 	int err;
635 
636 	/* Is vif busy ? */
637 	if (MIF_EXISTS(net, vifi))
638 		return -EADDRINUSE;
639 
640 	switch (vifc->mif6c_flags) {
641 #ifdef CONFIG_IPV6_PIMSM_V2
642 	case MIFF_REGISTER:
643 		/*
644 		 * Special Purpose VIF in PIM
645 		 * All the packets will be sent to the daemon
646 		 */
647 		if (net->ipv6.mroute_reg_vif_num >= 0)
648 			return -EADDRINUSE;
649 		dev = ip6mr_reg_vif(net);
650 		if (!dev)
651 			return -ENOBUFS;
652 		err = dev_set_allmulti(dev, 1);
653 		if (err) {
654 			unregister_netdevice(dev);
655 			dev_put(dev);
656 			return err;
657 		}
658 		break;
659 #endif
660 	case 0:
661 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
662 		if (!dev)
663 			return -EADDRNOTAVAIL;
664 		err = dev_set_allmulti(dev, 1);
665 		if (err) {
666 			dev_put(dev);
667 			return err;
668 		}
669 		break;
670 	default:
671 		return -EINVAL;
672 	}
673 
674 	in6_dev = __in6_dev_get(dev);
675 	if (in6_dev)
676 		in6_dev->cnf.mc_forwarding++;
677 
678 	/*
679 	 *	Fill in the VIF structures
680 	 */
681 	v->rate_limit = vifc->vifc_rate_limit;
682 	v->flags = vifc->mif6c_flags;
683 	if (!mrtsock)
684 		v->flags |= VIFF_STATIC;
685 	v->threshold = vifc->vifc_threshold;
686 	v->bytes_in = 0;
687 	v->bytes_out = 0;
688 	v->pkt_in = 0;
689 	v->pkt_out = 0;
690 	v->link = dev->ifindex;
691 	if (v->flags & MIFF_REGISTER)
692 		v->link = dev->iflink;
693 
694 	/* And finish update writing critical data */
695 	write_lock_bh(&mrt_lock);
696 	v->dev = dev;
697 #ifdef CONFIG_IPV6_PIMSM_V2
698 	if (v->flags & MIFF_REGISTER)
699 		net->ipv6.mroute_reg_vif_num = vifi;
700 #endif
701 	if (vifi + 1 > net->ipv6.maxvif)
702 		net->ipv6.maxvif = vifi + 1;
703 	write_unlock_bh(&mrt_lock);
704 	return 0;
705 }
706 
707 static struct mfc6_cache *ip6mr_cache_find(struct net *net,
708 					   struct in6_addr *origin,
709 					   struct in6_addr *mcastgrp)
710 {
711 	int line = MFC6_HASH(mcastgrp, origin);
712 	struct mfc6_cache *c;
713 
714 	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
715 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
716 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
717 			break;
718 	}
719 	return c;
720 }
721 
722 /*
723  *	Allocate a multicast cache entry
724  */
725 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
726 {
727 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
728 	if (c == NULL)
729 		return NULL;
730 	c->mfc_un.res.minvif = MAXMIFS;
731 	mfc6_net_set(c, net);
732 	return c;
733 }
734 
735 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
736 {
737 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
738 	if (c == NULL)
739 		return NULL;
740 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
741 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
742 	mfc6_net_set(c, net);
743 	return c;
744 }
745 
746 /*
747  *	A cache entry has gone into a resolved state from queued
748  */
749 
750 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
751 {
752 	struct sk_buff *skb;
753 
754 	/*
755 	 *	Play the pending entries through our router
756 	 */
757 
758 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
759 		if (ipv6_hdr(skb)->version == 0) {
760 			int err;
761 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
762 
763 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
764 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
765 			} else {
766 				nlh->nlmsg_type = NLMSG_ERROR;
767 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
768 				skb_trim(skb, nlh->nlmsg_len);
769 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
770 			}
771 			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
772 		} else
773 			ip6_mr_forward(skb, c);
774 	}
775 }
776 
777 /*
778  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
779  *	expects the following bizarre scheme.
780  *
781  *	Called under mrt_lock.
782  */
783 
784 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
785 			      int assert)
786 {
787 	struct sk_buff *skb;
788 	struct mrt6msg *msg;
789 	int ret;
790 
791 #ifdef CONFIG_IPV6_PIMSM_V2
792 	if (assert == MRT6MSG_WHOLEPKT)
793 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
794 						+sizeof(*msg));
795 	else
796 #endif
797 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
798 
799 	if (!skb)
800 		return -ENOBUFS;
801 
802 	/* I suppose that internal messages
803 	 * do not require checksums */
804 
805 	skb->ip_summed = CHECKSUM_UNNECESSARY;
806 
807 #ifdef CONFIG_IPV6_PIMSM_V2
808 	if (assert == MRT6MSG_WHOLEPKT) {
809 		/* Ugly, but we have no choice with this interface.
810 		   Duplicate old header, fix length etc.
811 		   And all this only to mangle msg->im6_msgtype and
812 		   to set msg->im6_mbz to "mbz" :-)
813 		 */
814 		skb_push(skb, -skb_network_offset(pkt));
815 
816 		skb_push(skb, sizeof(*msg));
817 		skb_reset_transport_header(skb);
818 		msg = (struct mrt6msg *)skb_transport_header(skb);
819 		msg->im6_mbz = 0;
820 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
821 		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
822 		msg->im6_pad = 0;
823 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
824 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
825 
826 		skb->ip_summed = CHECKSUM_UNNECESSARY;
827 	} else
828 #endif
829 	{
830 	/*
831 	 *	Copy the IP header
832 	 */
833 
834 	skb_put(skb, sizeof(struct ipv6hdr));
835 	skb_reset_network_header(skb);
836 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
837 
838 	/*
839 	 *	Add our header
840 	 */
841 	skb_put(skb, sizeof(*msg));
842 	skb_reset_transport_header(skb);
843 	msg = (struct mrt6msg *)skb_transport_header(skb);
844 
845 	msg->im6_mbz = 0;
846 	msg->im6_msgtype = assert;
847 	msg->im6_mif = mifi;
848 	msg->im6_pad = 0;
849 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
850 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
851 
852 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
853 	skb->ip_summed = CHECKSUM_UNNECESSARY;
854 	}
855 
856 	if (net->ipv6.mroute6_sk == NULL) {
857 		kfree_skb(skb);
858 		return -EINVAL;
859 	}
860 
861 	/*
862 	 *	Deliver to user space multicast routing algorithms
863 	 */
864 	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
865 	if (ret < 0) {
866 		if (net_ratelimit())
867 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
868 		kfree_skb(skb);
869 	}
870 
871 	return ret;
872 }
873 
874 /*
875  *	Queue a packet for resolution. It gets locked cache entry!
876  */
877 
878 static int
879 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
880 {
881 	int err;
882 	struct mfc6_cache *c;
883 
884 	spin_lock_bh(&mfc_unres_lock);
885 	for (c = mfc_unres_queue; c; c = c->next) {
886 		if (net_eq(mfc6_net(c), net) &&
887 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
888 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
889 			break;
890 	}
891 
892 	if (c == NULL) {
893 		/*
894 		 *	Create a new entry if allowable
895 		 */
896 
897 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
898 		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
899 			spin_unlock_bh(&mfc_unres_lock);
900 
901 			kfree_skb(skb);
902 			return -ENOBUFS;
903 		}
904 
905 		/*
906 		 *	Fill in the new cache entry
907 		 */
908 		c->mf6c_parent = -1;
909 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
910 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
911 
912 		/*
913 		 *	Reflect first query at pim6sd
914 		 */
915 		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
916 		if (err < 0) {
917 			/* If the report failed throw the cache entry
918 			   out - Brad Parker
919 			 */
920 			spin_unlock_bh(&mfc_unres_lock);
921 
922 			ip6mr_cache_free(c);
923 			kfree_skb(skb);
924 			return err;
925 		}
926 
927 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
928 		c->next = mfc_unres_queue;
929 		mfc_unres_queue = c;
930 
931 		ipmr_do_expire_process(1);
932 	}
933 
934 	/*
935 	 *	See if we can append the packet
936 	 */
937 	if (c->mfc_un.unres.unresolved.qlen > 3) {
938 		kfree_skb(skb);
939 		err = -ENOBUFS;
940 	} else {
941 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
942 		err = 0;
943 	}
944 
945 	spin_unlock_bh(&mfc_unres_lock);
946 	return err;
947 }
948 
949 /*
950  *	MFC6 cache manipulation by user space
951  */
952 
953 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
954 {
955 	int line;
956 	struct mfc6_cache *c, **cp;
957 
958 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
959 
960 	for (cp = &net->ipv6.mfc6_cache_array[line];
961 	     (c = *cp) != NULL; cp = &c->next) {
962 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
963 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
964 			write_lock_bh(&mrt_lock);
965 			*cp = c->next;
966 			write_unlock_bh(&mrt_lock);
967 
968 			ip6mr_cache_free(c);
969 			return 0;
970 		}
971 	}
972 	return -ENOENT;
973 }
974 
975 static int ip6mr_device_event(struct notifier_block *this,
976 			      unsigned long event, void *ptr)
977 {
978 	struct net_device *dev = ptr;
979 	struct net *net = dev_net(dev);
980 	struct mif_device *v;
981 	int ct;
982 
983 	if (event != NETDEV_UNREGISTER)
984 		return NOTIFY_DONE;
985 
986 	v = &net->ipv6.vif6_table[0];
987 	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
988 		if (v->dev == dev)
989 			mif6_delete(net, ct);
990 	}
991 	return NOTIFY_DONE;
992 }
993 
994 static struct notifier_block ip6_mr_notifier = {
995 	.notifier_call = ip6mr_device_event
996 };
997 
998 /*
999  *	Setup for IP multicast routing
1000  */
1001 
1002 static int __net_init ip6mr_net_init(struct net *net)
1003 {
1004 	int err = 0;
1005 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1006 				       GFP_KERNEL);
1007 	if (!net->ipv6.vif6_table) {
1008 		err = -ENOMEM;
1009 		goto fail;
1010 	}
1011 
1012 	/* Forwarding cache */
1013 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1014 					     sizeof(struct mfc6_cache *),
1015 					     GFP_KERNEL);
1016 	if (!net->ipv6.mfc6_cache_array) {
1017 		err = -ENOMEM;
1018 		goto fail_mfc6_cache;
1019 	}
1020 
1021 #ifdef CONFIG_IPV6_PIMSM_V2
1022 	net->ipv6.mroute_reg_vif_num = -1;
1023 #endif
1024 
1025 #ifdef CONFIG_PROC_FS
1026 	err = -ENOMEM;
1027 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1028 		goto proc_vif_fail;
1029 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1030 		goto proc_cache_fail;
1031 #endif
1032 	return 0;
1033 
1034 #ifdef CONFIG_PROC_FS
1035 proc_cache_fail:
1036 	proc_net_remove(net, "ip6_mr_vif");
1037 proc_vif_fail:
1038 	kfree(net->ipv6.mfc6_cache_array);
1039 #endif
1040 fail_mfc6_cache:
1041 	kfree(net->ipv6.vif6_table);
1042 fail:
1043 	return err;
1044 }
1045 
1046 static void __net_exit ip6mr_net_exit(struct net *net)
1047 {
1048 #ifdef CONFIG_PROC_FS
1049 	proc_net_remove(net, "ip6_mr_cache");
1050 	proc_net_remove(net, "ip6_mr_vif");
1051 #endif
1052 	mroute_clean_tables(net);
1053 	kfree(net->ipv6.mfc6_cache_array);
1054 	kfree(net->ipv6.vif6_table);
1055 }
1056 
1057 static struct pernet_operations ip6mr_net_ops = {
1058 	.init = ip6mr_net_init,
1059 	.exit = ip6mr_net_exit,
1060 };
1061 
1062 int __init ip6_mr_init(void)
1063 {
1064 	int err;
1065 
1066 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1067 				       sizeof(struct mfc6_cache),
1068 				       0, SLAB_HWCACHE_ALIGN,
1069 				       NULL);
1070 	if (!mrt_cachep)
1071 		return -ENOMEM;
1072 
1073 	err = register_pernet_subsys(&ip6mr_net_ops);
1074 	if (err)
1075 		goto reg_pernet_fail;
1076 
1077 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1078 	err = register_netdevice_notifier(&ip6_mr_notifier);
1079 	if (err)
1080 		goto reg_notif_fail;
1081 #ifdef CONFIG_IPV6_PIMSM_V2
1082 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1083 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1084 		err = -EAGAIN;
1085 		goto add_proto_fail;
1086 	}
1087 #endif
1088 	return 0;
1089 #ifdef CONFIG_IPV6_PIMSM_V2
1090 add_proto_fail:
1091 	unregister_netdevice_notifier(&ip6_mr_notifier);
1092 #endif
1093 reg_notif_fail:
1094 	del_timer(&ipmr_expire_timer);
1095 	unregister_pernet_subsys(&ip6mr_net_ops);
1096 reg_pernet_fail:
1097 	kmem_cache_destroy(mrt_cachep);
1098 	return err;
1099 }
1100 
1101 void ip6_mr_cleanup(void)
1102 {
1103 	unregister_netdevice_notifier(&ip6_mr_notifier);
1104 	del_timer(&ipmr_expire_timer);
1105 	unregister_pernet_subsys(&ip6mr_net_ops);
1106 	kmem_cache_destroy(mrt_cachep);
1107 }
1108 
1109 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1110 {
1111 	int line;
1112 	struct mfc6_cache *uc, *c, **cp;
1113 	unsigned char ttls[MAXMIFS];
1114 	int i;
1115 
1116 	memset(ttls, 255, MAXMIFS);
1117 	for (i = 0; i < MAXMIFS; i++) {
1118 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1119 			ttls[i] = 1;
1120 
1121 	}
1122 
1123 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1124 
1125 	for (cp = &net->ipv6.mfc6_cache_array[line];
1126 	     (c = *cp) != NULL; cp = &c->next) {
1127 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1128 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1129 			break;
1130 	}
1131 
1132 	if (c != NULL) {
1133 		write_lock_bh(&mrt_lock);
1134 		c->mf6c_parent = mfc->mf6cc_parent;
1135 		ip6mr_update_thresholds(c, ttls);
1136 		if (!mrtsock)
1137 			c->mfc_flags |= MFC_STATIC;
1138 		write_unlock_bh(&mrt_lock);
1139 		return 0;
1140 	}
1141 
1142 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1143 		return -EINVAL;
1144 
1145 	c = ip6mr_cache_alloc(net);
1146 	if (c == NULL)
1147 		return -ENOMEM;
1148 
1149 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1150 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1151 	c->mf6c_parent = mfc->mf6cc_parent;
1152 	ip6mr_update_thresholds(c, ttls);
1153 	if (!mrtsock)
1154 		c->mfc_flags |= MFC_STATIC;
1155 
1156 	write_lock_bh(&mrt_lock);
1157 	c->next = net->ipv6.mfc6_cache_array[line];
1158 	net->ipv6.mfc6_cache_array[line] = c;
1159 	write_unlock_bh(&mrt_lock);
1160 
1161 	/*
1162 	 *	Check to see if we resolved a queued list. If so we
1163 	 *	need to send on the frames and tidy up.
1164 	 */
1165 	spin_lock_bh(&mfc_unres_lock);
1166 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1167 	     cp = &uc->next) {
1168 		if (net_eq(mfc6_net(uc), net) &&
1169 		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1170 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1171 			*cp = uc->next;
1172 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
1173 			break;
1174 		}
1175 	}
1176 	if (mfc_unres_queue == NULL)
1177 		del_timer(&ipmr_expire_timer);
1178 	spin_unlock_bh(&mfc_unres_lock);
1179 
1180 	if (uc) {
1181 		ip6mr_cache_resolve(uc, c);
1182 		ip6mr_cache_free(uc);
1183 	}
1184 	return 0;
1185 }
1186 
1187 /*
1188  *	Close the multicast socket, and clear the vif tables etc
1189  */
1190 
1191 static void mroute_clean_tables(struct net *net)
1192 {
1193 	int i;
1194 
1195 	/*
1196 	 *	Shut down all active vif entries
1197 	 */
1198 	for (i = 0; i < net->ipv6.maxvif; i++) {
1199 		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1200 			mif6_delete(net, i);
1201 	}
1202 
1203 	/*
1204 	 *	Wipe the cache
1205 	 */
1206 	for (i = 0; i < MFC6_LINES; i++) {
1207 		struct mfc6_cache *c, **cp;
1208 
1209 		cp = &net->ipv6.mfc6_cache_array[i];
1210 		while ((c = *cp) != NULL) {
1211 			if (c->mfc_flags & MFC_STATIC) {
1212 				cp = &c->next;
1213 				continue;
1214 			}
1215 			write_lock_bh(&mrt_lock);
1216 			*cp = c->next;
1217 			write_unlock_bh(&mrt_lock);
1218 
1219 			ip6mr_cache_free(c);
1220 		}
1221 	}
1222 
1223 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
1224 		struct mfc6_cache *c, **cp;
1225 
1226 		spin_lock_bh(&mfc_unres_lock);
1227 		cp = &mfc_unres_queue;
1228 		while ((c = *cp) != NULL) {
1229 			if (!net_eq(mfc6_net(c), net)) {
1230 				cp = &c->next;
1231 				continue;
1232 			}
1233 			*cp = c->next;
1234 			ip6mr_destroy_unres(c);
1235 		}
1236 		spin_unlock_bh(&mfc_unres_lock);
1237 	}
1238 }
1239 
1240 static int ip6mr_sk_init(struct sock *sk)
1241 {
1242 	int err = 0;
1243 	struct net *net = sock_net(sk);
1244 
1245 	rtnl_lock();
1246 	write_lock_bh(&mrt_lock);
1247 	if (likely(net->ipv6.mroute6_sk == NULL)) {
1248 		net->ipv6.mroute6_sk = sk;
1249 		net->ipv6.devconf_all->mc_forwarding++;
1250 	}
1251 	else
1252 		err = -EADDRINUSE;
1253 	write_unlock_bh(&mrt_lock);
1254 
1255 	rtnl_unlock();
1256 
1257 	return err;
1258 }
1259 
1260 int ip6mr_sk_done(struct sock *sk)
1261 {
1262 	int err = 0;
1263 	struct net *net = sock_net(sk);
1264 
1265 	rtnl_lock();
1266 	if (sk == net->ipv6.mroute6_sk) {
1267 		write_lock_bh(&mrt_lock);
1268 		net->ipv6.mroute6_sk = NULL;
1269 		net->ipv6.devconf_all->mc_forwarding--;
1270 		write_unlock_bh(&mrt_lock);
1271 
1272 		mroute_clean_tables(net);
1273 	} else
1274 		err = -EACCES;
1275 	rtnl_unlock();
1276 
1277 	return err;
1278 }
1279 
1280 /*
1281  *	Socket options and virtual interface manipulation. The whole
1282  *	virtual interface system is a complete heap, but unfortunately
1283  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1284  *	MOSPF/PIM router set up we can clean this up.
1285  */
1286 
1287 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1288 {
1289 	int ret;
1290 	struct mif6ctl vif;
1291 	struct mf6cctl mfc;
1292 	mifi_t mifi;
1293 	struct net *net = sock_net(sk);
1294 
1295 	if (optname != MRT6_INIT) {
1296 		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1297 			return -EACCES;
1298 	}
1299 
1300 	switch (optname) {
1301 	case MRT6_INIT:
1302 		if (sk->sk_type != SOCK_RAW ||
1303 		    inet_sk(sk)->num != IPPROTO_ICMPV6)
1304 			return -EOPNOTSUPP;
1305 		if (optlen < sizeof(int))
1306 			return -EINVAL;
1307 
1308 		return ip6mr_sk_init(sk);
1309 
1310 	case MRT6_DONE:
1311 		return ip6mr_sk_done(sk);
1312 
1313 	case MRT6_ADD_MIF:
1314 		if (optlen < sizeof(vif))
1315 			return -EINVAL;
1316 		if (copy_from_user(&vif, optval, sizeof(vif)))
1317 			return -EFAULT;
1318 		if (vif.mif6c_mifi >= MAXMIFS)
1319 			return -ENFILE;
1320 		rtnl_lock();
1321 		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
1322 		rtnl_unlock();
1323 		return ret;
1324 
1325 	case MRT6_DEL_MIF:
1326 		if (optlen < sizeof(mifi_t))
1327 			return -EINVAL;
1328 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1329 			return -EFAULT;
1330 		rtnl_lock();
1331 		ret = mif6_delete(net, mifi);
1332 		rtnl_unlock();
1333 		return ret;
1334 
1335 	/*
1336 	 *	Manipulate the forwarding caches. These live
1337 	 *	in a sort of kernel/user symbiosis.
1338 	 */
1339 	case MRT6_ADD_MFC:
1340 	case MRT6_DEL_MFC:
1341 		if (optlen < sizeof(mfc))
1342 			return -EINVAL;
1343 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1344 			return -EFAULT;
1345 		rtnl_lock();
1346 		if (optname == MRT6_DEL_MFC)
1347 			ret = ip6mr_mfc_delete(net, &mfc);
1348 		else
1349 			ret = ip6mr_mfc_add(net, &mfc,
1350 					    sk == net->ipv6.mroute6_sk);
1351 		rtnl_unlock();
1352 		return ret;
1353 
1354 	/*
1355 	 *	Control PIM assert (to activate pim will activate assert)
1356 	 */
1357 	case MRT6_ASSERT:
1358 	{
1359 		int v;
1360 		if (get_user(v, (int __user *)optval))
1361 			return -EFAULT;
1362 		net->ipv6.mroute_do_assert = !!v;
1363 		return 0;
1364 	}
1365 
1366 #ifdef CONFIG_IPV6_PIMSM_V2
1367 	case MRT6_PIM:
1368 	{
1369 		int v;
1370 		if (get_user(v, (int __user *)optval))
1371 			return -EFAULT;
1372 		v = !!v;
1373 		rtnl_lock();
1374 		ret = 0;
1375 		if (v != net->ipv6.mroute_do_pim) {
1376 			net->ipv6.mroute_do_pim = v;
1377 			net->ipv6.mroute_do_assert = v;
1378 		}
1379 		rtnl_unlock();
1380 		return ret;
1381 	}
1382 
1383 #endif
1384 	/*
1385 	 *	Spurious command, or MRT6_VERSION which you cannot
1386 	 *	set.
1387 	 */
1388 	default:
1389 		return -ENOPROTOOPT;
1390 	}
1391 }
1392 
1393 /*
1394  *	Getsock opt support for the multicast routing system.
1395  */
1396 
1397 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1398 			  int __user *optlen)
1399 {
1400 	int olr;
1401 	int val;
1402 	struct net *net = sock_net(sk);
1403 
1404 	switch (optname) {
1405 	case MRT6_VERSION:
1406 		val = 0x0305;
1407 		break;
1408 #ifdef CONFIG_IPV6_PIMSM_V2
1409 	case MRT6_PIM:
1410 		val = net->ipv6.mroute_do_pim;
1411 		break;
1412 #endif
1413 	case MRT6_ASSERT:
1414 		val = net->ipv6.mroute_do_assert;
1415 		break;
1416 	default:
1417 		return -ENOPROTOOPT;
1418 	}
1419 
1420 	if (get_user(olr, optlen))
1421 		return -EFAULT;
1422 
1423 	olr = min_t(int, olr, sizeof(int));
1424 	if (olr < 0)
1425 		return -EINVAL;
1426 
1427 	if (put_user(olr, optlen))
1428 		return -EFAULT;
1429 	if (copy_to_user(optval, &val, olr))
1430 		return -EFAULT;
1431 	return 0;
1432 }
1433 
1434 /*
1435  *	The IP multicast ioctl support routines.
1436  */
1437 
1438 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1439 {
1440 	struct sioc_sg_req6 sr;
1441 	struct sioc_mif_req6 vr;
1442 	struct mif_device *vif;
1443 	struct mfc6_cache *c;
1444 	struct net *net = sock_net(sk);
1445 
1446 	switch (cmd) {
1447 	case SIOCGETMIFCNT_IN6:
1448 		if (copy_from_user(&vr, arg, sizeof(vr)))
1449 			return -EFAULT;
1450 		if (vr.mifi >= net->ipv6.maxvif)
1451 			return -EINVAL;
1452 		read_lock(&mrt_lock);
1453 		vif = &net->ipv6.vif6_table[vr.mifi];
1454 		if (MIF_EXISTS(net, vr.mifi)) {
1455 			vr.icount = vif->pkt_in;
1456 			vr.ocount = vif->pkt_out;
1457 			vr.ibytes = vif->bytes_in;
1458 			vr.obytes = vif->bytes_out;
1459 			read_unlock(&mrt_lock);
1460 
1461 			if (copy_to_user(arg, &vr, sizeof(vr)))
1462 				return -EFAULT;
1463 			return 0;
1464 		}
1465 		read_unlock(&mrt_lock);
1466 		return -EADDRNOTAVAIL;
1467 	case SIOCGETSGCNT_IN6:
1468 		if (copy_from_user(&sr, arg, sizeof(sr)))
1469 			return -EFAULT;
1470 
1471 		read_lock(&mrt_lock);
1472 		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1473 		if (c) {
1474 			sr.pktcnt = c->mfc_un.res.pkt;
1475 			sr.bytecnt = c->mfc_un.res.bytes;
1476 			sr.wrong_if = c->mfc_un.res.wrong_if;
1477 			read_unlock(&mrt_lock);
1478 
1479 			if (copy_to_user(arg, &sr, sizeof(sr)))
1480 				return -EFAULT;
1481 			return 0;
1482 		}
1483 		read_unlock(&mrt_lock);
1484 		return -EADDRNOTAVAIL;
1485 	default:
1486 		return -ENOIOCTLCMD;
1487 	}
1488 }
1489 
1490 
1491 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1492 {
1493 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1494 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1495 	return dst_output(skb);
1496 }
1497 
1498 /*
1499  *	Processing handlers for ip6mr_forward
1500  */
1501 
1502 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1503 {
1504 	struct ipv6hdr *ipv6h;
1505 	struct net *net = mfc6_net(c);
1506 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1507 	struct net_device *dev;
1508 	struct dst_entry *dst;
1509 	struct flowi fl;
1510 
1511 	if (vif->dev == NULL)
1512 		goto out_free;
1513 
1514 #ifdef CONFIG_IPV6_PIMSM_V2
1515 	if (vif->flags & MIFF_REGISTER) {
1516 		vif->pkt_out++;
1517 		vif->bytes_out += skb->len;
1518 		vif->dev->stats.tx_bytes += skb->len;
1519 		vif->dev->stats.tx_packets++;
1520 		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
1521 		goto out_free;
1522 	}
1523 #endif
1524 
1525 	ipv6h = ipv6_hdr(skb);
1526 
1527 	fl = (struct flowi) {
1528 		.oif = vif->link,
1529 		.nl_u = { .ip6_u =
1530 				{ .daddr = ipv6h->daddr, }
1531 		}
1532 	};
1533 
1534 	dst = ip6_route_output(net, NULL, &fl);
1535 	if (!dst)
1536 		goto out_free;
1537 
1538 	skb_dst_drop(skb);
1539 	skb_dst_set(skb, dst);
1540 
1541 	/*
1542 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1543 	 * not only before forwarding, but after forwarding on all output
1544 	 * interfaces. It is clear, if mrouter runs a multicasting
1545 	 * program, it should receive packets not depending to what interface
1546 	 * program is joined.
1547 	 * If we will not make it, the program will have to join on all
1548 	 * interfaces. On the other hand, multihoming host (or router, but
1549 	 * not mrouter) cannot join to more than one interface - it will
1550 	 * result in receiving multiple packets.
1551 	 */
1552 	dev = vif->dev;
1553 	skb->dev = dev;
1554 	vif->pkt_out++;
1555 	vif->bytes_out += skb->len;
1556 
1557 	/* We are about to write */
1558 	/* XXX: extension headers? */
1559 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1560 		goto out_free;
1561 
1562 	ipv6h = ipv6_hdr(skb);
1563 	ipv6h->hop_limit--;
1564 
1565 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1566 
1567 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1568 		       ip6mr_forward2_finish);
1569 
1570 out_free:
1571 	kfree_skb(skb);
1572 	return 0;
1573 }
1574 
1575 static int ip6mr_find_vif(struct net_device *dev)
1576 {
1577 	struct net *net = dev_net(dev);
1578 	int ct;
1579 	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1580 		if (net->ipv6.vif6_table[ct].dev == dev)
1581 			break;
1582 	}
1583 	return ct;
1584 }
1585 
1586 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1587 {
1588 	int psend = -1;
1589 	int vif, ct;
1590 	struct net *net = mfc6_net(cache);
1591 
1592 	vif = cache->mf6c_parent;
1593 	cache->mfc_un.res.pkt++;
1594 	cache->mfc_un.res.bytes += skb->len;
1595 
1596 	/*
1597 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1598 	 */
1599 	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
1600 		int true_vifi;
1601 
1602 		cache->mfc_un.res.wrong_if++;
1603 		true_vifi = ip6mr_find_vif(skb->dev);
1604 
1605 		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
1606 		    /* pimsm uses asserts, when switching from RPT to SPT,
1607 		       so that we cannot check that packet arrived on an oif.
1608 		       It is bad, but otherwise we would need to move pretty
1609 		       large chunk of pimd to kernel. Ough... --ANK
1610 		     */
1611 		    (net->ipv6.mroute_do_pim ||
1612 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1613 		    time_after(jiffies,
1614 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1615 			cache->mfc_un.res.last_assert = jiffies;
1616 			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
1617 		}
1618 		goto dont_forward;
1619 	}
1620 
1621 	net->ipv6.vif6_table[vif].pkt_in++;
1622 	net->ipv6.vif6_table[vif].bytes_in += skb->len;
1623 
1624 	/*
1625 	 *	Forward the frame
1626 	 */
1627 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1628 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1629 			if (psend != -1) {
1630 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1631 				if (skb2)
1632 					ip6mr_forward2(skb2, cache, psend);
1633 			}
1634 			psend = ct;
1635 		}
1636 	}
1637 	if (psend != -1) {
1638 		ip6mr_forward2(skb, cache, psend);
1639 		return 0;
1640 	}
1641 
1642 dont_forward:
1643 	kfree_skb(skb);
1644 	return 0;
1645 }
1646 
1647 
1648 /*
1649  *	Multicast packets for forwarding arrive here
1650  */
1651 
1652 int ip6_mr_input(struct sk_buff *skb)
1653 {
1654 	struct mfc6_cache *cache;
1655 	struct net *net = dev_net(skb->dev);
1656 
1657 	read_lock(&mrt_lock);
1658 	cache = ip6mr_cache_find(net,
1659 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1660 
1661 	/*
1662 	 *	No usable cache entry
1663 	 */
1664 	if (cache == NULL) {
1665 		int vif;
1666 
1667 		vif = ip6mr_find_vif(skb->dev);
1668 		if (vif >= 0) {
1669 			int err = ip6mr_cache_unresolved(net, vif, skb);
1670 			read_unlock(&mrt_lock);
1671 
1672 			return err;
1673 		}
1674 		read_unlock(&mrt_lock);
1675 		kfree_skb(skb);
1676 		return -ENODEV;
1677 	}
1678 
1679 	ip6_mr_forward(skb, cache);
1680 
1681 	read_unlock(&mrt_lock);
1682 
1683 	return 0;
1684 }
1685 
1686 
1687 static int
1688 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1689 {
1690 	int ct;
1691 	struct rtnexthop *nhp;
1692 	struct net *net = mfc6_net(c);
1693 	struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
1694 	u8 *b = skb_tail_pointer(skb);
1695 	struct rtattr *mp_head;
1696 
1697 	if (dev)
1698 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1699 
1700 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1701 
1702 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1703 		if (c->mfc_un.res.ttls[ct] < 255) {
1704 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1705 				goto rtattr_failure;
1706 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1707 			nhp->rtnh_flags = 0;
1708 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1709 			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
1710 			nhp->rtnh_len = sizeof(*nhp);
1711 		}
1712 	}
1713 	mp_head->rta_type = RTA_MULTIPATH;
1714 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1715 	rtm->rtm_type = RTN_MULTICAST;
1716 	return 1;
1717 
1718 rtattr_failure:
1719 	nlmsg_trim(skb, b);
1720 	return -EMSGSIZE;
1721 }
1722 
1723 int ip6mr_get_route(struct net *net,
1724 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1725 {
1726 	int err;
1727 	struct mfc6_cache *cache;
1728 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1729 
1730 	read_lock(&mrt_lock);
1731 	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1732 
1733 	if (!cache) {
1734 		struct sk_buff *skb2;
1735 		struct ipv6hdr *iph;
1736 		struct net_device *dev;
1737 		int vif;
1738 
1739 		if (nowait) {
1740 			read_unlock(&mrt_lock);
1741 			return -EAGAIN;
1742 		}
1743 
1744 		dev = skb->dev;
1745 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1746 			read_unlock(&mrt_lock);
1747 			return -ENODEV;
1748 		}
1749 
1750 		/* really correct? */
1751 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1752 		if (!skb2) {
1753 			read_unlock(&mrt_lock);
1754 			return -ENOMEM;
1755 		}
1756 
1757 		skb_reset_transport_header(skb2);
1758 
1759 		skb_put(skb2, sizeof(struct ipv6hdr));
1760 		skb_reset_network_header(skb2);
1761 
1762 		iph = ipv6_hdr(skb2);
1763 		iph->version = 0;
1764 		iph->priority = 0;
1765 		iph->flow_lbl[0] = 0;
1766 		iph->flow_lbl[1] = 0;
1767 		iph->flow_lbl[2] = 0;
1768 		iph->payload_len = 0;
1769 		iph->nexthdr = IPPROTO_NONE;
1770 		iph->hop_limit = 0;
1771 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1772 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1773 
1774 		err = ip6mr_cache_unresolved(net, vif, skb2);
1775 		read_unlock(&mrt_lock);
1776 
1777 		return err;
1778 	}
1779 
1780 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1781 		cache->mfc_flags |= MFC_NOTIFY;
1782 
1783 	err = ip6mr_fill_mroute(skb, cache, rtm);
1784 	read_unlock(&mrt_lock);
1785 	return err;
1786 }
1787 
1788