xref: /linux/net/ipv6/ip6mr.c (revision 27258e448eb301cf89e351df87aa8cb916653bf2)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <net/ip6_checksum.h>
52 
53 /* Big lock, protecting vif table, mrt cache and mroute socket state.
54    Note that the changes are semaphored via rtnl_lock.
55  */
56 
57 static DEFINE_RWLOCK(mrt_lock);
58 
59 /*
60  *	Multicast router control variables
61  */
62 
63 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 
65 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
66 
67 /* Special spinlock for queue of unresolved entries */
68 static DEFINE_SPINLOCK(mfc_unres_lock);
69 
70 /* We return to original Alan's scheme. Hash table of resolved
71    entries is changed only in process context and protected
72    with weak lock mrt_lock. Queue of unresolved entries is protected
73    with strong spinlock mfc_unres_lock.
74 
75    In this case data path is free of exclusive locks at all.
76  */
77 
78 static struct kmem_cache *mrt_cachep __read_mostly;
79 
80 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
81 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
82 			      mifi_t mifi, int assert);
83 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
84 static void mroute_clean_tables(struct net *net);
85 
86 static struct timer_list ipmr_expire_timer;
87 
88 
89 #ifdef CONFIG_PROC_FS
90 
91 struct ipmr_mfc_iter {
92 	struct seq_net_private p;
93 	struct mfc6_cache **cache;
94 	int ct;
95 };
96 
97 
98 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
99 					   struct ipmr_mfc_iter *it, loff_t pos)
100 {
101 	struct mfc6_cache *mfc;
102 
103 	it->cache = net->ipv6.mfc6_cache_array;
104 	read_lock(&mrt_lock);
105 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
106 		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
107 		     mfc; mfc = mfc->next)
108 			if (pos-- == 0)
109 				return mfc;
110 	read_unlock(&mrt_lock);
111 
112 	it->cache = &mfc_unres_queue;
113 	spin_lock_bh(&mfc_unres_lock);
114 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
115 		if (net_eq(mfc6_net(mfc), net) &&
116 		    pos-- == 0)
117 			return mfc;
118 	spin_unlock_bh(&mfc_unres_lock);
119 
120 	it->cache = NULL;
121 	return NULL;
122 }
123 
124 
125 
126 
127 /*
128  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
129  */
130 
131 struct ipmr_vif_iter {
132 	struct seq_net_private p;
133 	int ct;
134 };
135 
136 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
137 					    struct ipmr_vif_iter *iter,
138 					    loff_t pos)
139 {
140 	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
141 		if (!MIF_EXISTS(net, iter->ct))
142 			continue;
143 		if (pos-- == 0)
144 			return &net->ipv6.vif6_table[iter->ct];
145 	}
146 	return NULL;
147 }
148 
149 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
150 	__acquires(mrt_lock)
151 {
152 	struct net *net = seq_file_net(seq);
153 
154 	read_lock(&mrt_lock);
155 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
156 		: SEQ_START_TOKEN;
157 }
158 
159 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
160 {
161 	struct ipmr_vif_iter *iter = seq->private;
162 	struct net *net = seq_file_net(seq);
163 
164 	++*pos;
165 	if (v == SEQ_START_TOKEN)
166 		return ip6mr_vif_seq_idx(net, iter, 0);
167 
168 	while (++iter->ct < net->ipv6.maxvif) {
169 		if (!MIF_EXISTS(net, iter->ct))
170 			continue;
171 		return &net->ipv6.vif6_table[iter->ct];
172 	}
173 	return NULL;
174 }
175 
176 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
177 	__releases(mrt_lock)
178 {
179 	read_unlock(&mrt_lock);
180 }
181 
182 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
183 {
184 	struct net *net = seq_file_net(seq);
185 
186 	if (v == SEQ_START_TOKEN) {
187 		seq_puts(seq,
188 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
189 	} else {
190 		const struct mif_device *vif = v;
191 		const char *name = vif->dev ? vif->dev->name : "none";
192 
193 		seq_printf(seq,
194 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
195 			   vif - net->ipv6.vif6_table,
196 			   name, vif->bytes_in, vif->pkt_in,
197 			   vif->bytes_out, vif->pkt_out,
198 			   vif->flags);
199 	}
200 	return 0;
201 }
202 
203 static const struct seq_operations ip6mr_vif_seq_ops = {
204 	.start = ip6mr_vif_seq_start,
205 	.next  = ip6mr_vif_seq_next,
206 	.stop  = ip6mr_vif_seq_stop,
207 	.show  = ip6mr_vif_seq_show,
208 };
209 
210 static int ip6mr_vif_open(struct inode *inode, struct file *file)
211 {
212 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
213 			    sizeof(struct ipmr_vif_iter));
214 }
215 
216 static const struct file_operations ip6mr_vif_fops = {
217 	.owner	 = THIS_MODULE,
218 	.open    = ip6mr_vif_open,
219 	.read    = seq_read,
220 	.llseek  = seq_lseek,
221 	.release = seq_release_net,
222 };
223 
224 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
225 {
226 	struct net *net = seq_file_net(seq);
227 
228 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
229 		: SEQ_START_TOKEN;
230 }
231 
232 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
233 {
234 	struct mfc6_cache *mfc = v;
235 	struct ipmr_mfc_iter *it = seq->private;
236 	struct net *net = seq_file_net(seq);
237 
238 	++*pos;
239 
240 	if (v == SEQ_START_TOKEN)
241 		return ipmr_mfc_seq_idx(net, seq->private, 0);
242 
243 	if (mfc->next)
244 		return mfc->next;
245 
246 	if (it->cache == &mfc_unres_queue)
247 		goto end_of_list;
248 
249 	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
250 
251 	while (++it->ct < MFC6_LINES) {
252 		mfc = net->ipv6.mfc6_cache_array[it->ct];
253 		if (mfc)
254 			return mfc;
255 	}
256 
257 	/* exhausted cache_array, show unresolved */
258 	read_unlock(&mrt_lock);
259 	it->cache = &mfc_unres_queue;
260 	it->ct = 0;
261 
262 	spin_lock_bh(&mfc_unres_lock);
263 	mfc = mfc_unres_queue;
264 	if (mfc)
265 		return mfc;
266 
267  end_of_list:
268 	spin_unlock_bh(&mfc_unres_lock);
269 	it->cache = NULL;
270 
271 	return NULL;
272 }
273 
274 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
275 {
276 	struct ipmr_mfc_iter *it = seq->private;
277 	struct net *net = seq_file_net(seq);
278 
279 	if (it->cache == &mfc_unres_queue)
280 		spin_unlock_bh(&mfc_unres_lock);
281 	else if (it->cache == net->ipv6.mfc6_cache_array)
282 		read_unlock(&mrt_lock);
283 }
284 
285 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
286 {
287 	int n;
288 	struct net *net = seq_file_net(seq);
289 
290 	if (v == SEQ_START_TOKEN) {
291 		seq_puts(seq,
292 			 "Group                            "
293 			 "Origin                           "
294 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
295 	} else {
296 		const struct mfc6_cache *mfc = v;
297 		const struct ipmr_mfc_iter *it = seq->private;
298 
299 		seq_printf(seq, "%pI6 %pI6 %-3hd",
300 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
301 			   mfc->mf6c_parent);
302 
303 		if (it->cache != &mfc_unres_queue) {
304 			seq_printf(seq, " %8lu %8lu %8lu",
305 				   mfc->mfc_un.res.pkt,
306 				   mfc->mfc_un.res.bytes,
307 				   mfc->mfc_un.res.wrong_if);
308 			for (n = mfc->mfc_un.res.minvif;
309 			     n < mfc->mfc_un.res.maxvif; n++) {
310 				if (MIF_EXISTS(net, n) &&
311 				    mfc->mfc_un.res.ttls[n] < 255)
312 					seq_printf(seq,
313 						   " %2d:%-3d",
314 						   n, mfc->mfc_un.res.ttls[n]);
315 			}
316 		} else {
317 			/* unresolved mfc_caches don't contain
318 			 * pkt, bytes and wrong_if values
319 			 */
320 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
321 		}
322 		seq_putc(seq, '\n');
323 	}
324 	return 0;
325 }
326 
327 static struct seq_operations ipmr_mfc_seq_ops = {
328 	.start = ipmr_mfc_seq_start,
329 	.next  = ipmr_mfc_seq_next,
330 	.stop  = ipmr_mfc_seq_stop,
331 	.show  = ipmr_mfc_seq_show,
332 };
333 
334 static int ipmr_mfc_open(struct inode *inode, struct file *file)
335 {
336 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
337 			    sizeof(struct ipmr_mfc_iter));
338 }
339 
340 static const struct file_operations ip6mr_mfc_fops = {
341 	.owner	 = THIS_MODULE,
342 	.open    = ipmr_mfc_open,
343 	.read    = seq_read,
344 	.llseek  = seq_lseek,
345 	.release = seq_release_net,
346 };
347 #endif
348 
349 #ifdef CONFIG_IPV6_PIMSM_V2
350 
351 static int pim6_rcv(struct sk_buff *skb)
352 {
353 	struct pimreghdr *pim;
354 	struct ipv6hdr   *encap;
355 	struct net_device  *reg_dev = NULL;
356 	struct net *net = dev_net(skb->dev);
357 	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
358 
359 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
360 		goto drop;
361 
362 	pim = (struct pimreghdr *)skb_transport_header(skb);
363 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
364 	    (pim->flags & PIM_NULL_REGISTER) ||
365 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
366 			     sizeof(*pim), IPPROTO_PIM,
367 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
368 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
369 		goto drop;
370 
371 	/* check if the inner packet is destined to mcast group */
372 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
373 				   sizeof(*pim));
374 
375 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
376 	    encap->payload_len == 0 ||
377 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
378 		goto drop;
379 
380 	read_lock(&mrt_lock);
381 	if (reg_vif_num >= 0)
382 		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
383 	if (reg_dev)
384 		dev_hold(reg_dev);
385 	read_unlock(&mrt_lock);
386 
387 	if (reg_dev == NULL)
388 		goto drop;
389 
390 	skb->mac_header = skb->network_header;
391 	skb_pull(skb, (u8 *)encap - skb->data);
392 	skb_reset_network_header(skb);
393 	skb->dev = reg_dev;
394 	skb->protocol = htons(ETH_P_IPV6);
395 	skb->ip_summed = 0;
396 	skb->pkt_type = PACKET_HOST;
397 	skb_dst_drop(skb);
398 	reg_dev->stats.rx_bytes += skb->len;
399 	reg_dev->stats.rx_packets++;
400 	nf_reset(skb);
401 	netif_rx(skb);
402 	dev_put(reg_dev);
403 	return 0;
404  drop:
405 	kfree_skb(skb);
406 	return 0;
407 }
408 
409 static const struct inet6_protocol pim6_protocol = {
410 	.handler	=	pim6_rcv,
411 };
412 
413 /* Service routines creating virtual interfaces: PIMREG */
414 
415 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
416 				      struct net_device *dev)
417 {
418 	struct net *net = dev_net(dev);
419 
420 	read_lock(&mrt_lock);
421 	dev->stats.tx_bytes += skb->len;
422 	dev->stats.tx_packets++;
423 	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
424 			   MRT6MSG_WHOLEPKT);
425 	read_unlock(&mrt_lock);
426 	kfree_skb(skb);
427 	return NETDEV_TX_OK;
428 }
429 
430 static const struct net_device_ops reg_vif_netdev_ops = {
431 	.ndo_start_xmit	= reg_vif_xmit,
432 };
433 
434 static void reg_vif_setup(struct net_device *dev)
435 {
436 	dev->type		= ARPHRD_PIMREG;
437 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
438 	dev->flags		= IFF_NOARP;
439 	dev->netdev_ops		= &reg_vif_netdev_ops;
440 	dev->destructor		= free_netdev;
441 	dev->features		|= NETIF_F_NETNS_LOCAL;
442 }
443 
444 static struct net_device *ip6mr_reg_vif(struct net *net)
445 {
446 	struct net_device *dev;
447 
448 	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
449 	if (dev == NULL)
450 		return NULL;
451 
452 	dev_net_set(dev, net);
453 
454 	if (register_netdevice(dev)) {
455 		free_netdev(dev);
456 		return NULL;
457 	}
458 	dev->iflink = 0;
459 
460 	if (dev_open(dev))
461 		goto failure;
462 
463 	dev_hold(dev);
464 	return dev;
465 
466 failure:
467 	/* allow the register to be completed before unregistering. */
468 	rtnl_unlock();
469 	rtnl_lock();
470 
471 	unregister_netdevice(dev);
472 	return NULL;
473 }
474 #endif
475 
476 /*
477  *	Delete a VIF entry
478  */
479 
480 static int mif6_delete(struct net *net, int vifi)
481 {
482 	struct mif_device *v;
483 	struct net_device *dev;
484 	struct inet6_dev *in6_dev;
485 	if (vifi < 0 || vifi >= net->ipv6.maxvif)
486 		return -EADDRNOTAVAIL;
487 
488 	v = &net->ipv6.vif6_table[vifi];
489 
490 	write_lock_bh(&mrt_lock);
491 	dev = v->dev;
492 	v->dev = NULL;
493 
494 	if (!dev) {
495 		write_unlock_bh(&mrt_lock);
496 		return -EADDRNOTAVAIL;
497 	}
498 
499 #ifdef CONFIG_IPV6_PIMSM_V2
500 	if (vifi == net->ipv6.mroute_reg_vif_num)
501 		net->ipv6.mroute_reg_vif_num = -1;
502 #endif
503 
504 	if (vifi + 1 == net->ipv6.maxvif) {
505 		int tmp;
506 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
507 			if (MIF_EXISTS(net, tmp))
508 				break;
509 		}
510 		net->ipv6.maxvif = tmp + 1;
511 	}
512 
513 	write_unlock_bh(&mrt_lock);
514 
515 	dev_set_allmulti(dev, -1);
516 
517 	in6_dev = __in6_dev_get(dev);
518 	if (in6_dev)
519 		in6_dev->cnf.mc_forwarding--;
520 
521 	if (v->flags & MIFF_REGISTER)
522 		unregister_netdevice(dev);
523 
524 	dev_put(dev);
525 	return 0;
526 }
527 
528 static inline void ip6mr_cache_free(struct mfc6_cache *c)
529 {
530 	release_net(mfc6_net(c));
531 	kmem_cache_free(mrt_cachep, c);
532 }
533 
534 /* Destroy an unresolved cache entry, killing queued skbs
535    and reporting error to netlink readers.
536  */
537 
538 static void ip6mr_destroy_unres(struct mfc6_cache *c)
539 {
540 	struct sk_buff *skb;
541 	struct net *net = mfc6_net(c);
542 
543 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
544 
545 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
546 		if (ipv6_hdr(skb)->version == 0) {
547 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
548 			nlh->nlmsg_type = NLMSG_ERROR;
549 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
550 			skb_trim(skb, nlh->nlmsg_len);
551 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
552 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
553 		} else
554 			kfree_skb(skb);
555 	}
556 
557 	ip6mr_cache_free(c);
558 }
559 
560 
561 /* Single timer process for all the unresolved queue. */
562 
563 static void ipmr_do_expire_process(unsigned long dummy)
564 {
565 	unsigned long now = jiffies;
566 	unsigned long expires = 10 * HZ;
567 	struct mfc6_cache *c, **cp;
568 
569 	cp = &mfc_unres_queue;
570 
571 	while ((c = *cp) != NULL) {
572 		if (time_after(c->mfc_un.unres.expires, now)) {
573 			/* not yet... */
574 			unsigned long interval = c->mfc_un.unres.expires - now;
575 			if (interval < expires)
576 				expires = interval;
577 			cp = &c->next;
578 			continue;
579 		}
580 
581 		*cp = c->next;
582 		ip6mr_destroy_unres(c);
583 	}
584 
585 	if (mfc_unres_queue != NULL)
586 		mod_timer(&ipmr_expire_timer, jiffies + expires);
587 }
588 
589 static void ipmr_expire_process(unsigned long dummy)
590 {
591 	if (!spin_trylock(&mfc_unres_lock)) {
592 		mod_timer(&ipmr_expire_timer, jiffies + 1);
593 		return;
594 	}
595 
596 	if (mfc_unres_queue != NULL)
597 		ipmr_do_expire_process(dummy);
598 
599 	spin_unlock(&mfc_unres_lock);
600 }
601 
602 /* Fill oifs list. It is called under write locked mrt_lock. */
603 
604 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
605 {
606 	int vifi;
607 	struct net *net = mfc6_net(cache);
608 
609 	cache->mfc_un.res.minvif = MAXMIFS;
610 	cache->mfc_un.res.maxvif = 0;
611 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
612 
613 	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
614 		if (MIF_EXISTS(net, vifi) &&
615 		    ttls[vifi] && ttls[vifi] < 255) {
616 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
617 			if (cache->mfc_un.res.minvif > vifi)
618 				cache->mfc_un.res.minvif = vifi;
619 			if (cache->mfc_un.res.maxvif <= vifi)
620 				cache->mfc_un.res.maxvif = vifi + 1;
621 		}
622 	}
623 }
624 
625 static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
626 {
627 	int vifi = vifc->mif6c_mifi;
628 	struct mif_device *v = &net->ipv6.vif6_table[vifi];
629 	struct net_device *dev;
630 	struct inet6_dev *in6_dev;
631 	int err;
632 
633 	/* Is vif busy ? */
634 	if (MIF_EXISTS(net, vifi))
635 		return -EADDRINUSE;
636 
637 	switch (vifc->mif6c_flags) {
638 #ifdef CONFIG_IPV6_PIMSM_V2
639 	case MIFF_REGISTER:
640 		/*
641 		 * Special Purpose VIF in PIM
642 		 * All the packets will be sent to the daemon
643 		 */
644 		if (net->ipv6.mroute_reg_vif_num >= 0)
645 			return -EADDRINUSE;
646 		dev = ip6mr_reg_vif(net);
647 		if (!dev)
648 			return -ENOBUFS;
649 		err = dev_set_allmulti(dev, 1);
650 		if (err) {
651 			unregister_netdevice(dev);
652 			dev_put(dev);
653 			return err;
654 		}
655 		break;
656 #endif
657 	case 0:
658 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
659 		if (!dev)
660 			return -EADDRNOTAVAIL;
661 		err = dev_set_allmulti(dev, 1);
662 		if (err) {
663 			dev_put(dev);
664 			return err;
665 		}
666 		break;
667 	default:
668 		return -EINVAL;
669 	}
670 
671 	in6_dev = __in6_dev_get(dev);
672 	if (in6_dev)
673 		in6_dev->cnf.mc_forwarding++;
674 
675 	/*
676 	 *	Fill in the VIF structures
677 	 */
678 	v->rate_limit = vifc->vifc_rate_limit;
679 	v->flags = vifc->mif6c_flags;
680 	if (!mrtsock)
681 		v->flags |= VIFF_STATIC;
682 	v->threshold = vifc->vifc_threshold;
683 	v->bytes_in = 0;
684 	v->bytes_out = 0;
685 	v->pkt_in = 0;
686 	v->pkt_out = 0;
687 	v->link = dev->ifindex;
688 	if (v->flags & MIFF_REGISTER)
689 		v->link = dev->iflink;
690 
691 	/* And finish update writing critical data */
692 	write_lock_bh(&mrt_lock);
693 	v->dev = dev;
694 #ifdef CONFIG_IPV6_PIMSM_V2
695 	if (v->flags & MIFF_REGISTER)
696 		net->ipv6.mroute_reg_vif_num = vifi;
697 #endif
698 	if (vifi + 1 > net->ipv6.maxvif)
699 		net->ipv6.maxvif = vifi + 1;
700 	write_unlock_bh(&mrt_lock);
701 	return 0;
702 }
703 
704 static struct mfc6_cache *ip6mr_cache_find(struct net *net,
705 					   struct in6_addr *origin,
706 					   struct in6_addr *mcastgrp)
707 {
708 	int line = MFC6_HASH(mcastgrp, origin);
709 	struct mfc6_cache *c;
710 
711 	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
712 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
713 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
714 			break;
715 	}
716 	return c;
717 }
718 
719 /*
720  *	Allocate a multicast cache entry
721  */
722 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
723 {
724 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
725 	if (c == NULL)
726 		return NULL;
727 	c->mfc_un.res.minvif = MAXMIFS;
728 	mfc6_net_set(c, net);
729 	return c;
730 }
731 
732 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
733 {
734 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
735 	if (c == NULL)
736 		return NULL;
737 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
738 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
739 	mfc6_net_set(c, net);
740 	return c;
741 }
742 
743 /*
744  *	A cache entry has gone into a resolved state from queued
745  */
746 
747 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
748 {
749 	struct sk_buff *skb;
750 
751 	/*
752 	 *	Play the pending entries through our router
753 	 */
754 
755 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
756 		if (ipv6_hdr(skb)->version == 0) {
757 			int err;
758 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
759 
760 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
761 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
762 			} else {
763 				nlh->nlmsg_type = NLMSG_ERROR;
764 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
765 				skb_trim(skb, nlh->nlmsg_len);
766 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
767 			}
768 			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
769 		} else
770 			ip6_mr_forward(skb, c);
771 	}
772 }
773 
774 /*
775  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
776  *	expects the following bizarre scheme.
777  *
778  *	Called under mrt_lock.
779  */
780 
781 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
782 			      int assert)
783 {
784 	struct sk_buff *skb;
785 	struct mrt6msg *msg;
786 	int ret;
787 
788 #ifdef CONFIG_IPV6_PIMSM_V2
789 	if (assert == MRT6MSG_WHOLEPKT)
790 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
791 						+sizeof(*msg));
792 	else
793 #endif
794 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
795 
796 	if (!skb)
797 		return -ENOBUFS;
798 
799 	/* I suppose that internal messages
800 	 * do not require checksums */
801 
802 	skb->ip_summed = CHECKSUM_UNNECESSARY;
803 
804 #ifdef CONFIG_IPV6_PIMSM_V2
805 	if (assert == MRT6MSG_WHOLEPKT) {
806 		/* Ugly, but we have no choice with this interface.
807 		   Duplicate old header, fix length etc.
808 		   And all this only to mangle msg->im6_msgtype and
809 		   to set msg->im6_mbz to "mbz" :-)
810 		 */
811 		skb_push(skb, -skb_network_offset(pkt));
812 
813 		skb_push(skb, sizeof(*msg));
814 		skb_reset_transport_header(skb);
815 		msg = (struct mrt6msg *)skb_transport_header(skb);
816 		msg->im6_mbz = 0;
817 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
818 		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
819 		msg->im6_pad = 0;
820 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
821 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
822 
823 		skb->ip_summed = CHECKSUM_UNNECESSARY;
824 	} else
825 #endif
826 	{
827 	/*
828 	 *	Copy the IP header
829 	 */
830 
831 	skb_put(skb, sizeof(struct ipv6hdr));
832 	skb_reset_network_header(skb);
833 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
834 
835 	/*
836 	 *	Add our header
837 	 */
838 	skb_put(skb, sizeof(*msg));
839 	skb_reset_transport_header(skb);
840 	msg = (struct mrt6msg *)skb_transport_header(skb);
841 
842 	msg->im6_mbz = 0;
843 	msg->im6_msgtype = assert;
844 	msg->im6_mif = mifi;
845 	msg->im6_pad = 0;
846 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
847 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
848 
849 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
850 	skb->ip_summed = CHECKSUM_UNNECESSARY;
851 	}
852 
853 	if (net->ipv6.mroute6_sk == NULL) {
854 		kfree_skb(skb);
855 		return -EINVAL;
856 	}
857 
858 	/*
859 	 *	Deliver to user space multicast routing algorithms
860 	 */
861 	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
862 	if (ret < 0) {
863 		if (net_ratelimit())
864 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
865 		kfree_skb(skb);
866 	}
867 
868 	return ret;
869 }
870 
871 /*
872  *	Queue a packet for resolution. It gets locked cache entry!
873  */
874 
875 static int
876 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
877 {
878 	int err;
879 	struct mfc6_cache *c;
880 
881 	spin_lock_bh(&mfc_unres_lock);
882 	for (c = mfc_unres_queue; c; c = c->next) {
883 		if (net_eq(mfc6_net(c), net) &&
884 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
885 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
886 			break;
887 	}
888 
889 	if (c == NULL) {
890 		/*
891 		 *	Create a new entry if allowable
892 		 */
893 
894 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
895 		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
896 			spin_unlock_bh(&mfc_unres_lock);
897 
898 			kfree_skb(skb);
899 			return -ENOBUFS;
900 		}
901 
902 		/*
903 		 *	Fill in the new cache entry
904 		 */
905 		c->mf6c_parent = -1;
906 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
907 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
908 
909 		/*
910 		 *	Reflect first query at pim6sd
911 		 */
912 		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
913 		if (err < 0) {
914 			/* If the report failed throw the cache entry
915 			   out - Brad Parker
916 			 */
917 			spin_unlock_bh(&mfc_unres_lock);
918 
919 			ip6mr_cache_free(c);
920 			kfree_skb(skb);
921 			return err;
922 		}
923 
924 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
925 		c->next = mfc_unres_queue;
926 		mfc_unres_queue = c;
927 
928 		ipmr_do_expire_process(1);
929 	}
930 
931 	/*
932 	 *	See if we can append the packet
933 	 */
934 	if (c->mfc_un.unres.unresolved.qlen > 3) {
935 		kfree_skb(skb);
936 		err = -ENOBUFS;
937 	} else {
938 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
939 		err = 0;
940 	}
941 
942 	spin_unlock_bh(&mfc_unres_lock);
943 	return err;
944 }
945 
946 /*
947  *	MFC6 cache manipulation by user space
948  */
949 
950 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
951 {
952 	int line;
953 	struct mfc6_cache *c, **cp;
954 
955 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
956 
957 	for (cp = &net->ipv6.mfc6_cache_array[line];
958 	     (c = *cp) != NULL; cp = &c->next) {
959 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
960 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
961 			write_lock_bh(&mrt_lock);
962 			*cp = c->next;
963 			write_unlock_bh(&mrt_lock);
964 
965 			ip6mr_cache_free(c);
966 			return 0;
967 		}
968 	}
969 	return -ENOENT;
970 }
971 
972 static int ip6mr_device_event(struct notifier_block *this,
973 			      unsigned long event, void *ptr)
974 {
975 	struct net_device *dev = ptr;
976 	struct net *net = dev_net(dev);
977 	struct mif_device *v;
978 	int ct;
979 
980 	if (event != NETDEV_UNREGISTER)
981 		return NOTIFY_DONE;
982 
983 	v = &net->ipv6.vif6_table[0];
984 	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
985 		if (v->dev == dev)
986 			mif6_delete(net, ct);
987 	}
988 	return NOTIFY_DONE;
989 }
990 
991 static struct notifier_block ip6_mr_notifier = {
992 	.notifier_call = ip6mr_device_event
993 };
994 
995 /*
996  *	Setup for IP multicast routing
997  */
998 
999 static int __net_init ip6mr_net_init(struct net *net)
1000 {
1001 	int err = 0;
1002 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1003 				       GFP_KERNEL);
1004 	if (!net->ipv6.vif6_table) {
1005 		err = -ENOMEM;
1006 		goto fail;
1007 	}
1008 
1009 	/* Forwarding cache */
1010 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1011 					     sizeof(struct mfc6_cache *),
1012 					     GFP_KERNEL);
1013 	if (!net->ipv6.mfc6_cache_array) {
1014 		err = -ENOMEM;
1015 		goto fail_mfc6_cache;
1016 	}
1017 
1018 #ifdef CONFIG_IPV6_PIMSM_V2
1019 	net->ipv6.mroute_reg_vif_num = -1;
1020 #endif
1021 
1022 #ifdef CONFIG_PROC_FS
1023 	err = -ENOMEM;
1024 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1025 		goto proc_vif_fail;
1026 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1027 		goto proc_cache_fail;
1028 #endif
1029 	return 0;
1030 
1031 #ifdef CONFIG_PROC_FS
1032 proc_cache_fail:
1033 	proc_net_remove(net, "ip6_mr_vif");
1034 proc_vif_fail:
1035 	kfree(net->ipv6.mfc6_cache_array);
1036 #endif
1037 fail_mfc6_cache:
1038 	kfree(net->ipv6.vif6_table);
1039 fail:
1040 	return err;
1041 }
1042 
1043 static void __net_exit ip6mr_net_exit(struct net *net)
1044 {
1045 #ifdef CONFIG_PROC_FS
1046 	proc_net_remove(net, "ip6_mr_cache");
1047 	proc_net_remove(net, "ip6_mr_vif");
1048 #endif
1049 	mroute_clean_tables(net);
1050 	kfree(net->ipv6.mfc6_cache_array);
1051 	kfree(net->ipv6.vif6_table);
1052 }
1053 
1054 static struct pernet_operations ip6mr_net_ops = {
1055 	.init = ip6mr_net_init,
1056 	.exit = ip6mr_net_exit,
1057 };
1058 
1059 int __init ip6_mr_init(void)
1060 {
1061 	int err;
1062 
1063 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1064 				       sizeof(struct mfc6_cache),
1065 				       0, SLAB_HWCACHE_ALIGN,
1066 				       NULL);
1067 	if (!mrt_cachep)
1068 		return -ENOMEM;
1069 
1070 	err = register_pernet_subsys(&ip6mr_net_ops);
1071 	if (err)
1072 		goto reg_pernet_fail;
1073 
1074 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1075 	err = register_netdevice_notifier(&ip6_mr_notifier);
1076 	if (err)
1077 		goto reg_notif_fail;
1078 #ifdef CONFIG_IPV6_PIMSM_V2
1079 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1080 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1081 		err = -EAGAIN;
1082 		goto add_proto_fail;
1083 	}
1084 #endif
1085 	return 0;
1086 #ifdef CONFIG_IPV6_PIMSM_V2
1087 add_proto_fail:
1088 	unregister_netdevice_notifier(&ip6_mr_notifier);
1089 #endif
1090 reg_notif_fail:
1091 	del_timer(&ipmr_expire_timer);
1092 	unregister_pernet_subsys(&ip6mr_net_ops);
1093 reg_pernet_fail:
1094 	kmem_cache_destroy(mrt_cachep);
1095 	return err;
1096 }
1097 
1098 void ip6_mr_cleanup(void)
1099 {
1100 	unregister_netdevice_notifier(&ip6_mr_notifier);
1101 	del_timer(&ipmr_expire_timer);
1102 	unregister_pernet_subsys(&ip6mr_net_ops);
1103 	kmem_cache_destroy(mrt_cachep);
1104 }
1105 
1106 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1107 {
1108 	int line;
1109 	struct mfc6_cache *uc, *c, **cp;
1110 	unsigned char ttls[MAXMIFS];
1111 	int i;
1112 
1113 	memset(ttls, 255, MAXMIFS);
1114 	for (i = 0; i < MAXMIFS; i++) {
1115 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1116 			ttls[i] = 1;
1117 
1118 	}
1119 
1120 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1121 
1122 	for (cp = &net->ipv6.mfc6_cache_array[line];
1123 	     (c = *cp) != NULL; cp = &c->next) {
1124 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1125 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1126 			break;
1127 	}
1128 
1129 	if (c != NULL) {
1130 		write_lock_bh(&mrt_lock);
1131 		c->mf6c_parent = mfc->mf6cc_parent;
1132 		ip6mr_update_thresholds(c, ttls);
1133 		if (!mrtsock)
1134 			c->mfc_flags |= MFC_STATIC;
1135 		write_unlock_bh(&mrt_lock);
1136 		return 0;
1137 	}
1138 
1139 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1140 		return -EINVAL;
1141 
1142 	c = ip6mr_cache_alloc(net);
1143 	if (c == NULL)
1144 		return -ENOMEM;
1145 
1146 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1147 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1148 	c->mf6c_parent = mfc->mf6cc_parent;
1149 	ip6mr_update_thresholds(c, ttls);
1150 	if (!mrtsock)
1151 		c->mfc_flags |= MFC_STATIC;
1152 
1153 	write_lock_bh(&mrt_lock);
1154 	c->next = net->ipv6.mfc6_cache_array[line];
1155 	net->ipv6.mfc6_cache_array[line] = c;
1156 	write_unlock_bh(&mrt_lock);
1157 
1158 	/*
1159 	 *	Check to see if we resolved a queued list. If so we
1160 	 *	need to send on the frames and tidy up.
1161 	 */
1162 	spin_lock_bh(&mfc_unres_lock);
1163 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1164 	     cp = &uc->next) {
1165 		if (net_eq(mfc6_net(uc), net) &&
1166 		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1167 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1168 			*cp = uc->next;
1169 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
1170 			break;
1171 		}
1172 	}
1173 	if (mfc_unres_queue == NULL)
1174 		del_timer(&ipmr_expire_timer);
1175 	spin_unlock_bh(&mfc_unres_lock);
1176 
1177 	if (uc) {
1178 		ip6mr_cache_resolve(uc, c);
1179 		ip6mr_cache_free(uc);
1180 	}
1181 	return 0;
1182 }
1183 
1184 /*
1185  *	Close the multicast socket, and clear the vif tables etc
1186  */
1187 
1188 static void mroute_clean_tables(struct net *net)
1189 {
1190 	int i;
1191 
1192 	/*
1193 	 *	Shut down all active vif entries
1194 	 */
1195 	for (i = 0; i < net->ipv6.maxvif; i++) {
1196 		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1197 			mif6_delete(net, i);
1198 	}
1199 
1200 	/*
1201 	 *	Wipe the cache
1202 	 */
1203 	for (i = 0; i < MFC6_LINES; i++) {
1204 		struct mfc6_cache *c, **cp;
1205 
1206 		cp = &net->ipv6.mfc6_cache_array[i];
1207 		while ((c = *cp) != NULL) {
1208 			if (c->mfc_flags & MFC_STATIC) {
1209 				cp = &c->next;
1210 				continue;
1211 			}
1212 			write_lock_bh(&mrt_lock);
1213 			*cp = c->next;
1214 			write_unlock_bh(&mrt_lock);
1215 
1216 			ip6mr_cache_free(c);
1217 		}
1218 	}
1219 
1220 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
1221 		struct mfc6_cache *c, **cp;
1222 
1223 		spin_lock_bh(&mfc_unres_lock);
1224 		cp = &mfc_unres_queue;
1225 		while ((c = *cp) != NULL) {
1226 			if (!net_eq(mfc6_net(c), net)) {
1227 				cp = &c->next;
1228 				continue;
1229 			}
1230 			*cp = c->next;
1231 			ip6mr_destroy_unres(c);
1232 		}
1233 		spin_unlock_bh(&mfc_unres_lock);
1234 	}
1235 }
1236 
1237 static int ip6mr_sk_init(struct sock *sk)
1238 {
1239 	int err = 0;
1240 	struct net *net = sock_net(sk);
1241 
1242 	rtnl_lock();
1243 	write_lock_bh(&mrt_lock);
1244 	if (likely(net->ipv6.mroute6_sk == NULL)) {
1245 		net->ipv6.mroute6_sk = sk;
1246 		net->ipv6.devconf_all->mc_forwarding++;
1247 	}
1248 	else
1249 		err = -EADDRINUSE;
1250 	write_unlock_bh(&mrt_lock);
1251 
1252 	rtnl_unlock();
1253 
1254 	return err;
1255 }
1256 
1257 int ip6mr_sk_done(struct sock *sk)
1258 {
1259 	int err = 0;
1260 	struct net *net = sock_net(sk);
1261 
1262 	rtnl_lock();
1263 	if (sk == net->ipv6.mroute6_sk) {
1264 		write_lock_bh(&mrt_lock);
1265 		net->ipv6.mroute6_sk = NULL;
1266 		net->ipv6.devconf_all->mc_forwarding--;
1267 		write_unlock_bh(&mrt_lock);
1268 
1269 		mroute_clean_tables(net);
1270 	} else
1271 		err = -EACCES;
1272 	rtnl_unlock();
1273 
1274 	return err;
1275 }
1276 
1277 /*
1278  *	Socket options and virtual interface manipulation. The whole
1279  *	virtual interface system is a complete heap, but unfortunately
1280  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1281  *	MOSPF/PIM router set up we can clean this up.
1282  */
1283 
1284 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1285 {
1286 	int ret;
1287 	struct mif6ctl vif;
1288 	struct mf6cctl mfc;
1289 	mifi_t mifi;
1290 	struct net *net = sock_net(sk);
1291 
1292 	if (optname != MRT6_INIT) {
1293 		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1294 			return -EACCES;
1295 	}
1296 
1297 	switch (optname) {
1298 	case MRT6_INIT:
1299 		if (sk->sk_type != SOCK_RAW ||
1300 		    inet_sk(sk)->num != IPPROTO_ICMPV6)
1301 			return -EOPNOTSUPP;
1302 		if (optlen < sizeof(int))
1303 			return -EINVAL;
1304 
1305 		return ip6mr_sk_init(sk);
1306 
1307 	case MRT6_DONE:
1308 		return ip6mr_sk_done(sk);
1309 
1310 	case MRT6_ADD_MIF:
1311 		if (optlen < sizeof(vif))
1312 			return -EINVAL;
1313 		if (copy_from_user(&vif, optval, sizeof(vif)))
1314 			return -EFAULT;
1315 		if (vif.mif6c_mifi >= MAXMIFS)
1316 			return -ENFILE;
1317 		rtnl_lock();
1318 		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
1319 		rtnl_unlock();
1320 		return ret;
1321 
1322 	case MRT6_DEL_MIF:
1323 		if (optlen < sizeof(mifi_t))
1324 			return -EINVAL;
1325 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1326 			return -EFAULT;
1327 		rtnl_lock();
1328 		ret = mif6_delete(net, mifi);
1329 		rtnl_unlock();
1330 		return ret;
1331 
1332 	/*
1333 	 *	Manipulate the forwarding caches. These live
1334 	 *	in a sort of kernel/user symbiosis.
1335 	 */
1336 	case MRT6_ADD_MFC:
1337 	case MRT6_DEL_MFC:
1338 		if (optlen < sizeof(mfc))
1339 			return -EINVAL;
1340 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1341 			return -EFAULT;
1342 		rtnl_lock();
1343 		if (optname == MRT6_DEL_MFC)
1344 			ret = ip6mr_mfc_delete(net, &mfc);
1345 		else
1346 			ret = ip6mr_mfc_add(net, &mfc,
1347 					    sk == net->ipv6.mroute6_sk);
1348 		rtnl_unlock();
1349 		return ret;
1350 
1351 	/*
1352 	 *	Control PIM assert (to activate pim will activate assert)
1353 	 */
1354 	case MRT6_ASSERT:
1355 	{
1356 		int v;
1357 		if (get_user(v, (int __user *)optval))
1358 			return -EFAULT;
1359 		net->ipv6.mroute_do_assert = !!v;
1360 		return 0;
1361 	}
1362 
1363 #ifdef CONFIG_IPV6_PIMSM_V2
1364 	case MRT6_PIM:
1365 	{
1366 		int v;
1367 		if (get_user(v, (int __user *)optval))
1368 			return -EFAULT;
1369 		v = !!v;
1370 		rtnl_lock();
1371 		ret = 0;
1372 		if (v != net->ipv6.mroute_do_pim) {
1373 			net->ipv6.mroute_do_pim = v;
1374 			net->ipv6.mroute_do_assert = v;
1375 		}
1376 		rtnl_unlock();
1377 		return ret;
1378 	}
1379 
1380 #endif
1381 	/*
1382 	 *	Spurious command, or MRT6_VERSION which you cannot
1383 	 *	set.
1384 	 */
1385 	default:
1386 		return -ENOPROTOOPT;
1387 	}
1388 }
1389 
1390 /*
1391  *	Getsock opt support for the multicast routing system.
1392  */
1393 
1394 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1395 			  int __user *optlen)
1396 {
1397 	int olr;
1398 	int val;
1399 	struct net *net = sock_net(sk);
1400 
1401 	switch (optname) {
1402 	case MRT6_VERSION:
1403 		val = 0x0305;
1404 		break;
1405 #ifdef CONFIG_IPV6_PIMSM_V2
1406 	case MRT6_PIM:
1407 		val = net->ipv6.mroute_do_pim;
1408 		break;
1409 #endif
1410 	case MRT6_ASSERT:
1411 		val = net->ipv6.mroute_do_assert;
1412 		break;
1413 	default:
1414 		return -ENOPROTOOPT;
1415 	}
1416 
1417 	if (get_user(olr, optlen))
1418 		return -EFAULT;
1419 
1420 	olr = min_t(int, olr, sizeof(int));
1421 	if (olr < 0)
1422 		return -EINVAL;
1423 
1424 	if (put_user(olr, optlen))
1425 		return -EFAULT;
1426 	if (copy_to_user(optval, &val, olr))
1427 		return -EFAULT;
1428 	return 0;
1429 }
1430 
1431 /*
1432  *	The IP multicast ioctl support routines.
1433  */
1434 
1435 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1436 {
1437 	struct sioc_sg_req6 sr;
1438 	struct sioc_mif_req6 vr;
1439 	struct mif_device *vif;
1440 	struct mfc6_cache *c;
1441 	struct net *net = sock_net(sk);
1442 
1443 	switch (cmd) {
1444 	case SIOCGETMIFCNT_IN6:
1445 		if (copy_from_user(&vr, arg, sizeof(vr)))
1446 			return -EFAULT;
1447 		if (vr.mifi >= net->ipv6.maxvif)
1448 			return -EINVAL;
1449 		read_lock(&mrt_lock);
1450 		vif = &net->ipv6.vif6_table[vr.mifi];
1451 		if (MIF_EXISTS(net, vr.mifi)) {
1452 			vr.icount = vif->pkt_in;
1453 			vr.ocount = vif->pkt_out;
1454 			vr.ibytes = vif->bytes_in;
1455 			vr.obytes = vif->bytes_out;
1456 			read_unlock(&mrt_lock);
1457 
1458 			if (copy_to_user(arg, &vr, sizeof(vr)))
1459 				return -EFAULT;
1460 			return 0;
1461 		}
1462 		read_unlock(&mrt_lock);
1463 		return -EADDRNOTAVAIL;
1464 	case SIOCGETSGCNT_IN6:
1465 		if (copy_from_user(&sr, arg, sizeof(sr)))
1466 			return -EFAULT;
1467 
1468 		read_lock(&mrt_lock);
1469 		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1470 		if (c) {
1471 			sr.pktcnt = c->mfc_un.res.pkt;
1472 			sr.bytecnt = c->mfc_un.res.bytes;
1473 			sr.wrong_if = c->mfc_un.res.wrong_if;
1474 			read_unlock(&mrt_lock);
1475 
1476 			if (copy_to_user(arg, &sr, sizeof(sr)))
1477 				return -EFAULT;
1478 			return 0;
1479 		}
1480 		read_unlock(&mrt_lock);
1481 		return -EADDRNOTAVAIL;
1482 	default:
1483 		return -ENOIOCTLCMD;
1484 	}
1485 }
1486 
1487 
1488 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1489 {
1490 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1491 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1492 	return dst_output(skb);
1493 }
1494 
1495 /*
1496  *	Processing handlers for ip6mr_forward
1497  */
1498 
1499 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1500 {
1501 	struct ipv6hdr *ipv6h;
1502 	struct net *net = mfc6_net(c);
1503 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1504 	struct net_device *dev;
1505 	struct dst_entry *dst;
1506 	struct flowi fl;
1507 
1508 	if (vif->dev == NULL)
1509 		goto out_free;
1510 
1511 #ifdef CONFIG_IPV6_PIMSM_V2
1512 	if (vif->flags & MIFF_REGISTER) {
1513 		vif->pkt_out++;
1514 		vif->bytes_out += skb->len;
1515 		vif->dev->stats.tx_bytes += skb->len;
1516 		vif->dev->stats.tx_packets++;
1517 		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
1518 		goto out_free;
1519 	}
1520 #endif
1521 
1522 	ipv6h = ipv6_hdr(skb);
1523 
1524 	fl = (struct flowi) {
1525 		.oif = vif->link,
1526 		.nl_u = { .ip6_u =
1527 				{ .daddr = ipv6h->daddr, }
1528 		}
1529 	};
1530 
1531 	dst = ip6_route_output(net, NULL, &fl);
1532 	if (!dst)
1533 		goto out_free;
1534 
1535 	skb_dst_drop(skb);
1536 	skb_dst_set(skb, dst);
1537 
1538 	/*
1539 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1540 	 * not only before forwarding, but after forwarding on all output
1541 	 * interfaces. It is clear, if mrouter runs a multicasting
1542 	 * program, it should receive packets not depending to what interface
1543 	 * program is joined.
1544 	 * If we will not make it, the program will have to join on all
1545 	 * interfaces. On the other hand, multihoming host (or router, but
1546 	 * not mrouter) cannot join to more than one interface - it will
1547 	 * result in receiving multiple packets.
1548 	 */
1549 	dev = vif->dev;
1550 	skb->dev = dev;
1551 	vif->pkt_out++;
1552 	vif->bytes_out += skb->len;
1553 
1554 	/* We are about to write */
1555 	/* XXX: extension headers? */
1556 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1557 		goto out_free;
1558 
1559 	ipv6h = ipv6_hdr(skb);
1560 	ipv6h->hop_limit--;
1561 
1562 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1563 
1564 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1565 		       ip6mr_forward2_finish);
1566 
1567 out_free:
1568 	kfree_skb(skb);
1569 	return 0;
1570 }
1571 
1572 static int ip6mr_find_vif(struct net_device *dev)
1573 {
1574 	struct net *net = dev_net(dev);
1575 	int ct;
1576 	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1577 		if (net->ipv6.vif6_table[ct].dev == dev)
1578 			break;
1579 	}
1580 	return ct;
1581 }
1582 
1583 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1584 {
1585 	int psend = -1;
1586 	int vif, ct;
1587 	struct net *net = mfc6_net(cache);
1588 
1589 	vif = cache->mf6c_parent;
1590 	cache->mfc_un.res.pkt++;
1591 	cache->mfc_un.res.bytes += skb->len;
1592 
1593 	/*
1594 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1595 	 */
1596 	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
1597 		int true_vifi;
1598 
1599 		cache->mfc_un.res.wrong_if++;
1600 		true_vifi = ip6mr_find_vif(skb->dev);
1601 
1602 		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
1603 		    /* pimsm uses asserts, when switching from RPT to SPT,
1604 		       so that we cannot check that packet arrived on an oif.
1605 		       It is bad, but otherwise we would need to move pretty
1606 		       large chunk of pimd to kernel. Ough... --ANK
1607 		     */
1608 		    (net->ipv6.mroute_do_pim ||
1609 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1610 		    time_after(jiffies,
1611 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1612 			cache->mfc_un.res.last_assert = jiffies;
1613 			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
1614 		}
1615 		goto dont_forward;
1616 	}
1617 
1618 	net->ipv6.vif6_table[vif].pkt_in++;
1619 	net->ipv6.vif6_table[vif].bytes_in += skb->len;
1620 
1621 	/*
1622 	 *	Forward the frame
1623 	 */
1624 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1625 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1626 			if (psend != -1) {
1627 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1628 				if (skb2)
1629 					ip6mr_forward2(skb2, cache, psend);
1630 			}
1631 			psend = ct;
1632 		}
1633 	}
1634 	if (psend != -1) {
1635 		ip6mr_forward2(skb, cache, psend);
1636 		return 0;
1637 	}
1638 
1639 dont_forward:
1640 	kfree_skb(skb);
1641 	return 0;
1642 }
1643 
1644 
1645 /*
1646  *	Multicast packets for forwarding arrive here
1647  */
1648 
1649 int ip6_mr_input(struct sk_buff *skb)
1650 {
1651 	struct mfc6_cache *cache;
1652 	struct net *net = dev_net(skb->dev);
1653 
1654 	read_lock(&mrt_lock);
1655 	cache = ip6mr_cache_find(net,
1656 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1657 
1658 	/*
1659 	 *	No usable cache entry
1660 	 */
1661 	if (cache == NULL) {
1662 		int vif;
1663 
1664 		vif = ip6mr_find_vif(skb->dev);
1665 		if (vif >= 0) {
1666 			int err = ip6mr_cache_unresolved(net, vif, skb);
1667 			read_unlock(&mrt_lock);
1668 
1669 			return err;
1670 		}
1671 		read_unlock(&mrt_lock);
1672 		kfree_skb(skb);
1673 		return -ENODEV;
1674 	}
1675 
1676 	ip6_mr_forward(skb, cache);
1677 
1678 	read_unlock(&mrt_lock);
1679 
1680 	return 0;
1681 }
1682 
1683 
1684 static int
1685 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1686 {
1687 	int ct;
1688 	struct rtnexthop *nhp;
1689 	struct net *net = mfc6_net(c);
1690 	struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
1691 	u8 *b = skb_tail_pointer(skb);
1692 	struct rtattr *mp_head;
1693 
1694 	if (dev)
1695 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1696 
1697 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1698 
1699 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1700 		if (c->mfc_un.res.ttls[ct] < 255) {
1701 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1702 				goto rtattr_failure;
1703 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1704 			nhp->rtnh_flags = 0;
1705 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1706 			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
1707 			nhp->rtnh_len = sizeof(*nhp);
1708 		}
1709 	}
1710 	mp_head->rta_type = RTA_MULTIPATH;
1711 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1712 	rtm->rtm_type = RTN_MULTICAST;
1713 	return 1;
1714 
1715 rtattr_failure:
1716 	nlmsg_trim(skb, b);
1717 	return -EMSGSIZE;
1718 }
1719 
1720 int ip6mr_get_route(struct net *net,
1721 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1722 {
1723 	int err;
1724 	struct mfc6_cache *cache;
1725 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1726 
1727 	read_lock(&mrt_lock);
1728 	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1729 
1730 	if (!cache) {
1731 		struct sk_buff *skb2;
1732 		struct ipv6hdr *iph;
1733 		struct net_device *dev;
1734 		int vif;
1735 
1736 		if (nowait) {
1737 			read_unlock(&mrt_lock);
1738 			return -EAGAIN;
1739 		}
1740 
1741 		dev = skb->dev;
1742 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1743 			read_unlock(&mrt_lock);
1744 			return -ENODEV;
1745 		}
1746 
1747 		/* really correct? */
1748 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1749 		if (!skb2) {
1750 			read_unlock(&mrt_lock);
1751 			return -ENOMEM;
1752 		}
1753 
1754 		skb_reset_transport_header(skb2);
1755 
1756 		skb_put(skb2, sizeof(struct ipv6hdr));
1757 		skb_reset_network_header(skb2);
1758 
1759 		iph = ipv6_hdr(skb2);
1760 		iph->version = 0;
1761 		iph->priority = 0;
1762 		iph->flow_lbl[0] = 0;
1763 		iph->flow_lbl[1] = 0;
1764 		iph->flow_lbl[2] = 0;
1765 		iph->payload_len = 0;
1766 		iph->nexthdr = IPPROTO_NONE;
1767 		iph->hop_limit = 0;
1768 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1769 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1770 
1771 		err = ip6mr_cache_unresolved(net, vif, skb2);
1772 		read_unlock(&mrt_lock);
1773 
1774 		return err;
1775 	}
1776 
1777 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1778 		cache->mfc_flags |= MFC_NOTIFY;
1779 
1780 	err = ip6mr_fill_mroute(skb, cache, rtm);
1781 	read_unlock(&mrt_lock);
1782 	return err;
1783 }
1784 
1785