xref: /linux/net/ipv6/ip6mr.c (revision ff5599816711d2e67da2d7561fd36ac48debd433)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	bool			mroute_do_assert;
71 	bool			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			  struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 			      int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 			       struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr6_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr6_table **mrt)
143 {
144 	struct ip6mr_result res;
145 	struct fib_lookup_arg arg = { .result = &res, };
146 	int err;
147 
148 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
149 			       flowi6_to_flowi(flp6), 0, &arg);
150 	if (err < 0)
151 		return err;
152 	*mrt = res.mrt;
153 	return 0;
154 }
155 
156 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
157 			     int flags, struct fib_lookup_arg *arg)
158 {
159 	struct ip6mr_result *res = arg->result;
160 	struct mr6_table *mrt;
161 
162 	switch (rule->action) {
163 	case FR_ACT_TO_TBL:
164 		break;
165 	case FR_ACT_UNREACHABLE:
166 		return -ENETUNREACH;
167 	case FR_ACT_PROHIBIT:
168 		return -EACCES;
169 	case FR_ACT_BLACKHOLE:
170 	default:
171 		return -EINVAL;
172 	}
173 
174 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
175 	if (mrt == NULL)
176 		return -EAGAIN;
177 	res->mrt = mrt;
178 	return 0;
179 }
180 
181 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182 {
183 	return 1;
184 }
185 
186 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
187 	FRA_GENERIC_POLICY,
188 };
189 
190 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
191 				struct fib_rule_hdr *frh, struct nlattr **tb)
192 {
193 	return 0;
194 }
195 
196 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197 			      struct nlattr **tb)
198 {
199 	return 1;
200 }
201 
202 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203 			   struct fib_rule_hdr *frh)
204 {
205 	frh->dst_len = 0;
206 	frh->src_len = 0;
207 	frh->tos     = 0;
208 	return 0;
209 }
210 
211 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
212 	.family		= RTNL_FAMILY_IP6MR,
213 	.rule_size	= sizeof(struct ip6mr_rule),
214 	.addr_size	= sizeof(struct in6_addr),
215 	.action		= ip6mr_rule_action,
216 	.match		= ip6mr_rule_match,
217 	.configure	= ip6mr_rule_configure,
218 	.compare	= ip6mr_rule_compare,
219 	.default_pref	= fib_default_rule_pref,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (mrt == NULL) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	kfree(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
263 		list_del(&mrt->list);
264 		ip6mr_free_table(mrt);
265 	}
266 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
267 }
268 #else
269 #define ip6mr_for_each_table(mrt, net) \
270 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
271 
272 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
273 {
274 	return net->ipv6.mrt6;
275 }
276 
277 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
278 			    struct mr6_table **mrt)
279 {
280 	*mrt = net->ipv6.mrt6;
281 	return 0;
282 }
283 
284 static int __net_init ip6mr_rules_init(struct net *net)
285 {
286 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
287 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
288 }
289 
290 static void __net_exit ip6mr_rules_exit(struct net *net)
291 {
292 	ip6mr_free_table(net->ipv6.mrt6);
293 }
294 #endif
295 
296 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
297 {
298 	struct mr6_table *mrt;
299 	unsigned int i;
300 
301 	mrt = ip6mr_get_table(net, id);
302 	if (mrt != NULL)
303 		return mrt;
304 
305 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
306 	if (mrt == NULL)
307 		return NULL;
308 	mrt->id = id;
309 	write_pnet(&mrt->net, net);
310 
311 	/* Forwarding cache */
312 	for (i = 0; i < MFC6_LINES; i++)
313 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
314 
315 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
316 
317 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
318 		    (unsigned long)mrt);
319 
320 #ifdef CONFIG_IPV6_PIMSM_V2
321 	mrt->mroute_reg_vif_num = -1;
322 #endif
323 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
324 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
325 #endif
326 	return mrt;
327 }
328 
329 static void ip6mr_free_table(struct mr6_table *mrt)
330 {
331 	del_timer(&mrt->ipmr_expire_timer);
332 	mroute_clean_tables(mrt);
333 	kfree(mrt);
334 }
335 
336 #ifdef CONFIG_PROC_FS
337 
338 struct ipmr_mfc_iter {
339 	struct seq_net_private p;
340 	struct mr6_table *mrt;
341 	struct list_head *cache;
342 	int ct;
343 };
344 
345 
346 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
347 					   struct ipmr_mfc_iter *it, loff_t pos)
348 {
349 	struct mr6_table *mrt = it->mrt;
350 	struct mfc6_cache *mfc;
351 
352 	read_lock(&mrt_lock);
353 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
354 		it->cache = &mrt->mfc6_cache_array[it->ct];
355 		list_for_each_entry(mfc, it->cache, list)
356 			if (pos-- == 0)
357 				return mfc;
358 	}
359 	read_unlock(&mrt_lock);
360 
361 	spin_lock_bh(&mfc_unres_lock);
362 	it->cache = &mrt->mfc6_unres_queue;
363 	list_for_each_entry(mfc, it->cache, list)
364 		if (pos-- == 0)
365 			return mfc;
366 	spin_unlock_bh(&mfc_unres_lock);
367 
368 	it->cache = NULL;
369 	return NULL;
370 }
371 
372 /*
373  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
374  */
375 
376 struct ipmr_vif_iter {
377 	struct seq_net_private p;
378 	struct mr6_table *mrt;
379 	int ct;
380 };
381 
382 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
383 					    struct ipmr_vif_iter *iter,
384 					    loff_t pos)
385 {
386 	struct mr6_table *mrt = iter->mrt;
387 
388 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
389 		if (!MIF_EXISTS(mrt, iter->ct))
390 			continue;
391 		if (pos-- == 0)
392 			return &mrt->vif6_table[iter->ct];
393 	}
394 	return NULL;
395 }
396 
397 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
398 	__acquires(mrt_lock)
399 {
400 	struct ipmr_vif_iter *iter = seq->private;
401 	struct net *net = seq_file_net(seq);
402 	struct mr6_table *mrt;
403 
404 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
405 	if (mrt == NULL)
406 		return ERR_PTR(-ENOENT);
407 
408 	iter->mrt = mrt;
409 
410 	read_lock(&mrt_lock);
411 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
412 		: SEQ_START_TOKEN;
413 }
414 
415 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
416 {
417 	struct ipmr_vif_iter *iter = seq->private;
418 	struct net *net = seq_file_net(seq);
419 	struct mr6_table *mrt = iter->mrt;
420 
421 	++*pos;
422 	if (v == SEQ_START_TOKEN)
423 		return ip6mr_vif_seq_idx(net, iter, 0);
424 
425 	while (++iter->ct < mrt->maxvif) {
426 		if (!MIF_EXISTS(mrt, iter->ct))
427 			continue;
428 		return &mrt->vif6_table[iter->ct];
429 	}
430 	return NULL;
431 }
432 
433 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
434 	__releases(mrt_lock)
435 {
436 	read_unlock(&mrt_lock);
437 }
438 
439 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
440 {
441 	struct ipmr_vif_iter *iter = seq->private;
442 	struct mr6_table *mrt = iter->mrt;
443 
444 	if (v == SEQ_START_TOKEN) {
445 		seq_puts(seq,
446 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
447 	} else {
448 		const struct mif_device *vif = v;
449 		const char *name = vif->dev ? vif->dev->name : "none";
450 
451 		seq_printf(seq,
452 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
453 			   vif - mrt->vif6_table,
454 			   name, vif->bytes_in, vif->pkt_in,
455 			   vif->bytes_out, vif->pkt_out,
456 			   vif->flags);
457 	}
458 	return 0;
459 }
460 
461 static const struct seq_operations ip6mr_vif_seq_ops = {
462 	.start = ip6mr_vif_seq_start,
463 	.next  = ip6mr_vif_seq_next,
464 	.stop  = ip6mr_vif_seq_stop,
465 	.show  = ip6mr_vif_seq_show,
466 };
467 
468 static int ip6mr_vif_open(struct inode *inode, struct file *file)
469 {
470 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
471 			    sizeof(struct ipmr_vif_iter));
472 }
473 
474 static const struct file_operations ip6mr_vif_fops = {
475 	.owner	 = THIS_MODULE,
476 	.open    = ip6mr_vif_open,
477 	.read    = seq_read,
478 	.llseek  = seq_lseek,
479 	.release = seq_release_net,
480 };
481 
482 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
483 {
484 	struct ipmr_mfc_iter *it = seq->private;
485 	struct net *net = seq_file_net(seq);
486 	struct mr6_table *mrt;
487 
488 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
489 	if (mrt == NULL)
490 		return ERR_PTR(-ENOENT);
491 
492 	it->mrt = mrt;
493 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
494 		: SEQ_START_TOKEN;
495 }
496 
497 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
498 {
499 	struct mfc6_cache *mfc = v;
500 	struct ipmr_mfc_iter *it = seq->private;
501 	struct net *net = seq_file_net(seq);
502 	struct mr6_table *mrt = it->mrt;
503 
504 	++*pos;
505 
506 	if (v == SEQ_START_TOKEN)
507 		return ipmr_mfc_seq_idx(net, seq->private, 0);
508 
509 	if (mfc->list.next != it->cache)
510 		return list_entry(mfc->list.next, struct mfc6_cache, list);
511 
512 	if (it->cache == &mrt->mfc6_unres_queue)
513 		goto end_of_list;
514 
515 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
516 
517 	while (++it->ct < MFC6_LINES) {
518 		it->cache = &mrt->mfc6_cache_array[it->ct];
519 		if (list_empty(it->cache))
520 			continue;
521 		return list_first_entry(it->cache, struct mfc6_cache, list);
522 	}
523 
524 	/* exhausted cache_array, show unresolved */
525 	read_unlock(&mrt_lock);
526 	it->cache = &mrt->mfc6_unres_queue;
527 	it->ct = 0;
528 
529 	spin_lock_bh(&mfc_unres_lock);
530 	if (!list_empty(it->cache))
531 		return list_first_entry(it->cache, struct mfc6_cache, list);
532 
533  end_of_list:
534 	spin_unlock_bh(&mfc_unres_lock);
535 	it->cache = NULL;
536 
537 	return NULL;
538 }
539 
540 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
541 {
542 	struct ipmr_mfc_iter *it = seq->private;
543 	struct mr6_table *mrt = it->mrt;
544 
545 	if (it->cache == &mrt->mfc6_unres_queue)
546 		spin_unlock_bh(&mfc_unres_lock);
547 	else if (it->cache == mrt->mfc6_cache_array)
548 		read_unlock(&mrt_lock);
549 }
550 
551 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
552 {
553 	int n;
554 
555 	if (v == SEQ_START_TOKEN) {
556 		seq_puts(seq,
557 			 "Group                            "
558 			 "Origin                           "
559 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
560 	} else {
561 		const struct mfc6_cache *mfc = v;
562 		const struct ipmr_mfc_iter *it = seq->private;
563 		struct mr6_table *mrt = it->mrt;
564 
565 		seq_printf(seq, "%pI6 %pI6 %-3hd",
566 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
567 			   mfc->mf6c_parent);
568 
569 		if (it->cache != &mrt->mfc6_unres_queue) {
570 			seq_printf(seq, " %8lu %8lu %8lu",
571 				   mfc->mfc_un.res.pkt,
572 				   mfc->mfc_un.res.bytes,
573 				   mfc->mfc_un.res.wrong_if);
574 			for (n = mfc->mfc_un.res.minvif;
575 			     n < mfc->mfc_un.res.maxvif; n++) {
576 				if (MIF_EXISTS(mrt, n) &&
577 				    mfc->mfc_un.res.ttls[n] < 255)
578 					seq_printf(seq,
579 						   " %2d:%-3d",
580 						   n, mfc->mfc_un.res.ttls[n]);
581 			}
582 		} else {
583 			/* unresolved mfc_caches don't contain
584 			 * pkt, bytes and wrong_if values
585 			 */
586 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
587 		}
588 		seq_putc(seq, '\n');
589 	}
590 	return 0;
591 }
592 
593 static const struct seq_operations ipmr_mfc_seq_ops = {
594 	.start = ipmr_mfc_seq_start,
595 	.next  = ipmr_mfc_seq_next,
596 	.stop  = ipmr_mfc_seq_stop,
597 	.show  = ipmr_mfc_seq_show,
598 };
599 
600 static int ipmr_mfc_open(struct inode *inode, struct file *file)
601 {
602 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
603 			    sizeof(struct ipmr_mfc_iter));
604 }
605 
606 static const struct file_operations ip6mr_mfc_fops = {
607 	.owner	 = THIS_MODULE,
608 	.open    = ipmr_mfc_open,
609 	.read    = seq_read,
610 	.llseek  = seq_lseek,
611 	.release = seq_release_net,
612 };
613 #endif
614 
615 #ifdef CONFIG_IPV6_PIMSM_V2
616 
617 static int pim6_rcv(struct sk_buff *skb)
618 {
619 	struct pimreghdr *pim;
620 	struct ipv6hdr   *encap;
621 	struct net_device  *reg_dev = NULL;
622 	struct net *net = dev_net(skb->dev);
623 	struct mr6_table *mrt;
624 	struct flowi6 fl6 = {
625 		.flowi6_iif	= skb->dev->ifindex,
626 		.flowi6_mark	= skb->mark,
627 	};
628 	int reg_vif_num;
629 
630 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
631 		goto drop;
632 
633 	pim = (struct pimreghdr *)skb_transport_header(skb);
634 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
635 	    (pim->flags & PIM_NULL_REGISTER) ||
636 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
637 			     sizeof(*pim), IPPROTO_PIM,
638 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
639 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
640 		goto drop;
641 
642 	/* check if the inner packet is destined to mcast group */
643 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
644 				   sizeof(*pim));
645 
646 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
647 	    encap->payload_len == 0 ||
648 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
649 		goto drop;
650 
651 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
652 		goto drop;
653 	reg_vif_num = mrt->mroute_reg_vif_num;
654 
655 	read_lock(&mrt_lock);
656 	if (reg_vif_num >= 0)
657 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
658 	if (reg_dev)
659 		dev_hold(reg_dev);
660 	read_unlock(&mrt_lock);
661 
662 	if (reg_dev == NULL)
663 		goto drop;
664 
665 	skb->mac_header = skb->network_header;
666 	skb_pull(skb, (u8 *)encap - skb->data);
667 	skb_reset_network_header(skb);
668 	skb->protocol = htons(ETH_P_IPV6);
669 	skb->ip_summed = CHECKSUM_NONE;
670 	skb->pkt_type = PACKET_HOST;
671 
672 	skb_tunnel_rx(skb, reg_dev);
673 
674 	netif_rx(skb);
675 
676 	dev_put(reg_dev);
677 	return 0;
678  drop:
679 	kfree_skb(skb);
680 	return 0;
681 }
682 
683 static const struct inet6_protocol pim6_protocol = {
684 	.handler	=	pim6_rcv,
685 };
686 
687 /* Service routines creating virtual interfaces: PIMREG */
688 
689 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
690 				      struct net_device *dev)
691 {
692 	struct net *net = dev_net(dev);
693 	struct mr6_table *mrt;
694 	struct flowi6 fl6 = {
695 		.flowi6_oif	= dev->ifindex,
696 		.flowi6_iif	= skb->skb_iif,
697 		.flowi6_mark	= skb->mark,
698 	};
699 	int err;
700 
701 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
702 	if (err < 0) {
703 		kfree_skb(skb);
704 		return err;
705 	}
706 
707 	read_lock(&mrt_lock);
708 	dev->stats.tx_bytes += skb->len;
709 	dev->stats.tx_packets++;
710 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
711 	read_unlock(&mrt_lock);
712 	kfree_skb(skb);
713 	return NETDEV_TX_OK;
714 }
715 
716 static const struct net_device_ops reg_vif_netdev_ops = {
717 	.ndo_start_xmit	= reg_vif_xmit,
718 };
719 
720 static void reg_vif_setup(struct net_device *dev)
721 {
722 	dev->type		= ARPHRD_PIMREG;
723 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
724 	dev->flags		= IFF_NOARP;
725 	dev->netdev_ops		= &reg_vif_netdev_ops;
726 	dev->destructor		= free_netdev;
727 	dev->features		|= NETIF_F_NETNS_LOCAL;
728 }
729 
730 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
731 {
732 	struct net_device *dev;
733 	char name[IFNAMSIZ];
734 
735 	if (mrt->id == RT6_TABLE_DFLT)
736 		sprintf(name, "pim6reg");
737 	else
738 		sprintf(name, "pim6reg%u", mrt->id);
739 
740 	dev = alloc_netdev(0, name, reg_vif_setup);
741 	if (dev == NULL)
742 		return NULL;
743 
744 	dev_net_set(dev, net);
745 
746 	if (register_netdevice(dev)) {
747 		free_netdev(dev);
748 		return NULL;
749 	}
750 	dev->iflink = 0;
751 
752 	if (dev_open(dev))
753 		goto failure;
754 
755 	dev_hold(dev);
756 	return dev;
757 
758 failure:
759 	/* allow the register to be completed before unregistering. */
760 	rtnl_unlock();
761 	rtnl_lock();
762 
763 	unregister_netdevice(dev);
764 	return NULL;
765 }
766 #endif
767 
768 /*
769  *	Delete a VIF entry
770  */
771 
772 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
773 {
774 	struct mif_device *v;
775 	struct net_device *dev;
776 	struct inet6_dev *in6_dev;
777 
778 	if (vifi < 0 || vifi >= mrt->maxvif)
779 		return -EADDRNOTAVAIL;
780 
781 	v = &mrt->vif6_table[vifi];
782 
783 	write_lock_bh(&mrt_lock);
784 	dev = v->dev;
785 	v->dev = NULL;
786 
787 	if (!dev) {
788 		write_unlock_bh(&mrt_lock);
789 		return -EADDRNOTAVAIL;
790 	}
791 
792 #ifdef CONFIG_IPV6_PIMSM_V2
793 	if (vifi == mrt->mroute_reg_vif_num)
794 		mrt->mroute_reg_vif_num = -1;
795 #endif
796 
797 	if (vifi + 1 == mrt->maxvif) {
798 		int tmp;
799 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
800 			if (MIF_EXISTS(mrt, tmp))
801 				break;
802 		}
803 		mrt->maxvif = tmp + 1;
804 	}
805 
806 	write_unlock_bh(&mrt_lock);
807 
808 	dev_set_allmulti(dev, -1);
809 
810 	in6_dev = __in6_dev_get(dev);
811 	if (in6_dev) {
812 		in6_dev->cnf.mc_forwarding--;
813 		inet6_netconf_notify_devconf(dev_net(dev),
814 					     NETCONFA_MC_FORWARDING,
815 					     dev->ifindex, &in6_dev->cnf);
816 	}
817 
818 	if (v->flags & MIFF_REGISTER)
819 		unregister_netdevice_queue(dev, head);
820 
821 	dev_put(dev);
822 	return 0;
823 }
824 
825 static inline void ip6mr_cache_free(struct mfc6_cache *c)
826 {
827 	kmem_cache_free(mrt_cachep, c);
828 }
829 
830 /* Destroy an unresolved cache entry, killing queued skbs
831    and reporting error to netlink readers.
832  */
833 
834 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
835 {
836 	struct net *net = read_pnet(&mrt->net);
837 	struct sk_buff *skb;
838 
839 	atomic_dec(&mrt->cache_resolve_queue_len);
840 
841 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
842 		if (ipv6_hdr(skb)->version == 0) {
843 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
844 			nlh->nlmsg_type = NLMSG_ERROR;
845 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
846 			skb_trim(skb, nlh->nlmsg_len);
847 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
848 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
849 		} else
850 			kfree_skb(skb);
851 	}
852 
853 	ip6mr_cache_free(c);
854 }
855 
856 
857 /* Timer process for all the unresolved queue. */
858 
859 static void ipmr_do_expire_process(struct mr6_table *mrt)
860 {
861 	unsigned long now = jiffies;
862 	unsigned long expires = 10 * HZ;
863 	struct mfc6_cache *c, *next;
864 
865 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
866 		if (time_after(c->mfc_un.unres.expires, now)) {
867 			/* not yet... */
868 			unsigned long interval = c->mfc_un.unres.expires - now;
869 			if (interval < expires)
870 				expires = interval;
871 			continue;
872 		}
873 
874 		list_del(&c->list);
875 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
876 		ip6mr_destroy_unres(mrt, c);
877 	}
878 
879 	if (!list_empty(&mrt->mfc6_unres_queue))
880 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
881 }
882 
883 static void ipmr_expire_process(unsigned long arg)
884 {
885 	struct mr6_table *mrt = (struct mr6_table *)arg;
886 
887 	if (!spin_trylock(&mfc_unres_lock)) {
888 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
889 		return;
890 	}
891 
892 	if (!list_empty(&mrt->mfc6_unres_queue))
893 		ipmr_do_expire_process(mrt);
894 
895 	spin_unlock(&mfc_unres_lock);
896 }
897 
898 /* Fill oifs list. It is called under write locked mrt_lock. */
899 
900 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
901 				    unsigned char *ttls)
902 {
903 	int vifi;
904 
905 	cache->mfc_un.res.minvif = MAXMIFS;
906 	cache->mfc_un.res.maxvif = 0;
907 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
908 
909 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
910 		if (MIF_EXISTS(mrt, vifi) &&
911 		    ttls[vifi] && ttls[vifi] < 255) {
912 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
913 			if (cache->mfc_un.res.minvif > vifi)
914 				cache->mfc_un.res.minvif = vifi;
915 			if (cache->mfc_un.res.maxvif <= vifi)
916 				cache->mfc_un.res.maxvif = vifi + 1;
917 		}
918 	}
919 }
920 
921 static int mif6_add(struct net *net, struct mr6_table *mrt,
922 		    struct mif6ctl *vifc, int mrtsock)
923 {
924 	int vifi = vifc->mif6c_mifi;
925 	struct mif_device *v = &mrt->vif6_table[vifi];
926 	struct net_device *dev;
927 	struct inet6_dev *in6_dev;
928 	int err;
929 
930 	/* Is vif busy ? */
931 	if (MIF_EXISTS(mrt, vifi))
932 		return -EADDRINUSE;
933 
934 	switch (vifc->mif6c_flags) {
935 #ifdef CONFIG_IPV6_PIMSM_V2
936 	case MIFF_REGISTER:
937 		/*
938 		 * Special Purpose VIF in PIM
939 		 * All the packets will be sent to the daemon
940 		 */
941 		if (mrt->mroute_reg_vif_num >= 0)
942 			return -EADDRINUSE;
943 		dev = ip6mr_reg_vif(net, mrt);
944 		if (!dev)
945 			return -ENOBUFS;
946 		err = dev_set_allmulti(dev, 1);
947 		if (err) {
948 			unregister_netdevice(dev);
949 			dev_put(dev);
950 			return err;
951 		}
952 		break;
953 #endif
954 	case 0:
955 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
956 		if (!dev)
957 			return -EADDRNOTAVAIL;
958 		err = dev_set_allmulti(dev, 1);
959 		if (err) {
960 			dev_put(dev);
961 			return err;
962 		}
963 		break;
964 	default:
965 		return -EINVAL;
966 	}
967 
968 	in6_dev = __in6_dev_get(dev);
969 	if (in6_dev) {
970 		in6_dev->cnf.mc_forwarding++;
971 		inet6_netconf_notify_devconf(dev_net(dev),
972 					     NETCONFA_MC_FORWARDING,
973 					     dev->ifindex, &in6_dev->cnf);
974 	}
975 
976 	/*
977 	 *	Fill in the VIF structures
978 	 */
979 	v->rate_limit = vifc->vifc_rate_limit;
980 	v->flags = vifc->mif6c_flags;
981 	if (!mrtsock)
982 		v->flags |= VIFF_STATIC;
983 	v->threshold = vifc->vifc_threshold;
984 	v->bytes_in = 0;
985 	v->bytes_out = 0;
986 	v->pkt_in = 0;
987 	v->pkt_out = 0;
988 	v->link = dev->ifindex;
989 	if (v->flags & MIFF_REGISTER)
990 		v->link = dev->iflink;
991 
992 	/* And finish update writing critical data */
993 	write_lock_bh(&mrt_lock);
994 	v->dev = dev;
995 #ifdef CONFIG_IPV6_PIMSM_V2
996 	if (v->flags & MIFF_REGISTER)
997 		mrt->mroute_reg_vif_num = vifi;
998 #endif
999 	if (vifi + 1 > mrt->maxvif)
1000 		mrt->maxvif = vifi + 1;
1001 	write_unlock_bh(&mrt_lock);
1002 	return 0;
1003 }
1004 
1005 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1006 					   const struct in6_addr *origin,
1007 					   const struct in6_addr *mcastgrp)
1008 {
1009 	int line = MFC6_HASH(mcastgrp, origin);
1010 	struct mfc6_cache *c;
1011 
1012 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1013 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1014 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1015 			return c;
1016 	}
1017 	return NULL;
1018 }
1019 
1020 /* Look for a (*,*,oif) entry */
1021 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1022 						      mifi_t mifi)
1023 {
1024 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1025 	struct mfc6_cache *c;
1026 
1027 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1028 		if (ipv6_addr_any(&c->mf6c_origin) &&
1029 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1030 		    (c->mfc_un.res.ttls[mifi] < 255))
1031 			return c;
1032 
1033 	return NULL;
1034 }
1035 
1036 /* Look for a (*,G) entry */
1037 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1038 					       struct in6_addr *mcastgrp,
1039 					       mifi_t mifi)
1040 {
1041 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1042 	struct mfc6_cache *c, *proxy;
1043 
1044 	if (ipv6_addr_any(mcastgrp))
1045 		goto skip;
1046 
1047 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1048 		if (ipv6_addr_any(&c->mf6c_origin) &&
1049 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1050 			if (c->mfc_un.res.ttls[mifi] < 255)
1051 				return c;
1052 
1053 			/* It's ok if the mifi is part of the static tree */
1054 			proxy = ip6mr_cache_find_any_parent(mrt,
1055 							    c->mf6c_parent);
1056 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1057 				return c;
1058 		}
1059 
1060 skip:
1061 	return ip6mr_cache_find_any_parent(mrt, mifi);
1062 }
1063 
1064 /*
1065  *	Allocate a multicast cache entry
1066  */
1067 static struct mfc6_cache *ip6mr_cache_alloc(void)
1068 {
1069 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1070 	if (c == NULL)
1071 		return NULL;
1072 	c->mfc_un.res.minvif = MAXMIFS;
1073 	return c;
1074 }
1075 
1076 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1077 {
1078 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1079 	if (c == NULL)
1080 		return NULL;
1081 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1082 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1083 	return c;
1084 }
1085 
1086 /*
1087  *	A cache entry has gone into a resolved state from queued
1088  */
1089 
1090 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1091 				struct mfc6_cache *uc, struct mfc6_cache *c)
1092 {
1093 	struct sk_buff *skb;
1094 
1095 	/*
1096 	 *	Play the pending entries through our router
1097 	 */
1098 
1099 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1100 		if (ipv6_hdr(skb)->version == 0) {
1101 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1102 
1103 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1104 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1105 			} else {
1106 				nlh->nlmsg_type = NLMSG_ERROR;
1107 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1108 				skb_trim(skb, nlh->nlmsg_len);
1109 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1110 			}
1111 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1112 		} else
1113 			ip6_mr_forward(net, mrt, skb, c);
1114 	}
1115 }
1116 
1117 /*
1118  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1119  *	expects the following bizarre scheme.
1120  *
1121  *	Called under mrt_lock.
1122  */
1123 
1124 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1125 			      mifi_t mifi, int assert)
1126 {
1127 	struct sk_buff *skb;
1128 	struct mrt6msg *msg;
1129 	int ret;
1130 
1131 #ifdef CONFIG_IPV6_PIMSM_V2
1132 	if (assert == MRT6MSG_WHOLEPKT)
1133 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1134 						+sizeof(*msg));
1135 	else
1136 #endif
1137 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1138 
1139 	if (!skb)
1140 		return -ENOBUFS;
1141 
1142 	/* I suppose that internal messages
1143 	 * do not require checksums */
1144 
1145 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1146 
1147 #ifdef CONFIG_IPV6_PIMSM_V2
1148 	if (assert == MRT6MSG_WHOLEPKT) {
1149 		/* Ugly, but we have no choice with this interface.
1150 		   Duplicate old header, fix length etc.
1151 		   And all this only to mangle msg->im6_msgtype and
1152 		   to set msg->im6_mbz to "mbz" :-)
1153 		 */
1154 		skb_push(skb, -skb_network_offset(pkt));
1155 
1156 		skb_push(skb, sizeof(*msg));
1157 		skb_reset_transport_header(skb);
1158 		msg = (struct mrt6msg *)skb_transport_header(skb);
1159 		msg->im6_mbz = 0;
1160 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1161 		msg->im6_mif = mrt->mroute_reg_vif_num;
1162 		msg->im6_pad = 0;
1163 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1164 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1165 
1166 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1167 	} else
1168 #endif
1169 	{
1170 	/*
1171 	 *	Copy the IP header
1172 	 */
1173 
1174 	skb_put(skb, sizeof(struct ipv6hdr));
1175 	skb_reset_network_header(skb);
1176 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1177 
1178 	/*
1179 	 *	Add our header
1180 	 */
1181 	skb_put(skb, sizeof(*msg));
1182 	skb_reset_transport_header(skb);
1183 	msg = (struct mrt6msg *)skb_transport_header(skb);
1184 
1185 	msg->im6_mbz = 0;
1186 	msg->im6_msgtype = assert;
1187 	msg->im6_mif = mifi;
1188 	msg->im6_pad = 0;
1189 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1190 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1191 
1192 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1193 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1194 	}
1195 
1196 	if (mrt->mroute6_sk == NULL) {
1197 		kfree_skb(skb);
1198 		return -EINVAL;
1199 	}
1200 
1201 	/*
1202 	 *	Deliver to user space multicast routing algorithms
1203 	 */
1204 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1205 	if (ret < 0) {
1206 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1207 		kfree_skb(skb);
1208 	}
1209 
1210 	return ret;
1211 }
1212 
1213 /*
1214  *	Queue a packet for resolution. It gets locked cache entry!
1215  */
1216 
1217 static int
1218 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1219 {
1220 	bool found = false;
1221 	int err;
1222 	struct mfc6_cache *c;
1223 
1224 	spin_lock_bh(&mfc_unres_lock);
1225 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1226 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1227 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1228 			found = true;
1229 			break;
1230 		}
1231 	}
1232 
1233 	if (!found) {
1234 		/*
1235 		 *	Create a new entry if allowable
1236 		 */
1237 
1238 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1239 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1240 			spin_unlock_bh(&mfc_unres_lock);
1241 
1242 			kfree_skb(skb);
1243 			return -ENOBUFS;
1244 		}
1245 
1246 		/*
1247 		 *	Fill in the new cache entry
1248 		 */
1249 		c->mf6c_parent = -1;
1250 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1251 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1252 
1253 		/*
1254 		 *	Reflect first query at pim6sd
1255 		 */
1256 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1257 		if (err < 0) {
1258 			/* If the report failed throw the cache entry
1259 			   out - Brad Parker
1260 			 */
1261 			spin_unlock_bh(&mfc_unres_lock);
1262 
1263 			ip6mr_cache_free(c);
1264 			kfree_skb(skb);
1265 			return err;
1266 		}
1267 
1268 		atomic_inc(&mrt->cache_resolve_queue_len);
1269 		list_add(&c->list, &mrt->mfc6_unres_queue);
1270 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1271 
1272 		ipmr_do_expire_process(mrt);
1273 	}
1274 
1275 	/*
1276 	 *	See if we can append the packet
1277 	 */
1278 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1279 		kfree_skb(skb);
1280 		err = -ENOBUFS;
1281 	} else {
1282 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1283 		err = 0;
1284 	}
1285 
1286 	spin_unlock_bh(&mfc_unres_lock);
1287 	return err;
1288 }
1289 
1290 /*
1291  *	MFC6 cache manipulation by user space
1292  */
1293 
1294 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1295 			    int parent)
1296 {
1297 	int line;
1298 	struct mfc6_cache *c, *next;
1299 
1300 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1301 
1302 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1303 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1304 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1305 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1306 		    (parent == -1 || parent == c->mf6c_parent)) {
1307 			write_lock_bh(&mrt_lock);
1308 			list_del(&c->list);
1309 			write_unlock_bh(&mrt_lock);
1310 
1311 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1312 			ip6mr_cache_free(c);
1313 			return 0;
1314 		}
1315 	}
1316 	return -ENOENT;
1317 }
1318 
1319 static int ip6mr_device_event(struct notifier_block *this,
1320 			      unsigned long event, void *ptr)
1321 {
1322 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1323 	struct net *net = dev_net(dev);
1324 	struct mr6_table *mrt;
1325 	struct mif_device *v;
1326 	int ct;
1327 	LIST_HEAD(list);
1328 
1329 	if (event != NETDEV_UNREGISTER)
1330 		return NOTIFY_DONE;
1331 
1332 	ip6mr_for_each_table(mrt, net) {
1333 		v = &mrt->vif6_table[0];
1334 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1335 			if (v->dev == dev)
1336 				mif6_delete(mrt, ct, &list);
1337 		}
1338 	}
1339 	unregister_netdevice_many(&list);
1340 
1341 	return NOTIFY_DONE;
1342 }
1343 
1344 static struct notifier_block ip6_mr_notifier = {
1345 	.notifier_call = ip6mr_device_event
1346 };
1347 
1348 /*
1349  *	Setup for IP multicast routing
1350  */
1351 
1352 static int __net_init ip6mr_net_init(struct net *net)
1353 {
1354 	int err;
1355 
1356 	err = ip6mr_rules_init(net);
1357 	if (err < 0)
1358 		goto fail;
1359 
1360 #ifdef CONFIG_PROC_FS
1361 	err = -ENOMEM;
1362 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1363 		goto proc_vif_fail;
1364 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1365 		goto proc_cache_fail;
1366 #endif
1367 
1368 	return 0;
1369 
1370 #ifdef CONFIG_PROC_FS
1371 proc_cache_fail:
1372 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1373 proc_vif_fail:
1374 	ip6mr_rules_exit(net);
1375 #endif
1376 fail:
1377 	return err;
1378 }
1379 
1380 static void __net_exit ip6mr_net_exit(struct net *net)
1381 {
1382 #ifdef CONFIG_PROC_FS
1383 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1384 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1385 #endif
1386 	ip6mr_rules_exit(net);
1387 }
1388 
1389 static struct pernet_operations ip6mr_net_ops = {
1390 	.init = ip6mr_net_init,
1391 	.exit = ip6mr_net_exit,
1392 };
1393 
1394 int __init ip6_mr_init(void)
1395 {
1396 	int err;
1397 
1398 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1399 				       sizeof(struct mfc6_cache),
1400 				       0, SLAB_HWCACHE_ALIGN,
1401 				       NULL);
1402 	if (!mrt_cachep)
1403 		return -ENOMEM;
1404 
1405 	err = register_pernet_subsys(&ip6mr_net_ops);
1406 	if (err)
1407 		goto reg_pernet_fail;
1408 
1409 	err = register_netdevice_notifier(&ip6_mr_notifier);
1410 	if (err)
1411 		goto reg_notif_fail;
1412 #ifdef CONFIG_IPV6_PIMSM_V2
1413 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1414 		pr_err("%s: can't add PIM protocol\n", __func__);
1415 		err = -EAGAIN;
1416 		goto add_proto_fail;
1417 	}
1418 #endif
1419 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1420 		      ip6mr_rtm_dumproute, NULL);
1421 	return 0;
1422 #ifdef CONFIG_IPV6_PIMSM_V2
1423 add_proto_fail:
1424 	unregister_netdevice_notifier(&ip6_mr_notifier);
1425 #endif
1426 reg_notif_fail:
1427 	unregister_pernet_subsys(&ip6mr_net_ops);
1428 reg_pernet_fail:
1429 	kmem_cache_destroy(mrt_cachep);
1430 	return err;
1431 }
1432 
1433 void ip6_mr_cleanup(void)
1434 {
1435 	unregister_netdevice_notifier(&ip6_mr_notifier);
1436 	unregister_pernet_subsys(&ip6mr_net_ops);
1437 	kmem_cache_destroy(mrt_cachep);
1438 }
1439 
1440 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1441 			 struct mf6cctl *mfc, int mrtsock, int parent)
1442 {
1443 	bool found = false;
1444 	int line;
1445 	struct mfc6_cache *uc, *c;
1446 	unsigned char ttls[MAXMIFS];
1447 	int i;
1448 
1449 	if (mfc->mf6cc_parent >= MAXMIFS)
1450 		return -ENFILE;
1451 
1452 	memset(ttls, 255, MAXMIFS);
1453 	for (i = 0; i < MAXMIFS; i++) {
1454 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1455 			ttls[i] = 1;
1456 
1457 	}
1458 
1459 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1460 
1461 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1462 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1463 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1464 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1465 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1466 			found = true;
1467 			break;
1468 		}
1469 	}
1470 
1471 	if (found) {
1472 		write_lock_bh(&mrt_lock);
1473 		c->mf6c_parent = mfc->mf6cc_parent;
1474 		ip6mr_update_thresholds(mrt, c, ttls);
1475 		if (!mrtsock)
1476 			c->mfc_flags |= MFC_STATIC;
1477 		write_unlock_bh(&mrt_lock);
1478 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1479 		return 0;
1480 	}
1481 
1482 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1483 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1484 		return -EINVAL;
1485 
1486 	c = ip6mr_cache_alloc();
1487 	if (c == NULL)
1488 		return -ENOMEM;
1489 
1490 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1491 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1492 	c->mf6c_parent = mfc->mf6cc_parent;
1493 	ip6mr_update_thresholds(mrt, c, ttls);
1494 	if (!mrtsock)
1495 		c->mfc_flags |= MFC_STATIC;
1496 
1497 	write_lock_bh(&mrt_lock);
1498 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1499 	write_unlock_bh(&mrt_lock);
1500 
1501 	/*
1502 	 *	Check to see if we resolved a queued list. If so we
1503 	 *	need to send on the frames and tidy up.
1504 	 */
1505 	found = false;
1506 	spin_lock_bh(&mfc_unres_lock);
1507 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1508 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1509 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1510 			list_del(&uc->list);
1511 			atomic_dec(&mrt->cache_resolve_queue_len);
1512 			found = true;
1513 			break;
1514 		}
1515 	}
1516 	if (list_empty(&mrt->mfc6_unres_queue))
1517 		del_timer(&mrt->ipmr_expire_timer);
1518 	spin_unlock_bh(&mfc_unres_lock);
1519 
1520 	if (found) {
1521 		ip6mr_cache_resolve(net, mrt, uc, c);
1522 		ip6mr_cache_free(uc);
1523 	}
1524 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1525 	return 0;
1526 }
1527 
1528 /*
1529  *	Close the multicast socket, and clear the vif tables etc
1530  */
1531 
1532 static void mroute_clean_tables(struct mr6_table *mrt)
1533 {
1534 	int i;
1535 	LIST_HEAD(list);
1536 	struct mfc6_cache *c, *next;
1537 
1538 	/*
1539 	 *	Shut down all active vif entries
1540 	 */
1541 	for (i = 0; i < mrt->maxvif; i++) {
1542 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1543 			mif6_delete(mrt, i, &list);
1544 	}
1545 	unregister_netdevice_many(&list);
1546 
1547 	/*
1548 	 *	Wipe the cache
1549 	 */
1550 	for (i = 0; i < MFC6_LINES; i++) {
1551 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1552 			if (c->mfc_flags & MFC_STATIC)
1553 				continue;
1554 			write_lock_bh(&mrt_lock);
1555 			list_del(&c->list);
1556 			write_unlock_bh(&mrt_lock);
1557 
1558 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1559 			ip6mr_cache_free(c);
1560 		}
1561 	}
1562 
1563 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1564 		spin_lock_bh(&mfc_unres_lock);
1565 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1566 			list_del(&c->list);
1567 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1568 			ip6mr_destroy_unres(mrt, c);
1569 		}
1570 		spin_unlock_bh(&mfc_unres_lock);
1571 	}
1572 }
1573 
1574 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1575 {
1576 	int err = 0;
1577 	struct net *net = sock_net(sk);
1578 
1579 	rtnl_lock();
1580 	write_lock_bh(&mrt_lock);
1581 	if (likely(mrt->mroute6_sk == NULL)) {
1582 		mrt->mroute6_sk = sk;
1583 		net->ipv6.devconf_all->mc_forwarding++;
1584 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1585 					     NETCONFA_IFINDEX_ALL,
1586 					     net->ipv6.devconf_all);
1587 	}
1588 	else
1589 		err = -EADDRINUSE;
1590 	write_unlock_bh(&mrt_lock);
1591 
1592 	rtnl_unlock();
1593 
1594 	return err;
1595 }
1596 
1597 int ip6mr_sk_done(struct sock *sk)
1598 {
1599 	int err = -EACCES;
1600 	struct net *net = sock_net(sk);
1601 	struct mr6_table *mrt;
1602 
1603 	rtnl_lock();
1604 	ip6mr_for_each_table(mrt, net) {
1605 		if (sk == mrt->mroute6_sk) {
1606 			write_lock_bh(&mrt_lock);
1607 			mrt->mroute6_sk = NULL;
1608 			net->ipv6.devconf_all->mc_forwarding--;
1609 			inet6_netconf_notify_devconf(net,
1610 						     NETCONFA_MC_FORWARDING,
1611 						     NETCONFA_IFINDEX_ALL,
1612 						     net->ipv6.devconf_all);
1613 			write_unlock_bh(&mrt_lock);
1614 
1615 			mroute_clean_tables(mrt);
1616 			err = 0;
1617 			break;
1618 		}
1619 	}
1620 	rtnl_unlock();
1621 
1622 	return err;
1623 }
1624 
1625 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1626 {
1627 	struct mr6_table *mrt;
1628 	struct flowi6 fl6 = {
1629 		.flowi6_iif	= skb->skb_iif,
1630 		.flowi6_oif	= skb->dev->ifindex,
1631 		.flowi6_mark	= skb->mark,
1632 	};
1633 
1634 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1635 		return NULL;
1636 
1637 	return mrt->mroute6_sk;
1638 }
1639 
1640 /*
1641  *	Socket options and virtual interface manipulation. The whole
1642  *	virtual interface system is a complete heap, but unfortunately
1643  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1644  *	MOSPF/PIM router set up we can clean this up.
1645  */
1646 
1647 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1648 {
1649 	int ret, parent = 0;
1650 	struct mif6ctl vif;
1651 	struct mf6cctl mfc;
1652 	mifi_t mifi;
1653 	struct net *net = sock_net(sk);
1654 	struct mr6_table *mrt;
1655 
1656 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1657 	if (mrt == NULL)
1658 		return -ENOENT;
1659 
1660 	if (optname != MRT6_INIT) {
1661 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1662 			return -EACCES;
1663 	}
1664 
1665 	switch (optname) {
1666 	case MRT6_INIT:
1667 		if (sk->sk_type != SOCK_RAW ||
1668 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1669 			return -EOPNOTSUPP;
1670 		if (optlen < sizeof(int))
1671 			return -EINVAL;
1672 
1673 		return ip6mr_sk_init(mrt, sk);
1674 
1675 	case MRT6_DONE:
1676 		return ip6mr_sk_done(sk);
1677 
1678 	case MRT6_ADD_MIF:
1679 		if (optlen < sizeof(vif))
1680 			return -EINVAL;
1681 		if (copy_from_user(&vif, optval, sizeof(vif)))
1682 			return -EFAULT;
1683 		if (vif.mif6c_mifi >= MAXMIFS)
1684 			return -ENFILE;
1685 		rtnl_lock();
1686 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1687 		rtnl_unlock();
1688 		return ret;
1689 
1690 	case MRT6_DEL_MIF:
1691 		if (optlen < sizeof(mifi_t))
1692 			return -EINVAL;
1693 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1694 			return -EFAULT;
1695 		rtnl_lock();
1696 		ret = mif6_delete(mrt, mifi, NULL);
1697 		rtnl_unlock();
1698 		return ret;
1699 
1700 	/*
1701 	 *	Manipulate the forwarding caches. These live
1702 	 *	in a sort of kernel/user symbiosis.
1703 	 */
1704 	case MRT6_ADD_MFC:
1705 	case MRT6_DEL_MFC:
1706 		parent = -1;
1707 	case MRT6_ADD_MFC_PROXY:
1708 	case MRT6_DEL_MFC_PROXY:
1709 		if (optlen < sizeof(mfc))
1710 			return -EINVAL;
1711 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1712 			return -EFAULT;
1713 		if (parent == 0)
1714 			parent = mfc.mf6cc_parent;
1715 		rtnl_lock();
1716 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1717 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1718 		else
1719 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1720 					    sk == mrt->mroute6_sk, parent);
1721 		rtnl_unlock();
1722 		return ret;
1723 
1724 	/*
1725 	 *	Control PIM assert (to activate pim will activate assert)
1726 	 */
1727 	case MRT6_ASSERT:
1728 	{
1729 		int v;
1730 
1731 		if (optlen != sizeof(v))
1732 			return -EINVAL;
1733 		if (get_user(v, (int __user *)optval))
1734 			return -EFAULT;
1735 		mrt->mroute_do_assert = v;
1736 		return 0;
1737 	}
1738 
1739 #ifdef CONFIG_IPV6_PIMSM_V2
1740 	case MRT6_PIM:
1741 	{
1742 		int v;
1743 
1744 		if (optlen != sizeof(v))
1745 			return -EINVAL;
1746 		if (get_user(v, (int __user *)optval))
1747 			return -EFAULT;
1748 		v = !!v;
1749 		rtnl_lock();
1750 		ret = 0;
1751 		if (v != mrt->mroute_do_pim) {
1752 			mrt->mroute_do_pim = v;
1753 			mrt->mroute_do_assert = v;
1754 		}
1755 		rtnl_unlock();
1756 		return ret;
1757 	}
1758 
1759 #endif
1760 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1761 	case MRT6_TABLE:
1762 	{
1763 		u32 v;
1764 
1765 		if (optlen != sizeof(u32))
1766 			return -EINVAL;
1767 		if (get_user(v, (u32 __user *)optval))
1768 			return -EFAULT;
1769 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1770 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1771 			return -EINVAL;
1772 		if (sk == mrt->mroute6_sk)
1773 			return -EBUSY;
1774 
1775 		rtnl_lock();
1776 		ret = 0;
1777 		if (!ip6mr_new_table(net, v))
1778 			ret = -ENOMEM;
1779 		raw6_sk(sk)->ip6mr_table = v;
1780 		rtnl_unlock();
1781 		return ret;
1782 	}
1783 #endif
1784 	/*
1785 	 *	Spurious command, or MRT6_VERSION which you cannot
1786 	 *	set.
1787 	 */
1788 	default:
1789 		return -ENOPROTOOPT;
1790 	}
1791 }
1792 
1793 /*
1794  *	Getsock opt support for the multicast routing system.
1795  */
1796 
1797 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1798 			  int __user *optlen)
1799 {
1800 	int olr;
1801 	int val;
1802 	struct net *net = sock_net(sk);
1803 	struct mr6_table *mrt;
1804 
1805 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1806 	if (mrt == NULL)
1807 		return -ENOENT;
1808 
1809 	switch (optname) {
1810 	case MRT6_VERSION:
1811 		val = 0x0305;
1812 		break;
1813 #ifdef CONFIG_IPV6_PIMSM_V2
1814 	case MRT6_PIM:
1815 		val = mrt->mroute_do_pim;
1816 		break;
1817 #endif
1818 	case MRT6_ASSERT:
1819 		val = mrt->mroute_do_assert;
1820 		break;
1821 	default:
1822 		return -ENOPROTOOPT;
1823 	}
1824 
1825 	if (get_user(olr, optlen))
1826 		return -EFAULT;
1827 
1828 	olr = min_t(int, olr, sizeof(int));
1829 	if (olr < 0)
1830 		return -EINVAL;
1831 
1832 	if (put_user(olr, optlen))
1833 		return -EFAULT;
1834 	if (copy_to_user(optval, &val, olr))
1835 		return -EFAULT;
1836 	return 0;
1837 }
1838 
1839 /*
1840  *	The IP multicast ioctl support routines.
1841  */
1842 
1843 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1844 {
1845 	struct sioc_sg_req6 sr;
1846 	struct sioc_mif_req6 vr;
1847 	struct mif_device *vif;
1848 	struct mfc6_cache *c;
1849 	struct net *net = sock_net(sk);
1850 	struct mr6_table *mrt;
1851 
1852 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1853 	if (mrt == NULL)
1854 		return -ENOENT;
1855 
1856 	switch (cmd) {
1857 	case SIOCGETMIFCNT_IN6:
1858 		if (copy_from_user(&vr, arg, sizeof(vr)))
1859 			return -EFAULT;
1860 		if (vr.mifi >= mrt->maxvif)
1861 			return -EINVAL;
1862 		read_lock(&mrt_lock);
1863 		vif = &mrt->vif6_table[vr.mifi];
1864 		if (MIF_EXISTS(mrt, vr.mifi)) {
1865 			vr.icount = vif->pkt_in;
1866 			vr.ocount = vif->pkt_out;
1867 			vr.ibytes = vif->bytes_in;
1868 			vr.obytes = vif->bytes_out;
1869 			read_unlock(&mrt_lock);
1870 
1871 			if (copy_to_user(arg, &vr, sizeof(vr)))
1872 				return -EFAULT;
1873 			return 0;
1874 		}
1875 		read_unlock(&mrt_lock);
1876 		return -EADDRNOTAVAIL;
1877 	case SIOCGETSGCNT_IN6:
1878 		if (copy_from_user(&sr, arg, sizeof(sr)))
1879 			return -EFAULT;
1880 
1881 		read_lock(&mrt_lock);
1882 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1883 		if (c) {
1884 			sr.pktcnt = c->mfc_un.res.pkt;
1885 			sr.bytecnt = c->mfc_un.res.bytes;
1886 			sr.wrong_if = c->mfc_un.res.wrong_if;
1887 			read_unlock(&mrt_lock);
1888 
1889 			if (copy_to_user(arg, &sr, sizeof(sr)))
1890 				return -EFAULT;
1891 			return 0;
1892 		}
1893 		read_unlock(&mrt_lock);
1894 		return -EADDRNOTAVAIL;
1895 	default:
1896 		return -ENOIOCTLCMD;
1897 	}
1898 }
1899 
1900 #ifdef CONFIG_COMPAT
1901 struct compat_sioc_sg_req6 {
1902 	struct sockaddr_in6 src;
1903 	struct sockaddr_in6 grp;
1904 	compat_ulong_t pktcnt;
1905 	compat_ulong_t bytecnt;
1906 	compat_ulong_t wrong_if;
1907 };
1908 
1909 struct compat_sioc_mif_req6 {
1910 	mifi_t	mifi;
1911 	compat_ulong_t icount;
1912 	compat_ulong_t ocount;
1913 	compat_ulong_t ibytes;
1914 	compat_ulong_t obytes;
1915 };
1916 
1917 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1918 {
1919 	struct compat_sioc_sg_req6 sr;
1920 	struct compat_sioc_mif_req6 vr;
1921 	struct mif_device *vif;
1922 	struct mfc6_cache *c;
1923 	struct net *net = sock_net(sk);
1924 	struct mr6_table *mrt;
1925 
1926 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1927 	if (mrt == NULL)
1928 		return -ENOENT;
1929 
1930 	switch (cmd) {
1931 	case SIOCGETMIFCNT_IN6:
1932 		if (copy_from_user(&vr, arg, sizeof(vr)))
1933 			return -EFAULT;
1934 		if (vr.mifi >= mrt->maxvif)
1935 			return -EINVAL;
1936 		read_lock(&mrt_lock);
1937 		vif = &mrt->vif6_table[vr.mifi];
1938 		if (MIF_EXISTS(mrt, vr.mifi)) {
1939 			vr.icount = vif->pkt_in;
1940 			vr.ocount = vif->pkt_out;
1941 			vr.ibytes = vif->bytes_in;
1942 			vr.obytes = vif->bytes_out;
1943 			read_unlock(&mrt_lock);
1944 
1945 			if (copy_to_user(arg, &vr, sizeof(vr)))
1946 				return -EFAULT;
1947 			return 0;
1948 		}
1949 		read_unlock(&mrt_lock);
1950 		return -EADDRNOTAVAIL;
1951 	case SIOCGETSGCNT_IN6:
1952 		if (copy_from_user(&sr, arg, sizeof(sr)))
1953 			return -EFAULT;
1954 
1955 		read_lock(&mrt_lock);
1956 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1957 		if (c) {
1958 			sr.pktcnt = c->mfc_un.res.pkt;
1959 			sr.bytecnt = c->mfc_un.res.bytes;
1960 			sr.wrong_if = c->mfc_un.res.wrong_if;
1961 			read_unlock(&mrt_lock);
1962 
1963 			if (copy_to_user(arg, &sr, sizeof(sr)))
1964 				return -EFAULT;
1965 			return 0;
1966 		}
1967 		read_unlock(&mrt_lock);
1968 		return -EADDRNOTAVAIL;
1969 	default:
1970 		return -ENOIOCTLCMD;
1971 	}
1972 }
1973 #endif
1974 
1975 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1976 {
1977 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1978 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1979 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1980 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1981 	return dst_output(skb);
1982 }
1983 
1984 /*
1985  *	Processing handlers for ip6mr_forward
1986  */
1987 
1988 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1989 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1990 {
1991 	struct ipv6hdr *ipv6h;
1992 	struct mif_device *vif = &mrt->vif6_table[vifi];
1993 	struct net_device *dev;
1994 	struct dst_entry *dst;
1995 	struct flowi6 fl6;
1996 
1997 	if (vif->dev == NULL)
1998 		goto out_free;
1999 
2000 #ifdef CONFIG_IPV6_PIMSM_V2
2001 	if (vif->flags & MIFF_REGISTER) {
2002 		vif->pkt_out++;
2003 		vif->bytes_out += skb->len;
2004 		vif->dev->stats.tx_bytes += skb->len;
2005 		vif->dev->stats.tx_packets++;
2006 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2007 		goto out_free;
2008 	}
2009 #endif
2010 
2011 	ipv6h = ipv6_hdr(skb);
2012 
2013 	fl6 = (struct flowi6) {
2014 		.flowi6_oif = vif->link,
2015 		.daddr = ipv6h->daddr,
2016 	};
2017 
2018 	dst = ip6_route_output(net, NULL, &fl6);
2019 	if (dst->error) {
2020 		dst_release(dst);
2021 		goto out_free;
2022 	}
2023 
2024 	skb_dst_drop(skb);
2025 	skb_dst_set(skb, dst);
2026 
2027 	/*
2028 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2029 	 * not only before forwarding, but after forwarding on all output
2030 	 * interfaces. It is clear, if mrouter runs a multicasting
2031 	 * program, it should receive packets not depending to what interface
2032 	 * program is joined.
2033 	 * If we will not make it, the program will have to join on all
2034 	 * interfaces. On the other hand, multihoming host (or router, but
2035 	 * not mrouter) cannot join to more than one interface - it will
2036 	 * result in receiving multiple packets.
2037 	 */
2038 	dev = vif->dev;
2039 	skb->dev = dev;
2040 	vif->pkt_out++;
2041 	vif->bytes_out += skb->len;
2042 
2043 	/* We are about to write */
2044 	/* XXX: extension headers? */
2045 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2046 		goto out_free;
2047 
2048 	ipv6h = ipv6_hdr(skb);
2049 	ipv6h->hop_limit--;
2050 
2051 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2052 
2053 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2054 		       ip6mr_forward2_finish);
2055 
2056 out_free:
2057 	kfree_skb(skb);
2058 	return 0;
2059 }
2060 
2061 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2062 {
2063 	int ct;
2064 
2065 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2066 		if (mrt->vif6_table[ct].dev == dev)
2067 			break;
2068 	}
2069 	return ct;
2070 }
2071 
2072 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2073 			  struct sk_buff *skb, struct mfc6_cache *cache)
2074 {
2075 	int psend = -1;
2076 	int vif, ct;
2077 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2078 
2079 	vif = cache->mf6c_parent;
2080 	cache->mfc_un.res.pkt++;
2081 	cache->mfc_un.res.bytes += skb->len;
2082 
2083 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2084 		struct mfc6_cache *cache_proxy;
2085 
2086 		/* For an (*,G) entry, we only check that the incomming
2087 		 * interface is part of the static tree.
2088 		 */
2089 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2090 		if (cache_proxy &&
2091 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2092 			goto forward;
2093 	}
2094 
2095 	/*
2096 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2097 	 */
2098 	if (mrt->vif6_table[vif].dev != skb->dev) {
2099 		cache->mfc_un.res.wrong_if++;
2100 
2101 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2102 		    /* pimsm uses asserts, when switching from RPT to SPT,
2103 		       so that we cannot check that packet arrived on an oif.
2104 		       It is bad, but otherwise we would need to move pretty
2105 		       large chunk of pimd to kernel. Ough... --ANK
2106 		     */
2107 		    (mrt->mroute_do_pim ||
2108 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2109 		    time_after(jiffies,
2110 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2111 			cache->mfc_un.res.last_assert = jiffies;
2112 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2113 		}
2114 		goto dont_forward;
2115 	}
2116 
2117 forward:
2118 	mrt->vif6_table[vif].pkt_in++;
2119 	mrt->vif6_table[vif].bytes_in += skb->len;
2120 
2121 	/*
2122 	 *	Forward the frame
2123 	 */
2124 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2125 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2126 		if (true_vifi >= 0 &&
2127 		    true_vifi != cache->mf6c_parent &&
2128 		    ipv6_hdr(skb)->hop_limit >
2129 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2130 			/* It's an (*,*) entry and the packet is not coming from
2131 			 * the upstream: forward the packet to the upstream
2132 			 * only.
2133 			 */
2134 			psend = cache->mf6c_parent;
2135 			goto last_forward;
2136 		}
2137 		goto dont_forward;
2138 	}
2139 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2140 		/* For (*,G) entry, don't forward to the incoming interface */
2141 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2142 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2143 			if (psend != -1) {
2144 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2145 				if (skb2)
2146 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2147 			}
2148 			psend = ct;
2149 		}
2150 	}
2151 last_forward:
2152 	if (psend != -1) {
2153 		ip6mr_forward2(net, mrt, skb, cache, psend);
2154 		return 0;
2155 	}
2156 
2157 dont_forward:
2158 	kfree_skb(skb);
2159 	return 0;
2160 }
2161 
2162 
2163 /*
2164  *	Multicast packets for forwarding arrive here
2165  */
2166 
2167 int ip6_mr_input(struct sk_buff *skb)
2168 {
2169 	struct mfc6_cache *cache;
2170 	struct net *net = dev_net(skb->dev);
2171 	struct mr6_table *mrt;
2172 	struct flowi6 fl6 = {
2173 		.flowi6_iif	= skb->dev->ifindex,
2174 		.flowi6_mark	= skb->mark,
2175 	};
2176 	int err;
2177 
2178 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2179 	if (err < 0) {
2180 		kfree_skb(skb);
2181 		return err;
2182 	}
2183 
2184 	read_lock(&mrt_lock);
2185 	cache = ip6mr_cache_find(mrt,
2186 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2187 	if (cache == NULL) {
2188 		int vif = ip6mr_find_vif(mrt, skb->dev);
2189 
2190 		if (vif >= 0)
2191 			cache = ip6mr_cache_find_any(mrt,
2192 						     &ipv6_hdr(skb)->daddr,
2193 						     vif);
2194 	}
2195 
2196 	/*
2197 	 *	No usable cache entry
2198 	 */
2199 	if (cache == NULL) {
2200 		int vif;
2201 
2202 		vif = ip6mr_find_vif(mrt, skb->dev);
2203 		if (vif >= 0) {
2204 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2205 			read_unlock(&mrt_lock);
2206 
2207 			return err;
2208 		}
2209 		read_unlock(&mrt_lock);
2210 		kfree_skb(skb);
2211 		return -ENODEV;
2212 	}
2213 
2214 	ip6_mr_forward(net, mrt, skb, cache);
2215 
2216 	read_unlock(&mrt_lock);
2217 
2218 	return 0;
2219 }
2220 
2221 
2222 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2223 			       struct mfc6_cache *c, struct rtmsg *rtm)
2224 {
2225 	int ct;
2226 	struct rtnexthop *nhp;
2227 	struct nlattr *mp_attr;
2228 	struct rta_mfc_stats mfcs;
2229 
2230 	/* If cache is unresolved, don't try to parse IIF and OIF */
2231 	if (c->mf6c_parent >= MAXMIFS)
2232 		return -ENOENT;
2233 
2234 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2235 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2236 		return -EMSGSIZE;
2237 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2238 	if (mp_attr == NULL)
2239 		return -EMSGSIZE;
2240 
2241 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2242 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2243 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2244 			if (nhp == NULL) {
2245 				nla_nest_cancel(skb, mp_attr);
2246 				return -EMSGSIZE;
2247 			}
2248 
2249 			nhp->rtnh_flags = 0;
2250 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2251 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2252 			nhp->rtnh_len = sizeof(*nhp);
2253 		}
2254 	}
2255 
2256 	nla_nest_end(skb, mp_attr);
2257 
2258 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2259 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2260 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2261 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2262 		return -EMSGSIZE;
2263 
2264 	rtm->rtm_type = RTN_MULTICAST;
2265 	return 1;
2266 }
2267 
2268 int ip6mr_get_route(struct net *net,
2269 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2270 {
2271 	int err;
2272 	struct mr6_table *mrt;
2273 	struct mfc6_cache *cache;
2274 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2275 
2276 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2277 	if (mrt == NULL)
2278 		return -ENOENT;
2279 
2280 	read_lock(&mrt_lock);
2281 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2282 	if (!cache && skb->dev) {
2283 		int vif = ip6mr_find_vif(mrt, skb->dev);
2284 
2285 		if (vif >= 0)
2286 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2287 						     vif);
2288 	}
2289 
2290 	if (!cache) {
2291 		struct sk_buff *skb2;
2292 		struct ipv6hdr *iph;
2293 		struct net_device *dev;
2294 		int vif;
2295 
2296 		if (nowait) {
2297 			read_unlock(&mrt_lock);
2298 			return -EAGAIN;
2299 		}
2300 
2301 		dev = skb->dev;
2302 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2303 			read_unlock(&mrt_lock);
2304 			return -ENODEV;
2305 		}
2306 
2307 		/* really correct? */
2308 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2309 		if (!skb2) {
2310 			read_unlock(&mrt_lock);
2311 			return -ENOMEM;
2312 		}
2313 
2314 		skb_reset_transport_header(skb2);
2315 
2316 		skb_put(skb2, sizeof(struct ipv6hdr));
2317 		skb_reset_network_header(skb2);
2318 
2319 		iph = ipv6_hdr(skb2);
2320 		iph->version = 0;
2321 		iph->priority = 0;
2322 		iph->flow_lbl[0] = 0;
2323 		iph->flow_lbl[1] = 0;
2324 		iph->flow_lbl[2] = 0;
2325 		iph->payload_len = 0;
2326 		iph->nexthdr = IPPROTO_NONE;
2327 		iph->hop_limit = 0;
2328 		iph->saddr = rt->rt6i_src.addr;
2329 		iph->daddr = rt->rt6i_dst.addr;
2330 
2331 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2332 		read_unlock(&mrt_lock);
2333 
2334 		return err;
2335 	}
2336 
2337 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2338 		cache->mfc_flags |= MFC_NOTIFY;
2339 
2340 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2341 	read_unlock(&mrt_lock);
2342 	return err;
2343 }
2344 
2345 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2346 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
2347 {
2348 	struct nlmsghdr *nlh;
2349 	struct rtmsg *rtm;
2350 	int err;
2351 
2352 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2353 	if (nlh == NULL)
2354 		return -EMSGSIZE;
2355 
2356 	rtm = nlmsg_data(nlh);
2357 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2358 	rtm->rtm_dst_len  = 128;
2359 	rtm->rtm_src_len  = 128;
2360 	rtm->rtm_tos      = 0;
2361 	rtm->rtm_table    = mrt->id;
2362 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2363 		goto nla_put_failure;
2364 	rtm->rtm_type = RTN_MULTICAST;
2365 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2366 	if (c->mfc_flags & MFC_STATIC)
2367 		rtm->rtm_protocol = RTPROT_STATIC;
2368 	else
2369 		rtm->rtm_protocol = RTPROT_MROUTED;
2370 	rtm->rtm_flags    = 0;
2371 
2372 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2373 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2374 		goto nla_put_failure;
2375 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2376 	/* do not break the dump if cache is unresolved */
2377 	if (err < 0 && err != -ENOENT)
2378 		goto nla_put_failure;
2379 
2380 	return nlmsg_end(skb, nlh);
2381 
2382 nla_put_failure:
2383 	nlmsg_cancel(skb, nlh);
2384 	return -EMSGSIZE;
2385 }
2386 
2387 static int mr6_msgsize(bool unresolved, int maxvif)
2388 {
2389 	size_t len =
2390 		NLMSG_ALIGN(sizeof(struct rtmsg))
2391 		+ nla_total_size(4)	/* RTA_TABLE */
2392 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2393 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2394 		;
2395 
2396 	if (!unresolved)
2397 		len = len
2398 		      + nla_total_size(4)	/* RTA_IIF */
2399 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2400 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2401 						/* RTA_MFC_STATS */
2402 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2403 		;
2404 
2405 	return len;
2406 }
2407 
2408 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2409 			      int cmd)
2410 {
2411 	struct net *net = read_pnet(&mrt->net);
2412 	struct sk_buff *skb;
2413 	int err = -ENOBUFS;
2414 
2415 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2416 			GFP_ATOMIC);
2417 	if (skb == NULL)
2418 		goto errout;
2419 
2420 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2421 	if (err < 0)
2422 		goto errout;
2423 
2424 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2425 	return;
2426 
2427 errout:
2428 	kfree_skb(skb);
2429 	if (err < 0)
2430 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2431 }
2432 
2433 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2434 {
2435 	struct net *net = sock_net(skb->sk);
2436 	struct mr6_table *mrt;
2437 	struct mfc6_cache *mfc;
2438 	unsigned int t = 0, s_t;
2439 	unsigned int h = 0, s_h;
2440 	unsigned int e = 0, s_e;
2441 
2442 	s_t = cb->args[0];
2443 	s_h = cb->args[1];
2444 	s_e = cb->args[2];
2445 
2446 	read_lock(&mrt_lock);
2447 	ip6mr_for_each_table(mrt, net) {
2448 		if (t < s_t)
2449 			goto next_table;
2450 		if (t > s_t)
2451 			s_h = 0;
2452 		for (h = s_h; h < MFC6_LINES; h++) {
2453 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2454 				if (e < s_e)
2455 					goto next_entry;
2456 				if (ip6mr_fill_mroute(mrt, skb,
2457 						      NETLINK_CB(cb->skb).portid,
2458 						      cb->nlh->nlmsg_seq,
2459 						      mfc, RTM_NEWROUTE) < 0)
2460 					goto done;
2461 next_entry:
2462 				e++;
2463 			}
2464 			e = s_e = 0;
2465 		}
2466 		spin_lock_bh(&mfc_unres_lock);
2467 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2468 			if (e < s_e)
2469 				goto next_entry2;
2470 			if (ip6mr_fill_mroute(mrt, skb,
2471 					      NETLINK_CB(cb->skb).portid,
2472 					      cb->nlh->nlmsg_seq,
2473 					      mfc, RTM_NEWROUTE) < 0) {
2474 				spin_unlock_bh(&mfc_unres_lock);
2475 				goto done;
2476 			}
2477 next_entry2:
2478 			e++;
2479 		}
2480 		spin_unlock_bh(&mfc_unres_lock);
2481 		e = s_e = 0;
2482 		s_h = 0;
2483 next_table:
2484 		t++;
2485 	}
2486 done:
2487 	read_unlock(&mrt_lock);
2488 
2489 	cb->args[2] = e;
2490 	cb->args[1] = h;
2491 	cb->args[0] = t;
2492 
2493 	return skb->len;
2494 }
2495