xref: /linux/net/ipv6/ip6mr.c (revision b889fcf63cb62e7fdb7816565e28f44dbe4a76a5)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	bool			mroute_do_assert;
71 	bool			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			  struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 			      int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 			       struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr6_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr6_table **mrt)
143 {
144 	struct ip6mr_result res;
145 	struct fib_lookup_arg arg = { .result = &res, };
146 	int err;
147 
148 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
149 			       flowi6_to_flowi(flp6), 0, &arg);
150 	if (err < 0)
151 		return err;
152 	*mrt = res.mrt;
153 	return 0;
154 }
155 
156 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
157 			     int flags, struct fib_lookup_arg *arg)
158 {
159 	struct ip6mr_result *res = arg->result;
160 	struct mr6_table *mrt;
161 
162 	switch (rule->action) {
163 	case FR_ACT_TO_TBL:
164 		break;
165 	case FR_ACT_UNREACHABLE:
166 		return -ENETUNREACH;
167 	case FR_ACT_PROHIBIT:
168 		return -EACCES;
169 	case FR_ACT_BLACKHOLE:
170 	default:
171 		return -EINVAL;
172 	}
173 
174 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
175 	if (mrt == NULL)
176 		return -EAGAIN;
177 	res->mrt = mrt;
178 	return 0;
179 }
180 
181 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182 {
183 	return 1;
184 }
185 
186 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
187 	FRA_GENERIC_POLICY,
188 };
189 
190 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
191 				struct fib_rule_hdr *frh, struct nlattr **tb)
192 {
193 	return 0;
194 }
195 
196 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197 			      struct nlattr **tb)
198 {
199 	return 1;
200 }
201 
202 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203 			   struct fib_rule_hdr *frh)
204 {
205 	frh->dst_len = 0;
206 	frh->src_len = 0;
207 	frh->tos     = 0;
208 	return 0;
209 }
210 
211 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
212 	.family		= RTNL_FAMILY_IP6MR,
213 	.rule_size	= sizeof(struct ip6mr_rule),
214 	.addr_size	= sizeof(struct in6_addr),
215 	.action		= ip6mr_rule_action,
216 	.match		= ip6mr_rule_match,
217 	.configure	= ip6mr_rule_configure,
218 	.compare	= ip6mr_rule_compare,
219 	.default_pref	= fib_default_rule_pref,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (mrt == NULL) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	kfree(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
263 		list_del(&mrt->list);
264 		ip6mr_free_table(mrt);
265 	}
266 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
267 }
268 #else
269 #define ip6mr_for_each_table(mrt, net) \
270 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
271 
272 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
273 {
274 	return net->ipv6.mrt6;
275 }
276 
277 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
278 			    struct mr6_table **mrt)
279 {
280 	*mrt = net->ipv6.mrt6;
281 	return 0;
282 }
283 
284 static int __net_init ip6mr_rules_init(struct net *net)
285 {
286 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
287 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
288 }
289 
290 static void __net_exit ip6mr_rules_exit(struct net *net)
291 {
292 	ip6mr_free_table(net->ipv6.mrt6);
293 }
294 #endif
295 
296 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
297 {
298 	struct mr6_table *mrt;
299 	unsigned int i;
300 
301 	mrt = ip6mr_get_table(net, id);
302 	if (mrt != NULL)
303 		return mrt;
304 
305 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
306 	if (mrt == NULL)
307 		return NULL;
308 	mrt->id = id;
309 	write_pnet(&mrt->net, net);
310 
311 	/* Forwarding cache */
312 	for (i = 0; i < MFC6_LINES; i++)
313 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
314 
315 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
316 
317 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
318 		    (unsigned long)mrt);
319 
320 #ifdef CONFIG_IPV6_PIMSM_V2
321 	mrt->mroute_reg_vif_num = -1;
322 #endif
323 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
324 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
325 #endif
326 	return mrt;
327 }
328 
329 static void ip6mr_free_table(struct mr6_table *mrt)
330 {
331 	del_timer(&mrt->ipmr_expire_timer);
332 	mroute_clean_tables(mrt);
333 	kfree(mrt);
334 }
335 
336 #ifdef CONFIG_PROC_FS
337 
338 struct ipmr_mfc_iter {
339 	struct seq_net_private p;
340 	struct mr6_table *mrt;
341 	struct list_head *cache;
342 	int ct;
343 };
344 
345 
346 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
347 					   struct ipmr_mfc_iter *it, loff_t pos)
348 {
349 	struct mr6_table *mrt = it->mrt;
350 	struct mfc6_cache *mfc;
351 
352 	read_lock(&mrt_lock);
353 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
354 		it->cache = &mrt->mfc6_cache_array[it->ct];
355 		list_for_each_entry(mfc, it->cache, list)
356 			if (pos-- == 0)
357 				return mfc;
358 	}
359 	read_unlock(&mrt_lock);
360 
361 	spin_lock_bh(&mfc_unres_lock);
362 	it->cache = &mrt->mfc6_unres_queue;
363 	list_for_each_entry(mfc, it->cache, list)
364 		if (pos-- == 0)
365 			return mfc;
366 	spin_unlock_bh(&mfc_unres_lock);
367 
368 	it->cache = NULL;
369 	return NULL;
370 }
371 
372 /*
373  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
374  */
375 
376 struct ipmr_vif_iter {
377 	struct seq_net_private p;
378 	struct mr6_table *mrt;
379 	int ct;
380 };
381 
382 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
383 					    struct ipmr_vif_iter *iter,
384 					    loff_t pos)
385 {
386 	struct mr6_table *mrt = iter->mrt;
387 
388 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
389 		if (!MIF_EXISTS(mrt, iter->ct))
390 			continue;
391 		if (pos-- == 0)
392 			return &mrt->vif6_table[iter->ct];
393 	}
394 	return NULL;
395 }
396 
397 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
398 	__acquires(mrt_lock)
399 {
400 	struct ipmr_vif_iter *iter = seq->private;
401 	struct net *net = seq_file_net(seq);
402 	struct mr6_table *mrt;
403 
404 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
405 	if (mrt == NULL)
406 		return ERR_PTR(-ENOENT);
407 
408 	iter->mrt = mrt;
409 
410 	read_lock(&mrt_lock);
411 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
412 		: SEQ_START_TOKEN;
413 }
414 
415 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
416 {
417 	struct ipmr_vif_iter *iter = seq->private;
418 	struct net *net = seq_file_net(seq);
419 	struct mr6_table *mrt = iter->mrt;
420 
421 	++*pos;
422 	if (v == SEQ_START_TOKEN)
423 		return ip6mr_vif_seq_idx(net, iter, 0);
424 
425 	while (++iter->ct < mrt->maxvif) {
426 		if (!MIF_EXISTS(mrt, iter->ct))
427 			continue;
428 		return &mrt->vif6_table[iter->ct];
429 	}
430 	return NULL;
431 }
432 
433 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
434 	__releases(mrt_lock)
435 {
436 	read_unlock(&mrt_lock);
437 }
438 
439 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
440 {
441 	struct ipmr_vif_iter *iter = seq->private;
442 	struct mr6_table *mrt = iter->mrt;
443 
444 	if (v == SEQ_START_TOKEN) {
445 		seq_puts(seq,
446 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
447 	} else {
448 		const struct mif_device *vif = v;
449 		const char *name = vif->dev ? vif->dev->name : "none";
450 
451 		seq_printf(seq,
452 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
453 			   vif - mrt->vif6_table,
454 			   name, vif->bytes_in, vif->pkt_in,
455 			   vif->bytes_out, vif->pkt_out,
456 			   vif->flags);
457 	}
458 	return 0;
459 }
460 
461 static const struct seq_operations ip6mr_vif_seq_ops = {
462 	.start = ip6mr_vif_seq_start,
463 	.next  = ip6mr_vif_seq_next,
464 	.stop  = ip6mr_vif_seq_stop,
465 	.show  = ip6mr_vif_seq_show,
466 };
467 
468 static int ip6mr_vif_open(struct inode *inode, struct file *file)
469 {
470 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
471 			    sizeof(struct ipmr_vif_iter));
472 }
473 
474 static const struct file_operations ip6mr_vif_fops = {
475 	.owner	 = THIS_MODULE,
476 	.open    = ip6mr_vif_open,
477 	.read    = seq_read,
478 	.llseek  = seq_lseek,
479 	.release = seq_release_net,
480 };
481 
482 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
483 {
484 	struct ipmr_mfc_iter *it = seq->private;
485 	struct net *net = seq_file_net(seq);
486 	struct mr6_table *mrt;
487 
488 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
489 	if (mrt == NULL)
490 		return ERR_PTR(-ENOENT);
491 
492 	it->mrt = mrt;
493 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
494 		: SEQ_START_TOKEN;
495 }
496 
497 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
498 {
499 	struct mfc6_cache *mfc = v;
500 	struct ipmr_mfc_iter *it = seq->private;
501 	struct net *net = seq_file_net(seq);
502 	struct mr6_table *mrt = it->mrt;
503 
504 	++*pos;
505 
506 	if (v == SEQ_START_TOKEN)
507 		return ipmr_mfc_seq_idx(net, seq->private, 0);
508 
509 	if (mfc->list.next != it->cache)
510 		return list_entry(mfc->list.next, struct mfc6_cache, list);
511 
512 	if (it->cache == &mrt->mfc6_unres_queue)
513 		goto end_of_list;
514 
515 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
516 
517 	while (++it->ct < MFC6_LINES) {
518 		it->cache = &mrt->mfc6_cache_array[it->ct];
519 		if (list_empty(it->cache))
520 			continue;
521 		return list_first_entry(it->cache, struct mfc6_cache, list);
522 	}
523 
524 	/* exhausted cache_array, show unresolved */
525 	read_unlock(&mrt_lock);
526 	it->cache = &mrt->mfc6_unres_queue;
527 	it->ct = 0;
528 
529 	spin_lock_bh(&mfc_unres_lock);
530 	if (!list_empty(it->cache))
531 		return list_first_entry(it->cache, struct mfc6_cache, list);
532 
533  end_of_list:
534 	spin_unlock_bh(&mfc_unres_lock);
535 	it->cache = NULL;
536 
537 	return NULL;
538 }
539 
540 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
541 {
542 	struct ipmr_mfc_iter *it = seq->private;
543 	struct mr6_table *mrt = it->mrt;
544 
545 	if (it->cache == &mrt->mfc6_unres_queue)
546 		spin_unlock_bh(&mfc_unres_lock);
547 	else if (it->cache == mrt->mfc6_cache_array)
548 		read_unlock(&mrt_lock);
549 }
550 
551 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
552 {
553 	int n;
554 
555 	if (v == SEQ_START_TOKEN) {
556 		seq_puts(seq,
557 			 "Group                            "
558 			 "Origin                           "
559 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
560 	} else {
561 		const struct mfc6_cache *mfc = v;
562 		const struct ipmr_mfc_iter *it = seq->private;
563 		struct mr6_table *mrt = it->mrt;
564 
565 		seq_printf(seq, "%pI6 %pI6 %-3hd",
566 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
567 			   mfc->mf6c_parent);
568 
569 		if (it->cache != &mrt->mfc6_unres_queue) {
570 			seq_printf(seq, " %8lu %8lu %8lu",
571 				   mfc->mfc_un.res.pkt,
572 				   mfc->mfc_un.res.bytes,
573 				   mfc->mfc_un.res.wrong_if);
574 			for (n = mfc->mfc_un.res.minvif;
575 			     n < mfc->mfc_un.res.maxvif; n++) {
576 				if (MIF_EXISTS(mrt, n) &&
577 				    mfc->mfc_un.res.ttls[n] < 255)
578 					seq_printf(seq,
579 						   " %2d:%-3d",
580 						   n, mfc->mfc_un.res.ttls[n]);
581 			}
582 		} else {
583 			/* unresolved mfc_caches don't contain
584 			 * pkt, bytes and wrong_if values
585 			 */
586 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
587 		}
588 		seq_putc(seq, '\n');
589 	}
590 	return 0;
591 }
592 
593 static const struct seq_operations ipmr_mfc_seq_ops = {
594 	.start = ipmr_mfc_seq_start,
595 	.next  = ipmr_mfc_seq_next,
596 	.stop  = ipmr_mfc_seq_stop,
597 	.show  = ipmr_mfc_seq_show,
598 };
599 
600 static int ipmr_mfc_open(struct inode *inode, struct file *file)
601 {
602 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
603 			    sizeof(struct ipmr_mfc_iter));
604 }
605 
606 static const struct file_operations ip6mr_mfc_fops = {
607 	.owner	 = THIS_MODULE,
608 	.open    = ipmr_mfc_open,
609 	.read    = seq_read,
610 	.llseek  = seq_lseek,
611 	.release = seq_release_net,
612 };
613 #endif
614 
615 #ifdef CONFIG_IPV6_PIMSM_V2
616 
617 static int pim6_rcv(struct sk_buff *skb)
618 {
619 	struct pimreghdr *pim;
620 	struct ipv6hdr   *encap;
621 	struct net_device  *reg_dev = NULL;
622 	struct net *net = dev_net(skb->dev);
623 	struct mr6_table *mrt;
624 	struct flowi6 fl6 = {
625 		.flowi6_iif	= skb->dev->ifindex,
626 		.flowi6_mark	= skb->mark,
627 	};
628 	int reg_vif_num;
629 
630 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
631 		goto drop;
632 
633 	pim = (struct pimreghdr *)skb_transport_header(skb);
634 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
635 	    (pim->flags & PIM_NULL_REGISTER) ||
636 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
637 			     sizeof(*pim), IPPROTO_PIM,
638 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
639 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
640 		goto drop;
641 
642 	/* check if the inner packet is destined to mcast group */
643 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
644 				   sizeof(*pim));
645 
646 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
647 	    encap->payload_len == 0 ||
648 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
649 		goto drop;
650 
651 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
652 		goto drop;
653 	reg_vif_num = mrt->mroute_reg_vif_num;
654 
655 	read_lock(&mrt_lock);
656 	if (reg_vif_num >= 0)
657 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
658 	if (reg_dev)
659 		dev_hold(reg_dev);
660 	read_unlock(&mrt_lock);
661 
662 	if (reg_dev == NULL)
663 		goto drop;
664 
665 	skb->mac_header = skb->network_header;
666 	skb_pull(skb, (u8 *)encap - skb->data);
667 	skb_reset_network_header(skb);
668 	skb->protocol = htons(ETH_P_IPV6);
669 	skb->ip_summed = CHECKSUM_NONE;
670 	skb->pkt_type = PACKET_HOST;
671 
672 	skb_tunnel_rx(skb, reg_dev);
673 
674 	netif_rx(skb);
675 
676 	dev_put(reg_dev);
677 	return 0;
678  drop:
679 	kfree_skb(skb);
680 	return 0;
681 }
682 
683 static const struct inet6_protocol pim6_protocol = {
684 	.handler	=	pim6_rcv,
685 };
686 
687 /* Service routines creating virtual interfaces: PIMREG */
688 
689 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
690 				      struct net_device *dev)
691 {
692 	struct net *net = dev_net(dev);
693 	struct mr6_table *mrt;
694 	struct flowi6 fl6 = {
695 		.flowi6_oif	= dev->ifindex,
696 		.flowi6_iif	= skb->skb_iif,
697 		.flowi6_mark	= skb->mark,
698 	};
699 	int err;
700 
701 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
702 	if (err < 0) {
703 		kfree_skb(skb);
704 		return err;
705 	}
706 
707 	read_lock(&mrt_lock);
708 	dev->stats.tx_bytes += skb->len;
709 	dev->stats.tx_packets++;
710 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
711 	read_unlock(&mrt_lock);
712 	kfree_skb(skb);
713 	return NETDEV_TX_OK;
714 }
715 
716 static const struct net_device_ops reg_vif_netdev_ops = {
717 	.ndo_start_xmit	= reg_vif_xmit,
718 };
719 
720 static void reg_vif_setup(struct net_device *dev)
721 {
722 	dev->type		= ARPHRD_PIMREG;
723 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
724 	dev->flags		= IFF_NOARP;
725 	dev->netdev_ops		= &reg_vif_netdev_ops;
726 	dev->destructor		= free_netdev;
727 	dev->features		|= NETIF_F_NETNS_LOCAL;
728 }
729 
730 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
731 {
732 	struct net_device *dev;
733 	char name[IFNAMSIZ];
734 
735 	if (mrt->id == RT6_TABLE_DFLT)
736 		sprintf(name, "pim6reg");
737 	else
738 		sprintf(name, "pim6reg%u", mrt->id);
739 
740 	dev = alloc_netdev(0, name, reg_vif_setup);
741 	if (dev == NULL)
742 		return NULL;
743 
744 	dev_net_set(dev, net);
745 
746 	if (register_netdevice(dev)) {
747 		free_netdev(dev);
748 		return NULL;
749 	}
750 	dev->iflink = 0;
751 
752 	if (dev_open(dev))
753 		goto failure;
754 
755 	dev_hold(dev);
756 	return dev;
757 
758 failure:
759 	/* allow the register to be completed before unregistering. */
760 	rtnl_unlock();
761 	rtnl_lock();
762 
763 	unregister_netdevice(dev);
764 	return NULL;
765 }
766 #endif
767 
768 /*
769  *	Delete a VIF entry
770  */
771 
772 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
773 {
774 	struct mif_device *v;
775 	struct net_device *dev;
776 	struct inet6_dev *in6_dev;
777 
778 	if (vifi < 0 || vifi >= mrt->maxvif)
779 		return -EADDRNOTAVAIL;
780 
781 	v = &mrt->vif6_table[vifi];
782 
783 	write_lock_bh(&mrt_lock);
784 	dev = v->dev;
785 	v->dev = NULL;
786 
787 	if (!dev) {
788 		write_unlock_bh(&mrt_lock);
789 		return -EADDRNOTAVAIL;
790 	}
791 
792 #ifdef CONFIG_IPV6_PIMSM_V2
793 	if (vifi == mrt->mroute_reg_vif_num)
794 		mrt->mroute_reg_vif_num = -1;
795 #endif
796 
797 	if (vifi + 1 == mrt->maxvif) {
798 		int tmp;
799 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
800 			if (MIF_EXISTS(mrt, tmp))
801 				break;
802 		}
803 		mrt->maxvif = tmp + 1;
804 	}
805 
806 	write_unlock_bh(&mrt_lock);
807 
808 	dev_set_allmulti(dev, -1);
809 
810 	in6_dev = __in6_dev_get(dev);
811 	if (in6_dev) {
812 		in6_dev->cnf.mc_forwarding--;
813 		inet6_netconf_notify_devconf(dev_net(dev),
814 					     NETCONFA_MC_FORWARDING,
815 					     dev->ifindex, &in6_dev->cnf);
816 	}
817 
818 	if (v->flags & MIFF_REGISTER)
819 		unregister_netdevice_queue(dev, head);
820 
821 	dev_put(dev);
822 	return 0;
823 }
824 
825 static inline void ip6mr_cache_free(struct mfc6_cache *c)
826 {
827 	kmem_cache_free(mrt_cachep, c);
828 }
829 
830 /* Destroy an unresolved cache entry, killing queued skbs
831    and reporting error to netlink readers.
832  */
833 
834 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
835 {
836 	struct net *net = read_pnet(&mrt->net);
837 	struct sk_buff *skb;
838 
839 	atomic_dec(&mrt->cache_resolve_queue_len);
840 
841 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
842 		if (ipv6_hdr(skb)->version == 0) {
843 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
844 			nlh->nlmsg_type = NLMSG_ERROR;
845 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
846 			skb_trim(skb, nlh->nlmsg_len);
847 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
848 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
849 		} else
850 			kfree_skb(skb);
851 	}
852 
853 	ip6mr_cache_free(c);
854 }
855 
856 
857 /* Timer process for all the unresolved queue. */
858 
859 static void ipmr_do_expire_process(struct mr6_table *mrt)
860 {
861 	unsigned long now = jiffies;
862 	unsigned long expires = 10 * HZ;
863 	struct mfc6_cache *c, *next;
864 
865 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
866 		if (time_after(c->mfc_un.unres.expires, now)) {
867 			/* not yet... */
868 			unsigned long interval = c->mfc_un.unres.expires - now;
869 			if (interval < expires)
870 				expires = interval;
871 			continue;
872 		}
873 
874 		list_del(&c->list);
875 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
876 		ip6mr_destroy_unres(mrt, c);
877 	}
878 
879 	if (!list_empty(&mrt->mfc6_unres_queue))
880 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
881 }
882 
883 static void ipmr_expire_process(unsigned long arg)
884 {
885 	struct mr6_table *mrt = (struct mr6_table *)arg;
886 
887 	if (!spin_trylock(&mfc_unres_lock)) {
888 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
889 		return;
890 	}
891 
892 	if (!list_empty(&mrt->mfc6_unres_queue))
893 		ipmr_do_expire_process(mrt);
894 
895 	spin_unlock(&mfc_unres_lock);
896 }
897 
898 /* Fill oifs list. It is called under write locked mrt_lock. */
899 
900 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
901 				    unsigned char *ttls)
902 {
903 	int vifi;
904 
905 	cache->mfc_un.res.minvif = MAXMIFS;
906 	cache->mfc_un.res.maxvif = 0;
907 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
908 
909 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
910 		if (MIF_EXISTS(mrt, vifi) &&
911 		    ttls[vifi] && ttls[vifi] < 255) {
912 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
913 			if (cache->mfc_un.res.minvif > vifi)
914 				cache->mfc_un.res.minvif = vifi;
915 			if (cache->mfc_un.res.maxvif <= vifi)
916 				cache->mfc_un.res.maxvif = vifi + 1;
917 		}
918 	}
919 }
920 
921 static int mif6_add(struct net *net, struct mr6_table *mrt,
922 		    struct mif6ctl *vifc, int mrtsock)
923 {
924 	int vifi = vifc->mif6c_mifi;
925 	struct mif_device *v = &mrt->vif6_table[vifi];
926 	struct net_device *dev;
927 	struct inet6_dev *in6_dev;
928 	int err;
929 
930 	/* Is vif busy ? */
931 	if (MIF_EXISTS(mrt, vifi))
932 		return -EADDRINUSE;
933 
934 	switch (vifc->mif6c_flags) {
935 #ifdef CONFIG_IPV6_PIMSM_V2
936 	case MIFF_REGISTER:
937 		/*
938 		 * Special Purpose VIF in PIM
939 		 * All the packets will be sent to the daemon
940 		 */
941 		if (mrt->mroute_reg_vif_num >= 0)
942 			return -EADDRINUSE;
943 		dev = ip6mr_reg_vif(net, mrt);
944 		if (!dev)
945 			return -ENOBUFS;
946 		err = dev_set_allmulti(dev, 1);
947 		if (err) {
948 			unregister_netdevice(dev);
949 			dev_put(dev);
950 			return err;
951 		}
952 		break;
953 #endif
954 	case 0:
955 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
956 		if (!dev)
957 			return -EADDRNOTAVAIL;
958 		err = dev_set_allmulti(dev, 1);
959 		if (err) {
960 			dev_put(dev);
961 			return err;
962 		}
963 		break;
964 	default:
965 		return -EINVAL;
966 	}
967 
968 	in6_dev = __in6_dev_get(dev);
969 	if (in6_dev) {
970 		in6_dev->cnf.mc_forwarding++;
971 		inet6_netconf_notify_devconf(dev_net(dev),
972 					     NETCONFA_MC_FORWARDING,
973 					     dev->ifindex, &in6_dev->cnf);
974 	}
975 
976 	/*
977 	 *	Fill in the VIF structures
978 	 */
979 	v->rate_limit = vifc->vifc_rate_limit;
980 	v->flags = vifc->mif6c_flags;
981 	if (!mrtsock)
982 		v->flags |= VIFF_STATIC;
983 	v->threshold = vifc->vifc_threshold;
984 	v->bytes_in = 0;
985 	v->bytes_out = 0;
986 	v->pkt_in = 0;
987 	v->pkt_out = 0;
988 	v->link = dev->ifindex;
989 	if (v->flags & MIFF_REGISTER)
990 		v->link = dev->iflink;
991 
992 	/* And finish update writing critical data */
993 	write_lock_bh(&mrt_lock);
994 	v->dev = dev;
995 #ifdef CONFIG_IPV6_PIMSM_V2
996 	if (v->flags & MIFF_REGISTER)
997 		mrt->mroute_reg_vif_num = vifi;
998 #endif
999 	if (vifi + 1 > mrt->maxvif)
1000 		mrt->maxvif = vifi + 1;
1001 	write_unlock_bh(&mrt_lock);
1002 	return 0;
1003 }
1004 
1005 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1006 					   const struct in6_addr *origin,
1007 					   const struct in6_addr *mcastgrp)
1008 {
1009 	int line = MFC6_HASH(mcastgrp, origin);
1010 	struct mfc6_cache *c;
1011 
1012 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1013 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1014 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1015 			return c;
1016 	}
1017 	return NULL;
1018 }
1019 
1020 /*
1021  *	Allocate a multicast cache entry
1022  */
1023 static struct mfc6_cache *ip6mr_cache_alloc(void)
1024 {
1025 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1026 	if (c == NULL)
1027 		return NULL;
1028 	c->mfc_un.res.minvif = MAXMIFS;
1029 	return c;
1030 }
1031 
1032 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1033 {
1034 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1035 	if (c == NULL)
1036 		return NULL;
1037 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1038 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1039 	return c;
1040 }
1041 
1042 /*
1043  *	A cache entry has gone into a resolved state from queued
1044  */
1045 
1046 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1047 				struct mfc6_cache *uc, struct mfc6_cache *c)
1048 {
1049 	struct sk_buff *skb;
1050 
1051 	/*
1052 	 *	Play the pending entries through our router
1053 	 */
1054 
1055 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1056 		if (ipv6_hdr(skb)->version == 0) {
1057 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1058 
1059 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1060 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1061 			} else {
1062 				nlh->nlmsg_type = NLMSG_ERROR;
1063 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1064 				skb_trim(skb, nlh->nlmsg_len);
1065 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1066 			}
1067 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1068 		} else
1069 			ip6_mr_forward(net, mrt, skb, c);
1070 	}
1071 }
1072 
1073 /*
1074  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1075  *	expects the following bizarre scheme.
1076  *
1077  *	Called under mrt_lock.
1078  */
1079 
1080 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1081 			      mifi_t mifi, int assert)
1082 {
1083 	struct sk_buff *skb;
1084 	struct mrt6msg *msg;
1085 	int ret;
1086 
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 	if (assert == MRT6MSG_WHOLEPKT)
1089 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1090 						+sizeof(*msg));
1091 	else
1092 #endif
1093 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1094 
1095 	if (!skb)
1096 		return -ENOBUFS;
1097 
1098 	/* I suppose that internal messages
1099 	 * do not require checksums */
1100 
1101 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1102 
1103 #ifdef CONFIG_IPV6_PIMSM_V2
1104 	if (assert == MRT6MSG_WHOLEPKT) {
1105 		/* Ugly, but we have no choice with this interface.
1106 		   Duplicate old header, fix length etc.
1107 		   And all this only to mangle msg->im6_msgtype and
1108 		   to set msg->im6_mbz to "mbz" :-)
1109 		 */
1110 		skb_push(skb, -skb_network_offset(pkt));
1111 
1112 		skb_push(skb, sizeof(*msg));
1113 		skb_reset_transport_header(skb);
1114 		msg = (struct mrt6msg *)skb_transport_header(skb);
1115 		msg->im6_mbz = 0;
1116 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1117 		msg->im6_mif = mrt->mroute_reg_vif_num;
1118 		msg->im6_pad = 0;
1119 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1120 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1121 
1122 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1123 	} else
1124 #endif
1125 	{
1126 	/*
1127 	 *	Copy the IP header
1128 	 */
1129 
1130 	skb_put(skb, sizeof(struct ipv6hdr));
1131 	skb_reset_network_header(skb);
1132 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1133 
1134 	/*
1135 	 *	Add our header
1136 	 */
1137 	skb_put(skb, sizeof(*msg));
1138 	skb_reset_transport_header(skb);
1139 	msg = (struct mrt6msg *)skb_transport_header(skb);
1140 
1141 	msg->im6_mbz = 0;
1142 	msg->im6_msgtype = assert;
1143 	msg->im6_mif = mifi;
1144 	msg->im6_pad = 0;
1145 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1146 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1147 
1148 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1149 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1150 	}
1151 
1152 	if (mrt->mroute6_sk == NULL) {
1153 		kfree_skb(skb);
1154 		return -EINVAL;
1155 	}
1156 
1157 	/*
1158 	 *	Deliver to user space multicast routing algorithms
1159 	 */
1160 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1161 	if (ret < 0) {
1162 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1163 		kfree_skb(skb);
1164 	}
1165 
1166 	return ret;
1167 }
1168 
1169 /*
1170  *	Queue a packet for resolution. It gets locked cache entry!
1171  */
1172 
1173 static int
1174 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1175 {
1176 	bool found = false;
1177 	int err;
1178 	struct mfc6_cache *c;
1179 
1180 	spin_lock_bh(&mfc_unres_lock);
1181 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1182 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1183 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1184 			found = true;
1185 			break;
1186 		}
1187 	}
1188 
1189 	if (!found) {
1190 		/*
1191 		 *	Create a new entry if allowable
1192 		 */
1193 
1194 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1195 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1196 			spin_unlock_bh(&mfc_unres_lock);
1197 
1198 			kfree_skb(skb);
1199 			return -ENOBUFS;
1200 		}
1201 
1202 		/*
1203 		 *	Fill in the new cache entry
1204 		 */
1205 		c->mf6c_parent = -1;
1206 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1207 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1208 
1209 		/*
1210 		 *	Reflect first query at pim6sd
1211 		 */
1212 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1213 		if (err < 0) {
1214 			/* If the report failed throw the cache entry
1215 			   out - Brad Parker
1216 			 */
1217 			spin_unlock_bh(&mfc_unres_lock);
1218 
1219 			ip6mr_cache_free(c);
1220 			kfree_skb(skb);
1221 			return err;
1222 		}
1223 
1224 		atomic_inc(&mrt->cache_resolve_queue_len);
1225 		list_add(&c->list, &mrt->mfc6_unres_queue);
1226 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1227 
1228 		ipmr_do_expire_process(mrt);
1229 	}
1230 
1231 	/*
1232 	 *	See if we can append the packet
1233 	 */
1234 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1235 		kfree_skb(skb);
1236 		err = -ENOBUFS;
1237 	} else {
1238 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1239 		err = 0;
1240 	}
1241 
1242 	spin_unlock_bh(&mfc_unres_lock);
1243 	return err;
1244 }
1245 
1246 /*
1247  *	MFC6 cache manipulation by user space
1248  */
1249 
1250 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1251 {
1252 	int line;
1253 	struct mfc6_cache *c, *next;
1254 
1255 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1256 
1257 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1258 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1259 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1260 			write_lock_bh(&mrt_lock);
1261 			list_del(&c->list);
1262 			write_unlock_bh(&mrt_lock);
1263 
1264 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1265 			ip6mr_cache_free(c);
1266 			return 0;
1267 		}
1268 	}
1269 	return -ENOENT;
1270 }
1271 
1272 static int ip6mr_device_event(struct notifier_block *this,
1273 			      unsigned long event, void *ptr)
1274 {
1275 	struct net_device *dev = ptr;
1276 	struct net *net = dev_net(dev);
1277 	struct mr6_table *mrt;
1278 	struct mif_device *v;
1279 	int ct;
1280 	LIST_HEAD(list);
1281 
1282 	if (event != NETDEV_UNREGISTER)
1283 		return NOTIFY_DONE;
1284 
1285 	ip6mr_for_each_table(mrt, net) {
1286 		v = &mrt->vif6_table[0];
1287 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1288 			if (v->dev == dev)
1289 				mif6_delete(mrt, ct, &list);
1290 		}
1291 	}
1292 	unregister_netdevice_many(&list);
1293 
1294 	return NOTIFY_DONE;
1295 }
1296 
1297 static struct notifier_block ip6_mr_notifier = {
1298 	.notifier_call = ip6mr_device_event
1299 };
1300 
1301 /*
1302  *	Setup for IP multicast routing
1303  */
1304 
1305 static int __net_init ip6mr_net_init(struct net *net)
1306 {
1307 	int err;
1308 
1309 	err = ip6mr_rules_init(net);
1310 	if (err < 0)
1311 		goto fail;
1312 
1313 #ifdef CONFIG_PROC_FS
1314 	err = -ENOMEM;
1315 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1316 		goto proc_vif_fail;
1317 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1318 		goto proc_cache_fail;
1319 #endif
1320 
1321 	return 0;
1322 
1323 #ifdef CONFIG_PROC_FS
1324 proc_cache_fail:
1325 	proc_net_remove(net, "ip6_mr_vif");
1326 proc_vif_fail:
1327 	ip6mr_rules_exit(net);
1328 #endif
1329 fail:
1330 	return err;
1331 }
1332 
1333 static void __net_exit ip6mr_net_exit(struct net *net)
1334 {
1335 #ifdef CONFIG_PROC_FS
1336 	proc_net_remove(net, "ip6_mr_cache");
1337 	proc_net_remove(net, "ip6_mr_vif");
1338 #endif
1339 	ip6mr_rules_exit(net);
1340 }
1341 
1342 static struct pernet_operations ip6mr_net_ops = {
1343 	.init = ip6mr_net_init,
1344 	.exit = ip6mr_net_exit,
1345 };
1346 
1347 int __init ip6_mr_init(void)
1348 {
1349 	int err;
1350 
1351 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1352 				       sizeof(struct mfc6_cache),
1353 				       0, SLAB_HWCACHE_ALIGN,
1354 				       NULL);
1355 	if (!mrt_cachep)
1356 		return -ENOMEM;
1357 
1358 	err = register_pernet_subsys(&ip6mr_net_ops);
1359 	if (err)
1360 		goto reg_pernet_fail;
1361 
1362 	err = register_netdevice_notifier(&ip6_mr_notifier);
1363 	if (err)
1364 		goto reg_notif_fail;
1365 #ifdef CONFIG_IPV6_PIMSM_V2
1366 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1367 		pr_err("%s: can't add PIM protocol\n", __func__);
1368 		err = -EAGAIN;
1369 		goto add_proto_fail;
1370 	}
1371 #endif
1372 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1373 		      ip6mr_rtm_dumproute, NULL);
1374 	return 0;
1375 #ifdef CONFIG_IPV6_PIMSM_V2
1376 add_proto_fail:
1377 	unregister_netdevice_notifier(&ip6_mr_notifier);
1378 #endif
1379 reg_notif_fail:
1380 	unregister_pernet_subsys(&ip6mr_net_ops);
1381 reg_pernet_fail:
1382 	kmem_cache_destroy(mrt_cachep);
1383 	return err;
1384 }
1385 
1386 void ip6_mr_cleanup(void)
1387 {
1388 	unregister_netdevice_notifier(&ip6_mr_notifier);
1389 	unregister_pernet_subsys(&ip6mr_net_ops);
1390 	kmem_cache_destroy(mrt_cachep);
1391 }
1392 
1393 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1394 			 struct mf6cctl *mfc, int mrtsock)
1395 {
1396 	bool found = false;
1397 	int line;
1398 	struct mfc6_cache *uc, *c;
1399 	unsigned char ttls[MAXMIFS];
1400 	int i;
1401 
1402 	if (mfc->mf6cc_parent >= MAXMIFS)
1403 		return -ENFILE;
1404 
1405 	memset(ttls, 255, MAXMIFS);
1406 	for (i = 0; i < MAXMIFS; i++) {
1407 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1408 			ttls[i] = 1;
1409 
1410 	}
1411 
1412 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1413 
1414 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1415 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1416 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1417 			found = true;
1418 			break;
1419 		}
1420 	}
1421 
1422 	if (found) {
1423 		write_lock_bh(&mrt_lock);
1424 		c->mf6c_parent = mfc->mf6cc_parent;
1425 		ip6mr_update_thresholds(mrt, c, ttls);
1426 		if (!mrtsock)
1427 			c->mfc_flags |= MFC_STATIC;
1428 		write_unlock_bh(&mrt_lock);
1429 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1430 		return 0;
1431 	}
1432 
1433 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1434 		return -EINVAL;
1435 
1436 	c = ip6mr_cache_alloc();
1437 	if (c == NULL)
1438 		return -ENOMEM;
1439 
1440 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1441 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1442 	c->mf6c_parent = mfc->mf6cc_parent;
1443 	ip6mr_update_thresholds(mrt, c, ttls);
1444 	if (!mrtsock)
1445 		c->mfc_flags |= MFC_STATIC;
1446 
1447 	write_lock_bh(&mrt_lock);
1448 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1449 	write_unlock_bh(&mrt_lock);
1450 
1451 	/*
1452 	 *	Check to see if we resolved a queued list. If so we
1453 	 *	need to send on the frames and tidy up.
1454 	 */
1455 	found = false;
1456 	spin_lock_bh(&mfc_unres_lock);
1457 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1458 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1459 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1460 			list_del(&uc->list);
1461 			atomic_dec(&mrt->cache_resolve_queue_len);
1462 			found = true;
1463 			break;
1464 		}
1465 	}
1466 	if (list_empty(&mrt->mfc6_unres_queue))
1467 		del_timer(&mrt->ipmr_expire_timer);
1468 	spin_unlock_bh(&mfc_unres_lock);
1469 
1470 	if (found) {
1471 		ip6mr_cache_resolve(net, mrt, uc, c);
1472 		ip6mr_cache_free(uc);
1473 	}
1474 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1475 	return 0;
1476 }
1477 
1478 /*
1479  *	Close the multicast socket, and clear the vif tables etc
1480  */
1481 
1482 static void mroute_clean_tables(struct mr6_table *mrt)
1483 {
1484 	int i;
1485 	LIST_HEAD(list);
1486 	struct mfc6_cache *c, *next;
1487 
1488 	/*
1489 	 *	Shut down all active vif entries
1490 	 */
1491 	for (i = 0; i < mrt->maxvif; i++) {
1492 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1493 			mif6_delete(mrt, i, &list);
1494 	}
1495 	unregister_netdevice_many(&list);
1496 
1497 	/*
1498 	 *	Wipe the cache
1499 	 */
1500 	for (i = 0; i < MFC6_LINES; i++) {
1501 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1502 			if (c->mfc_flags & MFC_STATIC)
1503 				continue;
1504 			write_lock_bh(&mrt_lock);
1505 			list_del(&c->list);
1506 			write_unlock_bh(&mrt_lock);
1507 
1508 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1509 			ip6mr_cache_free(c);
1510 		}
1511 	}
1512 
1513 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1514 		spin_lock_bh(&mfc_unres_lock);
1515 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1516 			list_del(&c->list);
1517 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1518 			ip6mr_destroy_unres(mrt, c);
1519 		}
1520 		spin_unlock_bh(&mfc_unres_lock);
1521 	}
1522 }
1523 
1524 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1525 {
1526 	int err = 0;
1527 	struct net *net = sock_net(sk);
1528 
1529 	rtnl_lock();
1530 	write_lock_bh(&mrt_lock);
1531 	if (likely(mrt->mroute6_sk == NULL)) {
1532 		mrt->mroute6_sk = sk;
1533 		net->ipv6.devconf_all->mc_forwarding++;
1534 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1535 					     NETCONFA_IFINDEX_ALL,
1536 					     net->ipv6.devconf_all);
1537 	}
1538 	else
1539 		err = -EADDRINUSE;
1540 	write_unlock_bh(&mrt_lock);
1541 
1542 	rtnl_unlock();
1543 
1544 	return err;
1545 }
1546 
1547 int ip6mr_sk_done(struct sock *sk)
1548 {
1549 	int err = -EACCES;
1550 	struct net *net = sock_net(sk);
1551 	struct mr6_table *mrt;
1552 
1553 	rtnl_lock();
1554 	ip6mr_for_each_table(mrt, net) {
1555 		if (sk == mrt->mroute6_sk) {
1556 			write_lock_bh(&mrt_lock);
1557 			mrt->mroute6_sk = NULL;
1558 			net->ipv6.devconf_all->mc_forwarding--;
1559 			inet6_netconf_notify_devconf(net,
1560 						     NETCONFA_MC_FORWARDING,
1561 						     NETCONFA_IFINDEX_ALL,
1562 						     net->ipv6.devconf_all);
1563 			write_unlock_bh(&mrt_lock);
1564 
1565 			mroute_clean_tables(mrt);
1566 			err = 0;
1567 			break;
1568 		}
1569 	}
1570 	rtnl_unlock();
1571 
1572 	return err;
1573 }
1574 
1575 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1576 {
1577 	struct mr6_table *mrt;
1578 	struct flowi6 fl6 = {
1579 		.flowi6_iif	= skb->skb_iif,
1580 		.flowi6_oif	= skb->dev->ifindex,
1581 		.flowi6_mark	= skb->mark,
1582 	};
1583 
1584 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1585 		return NULL;
1586 
1587 	return mrt->mroute6_sk;
1588 }
1589 
1590 /*
1591  *	Socket options and virtual interface manipulation. The whole
1592  *	virtual interface system is a complete heap, but unfortunately
1593  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1594  *	MOSPF/PIM router set up we can clean this up.
1595  */
1596 
1597 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1598 {
1599 	int ret;
1600 	struct mif6ctl vif;
1601 	struct mf6cctl mfc;
1602 	mifi_t mifi;
1603 	struct net *net = sock_net(sk);
1604 	struct mr6_table *mrt;
1605 
1606 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1607 	if (mrt == NULL)
1608 		return -ENOENT;
1609 
1610 	if (optname != MRT6_INIT) {
1611 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1612 			return -EACCES;
1613 	}
1614 
1615 	switch (optname) {
1616 	case MRT6_INIT:
1617 		if (sk->sk_type != SOCK_RAW ||
1618 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1619 			return -EOPNOTSUPP;
1620 		if (optlen < sizeof(int))
1621 			return -EINVAL;
1622 
1623 		return ip6mr_sk_init(mrt, sk);
1624 
1625 	case MRT6_DONE:
1626 		return ip6mr_sk_done(sk);
1627 
1628 	case MRT6_ADD_MIF:
1629 		if (optlen < sizeof(vif))
1630 			return -EINVAL;
1631 		if (copy_from_user(&vif, optval, sizeof(vif)))
1632 			return -EFAULT;
1633 		if (vif.mif6c_mifi >= MAXMIFS)
1634 			return -ENFILE;
1635 		rtnl_lock();
1636 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1637 		rtnl_unlock();
1638 		return ret;
1639 
1640 	case MRT6_DEL_MIF:
1641 		if (optlen < sizeof(mifi_t))
1642 			return -EINVAL;
1643 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1644 			return -EFAULT;
1645 		rtnl_lock();
1646 		ret = mif6_delete(mrt, mifi, NULL);
1647 		rtnl_unlock();
1648 		return ret;
1649 
1650 	/*
1651 	 *	Manipulate the forwarding caches. These live
1652 	 *	in a sort of kernel/user symbiosis.
1653 	 */
1654 	case MRT6_ADD_MFC:
1655 	case MRT6_DEL_MFC:
1656 		if (optlen < sizeof(mfc))
1657 			return -EINVAL;
1658 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1659 			return -EFAULT;
1660 		rtnl_lock();
1661 		if (optname == MRT6_DEL_MFC)
1662 			ret = ip6mr_mfc_delete(mrt, &mfc);
1663 		else
1664 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1665 		rtnl_unlock();
1666 		return ret;
1667 
1668 	/*
1669 	 *	Control PIM assert (to activate pim will activate assert)
1670 	 */
1671 	case MRT6_ASSERT:
1672 	{
1673 		int v;
1674 
1675 		if (optlen != sizeof(v))
1676 			return -EINVAL;
1677 		if (get_user(v, (int __user *)optval))
1678 			return -EFAULT;
1679 		mrt->mroute_do_assert = v;
1680 		return 0;
1681 	}
1682 
1683 #ifdef CONFIG_IPV6_PIMSM_V2
1684 	case MRT6_PIM:
1685 	{
1686 		int v;
1687 
1688 		if (optlen != sizeof(v))
1689 			return -EINVAL;
1690 		if (get_user(v, (int __user *)optval))
1691 			return -EFAULT;
1692 		v = !!v;
1693 		rtnl_lock();
1694 		ret = 0;
1695 		if (v != mrt->mroute_do_pim) {
1696 			mrt->mroute_do_pim = v;
1697 			mrt->mroute_do_assert = v;
1698 		}
1699 		rtnl_unlock();
1700 		return ret;
1701 	}
1702 
1703 #endif
1704 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1705 	case MRT6_TABLE:
1706 	{
1707 		u32 v;
1708 
1709 		if (optlen != sizeof(u32))
1710 			return -EINVAL;
1711 		if (get_user(v, (u32 __user *)optval))
1712 			return -EFAULT;
1713 		if (sk == mrt->mroute6_sk)
1714 			return -EBUSY;
1715 
1716 		rtnl_lock();
1717 		ret = 0;
1718 		if (!ip6mr_new_table(net, v))
1719 			ret = -ENOMEM;
1720 		raw6_sk(sk)->ip6mr_table = v;
1721 		rtnl_unlock();
1722 		return ret;
1723 	}
1724 #endif
1725 	/*
1726 	 *	Spurious command, or MRT6_VERSION which you cannot
1727 	 *	set.
1728 	 */
1729 	default:
1730 		return -ENOPROTOOPT;
1731 	}
1732 }
1733 
1734 /*
1735  *	Getsock opt support for the multicast routing system.
1736  */
1737 
1738 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1739 			  int __user *optlen)
1740 {
1741 	int olr;
1742 	int val;
1743 	struct net *net = sock_net(sk);
1744 	struct mr6_table *mrt;
1745 
1746 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1747 	if (mrt == NULL)
1748 		return -ENOENT;
1749 
1750 	switch (optname) {
1751 	case MRT6_VERSION:
1752 		val = 0x0305;
1753 		break;
1754 #ifdef CONFIG_IPV6_PIMSM_V2
1755 	case MRT6_PIM:
1756 		val = mrt->mroute_do_pim;
1757 		break;
1758 #endif
1759 	case MRT6_ASSERT:
1760 		val = mrt->mroute_do_assert;
1761 		break;
1762 	default:
1763 		return -ENOPROTOOPT;
1764 	}
1765 
1766 	if (get_user(olr, optlen))
1767 		return -EFAULT;
1768 
1769 	olr = min_t(int, olr, sizeof(int));
1770 	if (olr < 0)
1771 		return -EINVAL;
1772 
1773 	if (put_user(olr, optlen))
1774 		return -EFAULT;
1775 	if (copy_to_user(optval, &val, olr))
1776 		return -EFAULT;
1777 	return 0;
1778 }
1779 
1780 /*
1781  *	The IP multicast ioctl support routines.
1782  */
1783 
1784 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1785 {
1786 	struct sioc_sg_req6 sr;
1787 	struct sioc_mif_req6 vr;
1788 	struct mif_device *vif;
1789 	struct mfc6_cache *c;
1790 	struct net *net = sock_net(sk);
1791 	struct mr6_table *mrt;
1792 
1793 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1794 	if (mrt == NULL)
1795 		return -ENOENT;
1796 
1797 	switch (cmd) {
1798 	case SIOCGETMIFCNT_IN6:
1799 		if (copy_from_user(&vr, arg, sizeof(vr)))
1800 			return -EFAULT;
1801 		if (vr.mifi >= mrt->maxvif)
1802 			return -EINVAL;
1803 		read_lock(&mrt_lock);
1804 		vif = &mrt->vif6_table[vr.mifi];
1805 		if (MIF_EXISTS(mrt, vr.mifi)) {
1806 			vr.icount = vif->pkt_in;
1807 			vr.ocount = vif->pkt_out;
1808 			vr.ibytes = vif->bytes_in;
1809 			vr.obytes = vif->bytes_out;
1810 			read_unlock(&mrt_lock);
1811 
1812 			if (copy_to_user(arg, &vr, sizeof(vr)))
1813 				return -EFAULT;
1814 			return 0;
1815 		}
1816 		read_unlock(&mrt_lock);
1817 		return -EADDRNOTAVAIL;
1818 	case SIOCGETSGCNT_IN6:
1819 		if (copy_from_user(&sr, arg, sizeof(sr)))
1820 			return -EFAULT;
1821 
1822 		read_lock(&mrt_lock);
1823 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1824 		if (c) {
1825 			sr.pktcnt = c->mfc_un.res.pkt;
1826 			sr.bytecnt = c->mfc_un.res.bytes;
1827 			sr.wrong_if = c->mfc_un.res.wrong_if;
1828 			read_unlock(&mrt_lock);
1829 
1830 			if (copy_to_user(arg, &sr, sizeof(sr)))
1831 				return -EFAULT;
1832 			return 0;
1833 		}
1834 		read_unlock(&mrt_lock);
1835 		return -EADDRNOTAVAIL;
1836 	default:
1837 		return -ENOIOCTLCMD;
1838 	}
1839 }
1840 
1841 #ifdef CONFIG_COMPAT
1842 struct compat_sioc_sg_req6 {
1843 	struct sockaddr_in6 src;
1844 	struct sockaddr_in6 grp;
1845 	compat_ulong_t pktcnt;
1846 	compat_ulong_t bytecnt;
1847 	compat_ulong_t wrong_if;
1848 };
1849 
1850 struct compat_sioc_mif_req6 {
1851 	mifi_t	mifi;
1852 	compat_ulong_t icount;
1853 	compat_ulong_t ocount;
1854 	compat_ulong_t ibytes;
1855 	compat_ulong_t obytes;
1856 };
1857 
1858 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1859 {
1860 	struct compat_sioc_sg_req6 sr;
1861 	struct compat_sioc_mif_req6 vr;
1862 	struct mif_device *vif;
1863 	struct mfc6_cache *c;
1864 	struct net *net = sock_net(sk);
1865 	struct mr6_table *mrt;
1866 
1867 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1868 	if (mrt == NULL)
1869 		return -ENOENT;
1870 
1871 	switch (cmd) {
1872 	case SIOCGETMIFCNT_IN6:
1873 		if (copy_from_user(&vr, arg, sizeof(vr)))
1874 			return -EFAULT;
1875 		if (vr.mifi >= mrt->maxvif)
1876 			return -EINVAL;
1877 		read_lock(&mrt_lock);
1878 		vif = &mrt->vif6_table[vr.mifi];
1879 		if (MIF_EXISTS(mrt, vr.mifi)) {
1880 			vr.icount = vif->pkt_in;
1881 			vr.ocount = vif->pkt_out;
1882 			vr.ibytes = vif->bytes_in;
1883 			vr.obytes = vif->bytes_out;
1884 			read_unlock(&mrt_lock);
1885 
1886 			if (copy_to_user(arg, &vr, sizeof(vr)))
1887 				return -EFAULT;
1888 			return 0;
1889 		}
1890 		read_unlock(&mrt_lock);
1891 		return -EADDRNOTAVAIL;
1892 	case SIOCGETSGCNT_IN6:
1893 		if (copy_from_user(&sr, arg, sizeof(sr)))
1894 			return -EFAULT;
1895 
1896 		read_lock(&mrt_lock);
1897 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1898 		if (c) {
1899 			sr.pktcnt = c->mfc_un.res.pkt;
1900 			sr.bytecnt = c->mfc_un.res.bytes;
1901 			sr.wrong_if = c->mfc_un.res.wrong_if;
1902 			read_unlock(&mrt_lock);
1903 
1904 			if (copy_to_user(arg, &sr, sizeof(sr)))
1905 				return -EFAULT;
1906 			return 0;
1907 		}
1908 		read_unlock(&mrt_lock);
1909 		return -EADDRNOTAVAIL;
1910 	default:
1911 		return -ENOIOCTLCMD;
1912 	}
1913 }
1914 #endif
1915 
1916 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1917 {
1918 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1919 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1920 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1921 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1922 	return dst_output(skb);
1923 }
1924 
1925 /*
1926  *	Processing handlers for ip6mr_forward
1927  */
1928 
1929 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1930 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1931 {
1932 	struct ipv6hdr *ipv6h;
1933 	struct mif_device *vif = &mrt->vif6_table[vifi];
1934 	struct net_device *dev;
1935 	struct dst_entry *dst;
1936 	struct flowi6 fl6;
1937 
1938 	if (vif->dev == NULL)
1939 		goto out_free;
1940 
1941 #ifdef CONFIG_IPV6_PIMSM_V2
1942 	if (vif->flags & MIFF_REGISTER) {
1943 		vif->pkt_out++;
1944 		vif->bytes_out += skb->len;
1945 		vif->dev->stats.tx_bytes += skb->len;
1946 		vif->dev->stats.tx_packets++;
1947 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1948 		goto out_free;
1949 	}
1950 #endif
1951 
1952 	ipv6h = ipv6_hdr(skb);
1953 
1954 	fl6 = (struct flowi6) {
1955 		.flowi6_oif = vif->link,
1956 		.daddr = ipv6h->daddr,
1957 	};
1958 
1959 	dst = ip6_route_output(net, NULL, &fl6);
1960 	if (dst->error) {
1961 		dst_release(dst);
1962 		goto out_free;
1963 	}
1964 
1965 	skb_dst_drop(skb);
1966 	skb_dst_set(skb, dst);
1967 
1968 	/*
1969 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1970 	 * not only before forwarding, but after forwarding on all output
1971 	 * interfaces. It is clear, if mrouter runs a multicasting
1972 	 * program, it should receive packets not depending to what interface
1973 	 * program is joined.
1974 	 * If we will not make it, the program will have to join on all
1975 	 * interfaces. On the other hand, multihoming host (or router, but
1976 	 * not mrouter) cannot join to more than one interface - it will
1977 	 * result in receiving multiple packets.
1978 	 */
1979 	dev = vif->dev;
1980 	skb->dev = dev;
1981 	vif->pkt_out++;
1982 	vif->bytes_out += skb->len;
1983 
1984 	/* We are about to write */
1985 	/* XXX: extension headers? */
1986 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1987 		goto out_free;
1988 
1989 	ipv6h = ipv6_hdr(skb);
1990 	ipv6h->hop_limit--;
1991 
1992 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1993 
1994 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1995 		       ip6mr_forward2_finish);
1996 
1997 out_free:
1998 	kfree_skb(skb);
1999 	return 0;
2000 }
2001 
2002 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2003 {
2004 	int ct;
2005 
2006 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2007 		if (mrt->vif6_table[ct].dev == dev)
2008 			break;
2009 	}
2010 	return ct;
2011 }
2012 
2013 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2014 			  struct sk_buff *skb, struct mfc6_cache *cache)
2015 {
2016 	int psend = -1;
2017 	int vif, ct;
2018 
2019 	vif = cache->mf6c_parent;
2020 	cache->mfc_un.res.pkt++;
2021 	cache->mfc_un.res.bytes += skb->len;
2022 
2023 	/*
2024 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2025 	 */
2026 	if (mrt->vif6_table[vif].dev != skb->dev) {
2027 		int true_vifi;
2028 
2029 		cache->mfc_un.res.wrong_if++;
2030 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
2031 
2032 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2033 		    /* pimsm uses asserts, when switching from RPT to SPT,
2034 		       so that we cannot check that packet arrived on an oif.
2035 		       It is bad, but otherwise we would need to move pretty
2036 		       large chunk of pimd to kernel. Ough... --ANK
2037 		     */
2038 		    (mrt->mroute_do_pim ||
2039 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2040 		    time_after(jiffies,
2041 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2042 			cache->mfc_un.res.last_assert = jiffies;
2043 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2044 		}
2045 		goto dont_forward;
2046 	}
2047 
2048 	mrt->vif6_table[vif].pkt_in++;
2049 	mrt->vif6_table[vif].bytes_in += skb->len;
2050 
2051 	/*
2052 	 *	Forward the frame
2053 	 */
2054 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2055 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2056 			if (psend != -1) {
2057 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2058 				if (skb2)
2059 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2060 			}
2061 			psend = ct;
2062 		}
2063 	}
2064 	if (psend != -1) {
2065 		ip6mr_forward2(net, mrt, skb, cache, psend);
2066 		return 0;
2067 	}
2068 
2069 dont_forward:
2070 	kfree_skb(skb);
2071 	return 0;
2072 }
2073 
2074 
2075 /*
2076  *	Multicast packets for forwarding arrive here
2077  */
2078 
2079 int ip6_mr_input(struct sk_buff *skb)
2080 {
2081 	struct mfc6_cache *cache;
2082 	struct net *net = dev_net(skb->dev);
2083 	struct mr6_table *mrt;
2084 	struct flowi6 fl6 = {
2085 		.flowi6_iif	= skb->dev->ifindex,
2086 		.flowi6_mark	= skb->mark,
2087 	};
2088 	int err;
2089 
2090 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2091 	if (err < 0) {
2092 		kfree_skb(skb);
2093 		return err;
2094 	}
2095 
2096 	read_lock(&mrt_lock);
2097 	cache = ip6mr_cache_find(mrt,
2098 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2099 
2100 	/*
2101 	 *	No usable cache entry
2102 	 */
2103 	if (cache == NULL) {
2104 		int vif;
2105 
2106 		vif = ip6mr_find_vif(mrt, skb->dev);
2107 		if (vif >= 0) {
2108 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2109 			read_unlock(&mrt_lock);
2110 
2111 			return err;
2112 		}
2113 		read_unlock(&mrt_lock);
2114 		kfree_skb(skb);
2115 		return -ENODEV;
2116 	}
2117 
2118 	ip6_mr_forward(net, mrt, skb, cache);
2119 
2120 	read_unlock(&mrt_lock);
2121 
2122 	return 0;
2123 }
2124 
2125 
2126 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2127 			       struct mfc6_cache *c, struct rtmsg *rtm)
2128 {
2129 	int ct;
2130 	struct rtnexthop *nhp;
2131 	struct nlattr *mp_attr;
2132 	struct rta_mfc_stats mfcs;
2133 
2134 	/* If cache is unresolved, don't try to parse IIF and OIF */
2135 	if (c->mf6c_parent >= MAXMIFS)
2136 		return -ENOENT;
2137 
2138 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2139 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2140 		return -EMSGSIZE;
2141 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2142 	if (mp_attr == NULL)
2143 		return -EMSGSIZE;
2144 
2145 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2146 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2147 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2148 			if (nhp == NULL) {
2149 				nla_nest_cancel(skb, mp_attr);
2150 				return -EMSGSIZE;
2151 			}
2152 
2153 			nhp->rtnh_flags = 0;
2154 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2155 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2156 			nhp->rtnh_len = sizeof(*nhp);
2157 		}
2158 	}
2159 
2160 	nla_nest_end(skb, mp_attr);
2161 
2162 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2163 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2164 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2165 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2166 		return -EMSGSIZE;
2167 
2168 	rtm->rtm_type = RTN_MULTICAST;
2169 	return 1;
2170 }
2171 
2172 int ip6mr_get_route(struct net *net,
2173 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2174 {
2175 	int err;
2176 	struct mr6_table *mrt;
2177 	struct mfc6_cache *cache;
2178 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2179 
2180 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2181 	if (mrt == NULL)
2182 		return -ENOENT;
2183 
2184 	read_lock(&mrt_lock);
2185 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2186 
2187 	if (!cache) {
2188 		struct sk_buff *skb2;
2189 		struct ipv6hdr *iph;
2190 		struct net_device *dev;
2191 		int vif;
2192 
2193 		if (nowait) {
2194 			read_unlock(&mrt_lock);
2195 			return -EAGAIN;
2196 		}
2197 
2198 		dev = skb->dev;
2199 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2200 			read_unlock(&mrt_lock);
2201 			return -ENODEV;
2202 		}
2203 
2204 		/* really correct? */
2205 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2206 		if (!skb2) {
2207 			read_unlock(&mrt_lock);
2208 			return -ENOMEM;
2209 		}
2210 
2211 		skb_reset_transport_header(skb2);
2212 
2213 		skb_put(skb2, sizeof(struct ipv6hdr));
2214 		skb_reset_network_header(skb2);
2215 
2216 		iph = ipv6_hdr(skb2);
2217 		iph->version = 0;
2218 		iph->priority = 0;
2219 		iph->flow_lbl[0] = 0;
2220 		iph->flow_lbl[1] = 0;
2221 		iph->flow_lbl[2] = 0;
2222 		iph->payload_len = 0;
2223 		iph->nexthdr = IPPROTO_NONE;
2224 		iph->hop_limit = 0;
2225 		iph->saddr = rt->rt6i_src.addr;
2226 		iph->daddr = rt->rt6i_dst.addr;
2227 
2228 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2229 		read_unlock(&mrt_lock);
2230 
2231 		return err;
2232 	}
2233 
2234 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2235 		cache->mfc_flags |= MFC_NOTIFY;
2236 
2237 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2238 	read_unlock(&mrt_lock);
2239 	return err;
2240 }
2241 
2242 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2243 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
2244 {
2245 	struct nlmsghdr *nlh;
2246 	struct rtmsg *rtm;
2247 	int err;
2248 
2249 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2250 	if (nlh == NULL)
2251 		return -EMSGSIZE;
2252 
2253 	rtm = nlmsg_data(nlh);
2254 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2255 	rtm->rtm_dst_len  = 128;
2256 	rtm->rtm_src_len  = 128;
2257 	rtm->rtm_tos      = 0;
2258 	rtm->rtm_table    = mrt->id;
2259 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2260 		goto nla_put_failure;
2261 	rtm->rtm_type = RTN_MULTICAST;
2262 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2263 	if (c->mfc_flags & MFC_STATIC)
2264 		rtm->rtm_protocol = RTPROT_STATIC;
2265 	else
2266 		rtm->rtm_protocol = RTPROT_MROUTED;
2267 	rtm->rtm_flags    = 0;
2268 
2269 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2270 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2271 		goto nla_put_failure;
2272 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2273 	/* do not break the dump if cache is unresolved */
2274 	if (err < 0 && err != -ENOENT)
2275 		goto nla_put_failure;
2276 
2277 	return nlmsg_end(skb, nlh);
2278 
2279 nla_put_failure:
2280 	nlmsg_cancel(skb, nlh);
2281 	return -EMSGSIZE;
2282 }
2283 
2284 static int mr6_msgsize(bool unresolved, int maxvif)
2285 {
2286 	size_t len =
2287 		NLMSG_ALIGN(sizeof(struct rtmsg))
2288 		+ nla_total_size(4)	/* RTA_TABLE */
2289 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2290 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2291 		;
2292 
2293 	if (!unresolved)
2294 		len = len
2295 		      + nla_total_size(4)	/* RTA_IIF */
2296 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2297 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2298 						/* RTA_MFC_STATS */
2299 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2300 		;
2301 
2302 	return len;
2303 }
2304 
2305 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2306 			      int cmd)
2307 {
2308 	struct net *net = read_pnet(&mrt->net);
2309 	struct sk_buff *skb;
2310 	int err = -ENOBUFS;
2311 
2312 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2313 			GFP_ATOMIC);
2314 	if (skb == NULL)
2315 		goto errout;
2316 
2317 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2318 	if (err < 0)
2319 		goto errout;
2320 
2321 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2322 	return;
2323 
2324 errout:
2325 	kfree_skb(skb);
2326 	if (err < 0)
2327 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2328 }
2329 
2330 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2331 {
2332 	struct net *net = sock_net(skb->sk);
2333 	struct mr6_table *mrt;
2334 	struct mfc6_cache *mfc;
2335 	unsigned int t = 0, s_t;
2336 	unsigned int h = 0, s_h;
2337 	unsigned int e = 0, s_e;
2338 
2339 	s_t = cb->args[0];
2340 	s_h = cb->args[1];
2341 	s_e = cb->args[2];
2342 
2343 	read_lock(&mrt_lock);
2344 	ip6mr_for_each_table(mrt, net) {
2345 		if (t < s_t)
2346 			goto next_table;
2347 		if (t > s_t)
2348 			s_h = 0;
2349 		for (h = s_h; h < MFC6_LINES; h++) {
2350 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2351 				if (e < s_e)
2352 					goto next_entry;
2353 				if (ip6mr_fill_mroute(mrt, skb,
2354 						      NETLINK_CB(cb->skb).portid,
2355 						      cb->nlh->nlmsg_seq,
2356 						      mfc, RTM_NEWROUTE) < 0)
2357 					goto done;
2358 next_entry:
2359 				e++;
2360 			}
2361 			e = s_e = 0;
2362 		}
2363 		spin_lock_bh(&mfc_unres_lock);
2364 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2365 			if (e < s_e)
2366 				goto next_entry2;
2367 			if (ip6mr_fill_mroute(mrt, skb,
2368 					      NETLINK_CB(cb->skb).portid,
2369 					      cb->nlh->nlmsg_seq,
2370 					      mfc, RTM_NEWROUTE) < 0) {
2371 				spin_unlock_bh(&mfc_unres_lock);
2372 				goto done;
2373 			}
2374 next_entry2:
2375 			e++;
2376 		}
2377 		spin_unlock_bh(&mfc_unres_lock);
2378 		e = s_e = 0;
2379 		s_h = 0;
2380 next_table:
2381 		t++;
2382 	}
2383 done:
2384 	read_unlock(&mrt_lock);
2385 
2386 	cb->args[2] = e;
2387 	cb->args[1] = h;
2388 	cb->args[0] = t;
2389 
2390 	return skb->len;
2391 }
2392