xref: /linux/net/ipv6/ip6mr.c (revision b3b77c8caef1750ebeea1054e39e358550ea9f55)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <net/ip6_checksum.h>
54 
55 struct mr6_table {
56 	struct list_head	list;
57 #ifdef CONFIG_NET_NS
58 	struct net		*net;
59 #endif
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct mif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	int			mroute_do_assert;
69 	int			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			  struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
118 			       struct netlink_callback *cb);
119 static void mroute_clean_tables(struct mr6_table *mrt);
120 static void ipmr_expire_process(unsigned long arg);
121 
122 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
123 #define ip6mr_for_each_table(mrt, met) \
124 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
125 
126 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
127 {
128 	struct mr6_table *mrt;
129 
130 	ip6mr_for_each_table(mrt, net) {
131 		if (mrt->id == id)
132 			return mrt;
133 	}
134 	return NULL;
135 }
136 
137 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
138 			    struct mr6_table **mrt)
139 {
140 	struct ip6mr_result res;
141 	struct fib_lookup_arg arg = { .result = &res, };
142 	int err;
143 
144 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
145 	if (err < 0)
146 		return err;
147 	*mrt = res.mrt;
148 	return 0;
149 }
150 
151 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
152 			     int flags, struct fib_lookup_arg *arg)
153 {
154 	struct ip6mr_result *res = arg->result;
155 	struct mr6_table *mrt;
156 
157 	switch (rule->action) {
158 	case FR_ACT_TO_TBL:
159 		break;
160 	case FR_ACT_UNREACHABLE:
161 		return -ENETUNREACH;
162 	case FR_ACT_PROHIBIT:
163 		return -EACCES;
164 	case FR_ACT_BLACKHOLE:
165 	default:
166 		return -EINVAL;
167 	}
168 
169 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
170 	if (mrt == NULL)
171 		return -EAGAIN;
172 	res->mrt = mrt;
173 	return 0;
174 }
175 
176 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
177 {
178 	return 1;
179 }
180 
181 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
182 	FRA_GENERIC_POLICY,
183 };
184 
185 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
186 				struct fib_rule_hdr *frh, struct nlattr **tb)
187 {
188 	return 0;
189 }
190 
191 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
192 			      struct nlattr **tb)
193 {
194 	return 1;
195 }
196 
197 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
198 			   struct fib_rule_hdr *frh)
199 {
200 	frh->dst_len = 0;
201 	frh->src_len = 0;
202 	frh->tos     = 0;
203 	return 0;
204 }
205 
206 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
207 	.family		= RTNL_FAMILY_IP6MR,
208 	.rule_size	= sizeof(struct ip6mr_rule),
209 	.addr_size	= sizeof(struct in6_addr),
210 	.action		= ip6mr_rule_action,
211 	.match		= ip6mr_rule_match,
212 	.configure	= ip6mr_rule_configure,
213 	.compare	= ip6mr_rule_compare,
214 	.default_pref	= fib_default_rule_pref,
215 	.fill		= ip6mr_rule_fill,
216 	.nlgroup	= RTNLGRP_IPV6_RULE,
217 	.policy		= ip6mr_rule_policy,
218 	.owner		= THIS_MODULE,
219 };
220 
221 static int __net_init ip6mr_rules_init(struct net *net)
222 {
223 	struct fib_rules_ops *ops;
224 	struct mr6_table *mrt;
225 	int err;
226 
227 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
228 	if (IS_ERR(ops))
229 		return PTR_ERR(ops);
230 
231 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
232 
233 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
234 	if (mrt == NULL) {
235 		err = -ENOMEM;
236 		goto err1;
237 	}
238 
239 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
240 	if (err < 0)
241 		goto err2;
242 
243 	net->ipv6.mr6_rules_ops = ops;
244 	return 0;
245 
246 err2:
247 	kfree(mrt);
248 err1:
249 	fib_rules_unregister(ops);
250 	return err;
251 }
252 
253 static void __net_exit ip6mr_rules_exit(struct net *net)
254 {
255 	struct mr6_table *mrt, *next;
256 
257 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list)
258 		ip6mr_free_table(mrt);
259 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
260 }
261 #else
262 #define ip6mr_for_each_table(mrt, net) \
263 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
264 
265 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
266 {
267 	return net->ipv6.mrt6;
268 }
269 
270 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
271 			    struct mr6_table **mrt)
272 {
273 	*mrt = net->ipv6.mrt6;
274 	return 0;
275 }
276 
277 static int __net_init ip6mr_rules_init(struct net *net)
278 {
279 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
280 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
281 }
282 
283 static void __net_exit ip6mr_rules_exit(struct net *net)
284 {
285 	ip6mr_free_table(net->ipv6.mrt6);
286 }
287 #endif
288 
289 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
290 {
291 	struct mr6_table *mrt;
292 	unsigned int i;
293 
294 	mrt = ip6mr_get_table(net, id);
295 	if (mrt != NULL)
296 		return mrt;
297 
298 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
299 	if (mrt == NULL)
300 		return NULL;
301 	mrt->id = id;
302 	write_pnet(&mrt->net, net);
303 
304 	/* Forwarding cache */
305 	for (i = 0; i < MFC6_LINES; i++)
306 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
307 
308 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
309 
310 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
311 		    (unsigned long)mrt);
312 
313 #ifdef CONFIG_IPV6_PIMSM_V2
314 	mrt->mroute_reg_vif_num = -1;
315 #endif
316 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
317 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
318 #endif
319 	return mrt;
320 }
321 
322 static void ip6mr_free_table(struct mr6_table *mrt)
323 {
324 	del_timer(&mrt->ipmr_expire_timer);
325 	mroute_clean_tables(mrt);
326 	kfree(mrt);
327 }
328 
329 #ifdef CONFIG_PROC_FS
330 
331 struct ipmr_mfc_iter {
332 	struct seq_net_private p;
333 	struct mr6_table *mrt;
334 	struct list_head *cache;
335 	int ct;
336 };
337 
338 
339 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
340 					   struct ipmr_mfc_iter *it, loff_t pos)
341 {
342 	struct mr6_table *mrt = it->mrt;
343 	struct mfc6_cache *mfc;
344 
345 	read_lock(&mrt_lock);
346 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
347 		it->cache = &mrt->mfc6_cache_array[it->ct];
348 		list_for_each_entry(mfc, it->cache, list)
349 			if (pos-- == 0)
350 				return mfc;
351 	}
352 	read_unlock(&mrt_lock);
353 
354 	spin_lock_bh(&mfc_unres_lock);
355 	it->cache = &mrt->mfc6_unres_queue;
356 	list_for_each_entry(mfc, it->cache, list)
357 		if (pos-- == 0)
358 			return mfc;
359 	spin_unlock_bh(&mfc_unres_lock);
360 
361 	it->cache = NULL;
362 	return NULL;
363 }
364 
365 /*
366  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
367  */
368 
369 struct ipmr_vif_iter {
370 	struct seq_net_private p;
371 	struct mr6_table *mrt;
372 	int ct;
373 };
374 
375 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
376 					    struct ipmr_vif_iter *iter,
377 					    loff_t pos)
378 {
379 	struct mr6_table *mrt = iter->mrt;
380 
381 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
382 		if (!MIF_EXISTS(mrt, iter->ct))
383 			continue;
384 		if (pos-- == 0)
385 			return &mrt->vif6_table[iter->ct];
386 	}
387 	return NULL;
388 }
389 
390 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
391 	__acquires(mrt_lock)
392 {
393 	struct ipmr_vif_iter *iter = seq->private;
394 	struct net *net = seq_file_net(seq);
395 	struct mr6_table *mrt;
396 
397 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
398 	if (mrt == NULL)
399 		return ERR_PTR(-ENOENT);
400 
401 	iter->mrt = mrt;
402 
403 	read_lock(&mrt_lock);
404 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
405 		: SEQ_START_TOKEN;
406 }
407 
408 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
409 {
410 	struct ipmr_vif_iter *iter = seq->private;
411 	struct net *net = seq_file_net(seq);
412 	struct mr6_table *mrt = iter->mrt;
413 
414 	++*pos;
415 	if (v == SEQ_START_TOKEN)
416 		return ip6mr_vif_seq_idx(net, iter, 0);
417 
418 	while (++iter->ct < mrt->maxvif) {
419 		if (!MIF_EXISTS(mrt, iter->ct))
420 			continue;
421 		return &mrt->vif6_table[iter->ct];
422 	}
423 	return NULL;
424 }
425 
426 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
427 	__releases(mrt_lock)
428 {
429 	read_unlock(&mrt_lock);
430 }
431 
432 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
433 {
434 	struct ipmr_vif_iter *iter = seq->private;
435 	struct mr6_table *mrt = iter->mrt;
436 
437 	if (v == SEQ_START_TOKEN) {
438 		seq_puts(seq,
439 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
440 	} else {
441 		const struct mif_device *vif = v;
442 		const char *name = vif->dev ? vif->dev->name : "none";
443 
444 		seq_printf(seq,
445 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
446 			   vif - mrt->vif6_table,
447 			   name, vif->bytes_in, vif->pkt_in,
448 			   vif->bytes_out, vif->pkt_out,
449 			   vif->flags);
450 	}
451 	return 0;
452 }
453 
454 static const struct seq_operations ip6mr_vif_seq_ops = {
455 	.start = ip6mr_vif_seq_start,
456 	.next  = ip6mr_vif_seq_next,
457 	.stop  = ip6mr_vif_seq_stop,
458 	.show  = ip6mr_vif_seq_show,
459 };
460 
461 static int ip6mr_vif_open(struct inode *inode, struct file *file)
462 {
463 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
464 			    sizeof(struct ipmr_vif_iter));
465 }
466 
467 static const struct file_operations ip6mr_vif_fops = {
468 	.owner	 = THIS_MODULE,
469 	.open    = ip6mr_vif_open,
470 	.read    = seq_read,
471 	.llseek  = seq_lseek,
472 	.release = seq_release_net,
473 };
474 
475 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
476 {
477 	struct ipmr_mfc_iter *it = seq->private;
478 	struct net *net = seq_file_net(seq);
479 	struct mr6_table *mrt;
480 
481 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
482 	if (mrt == NULL)
483 		return ERR_PTR(-ENOENT);
484 
485 	it->mrt = mrt;
486 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
487 		: SEQ_START_TOKEN;
488 }
489 
490 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
491 {
492 	struct mfc6_cache *mfc = v;
493 	struct ipmr_mfc_iter *it = seq->private;
494 	struct net *net = seq_file_net(seq);
495 	struct mr6_table *mrt = it->mrt;
496 
497 	++*pos;
498 
499 	if (v == SEQ_START_TOKEN)
500 		return ipmr_mfc_seq_idx(net, seq->private, 0);
501 
502 	if (mfc->list.next != it->cache)
503 		return list_entry(mfc->list.next, struct mfc6_cache, list);
504 
505 	if (it->cache == &mrt->mfc6_unres_queue)
506 		goto end_of_list;
507 
508 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
509 
510 	while (++it->ct < MFC6_LINES) {
511 		it->cache = &mrt->mfc6_cache_array[it->ct];
512 		if (list_empty(it->cache))
513 			continue;
514 		return list_first_entry(it->cache, struct mfc6_cache, list);
515 	}
516 
517 	/* exhausted cache_array, show unresolved */
518 	read_unlock(&mrt_lock);
519 	it->cache = &mrt->mfc6_unres_queue;
520 	it->ct = 0;
521 
522 	spin_lock_bh(&mfc_unres_lock);
523 	if (!list_empty(it->cache))
524 		return list_first_entry(it->cache, struct mfc6_cache, list);
525 
526  end_of_list:
527 	spin_unlock_bh(&mfc_unres_lock);
528 	it->cache = NULL;
529 
530 	return NULL;
531 }
532 
533 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
534 {
535 	struct ipmr_mfc_iter *it = seq->private;
536 	struct mr6_table *mrt = it->mrt;
537 
538 	if (it->cache == &mrt->mfc6_unres_queue)
539 		spin_unlock_bh(&mfc_unres_lock);
540 	else if (it->cache == mrt->mfc6_cache_array)
541 		read_unlock(&mrt_lock);
542 }
543 
544 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
545 {
546 	int n;
547 
548 	if (v == SEQ_START_TOKEN) {
549 		seq_puts(seq,
550 			 "Group                            "
551 			 "Origin                           "
552 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
553 	} else {
554 		const struct mfc6_cache *mfc = v;
555 		const struct ipmr_mfc_iter *it = seq->private;
556 		struct mr6_table *mrt = it->mrt;
557 
558 		seq_printf(seq, "%pI6 %pI6 %-3hd",
559 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
560 			   mfc->mf6c_parent);
561 
562 		if (it->cache != &mrt->mfc6_unres_queue) {
563 			seq_printf(seq, " %8lu %8lu %8lu",
564 				   mfc->mfc_un.res.pkt,
565 				   mfc->mfc_un.res.bytes,
566 				   mfc->mfc_un.res.wrong_if);
567 			for (n = mfc->mfc_un.res.minvif;
568 			     n < mfc->mfc_un.res.maxvif; n++) {
569 				if (MIF_EXISTS(mrt, n) &&
570 				    mfc->mfc_un.res.ttls[n] < 255)
571 					seq_printf(seq,
572 						   " %2d:%-3d",
573 						   n, mfc->mfc_un.res.ttls[n]);
574 			}
575 		} else {
576 			/* unresolved mfc_caches don't contain
577 			 * pkt, bytes and wrong_if values
578 			 */
579 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
580 		}
581 		seq_putc(seq, '\n');
582 	}
583 	return 0;
584 }
585 
586 static const struct seq_operations ipmr_mfc_seq_ops = {
587 	.start = ipmr_mfc_seq_start,
588 	.next  = ipmr_mfc_seq_next,
589 	.stop  = ipmr_mfc_seq_stop,
590 	.show  = ipmr_mfc_seq_show,
591 };
592 
593 static int ipmr_mfc_open(struct inode *inode, struct file *file)
594 {
595 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
596 			    sizeof(struct ipmr_mfc_iter));
597 }
598 
599 static const struct file_operations ip6mr_mfc_fops = {
600 	.owner	 = THIS_MODULE,
601 	.open    = ipmr_mfc_open,
602 	.read    = seq_read,
603 	.llseek  = seq_lseek,
604 	.release = seq_release_net,
605 };
606 #endif
607 
608 #ifdef CONFIG_IPV6_PIMSM_V2
609 
610 static int pim6_rcv(struct sk_buff *skb)
611 {
612 	struct pimreghdr *pim;
613 	struct ipv6hdr   *encap;
614 	struct net_device  *reg_dev = NULL;
615 	struct net *net = dev_net(skb->dev);
616 	struct mr6_table *mrt;
617 	struct flowi fl = {
618 		.iif	= skb->dev->ifindex,
619 		.mark	= skb->mark,
620 	};
621 	int reg_vif_num;
622 
623 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
624 		goto drop;
625 
626 	pim = (struct pimreghdr *)skb_transport_header(skb);
627 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
628 	    (pim->flags & PIM_NULL_REGISTER) ||
629 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
630 			     sizeof(*pim), IPPROTO_PIM,
631 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
632 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
633 		goto drop;
634 
635 	/* check if the inner packet is destined to mcast group */
636 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
637 				   sizeof(*pim));
638 
639 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
640 	    encap->payload_len == 0 ||
641 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
642 		goto drop;
643 
644 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
645 		goto drop;
646 	reg_vif_num = mrt->mroute_reg_vif_num;
647 
648 	read_lock(&mrt_lock);
649 	if (reg_vif_num >= 0)
650 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
651 	if (reg_dev)
652 		dev_hold(reg_dev);
653 	read_unlock(&mrt_lock);
654 
655 	if (reg_dev == NULL)
656 		goto drop;
657 
658 	skb->mac_header = skb->network_header;
659 	skb_pull(skb, (u8 *)encap - skb->data);
660 	skb_reset_network_header(skb);
661 	skb->protocol = htons(ETH_P_IPV6);
662 	skb->ip_summed = 0;
663 	skb->pkt_type = PACKET_HOST;
664 
665 	skb_tunnel_rx(skb, reg_dev);
666 
667 	netif_rx(skb);
668 	dev_put(reg_dev);
669 	return 0;
670  drop:
671 	kfree_skb(skb);
672 	return 0;
673 }
674 
675 static const struct inet6_protocol pim6_protocol = {
676 	.handler	=	pim6_rcv,
677 };
678 
679 /* Service routines creating virtual interfaces: PIMREG */
680 
681 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
682 				      struct net_device *dev)
683 {
684 	struct net *net = dev_net(dev);
685 	struct mr6_table *mrt;
686 	struct flowi fl = {
687 		.oif		= dev->ifindex,
688 		.iif		= skb->skb_iif,
689 		.mark		= skb->mark,
690 	};
691 	int err;
692 
693 	err = ip6mr_fib_lookup(net, &fl, &mrt);
694 	if (err < 0)
695 		return err;
696 
697 	read_lock(&mrt_lock);
698 	dev->stats.tx_bytes += skb->len;
699 	dev->stats.tx_packets++;
700 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
701 	read_unlock(&mrt_lock);
702 	kfree_skb(skb);
703 	return NETDEV_TX_OK;
704 }
705 
706 static const struct net_device_ops reg_vif_netdev_ops = {
707 	.ndo_start_xmit	= reg_vif_xmit,
708 };
709 
710 static void reg_vif_setup(struct net_device *dev)
711 {
712 	dev->type		= ARPHRD_PIMREG;
713 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
714 	dev->flags		= IFF_NOARP;
715 	dev->netdev_ops		= &reg_vif_netdev_ops;
716 	dev->destructor		= free_netdev;
717 	dev->features		|= NETIF_F_NETNS_LOCAL;
718 }
719 
720 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
721 {
722 	struct net_device *dev;
723 	char name[IFNAMSIZ];
724 
725 	if (mrt->id == RT6_TABLE_DFLT)
726 		sprintf(name, "pim6reg");
727 	else
728 		sprintf(name, "pim6reg%u", mrt->id);
729 
730 	dev = alloc_netdev(0, name, reg_vif_setup);
731 	if (dev == NULL)
732 		return NULL;
733 
734 	dev_net_set(dev, net);
735 
736 	if (register_netdevice(dev)) {
737 		free_netdev(dev);
738 		return NULL;
739 	}
740 	dev->iflink = 0;
741 
742 	if (dev_open(dev))
743 		goto failure;
744 
745 	dev_hold(dev);
746 	return dev;
747 
748 failure:
749 	/* allow the register to be completed before unregistering. */
750 	rtnl_unlock();
751 	rtnl_lock();
752 
753 	unregister_netdevice(dev);
754 	return NULL;
755 }
756 #endif
757 
758 /*
759  *	Delete a VIF entry
760  */
761 
762 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
763 {
764 	struct mif_device *v;
765 	struct net_device *dev;
766 	struct inet6_dev *in6_dev;
767 
768 	if (vifi < 0 || vifi >= mrt->maxvif)
769 		return -EADDRNOTAVAIL;
770 
771 	v = &mrt->vif6_table[vifi];
772 
773 	write_lock_bh(&mrt_lock);
774 	dev = v->dev;
775 	v->dev = NULL;
776 
777 	if (!dev) {
778 		write_unlock_bh(&mrt_lock);
779 		return -EADDRNOTAVAIL;
780 	}
781 
782 #ifdef CONFIG_IPV6_PIMSM_V2
783 	if (vifi == mrt->mroute_reg_vif_num)
784 		mrt->mroute_reg_vif_num = -1;
785 #endif
786 
787 	if (vifi + 1 == mrt->maxvif) {
788 		int tmp;
789 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
790 			if (MIF_EXISTS(mrt, tmp))
791 				break;
792 		}
793 		mrt->maxvif = tmp + 1;
794 	}
795 
796 	write_unlock_bh(&mrt_lock);
797 
798 	dev_set_allmulti(dev, -1);
799 
800 	in6_dev = __in6_dev_get(dev);
801 	if (in6_dev)
802 		in6_dev->cnf.mc_forwarding--;
803 
804 	if (v->flags & MIFF_REGISTER)
805 		unregister_netdevice_queue(dev, head);
806 
807 	dev_put(dev);
808 	return 0;
809 }
810 
811 static inline void ip6mr_cache_free(struct mfc6_cache *c)
812 {
813 	kmem_cache_free(mrt_cachep, c);
814 }
815 
816 /* Destroy an unresolved cache entry, killing queued skbs
817    and reporting error to netlink readers.
818  */
819 
820 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
821 {
822 	struct net *net = read_pnet(&mrt->net);
823 	struct sk_buff *skb;
824 
825 	atomic_dec(&mrt->cache_resolve_queue_len);
826 
827 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
828 		if (ipv6_hdr(skb)->version == 0) {
829 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
830 			nlh->nlmsg_type = NLMSG_ERROR;
831 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
832 			skb_trim(skb, nlh->nlmsg_len);
833 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
834 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
835 		} else
836 			kfree_skb(skb);
837 	}
838 
839 	ip6mr_cache_free(c);
840 }
841 
842 
843 /* Timer process for all the unresolved queue. */
844 
845 static void ipmr_do_expire_process(struct mr6_table *mrt)
846 {
847 	unsigned long now = jiffies;
848 	unsigned long expires = 10 * HZ;
849 	struct mfc6_cache *c, *next;
850 
851 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
852 		if (time_after(c->mfc_un.unres.expires, now)) {
853 			/* not yet... */
854 			unsigned long interval = c->mfc_un.unres.expires - now;
855 			if (interval < expires)
856 				expires = interval;
857 			continue;
858 		}
859 
860 		list_del(&c->list);
861 		ip6mr_destroy_unres(mrt, c);
862 	}
863 
864 	if (!list_empty(&mrt->mfc6_unres_queue))
865 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
866 }
867 
868 static void ipmr_expire_process(unsigned long arg)
869 {
870 	struct mr6_table *mrt = (struct mr6_table *)arg;
871 
872 	if (!spin_trylock(&mfc_unres_lock)) {
873 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
874 		return;
875 	}
876 
877 	if (!list_empty(&mrt->mfc6_unres_queue))
878 		ipmr_do_expire_process(mrt);
879 
880 	spin_unlock(&mfc_unres_lock);
881 }
882 
883 /* Fill oifs list. It is called under write locked mrt_lock. */
884 
885 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
886 				    unsigned char *ttls)
887 {
888 	int vifi;
889 
890 	cache->mfc_un.res.minvif = MAXMIFS;
891 	cache->mfc_un.res.maxvif = 0;
892 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
893 
894 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
895 		if (MIF_EXISTS(mrt, vifi) &&
896 		    ttls[vifi] && ttls[vifi] < 255) {
897 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
898 			if (cache->mfc_un.res.minvif > vifi)
899 				cache->mfc_un.res.minvif = vifi;
900 			if (cache->mfc_un.res.maxvif <= vifi)
901 				cache->mfc_un.res.maxvif = vifi + 1;
902 		}
903 	}
904 }
905 
906 static int mif6_add(struct net *net, struct mr6_table *mrt,
907 		    struct mif6ctl *vifc, int mrtsock)
908 {
909 	int vifi = vifc->mif6c_mifi;
910 	struct mif_device *v = &mrt->vif6_table[vifi];
911 	struct net_device *dev;
912 	struct inet6_dev *in6_dev;
913 	int err;
914 
915 	/* Is vif busy ? */
916 	if (MIF_EXISTS(mrt, vifi))
917 		return -EADDRINUSE;
918 
919 	switch (vifc->mif6c_flags) {
920 #ifdef CONFIG_IPV6_PIMSM_V2
921 	case MIFF_REGISTER:
922 		/*
923 		 * Special Purpose VIF in PIM
924 		 * All the packets will be sent to the daemon
925 		 */
926 		if (mrt->mroute_reg_vif_num >= 0)
927 			return -EADDRINUSE;
928 		dev = ip6mr_reg_vif(net, mrt);
929 		if (!dev)
930 			return -ENOBUFS;
931 		err = dev_set_allmulti(dev, 1);
932 		if (err) {
933 			unregister_netdevice(dev);
934 			dev_put(dev);
935 			return err;
936 		}
937 		break;
938 #endif
939 	case 0:
940 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
941 		if (!dev)
942 			return -EADDRNOTAVAIL;
943 		err = dev_set_allmulti(dev, 1);
944 		if (err) {
945 			dev_put(dev);
946 			return err;
947 		}
948 		break;
949 	default:
950 		return -EINVAL;
951 	}
952 
953 	in6_dev = __in6_dev_get(dev);
954 	if (in6_dev)
955 		in6_dev->cnf.mc_forwarding++;
956 
957 	/*
958 	 *	Fill in the VIF structures
959 	 */
960 	v->rate_limit = vifc->vifc_rate_limit;
961 	v->flags = vifc->mif6c_flags;
962 	if (!mrtsock)
963 		v->flags |= VIFF_STATIC;
964 	v->threshold = vifc->vifc_threshold;
965 	v->bytes_in = 0;
966 	v->bytes_out = 0;
967 	v->pkt_in = 0;
968 	v->pkt_out = 0;
969 	v->link = dev->ifindex;
970 	if (v->flags & MIFF_REGISTER)
971 		v->link = dev->iflink;
972 
973 	/* And finish update writing critical data */
974 	write_lock_bh(&mrt_lock);
975 	v->dev = dev;
976 #ifdef CONFIG_IPV6_PIMSM_V2
977 	if (v->flags & MIFF_REGISTER)
978 		mrt->mroute_reg_vif_num = vifi;
979 #endif
980 	if (vifi + 1 > mrt->maxvif)
981 		mrt->maxvif = vifi + 1;
982 	write_unlock_bh(&mrt_lock);
983 	return 0;
984 }
985 
986 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
987 					   struct in6_addr *origin,
988 					   struct in6_addr *mcastgrp)
989 {
990 	int line = MFC6_HASH(mcastgrp, origin);
991 	struct mfc6_cache *c;
992 
993 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
994 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
995 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
996 			return c;
997 	}
998 	return NULL;
999 }
1000 
1001 /*
1002  *	Allocate a multicast cache entry
1003  */
1004 static struct mfc6_cache *ip6mr_cache_alloc(void)
1005 {
1006 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1007 	if (c == NULL)
1008 		return NULL;
1009 	c->mfc_un.res.minvif = MAXMIFS;
1010 	return c;
1011 }
1012 
1013 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1014 {
1015 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1016 	if (c == NULL)
1017 		return NULL;
1018 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1019 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1020 	return c;
1021 }
1022 
1023 /*
1024  *	A cache entry has gone into a resolved state from queued
1025  */
1026 
1027 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1028 				struct mfc6_cache *uc, struct mfc6_cache *c)
1029 {
1030 	struct sk_buff *skb;
1031 
1032 	/*
1033 	 *	Play the pending entries through our router
1034 	 */
1035 
1036 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1037 		if (ipv6_hdr(skb)->version == 0) {
1038 			int err;
1039 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1040 
1041 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1042 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1043 			} else {
1044 				nlh->nlmsg_type = NLMSG_ERROR;
1045 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1046 				skb_trim(skb, nlh->nlmsg_len);
1047 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1048 			}
1049 			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1050 		} else
1051 			ip6_mr_forward(net, mrt, skb, c);
1052 	}
1053 }
1054 
1055 /*
1056  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1057  *	expects the following bizarre scheme.
1058  *
1059  *	Called under mrt_lock.
1060  */
1061 
1062 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1063 			      mifi_t mifi, int assert)
1064 {
1065 	struct sk_buff *skb;
1066 	struct mrt6msg *msg;
1067 	int ret;
1068 
1069 #ifdef CONFIG_IPV6_PIMSM_V2
1070 	if (assert == MRT6MSG_WHOLEPKT)
1071 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1072 						+sizeof(*msg));
1073 	else
1074 #endif
1075 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1076 
1077 	if (!skb)
1078 		return -ENOBUFS;
1079 
1080 	/* I suppose that internal messages
1081 	 * do not require checksums */
1082 
1083 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1084 
1085 #ifdef CONFIG_IPV6_PIMSM_V2
1086 	if (assert == MRT6MSG_WHOLEPKT) {
1087 		/* Ugly, but we have no choice with this interface.
1088 		   Duplicate old header, fix length etc.
1089 		   And all this only to mangle msg->im6_msgtype and
1090 		   to set msg->im6_mbz to "mbz" :-)
1091 		 */
1092 		skb_push(skb, -skb_network_offset(pkt));
1093 
1094 		skb_push(skb, sizeof(*msg));
1095 		skb_reset_transport_header(skb);
1096 		msg = (struct mrt6msg *)skb_transport_header(skb);
1097 		msg->im6_mbz = 0;
1098 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1099 		msg->im6_mif = mrt->mroute_reg_vif_num;
1100 		msg->im6_pad = 0;
1101 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1102 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1103 
1104 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1105 	} else
1106 #endif
1107 	{
1108 	/*
1109 	 *	Copy the IP header
1110 	 */
1111 
1112 	skb_put(skb, sizeof(struct ipv6hdr));
1113 	skb_reset_network_header(skb);
1114 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1115 
1116 	/*
1117 	 *	Add our header
1118 	 */
1119 	skb_put(skb, sizeof(*msg));
1120 	skb_reset_transport_header(skb);
1121 	msg = (struct mrt6msg *)skb_transport_header(skb);
1122 
1123 	msg->im6_mbz = 0;
1124 	msg->im6_msgtype = assert;
1125 	msg->im6_mif = mifi;
1126 	msg->im6_pad = 0;
1127 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1128 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1129 
1130 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1131 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1132 	}
1133 
1134 	if (mrt->mroute6_sk == NULL) {
1135 		kfree_skb(skb);
1136 		return -EINVAL;
1137 	}
1138 
1139 	/*
1140 	 *	Deliver to user space multicast routing algorithms
1141 	 */
1142 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1143 	if (ret < 0) {
1144 		if (net_ratelimit())
1145 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1146 		kfree_skb(skb);
1147 	}
1148 
1149 	return ret;
1150 }
1151 
1152 /*
1153  *	Queue a packet for resolution. It gets locked cache entry!
1154  */
1155 
1156 static int
1157 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1158 {
1159 	bool found = false;
1160 	int err;
1161 	struct mfc6_cache *c;
1162 
1163 	spin_lock_bh(&mfc_unres_lock);
1164 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1165 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1166 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1167 			found = true;
1168 			break;
1169 		}
1170 	}
1171 
1172 	if (!found) {
1173 		/*
1174 		 *	Create a new entry if allowable
1175 		 */
1176 
1177 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1178 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1179 			spin_unlock_bh(&mfc_unres_lock);
1180 
1181 			kfree_skb(skb);
1182 			return -ENOBUFS;
1183 		}
1184 
1185 		/*
1186 		 *	Fill in the new cache entry
1187 		 */
1188 		c->mf6c_parent = -1;
1189 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1190 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1191 
1192 		/*
1193 		 *	Reflect first query at pim6sd
1194 		 */
1195 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1196 		if (err < 0) {
1197 			/* If the report failed throw the cache entry
1198 			   out - Brad Parker
1199 			 */
1200 			spin_unlock_bh(&mfc_unres_lock);
1201 
1202 			ip6mr_cache_free(c);
1203 			kfree_skb(skb);
1204 			return err;
1205 		}
1206 
1207 		atomic_inc(&mrt->cache_resolve_queue_len);
1208 		list_add(&c->list, &mrt->mfc6_unres_queue);
1209 
1210 		ipmr_do_expire_process(mrt);
1211 	}
1212 
1213 	/*
1214 	 *	See if we can append the packet
1215 	 */
1216 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1217 		kfree_skb(skb);
1218 		err = -ENOBUFS;
1219 	} else {
1220 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1221 		err = 0;
1222 	}
1223 
1224 	spin_unlock_bh(&mfc_unres_lock);
1225 	return err;
1226 }
1227 
1228 /*
1229  *	MFC6 cache manipulation by user space
1230  */
1231 
1232 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1233 {
1234 	int line;
1235 	struct mfc6_cache *c, *next;
1236 
1237 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1238 
1239 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1240 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1241 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1242 			write_lock_bh(&mrt_lock);
1243 			list_del(&c->list);
1244 			write_unlock_bh(&mrt_lock);
1245 
1246 			ip6mr_cache_free(c);
1247 			return 0;
1248 		}
1249 	}
1250 	return -ENOENT;
1251 }
1252 
1253 static int ip6mr_device_event(struct notifier_block *this,
1254 			      unsigned long event, void *ptr)
1255 {
1256 	struct net_device *dev = ptr;
1257 	struct net *net = dev_net(dev);
1258 	struct mr6_table *mrt;
1259 	struct mif_device *v;
1260 	int ct;
1261 	LIST_HEAD(list);
1262 
1263 	if (event != NETDEV_UNREGISTER)
1264 		return NOTIFY_DONE;
1265 
1266 	ip6mr_for_each_table(mrt, net) {
1267 		v = &mrt->vif6_table[0];
1268 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1269 			if (v->dev == dev)
1270 				mif6_delete(mrt, ct, &list);
1271 		}
1272 	}
1273 	unregister_netdevice_many(&list);
1274 
1275 	return NOTIFY_DONE;
1276 }
1277 
1278 static struct notifier_block ip6_mr_notifier = {
1279 	.notifier_call = ip6mr_device_event
1280 };
1281 
1282 /*
1283  *	Setup for IP multicast routing
1284  */
1285 
1286 static int __net_init ip6mr_net_init(struct net *net)
1287 {
1288 	int err;
1289 
1290 	err = ip6mr_rules_init(net);
1291 	if (err < 0)
1292 		goto fail;
1293 
1294 #ifdef CONFIG_PROC_FS
1295 	err = -ENOMEM;
1296 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1297 		goto proc_vif_fail;
1298 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1299 		goto proc_cache_fail;
1300 #endif
1301 
1302 	return 0;
1303 
1304 #ifdef CONFIG_PROC_FS
1305 proc_cache_fail:
1306 	proc_net_remove(net, "ip6_mr_vif");
1307 proc_vif_fail:
1308 	ip6mr_rules_exit(net);
1309 #endif
1310 fail:
1311 	return err;
1312 }
1313 
1314 static void __net_exit ip6mr_net_exit(struct net *net)
1315 {
1316 #ifdef CONFIG_PROC_FS
1317 	proc_net_remove(net, "ip6_mr_cache");
1318 	proc_net_remove(net, "ip6_mr_vif");
1319 #endif
1320 	ip6mr_rules_exit(net);
1321 }
1322 
1323 static struct pernet_operations ip6mr_net_ops = {
1324 	.init = ip6mr_net_init,
1325 	.exit = ip6mr_net_exit,
1326 };
1327 
1328 int __init ip6_mr_init(void)
1329 {
1330 	int err;
1331 
1332 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1333 				       sizeof(struct mfc6_cache),
1334 				       0, SLAB_HWCACHE_ALIGN,
1335 				       NULL);
1336 	if (!mrt_cachep)
1337 		return -ENOMEM;
1338 
1339 	err = register_pernet_subsys(&ip6mr_net_ops);
1340 	if (err)
1341 		goto reg_pernet_fail;
1342 
1343 	err = register_netdevice_notifier(&ip6_mr_notifier);
1344 	if (err)
1345 		goto reg_notif_fail;
1346 #ifdef CONFIG_IPV6_PIMSM_V2
1347 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1348 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1349 		err = -EAGAIN;
1350 		goto add_proto_fail;
1351 	}
1352 #endif
1353 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1354 	return 0;
1355 #ifdef CONFIG_IPV6_PIMSM_V2
1356 add_proto_fail:
1357 	unregister_netdevice_notifier(&ip6_mr_notifier);
1358 #endif
1359 reg_notif_fail:
1360 	unregister_pernet_subsys(&ip6mr_net_ops);
1361 reg_pernet_fail:
1362 	kmem_cache_destroy(mrt_cachep);
1363 	return err;
1364 }
1365 
1366 void ip6_mr_cleanup(void)
1367 {
1368 	unregister_netdevice_notifier(&ip6_mr_notifier);
1369 	unregister_pernet_subsys(&ip6mr_net_ops);
1370 	kmem_cache_destroy(mrt_cachep);
1371 }
1372 
1373 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1374 			 struct mf6cctl *mfc, int mrtsock)
1375 {
1376 	bool found = false;
1377 	int line;
1378 	struct mfc6_cache *uc, *c;
1379 	unsigned char ttls[MAXMIFS];
1380 	int i;
1381 
1382 	if (mfc->mf6cc_parent >= MAXMIFS)
1383 		return -ENFILE;
1384 
1385 	memset(ttls, 255, MAXMIFS);
1386 	for (i = 0; i < MAXMIFS; i++) {
1387 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1388 			ttls[i] = 1;
1389 
1390 	}
1391 
1392 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1393 
1394 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1395 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1396 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1397 			found = true;
1398 			break;
1399 		}
1400 	}
1401 
1402 	if (found) {
1403 		write_lock_bh(&mrt_lock);
1404 		c->mf6c_parent = mfc->mf6cc_parent;
1405 		ip6mr_update_thresholds(mrt, c, ttls);
1406 		if (!mrtsock)
1407 			c->mfc_flags |= MFC_STATIC;
1408 		write_unlock_bh(&mrt_lock);
1409 		return 0;
1410 	}
1411 
1412 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1413 		return -EINVAL;
1414 
1415 	c = ip6mr_cache_alloc();
1416 	if (c == NULL)
1417 		return -ENOMEM;
1418 
1419 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1420 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1421 	c->mf6c_parent = mfc->mf6cc_parent;
1422 	ip6mr_update_thresholds(mrt, c, ttls);
1423 	if (!mrtsock)
1424 		c->mfc_flags |= MFC_STATIC;
1425 
1426 	write_lock_bh(&mrt_lock);
1427 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1428 	write_unlock_bh(&mrt_lock);
1429 
1430 	/*
1431 	 *	Check to see if we resolved a queued list. If so we
1432 	 *	need to send on the frames and tidy up.
1433 	 */
1434 	found = false;
1435 	spin_lock_bh(&mfc_unres_lock);
1436 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1437 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1438 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1439 			list_del(&uc->list);
1440 			atomic_dec(&mrt->cache_resolve_queue_len);
1441 			found = true;
1442 			break;
1443 		}
1444 	}
1445 	if (list_empty(&mrt->mfc6_unres_queue))
1446 		del_timer(&mrt->ipmr_expire_timer);
1447 	spin_unlock_bh(&mfc_unres_lock);
1448 
1449 	if (found) {
1450 		ip6mr_cache_resolve(net, mrt, uc, c);
1451 		ip6mr_cache_free(uc);
1452 	}
1453 	return 0;
1454 }
1455 
1456 /*
1457  *	Close the multicast socket, and clear the vif tables etc
1458  */
1459 
1460 static void mroute_clean_tables(struct mr6_table *mrt)
1461 {
1462 	int i;
1463 	LIST_HEAD(list);
1464 	struct mfc6_cache *c, *next;
1465 
1466 	/*
1467 	 *	Shut down all active vif entries
1468 	 */
1469 	for (i = 0; i < mrt->maxvif; i++) {
1470 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1471 			mif6_delete(mrt, i, &list);
1472 	}
1473 	unregister_netdevice_many(&list);
1474 
1475 	/*
1476 	 *	Wipe the cache
1477 	 */
1478 	for (i = 0; i < MFC6_LINES; i++) {
1479 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1480 			if (c->mfc_flags & MFC_STATIC)
1481 				continue;
1482 			write_lock_bh(&mrt_lock);
1483 			list_del(&c->list);
1484 			write_unlock_bh(&mrt_lock);
1485 
1486 			ip6mr_cache_free(c);
1487 		}
1488 	}
1489 
1490 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1491 		spin_lock_bh(&mfc_unres_lock);
1492 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1493 			list_del(&c->list);
1494 			ip6mr_destroy_unres(mrt, c);
1495 		}
1496 		spin_unlock_bh(&mfc_unres_lock);
1497 	}
1498 }
1499 
1500 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1501 {
1502 	int err = 0;
1503 	struct net *net = sock_net(sk);
1504 
1505 	rtnl_lock();
1506 	write_lock_bh(&mrt_lock);
1507 	if (likely(mrt->mroute6_sk == NULL)) {
1508 		mrt->mroute6_sk = sk;
1509 		net->ipv6.devconf_all->mc_forwarding++;
1510 	}
1511 	else
1512 		err = -EADDRINUSE;
1513 	write_unlock_bh(&mrt_lock);
1514 
1515 	rtnl_unlock();
1516 
1517 	return err;
1518 }
1519 
1520 int ip6mr_sk_done(struct sock *sk)
1521 {
1522 	int err = -EACCES;
1523 	struct net *net = sock_net(sk);
1524 	struct mr6_table *mrt;
1525 
1526 	rtnl_lock();
1527 	ip6mr_for_each_table(mrt, net) {
1528 		if (sk == mrt->mroute6_sk) {
1529 			write_lock_bh(&mrt_lock);
1530 			mrt->mroute6_sk = NULL;
1531 			net->ipv6.devconf_all->mc_forwarding--;
1532 			write_unlock_bh(&mrt_lock);
1533 
1534 			mroute_clean_tables(mrt);
1535 			err = 0;
1536 			break;
1537 		}
1538 	}
1539 	rtnl_unlock();
1540 
1541 	return err;
1542 }
1543 
1544 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1545 {
1546 	struct mr6_table *mrt;
1547 	struct flowi fl = {
1548 		.iif	= skb->skb_iif,
1549 		.oif	= skb->dev->ifindex,
1550 		.mark	= skb->mark,
1551 	};
1552 
1553 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
1554 		return NULL;
1555 
1556 	return mrt->mroute6_sk;
1557 }
1558 
1559 /*
1560  *	Socket options and virtual interface manipulation. The whole
1561  *	virtual interface system is a complete heap, but unfortunately
1562  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1563  *	MOSPF/PIM router set up we can clean this up.
1564  */
1565 
1566 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1567 {
1568 	int ret;
1569 	struct mif6ctl vif;
1570 	struct mf6cctl mfc;
1571 	mifi_t mifi;
1572 	struct net *net = sock_net(sk);
1573 	struct mr6_table *mrt;
1574 
1575 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1576 	if (mrt == NULL)
1577 		return -ENOENT;
1578 
1579 	if (optname != MRT6_INIT) {
1580 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1581 			return -EACCES;
1582 	}
1583 
1584 	switch (optname) {
1585 	case MRT6_INIT:
1586 		if (sk->sk_type != SOCK_RAW ||
1587 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1588 			return -EOPNOTSUPP;
1589 		if (optlen < sizeof(int))
1590 			return -EINVAL;
1591 
1592 		return ip6mr_sk_init(mrt, sk);
1593 
1594 	case MRT6_DONE:
1595 		return ip6mr_sk_done(sk);
1596 
1597 	case MRT6_ADD_MIF:
1598 		if (optlen < sizeof(vif))
1599 			return -EINVAL;
1600 		if (copy_from_user(&vif, optval, sizeof(vif)))
1601 			return -EFAULT;
1602 		if (vif.mif6c_mifi >= MAXMIFS)
1603 			return -ENFILE;
1604 		rtnl_lock();
1605 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1606 		rtnl_unlock();
1607 		return ret;
1608 
1609 	case MRT6_DEL_MIF:
1610 		if (optlen < sizeof(mifi_t))
1611 			return -EINVAL;
1612 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1613 			return -EFAULT;
1614 		rtnl_lock();
1615 		ret = mif6_delete(mrt, mifi, NULL);
1616 		rtnl_unlock();
1617 		return ret;
1618 
1619 	/*
1620 	 *	Manipulate the forwarding caches. These live
1621 	 *	in a sort of kernel/user symbiosis.
1622 	 */
1623 	case MRT6_ADD_MFC:
1624 	case MRT6_DEL_MFC:
1625 		if (optlen < sizeof(mfc))
1626 			return -EINVAL;
1627 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1628 			return -EFAULT;
1629 		rtnl_lock();
1630 		if (optname == MRT6_DEL_MFC)
1631 			ret = ip6mr_mfc_delete(mrt, &mfc);
1632 		else
1633 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1634 		rtnl_unlock();
1635 		return ret;
1636 
1637 	/*
1638 	 *	Control PIM assert (to activate pim will activate assert)
1639 	 */
1640 	case MRT6_ASSERT:
1641 	{
1642 		int v;
1643 		if (get_user(v, (int __user *)optval))
1644 			return -EFAULT;
1645 		mrt->mroute_do_assert = !!v;
1646 		return 0;
1647 	}
1648 
1649 #ifdef CONFIG_IPV6_PIMSM_V2
1650 	case MRT6_PIM:
1651 	{
1652 		int v;
1653 		if (get_user(v, (int __user *)optval))
1654 			return -EFAULT;
1655 		v = !!v;
1656 		rtnl_lock();
1657 		ret = 0;
1658 		if (v != mrt->mroute_do_pim) {
1659 			mrt->mroute_do_pim = v;
1660 			mrt->mroute_do_assert = v;
1661 		}
1662 		rtnl_unlock();
1663 		return ret;
1664 	}
1665 
1666 #endif
1667 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1668 	case MRT6_TABLE:
1669 	{
1670 		u32 v;
1671 
1672 		if (optlen != sizeof(u32))
1673 			return -EINVAL;
1674 		if (get_user(v, (u32 __user *)optval))
1675 			return -EFAULT;
1676 		if (sk == mrt->mroute6_sk)
1677 			return -EBUSY;
1678 
1679 		rtnl_lock();
1680 		ret = 0;
1681 		if (!ip6mr_new_table(net, v))
1682 			ret = -ENOMEM;
1683 		raw6_sk(sk)->ip6mr_table = v;
1684 		rtnl_unlock();
1685 		return ret;
1686 	}
1687 #endif
1688 	/*
1689 	 *	Spurious command, or MRT6_VERSION which you cannot
1690 	 *	set.
1691 	 */
1692 	default:
1693 		return -ENOPROTOOPT;
1694 	}
1695 }
1696 
1697 /*
1698  *	Getsock opt support for the multicast routing system.
1699  */
1700 
1701 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1702 			  int __user *optlen)
1703 {
1704 	int olr;
1705 	int val;
1706 	struct net *net = sock_net(sk);
1707 	struct mr6_table *mrt;
1708 
1709 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1710 	if (mrt == NULL)
1711 		return -ENOENT;
1712 
1713 	switch (optname) {
1714 	case MRT6_VERSION:
1715 		val = 0x0305;
1716 		break;
1717 #ifdef CONFIG_IPV6_PIMSM_V2
1718 	case MRT6_PIM:
1719 		val = mrt->mroute_do_pim;
1720 		break;
1721 #endif
1722 	case MRT6_ASSERT:
1723 		val = mrt->mroute_do_assert;
1724 		break;
1725 	default:
1726 		return -ENOPROTOOPT;
1727 	}
1728 
1729 	if (get_user(olr, optlen))
1730 		return -EFAULT;
1731 
1732 	olr = min_t(int, olr, sizeof(int));
1733 	if (olr < 0)
1734 		return -EINVAL;
1735 
1736 	if (put_user(olr, optlen))
1737 		return -EFAULT;
1738 	if (copy_to_user(optval, &val, olr))
1739 		return -EFAULT;
1740 	return 0;
1741 }
1742 
1743 /*
1744  *	The IP multicast ioctl support routines.
1745  */
1746 
1747 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1748 {
1749 	struct sioc_sg_req6 sr;
1750 	struct sioc_mif_req6 vr;
1751 	struct mif_device *vif;
1752 	struct mfc6_cache *c;
1753 	struct net *net = sock_net(sk);
1754 	struct mr6_table *mrt;
1755 
1756 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1757 	if (mrt == NULL)
1758 		return -ENOENT;
1759 
1760 	switch (cmd) {
1761 	case SIOCGETMIFCNT_IN6:
1762 		if (copy_from_user(&vr, arg, sizeof(vr)))
1763 			return -EFAULT;
1764 		if (vr.mifi >= mrt->maxvif)
1765 			return -EINVAL;
1766 		read_lock(&mrt_lock);
1767 		vif = &mrt->vif6_table[vr.mifi];
1768 		if (MIF_EXISTS(mrt, vr.mifi)) {
1769 			vr.icount = vif->pkt_in;
1770 			vr.ocount = vif->pkt_out;
1771 			vr.ibytes = vif->bytes_in;
1772 			vr.obytes = vif->bytes_out;
1773 			read_unlock(&mrt_lock);
1774 
1775 			if (copy_to_user(arg, &vr, sizeof(vr)))
1776 				return -EFAULT;
1777 			return 0;
1778 		}
1779 		read_unlock(&mrt_lock);
1780 		return -EADDRNOTAVAIL;
1781 	case SIOCGETSGCNT_IN6:
1782 		if (copy_from_user(&sr, arg, sizeof(sr)))
1783 			return -EFAULT;
1784 
1785 		read_lock(&mrt_lock);
1786 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1787 		if (c) {
1788 			sr.pktcnt = c->mfc_un.res.pkt;
1789 			sr.bytecnt = c->mfc_un.res.bytes;
1790 			sr.wrong_if = c->mfc_un.res.wrong_if;
1791 			read_unlock(&mrt_lock);
1792 
1793 			if (copy_to_user(arg, &sr, sizeof(sr)))
1794 				return -EFAULT;
1795 			return 0;
1796 		}
1797 		read_unlock(&mrt_lock);
1798 		return -EADDRNOTAVAIL;
1799 	default:
1800 		return -ENOIOCTLCMD;
1801 	}
1802 }
1803 
1804 
1805 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1806 {
1807 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1808 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1809 	return dst_output(skb);
1810 }
1811 
1812 /*
1813  *	Processing handlers for ip6mr_forward
1814  */
1815 
1816 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1817 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1818 {
1819 	struct ipv6hdr *ipv6h;
1820 	struct mif_device *vif = &mrt->vif6_table[vifi];
1821 	struct net_device *dev;
1822 	struct dst_entry *dst;
1823 	struct flowi fl;
1824 
1825 	if (vif->dev == NULL)
1826 		goto out_free;
1827 
1828 #ifdef CONFIG_IPV6_PIMSM_V2
1829 	if (vif->flags & MIFF_REGISTER) {
1830 		vif->pkt_out++;
1831 		vif->bytes_out += skb->len;
1832 		vif->dev->stats.tx_bytes += skb->len;
1833 		vif->dev->stats.tx_packets++;
1834 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1835 		goto out_free;
1836 	}
1837 #endif
1838 
1839 	ipv6h = ipv6_hdr(skb);
1840 
1841 	fl = (struct flowi) {
1842 		.oif = vif->link,
1843 		.nl_u = { .ip6_u =
1844 				{ .daddr = ipv6h->daddr, }
1845 		}
1846 	};
1847 
1848 	dst = ip6_route_output(net, NULL, &fl);
1849 	if (!dst)
1850 		goto out_free;
1851 
1852 	skb_dst_drop(skb);
1853 	skb_dst_set(skb, dst);
1854 
1855 	/*
1856 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1857 	 * not only before forwarding, but after forwarding on all output
1858 	 * interfaces. It is clear, if mrouter runs a multicasting
1859 	 * program, it should receive packets not depending to what interface
1860 	 * program is joined.
1861 	 * If we will not make it, the program will have to join on all
1862 	 * interfaces. On the other hand, multihoming host (or router, but
1863 	 * not mrouter) cannot join to more than one interface - it will
1864 	 * result in receiving multiple packets.
1865 	 */
1866 	dev = vif->dev;
1867 	skb->dev = dev;
1868 	vif->pkt_out++;
1869 	vif->bytes_out += skb->len;
1870 
1871 	/* We are about to write */
1872 	/* XXX: extension headers? */
1873 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1874 		goto out_free;
1875 
1876 	ipv6h = ipv6_hdr(skb);
1877 	ipv6h->hop_limit--;
1878 
1879 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1880 
1881 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1882 		       ip6mr_forward2_finish);
1883 
1884 out_free:
1885 	kfree_skb(skb);
1886 	return 0;
1887 }
1888 
1889 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1890 {
1891 	int ct;
1892 
1893 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1894 		if (mrt->vif6_table[ct].dev == dev)
1895 			break;
1896 	}
1897 	return ct;
1898 }
1899 
1900 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1901 			  struct sk_buff *skb, struct mfc6_cache *cache)
1902 {
1903 	int psend = -1;
1904 	int vif, ct;
1905 
1906 	vif = cache->mf6c_parent;
1907 	cache->mfc_un.res.pkt++;
1908 	cache->mfc_un.res.bytes += skb->len;
1909 
1910 	/*
1911 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1912 	 */
1913 	if (mrt->vif6_table[vif].dev != skb->dev) {
1914 		int true_vifi;
1915 
1916 		cache->mfc_un.res.wrong_if++;
1917 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
1918 
1919 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1920 		    /* pimsm uses asserts, when switching from RPT to SPT,
1921 		       so that we cannot check that packet arrived on an oif.
1922 		       It is bad, but otherwise we would need to move pretty
1923 		       large chunk of pimd to kernel. Ough... --ANK
1924 		     */
1925 		    (mrt->mroute_do_pim ||
1926 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1927 		    time_after(jiffies,
1928 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1929 			cache->mfc_un.res.last_assert = jiffies;
1930 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
1931 		}
1932 		goto dont_forward;
1933 	}
1934 
1935 	mrt->vif6_table[vif].pkt_in++;
1936 	mrt->vif6_table[vif].bytes_in += skb->len;
1937 
1938 	/*
1939 	 *	Forward the frame
1940 	 */
1941 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1942 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1943 			if (psend != -1) {
1944 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1945 				if (skb2)
1946 					ip6mr_forward2(net, mrt, skb2, cache, psend);
1947 			}
1948 			psend = ct;
1949 		}
1950 	}
1951 	if (psend != -1) {
1952 		ip6mr_forward2(net, mrt, skb, cache, psend);
1953 		return 0;
1954 	}
1955 
1956 dont_forward:
1957 	kfree_skb(skb);
1958 	return 0;
1959 }
1960 
1961 
1962 /*
1963  *	Multicast packets for forwarding arrive here
1964  */
1965 
1966 int ip6_mr_input(struct sk_buff *skb)
1967 {
1968 	struct mfc6_cache *cache;
1969 	struct net *net = dev_net(skb->dev);
1970 	struct mr6_table *mrt;
1971 	struct flowi fl = {
1972 		.iif	= skb->dev->ifindex,
1973 		.mark	= skb->mark,
1974 	};
1975 	int err;
1976 
1977 	err = ip6mr_fib_lookup(net, &fl, &mrt);
1978 	if (err < 0)
1979 		return err;
1980 
1981 	read_lock(&mrt_lock);
1982 	cache = ip6mr_cache_find(mrt,
1983 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1984 
1985 	/*
1986 	 *	No usable cache entry
1987 	 */
1988 	if (cache == NULL) {
1989 		int vif;
1990 
1991 		vif = ip6mr_find_vif(mrt, skb->dev);
1992 		if (vif >= 0) {
1993 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
1994 			read_unlock(&mrt_lock);
1995 
1996 			return err;
1997 		}
1998 		read_unlock(&mrt_lock);
1999 		kfree_skb(skb);
2000 		return -ENODEV;
2001 	}
2002 
2003 	ip6_mr_forward(net, mrt, skb, cache);
2004 
2005 	read_unlock(&mrt_lock);
2006 
2007 	return 0;
2008 }
2009 
2010 
2011 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2012 			       struct mfc6_cache *c, struct rtmsg *rtm)
2013 {
2014 	int ct;
2015 	struct rtnexthop *nhp;
2016 	u8 *b = skb_tail_pointer(skb);
2017 	struct rtattr *mp_head;
2018 
2019 	/* If cache is unresolved, don't try to parse IIF and OIF */
2020 	if (c->mf6c_parent > MAXMIFS)
2021 		return -ENOENT;
2022 
2023 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2024 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2025 
2026 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2027 
2028 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2029 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2030 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2031 				goto rtattr_failure;
2032 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2033 			nhp->rtnh_flags = 0;
2034 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2035 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2036 			nhp->rtnh_len = sizeof(*nhp);
2037 		}
2038 	}
2039 	mp_head->rta_type = RTA_MULTIPATH;
2040 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2041 	rtm->rtm_type = RTN_MULTICAST;
2042 	return 1;
2043 
2044 rtattr_failure:
2045 	nlmsg_trim(skb, b);
2046 	return -EMSGSIZE;
2047 }
2048 
2049 int ip6mr_get_route(struct net *net,
2050 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2051 {
2052 	int err;
2053 	struct mr6_table *mrt;
2054 	struct mfc6_cache *cache;
2055 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2056 
2057 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2058 	if (mrt == NULL)
2059 		return -ENOENT;
2060 
2061 	read_lock(&mrt_lock);
2062 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2063 
2064 	if (!cache) {
2065 		struct sk_buff *skb2;
2066 		struct ipv6hdr *iph;
2067 		struct net_device *dev;
2068 		int vif;
2069 
2070 		if (nowait) {
2071 			read_unlock(&mrt_lock);
2072 			return -EAGAIN;
2073 		}
2074 
2075 		dev = skb->dev;
2076 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2077 			read_unlock(&mrt_lock);
2078 			return -ENODEV;
2079 		}
2080 
2081 		/* really correct? */
2082 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2083 		if (!skb2) {
2084 			read_unlock(&mrt_lock);
2085 			return -ENOMEM;
2086 		}
2087 
2088 		skb_reset_transport_header(skb2);
2089 
2090 		skb_put(skb2, sizeof(struct ipv6hdr));
2091 		skb_reset_network_header(skb2);
2092 
2093 		iph = ipv6_hdr(skb2);
2094 		iph->version = 0;
2095 		iph->priority = 0;
2096 		iph->flow_lbl[0] = 0;
2097 		iph->flow_lbl[1] = 0;
2098 		iph->flow_lbl[2] = 0;
2099 		iph->payload_len = 0;
2100 		iph->nexthdr = IPPROTO_NONE;
2101 		iph->hop_limit = 0;
2102 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2103 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2104 
2105 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2106 		read_unlock(&mrt_lock);
2107 
2108 		return err;
2109 	}
2110 
2111 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2112 		cache->mfc_flags |= MFC_NOTIFY;
2113 
2114 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2115 	read_unlock(&mrt_lock);
2116 	return err;
2117 }
2118 
2119 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2120 			     u32 pid, u32 seq, struct mfc6_cache *c)
2121 {
2122 	struct nlmsghdr *nlh;
2123 	struct rtmsg *rtm;
2124 
2125 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2126 	if (nlh == NULL)
2127 		return -EMSGSIZE;
2128 
2129 	rtm = nlmsg_data(nlh);
2130 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2131 	rtm->rtm_dst_len  = 128;
2132 	rtm->rtm_src_len  = 128;
2133 	rtm->rtm_tos      = 0;
2134 	rtm->rtm_table    = mrt->id;
2135 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2136 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2137 	rtm->rtm_protocol = RTPROT_UNSPEC;
2138 	rtm->rtm_flags    = 0;
2139 
2140 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2141 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2142 
2143 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2144 		goto nla_put_failure;
2145 
2146 	return nlmsg_end(skb, nlh);
2147 
2148 nla_put_failure:
2149 	nlmsg_cancel(skb, nlh);
2150 	return -EMSGSIZE;
2151 }
2152 
2153 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2154 {
2155 	struct net *net = sock_net(skb->sk);
2156 	struct mr6_table *mrt;
2157 	struct mfc6_cache *mfc;
2158 	unsigned int t = 0, s_t;
2159 	unsigned int h = 0, s_h;
2160 	unsigned int e = 0, s_e;
2161 
2162 	s_t = cb->args[0];
2163 	s_h = cb->args[1];
2164 	s_e = cb->args[2];
2165 
2166 	read_lock(&mrt_lock);
2167 	ip6mr_for_each_table(mrt, net) {
2168 		if (t < s_t)
2169 			goto next_table;
2170 		if (t > s_t)
2171 			s_h = 0;
2172 		for (h = s_h; h < MFC6_LINES; h++) {
2173 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2174 				if (e < s_e)
2175 					goto next_entry;
2176 				if (ip6mr_fill_mroute(mrt, skb,
2177 						      NETLINK_CB(cb->skb).pid,
2178 						      cb->nlh->nlmsg_seq,
2179 						      mfc) < 0)
2180 					goto done;
2181 next_entry:
2182 				e++;
2183 			}
2184 			e = s_e = 0;
2185 		}
2186 		s_h = 0;
2187 next_table:
2188 		t++;
2189 	}
2190 done:
2191 	read_unlock(&mrt_lock);
2192 
2193 	cb->args[2] = e;
2194 	cb->args[1] = h;
2195 	cb->args[0] = t;
2196 
2197 	return skb->len;
2198 }
2199