xref: /linux/net/ipv6/ip6mr.c (revision 2cd1f0ddbb5667f61e69089964209e8f716e9009)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/mm.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <net/protocol.h>
36 #include <linux/skbuff.h>
37 #include <net/raw.h>
38 #include <linux/notifier.h>
39 #include <linux/if_arp.h>
40 #include <net/checksum.h>
41 #include <net/netlink.h>
42 #include <net/fib_rules.h>
43 
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #include <linux/mroute6.h>
47 #include <linux/pim.h>
48 #include <net/addrconf.h>
49 #include <linux/netfilter_ipv6.h>
50 #include <linux/export.h>
51 #include <net/ip6_checksum.h>
52 #include <linux/netconf.h>
53 
54 struct ip6mr_rule {
55 	struct fib_rule		common;
56 };
57 
58 struct ip6mr_result {
59 	struct mr_table	*mrt;
60 };
61 
62 /* Big lock, protecting vif table, mrt cache and mroute socket state.
63    Note that the changes are semaphored via rtnl_lock.
64  */
65 
66 static DEFINE_RWLOCK(mrt_lock);
67 
68 /* Multicast router control variables */
69 
70 /* Special spinlock for queue of unresolved entries */
71 static DEFINE_SPINLOCK(mfc_unres_lock);
72 
73 /* We return to original Alan's scheme. Hash table of resolved
74    entries is changed only in process context and protected
75    with weak lock mrt_lock. Queue of unresolved entries is protected
76    with strong spinlock mfc_unres_lock.
77 
78    In this case data path is free of exclusive locks at all.
79  */
80 
81 static struct kmem_cache *mrt_cachep __read_mostly;
82 
83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
84 static void ip6mr_free_table(struct mr_table *mrt);
85 
86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
87 			   struct sk_buff *skb, struct mfc6_cache *cache);
88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
89 			      mifi_t mifi, int assert);
90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
91 			      int cmd);
92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
93 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
94 			       struct netlink_callback *cb);
95 static void mroute_clean_tables(struct mr_table *mrt, bool all);
96 static void ipmr_expire_process(struct timer_list *t);
97 
98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
99 #define ip6mr_for_each_table(mrt, net) \
100 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
101 
102 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
103 					    struct mr_table *mrt)
104 {
105 	struct mr_table *ret;
106 
107 	if (!mrt)
108 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
109 				     struct mr_table, list);
110 	else
111 		ret = list_entry_rcu(mrt->list.next,
112 				     struct mr_table, list);
113 
114 	if (&ret->list == &net->ipv6.mr6_tables)
115 		return NULL;
116 	return ret;
117 }
118 
119 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
120 {
121 	struct mr_table *mrt;
122 
123 	ip6mr_for_each_table(mrt, net) {
124 		if (mrt->id == id)
125 			return mrt;
126 	}
127 	return NULL;
128 }
129 
130 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
131 			    struct mr_table **mrt)
132 {
133 	int err;
134 	struct ip6mr_result res;
135 	struct fib_lookup_arg arg = {
136 		.result = &res,
137 		.flags = FIB_LOOKUP_NOREF,
138 	};
139 
140 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
141 			       flowi6_to_flowi(flp6), 0, &arg);
142 	if (err < 0)
143 		return err;
144 	*mrt = res.mrt;
145 	return 0;
146 }
147 
148 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
149 			     int flags, struct fib_lookup_arg *arg)
150 {
151 	struct ip6mr_result *res = arg->result;
152 	struct mr_table *mrt;
153 
154 	switch (rule->action) {
155 	case FR_ACT_TO_TBL:
156 		break;
157 	case FR_ACT_UNREACHABLE:
158 		return -ENETUNREACH;
159 	case FR_ACT_PROHIBIT:
160 		return -EACCES;
161 	case FR_ACT_BLACKHOLE:
162 	default:
163 		return -EINVAL;
164 	}
165 
166 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
167 	if (!mrt)
168 		return -EAGAIN;
169 	res->mrt = mrt;
170 	return 0;
171 }
172 
173 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
174 {
175 	return 1;
176 }
177 
178 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
179 	FRA_GENERIC_POLICY,
180 };
181 
182 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
183 				struct fib_rule_hdr *frh, struct nlattr **tb)
184 {
185 	return 0;
186 }
187 
188 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
189 			      struct nlattr **tb)
190 {
191 	return 1;
192 }
193 
194 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
195 			   struct fib_rule_hdr *frh)
196 {
197 	frh->dst_len = 0;
198 	frh->src_len = 0;
199 	frh->tos     = 0;
200 	return 0;
201 }
202 
203 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
204 	.family		= RTNL_FAMILY_IP6MR,
205 	.rule_size	= sizeof(struct ip6mr_rule),
206 	.addr_size	= sizeof(struct in6_addr),
207 	.action		= ip6mr_rule_action,
208 	.match		= ip6mr_rule_match,
209 	.configure	= ip6mr_rule_configure,
210 	.compare	= ip6mr_rule_compare,
211 	.fill		= ip6mr_rule_fill,
212 	.nlgroup	= RTNLGRP_IPV6_RULE,
213 	.policy		= ip6mr_rule_policy,
214 	.owner		= THIS_MODULE,
215 };
216 
217 static int __net_init ip6mr_rules_init(struct net *net)
218 {
219 	struct fib_rules_ops *ops;
220 	struct mr_table *mrt;
221 	int err;
222 
223 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
224 	if (IS_ERR(ops))
225 		return PTR_ERR(ops);
226 
227 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
228 
229 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
230 	if (!mrt) {
231 		err = -ENOMEM;
232 		goto err1;
233 	}
234 
235 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
236 	if (err < 0)
237 		goto err2;
238 
239 	net->ipv6.mr6_rules_ops = ops;
240 	return 0;
241 
242 err2:
243 	ip6mr_free_table(mrt);
244 err1:
245 	fib_rules_unregister(ops);
246 	return err;
247 }
248 
249 static void __net_exit ip6mr_rules_exit(struct net *net)
250 {
251 	struct mr_table *mrt, *next;
252 
253 	rtnl_lock();
254 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
255 		list_del(&mrt->list);
256 		ip6mr_free_table(mrt);
257 	}
258 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
259 	rtnl_unlock();
260 }
261 
262 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
263 {
264 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
265 }
266 
267 static unsigned int ip6mr_rules_seq_read(struct net *net)
268 {
269 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
270 }
271 
272 bool ip6mr_rule_default(const struct fib_rule *rule)
273 {
274 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
275 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
276 }
277 EXPORT_SYMBOL(ip6mr_rule_default);
278 #else
279 #define ip6mr_for_each_table(mrt, net) \
280 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
281 
282 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
283 					    struct mr_table *mrt)
284 {
285 	if (!mrt)
286 		return net->ipv6.mrt6;
287 	return NULL;
288 }
289 
290 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
291 {
292 	return net->ipv6.mrt6;
293 }
294 
295 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
296 			    struct mr_table **mrt)
297 {
298 	*mrt = net->ipv6.mrt6;
299 	return 0;
300 }
301 
302 static int __net_init ip6mr_rules_init(struct net *net)
303 {
304 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
305 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
306 }
307 
308 static void __net_exit ip6mr_rules_exit(struct net *net)
309 {
310 	rtnl_lock();
311 	ip6mr_free_table(net->ipv6.mrt6);
312 	net->ipv6.mrt6 = NULL;
313 	rtnl_unlock();
314 }
315 
316 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
317 {
318 	return 0;
319 }
320 
321 static unsigned int ip6mr_rules_seq_read(struct net *net)
322 {
323 	return 0;
324 }
325 #endif
326 
327 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
328 			  const void *ptr)
329 {
330 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
331 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
332 
333 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
334 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
335 }
336 
337 static const struct rhashtable_params ip6mr_rht_params = {
338 	.head_offset = offsetof(struct mr_mfc, mnode),
339 	.key_offset = offsetof(struct mfc6_cache, cmparg),
340 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
341 	.nelem_hint = 3,
342 	.locks_mul = 1,
343 	.obj_cmpfn = ip6mr_hash_cmp,
344 	.automatic_shrinking = true,
345 };
346 
347 static void ip6mr_new_table_set(struct mr_table *mrt,
348 				struct net *net)
349 {
350 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
351 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
352 #endif
353 }
354 
355 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
356 	.mf6c_origin = IN6ADDR_ANY_INIT,
357 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
358 };
359 
360 static struct mr_table_ops ip6mr_mr_table_ops = {
361 	.rht_params = &ip6mr_rht_params,
362 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
363 };
364 
365 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
366 {
367 	struct mr_table *mrt;
368 
369 	mrt = ip6mr_get_table(net, id);
370 	if (mrt)
371 		return mrt;
372 
373 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
374 			      ipmr_expire_process, ip6mr_new_table_set);
375 }
376 
377 static void ip6mr_free_table(struct mr_table *mrt)
378 {
379 	del_timer_sync(&mrt->ipmr_expire_timer);
380 	mroute_clean_tables(mrt, true);
381 	rhltable_destroy(&mrt->mfc_hash);
382 	kfree(mrt);
383 }
384 
385 #ifdef CONFIG_PROC_FS
386 /* The /proc interfaces to multicast routing
387  * /proc/ip6_mr_cache /proc/ip6_mr_vif
388  */
389 
390 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
391 	__acquires(mrt_lock)
392 {
393 	struct mr_vif_iter *iter = seq->private;
394 	struct net *net = seq_file_net(seq);
395 	struct mr_table *mrt;
396 
397 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
398 	if (!mrt)
399 		return ERR_PTR(-ENOENT);
400 
401 	iter->mrt = mrt;
402 
403 	read_lock(&mrt_lock);
404 	return mr_vif_seq_start(seq, pos);
405 }
406 
407 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
408 	__releases(mrt_lock)
409 {
410 	read_unlock(&mrt_lock);
411 }
412 
413 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
414 {
415 	struct mr_vif_iter *iter = seq->private;
416 	struct mr_table *mrt = iter->mrt;
417 
418 	if (v == SEQ_START_TOKEN) {
419 		seq_puts(seq,
420 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
421 	} else {
422 		const struct vif_device *vif = v;
423 		const char *name = vif->dev ? vif->dev->name : "none";
424 
425 		seq_printf(seq,
426 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
427 			   vif - mrt->vif_table,
428 			   name, vif->bytes_in, vif->pkt_in,
429 			   vif->bytes_out, vif->pkt_out,
430 			   vif->flags);
431 	}
432 	return 0;
433 }
434 
435 static const struct seq_operations ip6mr_vif_seq_ops = {
436 	.start = ip6mr_vif_seq_start,
437 	.next  = mr_vif_seq_next,
438 	.stop  = ip6mr_vif_seq_stop,
439 	.show  = ip6mr_vif_seq_show,
440 };
441 
442 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
443 {
444 	struct net *net = seq_file_net(seq);
445 	struct mr_table *mrt;
446 
447 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
448 	if (!mrt)
449 		return ERR_PTR(-ENOENT);
450 
451 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
452 }
453 
454 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
455 {
456 	int n;
457 
458 	if (v == SEQ_START_TOKEN) {
459 		seq_puts(seq,
460 			 "Group                            "
461 			 "Origin                           "
462 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
463 	} else {
464 		const struct mfc6_cache *mfc = v;
465 		const struct mr_mfc_iter *it = seq->private;
466 		struct mr_table *mrt = it->mrt;
467 
468 		seq_printf(seq, "%pI6 %pI6 %-3hd",
469 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
470 			   mfc->_c.mfc_parent);
471 
472 		if (it->cache != &mrt->mfc_unres_queue) {
473 			seq_printf(seq, " %8lu %8lu %8lu",
474 				   mfc->_c.mfc_un.res.pkt,
475 				   mfc->_c.mfc_un.res.bytes,
476 				   mfc->_c.mfc_un.res.wrong_if);
477 			for (n = mfc->_c.mfc_un.res.minvif;
478 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
479 				if (VIF_EXISTS(mrt, n) &&
480 				    mfc->_c.mfc_un.res.ttls[n] < 255)
481 					seq_printf(seq,
482 						   " %2d:%-3d", n,
483 						   mfc->_c.mfc_un.res.ttls[n]);
484 			}
485 		} else {
486 			/* unresolved mfc_caches don't contain
487 			 * pkt, bytes and wrong_if values
488 			 */
489 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
490 		}
491 		seq_putc(seq, '\n');
492 	}
493 	return 0;
494 }
495 
496 static const struct seq_operations ipmr_mfc_seq_ops = {
497 	.start = ipmr_mfc_seq_start,
498 	.next  = mr_mfc_seq_next,
499 	.stop  = mr_mfc_seq_stop,
500 	.show  = ipmr_mfc_seq_show,
501 };
502 #endif
503 
504 #ifdef CONFIG_IPV6_PIMSM_V2
505 
506 static int pim6_rcv(struct sk_buff *skb)
507 {
508 	struct pimreghdr *pim;
509 	struct ipv6hdr   *encap;
510 	struct net_device  *reg_dev = NULL;
511 	struct net *net = dev_net(skb->dev);
512 	struct mr_table *mrt;
513 	struct flowi6 fl6 = {
514 		.flowi6_iif	= skb->dev->ifindex,
515 		.flowi6_mark	= skb->mark,
516 	};
517 	int reg_vif_num;
518 
519 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
520 		goto drop;
521 
522 	pim = (struct pimreghdr *)skb_transport_header(skb);
523 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
524 	    (pim->flags & PIM_NULL_REGISTER) ||
525 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
526 			     sizeof(*pim), IPPROTO_PIM,
527 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
528 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
529 		goto drop;
530 
531 	/* check if the inner packet is destined to mcast group */
532 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
533 				   sizeof(*pim));
534 
535 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
536 	    encap->payload_len == 0 ||
537 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
538 		goto drop;
539 
540 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
541 		goto drop;
542 	reg_vif_num = mrt->mroute_reg_vif_num;
543 
544 	read_lock(&mrt_lock);
545 	if (reg_vif_num >= 0)
546 		reg_dev = mrt->vif_table[reg_vif_num].dev;
547 	if (reg_dev)
548 		dev_hold(reg_dev);
549 	read_unlock(&mrt_lock);
550 
551 	if (!reg_dev)
552 		goto drop;
553 
554 	skb->mac_header = skb->network_header;
555 	skb_pull(skb, (u8 *)encap - skb->data);
556 	skb_reset_network_header(skb);
557 	skb->protocol = htons(ETH_P_IPV6);
558 	skb->ip_summed = CHECKSUM_NONE;
559 
560 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
561 
562 	netif_rx(skb);
563 
564 	dev_put(reg_dev);
565 	return 0;
566  drop:
567 	kfree_skb(skb);
568 	return 0;
569 }
570 
571 static const struct inet6_protocol pim6_protocol = {
572 	.handler	=	pim6_rcv,
573 };
574 
575 /* Service routines creating virtual interfaces: PIMREG */
576 
577 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
578 				      struct net_device *dev)
579 {
580 	struct net *net = dev_net(dev);
581 	struct mr_table *mrt;
582 	struct flowi6 fl6 = {
583 		.flowi6_oif	= dev->ifindex,
584 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
585 		.flowi6_mark	= skb->mark,
586 	};
587 	int err;
588 
589 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
590 	if (err < 0) {
591 		kfree_skb(skb);
592 		return err;
593 	}
594 
595 	read_lock(&mrt_lock);
596 	dev->stats.tx_bytes += skb->len;
597 	dev->stats.tx_packets++;
598 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
599 	read_unlock(&mrt_lock);
600 	kfree_skb(skb);
601 	return NETDEV_TX_OK;
602 }
603 
604 static int reg_vif_get_iflink(const struct net_device *dev)
605 {
606 	return 0;
607 }
608 
609 static const struct net_device_ops reg_vif_netdev_ops = {
610 	.ndo_start_xmit	= reg_vif_xmit,
611 	.ndo_get_iflink = reg_vif_get_iflink,
612 };
613 
614 static void reg_vif_setup(struct net_device *dev)
615 {
616 	dev->type		= ARPHRD_PIMREG;
617 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
618 	dev->flags		= IFF_NOARP;
619 	dev->netdev_ops		= &reg_vif_netdev_ops;
620 	dev->needs_free_netdev	= true;
621 	dev->features		|= NETIF_F_NETNS_LOCAL;
622 }
623 
624 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
625 {
626 	struct net_device *dev;
627 	char name[IFNAMSIZ];
628 
629 	if (mrt->id == RT6_TABLE_DFLT)
630 		sprintf(name, "pim6reg");
631 	else
632 		sprintf(name, "pim6reg%u", mrt->id);
633 
634 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
635 	if (!dev)
636 		return NULL;
637 
638 	dev_net_set(dev, net);
639 
640 	if (register_netdevice(dev)) {
641 		free_netdev(dev);
642 		return NULL;
643 	}
644 
645 	if (dev_open(dev))
646 		goto failure;
647 
648 	dev_hold(dev);
649 	return dev;
650 
651 failure:
652 	unregister_netdevice(dev);
653 	return NULL;
654 }
655 #endif
656 
657 static int call_ip6mr_vif_entry_notifiers(struct net *net,
658 					  enum fib_event_type event_type,
659 					  struct vif_device *vif,
660 					  mifi_t vif_index, u32 tb_id)
661 {
662 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
663 				     vif, vif_index, tb_id,
664 				     &net->ipv6.ipmr_seq);
665 }
666 
667 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
668 					  enum fib_event_type event_type,
669 					  struct mfc6_cache *mfc, u32 tb_id)
670 {
671 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
672 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
673 }
674 
675 /* Delete a VIF entry */
676 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
677 		       struct list_head *head)
678 {
679 	struct vif_device *v;
680 	struct net_device *dev;
681 	struct inet6_dev *in6_dev;
682 
683 	if (vifi < 0 || vifi >= mrt->maxvif)
684 		return -EADDRNOTAVAIL;
685 
686 	v = &mrt->vif_table[vifi];
687 
688 	if (VIF_EXISTS(mrt, vifi))
689 		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
690 					       FIB_EVENT_VIF_DEL, v, vifi,
691 					       mrt->id);
692 
693 	write_lock_bh(&mrt_lock);
694 	dev = v->dev;
695 	v->dev = NULL;
696 
697 	if (!dev) {
698 		write_unlock_bh(&mrt_lock);
699 		return -EADDRNOTAVAIL;
700 	}
701 
702 #ifdef CONFIG_IPV6_PIMSM_V2
703 	if (vifi == mrt->mroute_reg_vif_num)
704 		mrt->mroute_reg_vif_num = -1;
705 #endif
706 
707 	if (vifi + 1 == mrt->maxvif) {
708 		int tmp;
709 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
710 			if (VIF_EXISTS(mrt, tmp))
711 				break;
712 		}
713 		mrt->maxvif = tmp + 1;
714 	}
715 
716 	write_unlock_bh(&mrt_lock);
717 
718 	dev_set_allmulti(dev, -1);
719 
720 	in6_dev = __in6_dev_get(dev);
721 	if (in6_dev) {
722 		in6_dev->cnf.mc_forwarding--;
723 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
724 					     NETCONFA_MC_FORWARDING,
725 					     dev->ifindex, &in6_dev->cnf);
726 	}
727 
728 	if ((v->flags & MIFF_REGISTER) && !notify)
729 		unregister_netdevice_queue(dev, head);
730 
731 	dev_put(dev);
732 	return 0;
733 }
734 
735 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
736 {
737 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
738 
739 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
740 }
741 
742 static inline void ip6mr_cache_free(struct mfc6_cache *c)
743 {
744 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
745 }
746 
747 /* Destroy an unresolved cache entry, killing queued skbs
748    and reporting error to netlink readers.
749  */
750 
751 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
752 {
753 	struct net *net = read_pnet(&mrt->net);
754 	struct sk_buff *skb;
755 
756 	atomic_dec(&mrt->cache_resolve_queue_len);
757 
758 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
759 		if (ipv6_hdr(skb)->version == 0) {
760 			struct nlmsghdr *nlh = skb_pull(skb,
761 							sizeof(struct ipv6hdr));
762 			nlh->nlmsg_type = NLMSG_ERROR;
763 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
764 			skb_trim(skb, nlh->nlmsg_len);
765 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
766 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
767 		} else
768 			kfree_skb(skb);
769 	}
770 
771 	ip6mr_cache_free(c);
772 }
773 
774 
775 /* Timer process for all the unresolved queue. */
776 
777 static void ipmr_do_expire_process(struct mr_table *mrt)
778 {
779 	unsigned long now = jiffies;
780 	unsigned long expires = 10 * HZ;
781 	struct mr_mfc *c, *next;
782 
783 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
784 		if (time_after(c->mfc_un.unres.expires, now)) {
785 			/* not yet... */
786 			unsigned long interval = c->mfc_un.unres.expires - now;
787 			if (interval < expires)
788 				expires = interval;
789 			continue;
790 		}
791 
792 		list_del(&c->list);
793 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
794 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
795 	}
796 
797 	if (!list_empty(&mrt->mfc_unres_queue))
798 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
799 }
800 
801 static void ipmr_expire_process(struct timer_list *t)
802 {
803 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
804 
805 	if (!spin_trylock(&mfc_unres_lock)) {
806 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
807 		return;
808 	}
809 
810 	if (!list_empty(&mrt->mfc_unres_queue))
811 		ipmr_do_expire_process(mrt);
812 
813 	spin_unlock(&mfc_unres_lock);
814 }
815 
816 /* Fill oifs list. It is called under write locked mrt_lock. */
817 
818 static void ip6mr_update_thresholds(struct mr_table *mrt,
819 				    struct mr_mfc *cache,
820 				    unsigned char *ttls)
821 {
822 	int vifi;
823 
824 	cache->mfc_un.res.minvif = MAXMIFS;
825 	cache->mfc_un.res.maxvif = 0;
826 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
827 
828 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
829 		if (VIF_EXISTS(mrt, vifi) &&
830 		    ttls[vifi] && ttls[vifi] < 255) {
831 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
832 			if (cache->mfc_un.res.minvif > vifi)
833 				cache->mfc_un.res.minvif = vifi;
834 			if (cache->mfc_un.res.maxvif <= vifi)
835 				cache->mfc_un.res.maxvif = vifi + 1;
836 		}
837 	}
838 	cache->mfc_un.res.lastuse = jiffies;
839 }
840 
841 static int mif6_add(struct net *net, struct mr_table *mrt,
842 		    struct mif6ctl *vifc, int mrtsock)
843 {
844 	int vifi = vifc->mif6c_mifi;
845 	struct vif_device *v = &mrt->vif_table[vifi];
846 	struct net_device *dev;
847 	struct inet6_dev *in6_dev;
848 	int err;
849 
850 	/* Is vif busy ? */
851 	if (VIF_EXISTS(mrt, vifi))
852 		return -EADDRINUSE;
853 
854 	switch (vifc->mif6c_flags) {
855 #ifdef CONFIG_IPV6_PIMSM_V2
856 	case MIFF_REGISTER:
857 		/*
858 		 * Special Purpose VIF in PIM
859 		 * All the packets will be sent to the daemon
860 		 */
861 		if (mrt->mroute_reg_vif_num >= 0)
862 			return -EADDRINUSE;
863 		dev = ip6mr_reg_vif(net, mrt);
864 		if (!dev)
865 			return -ENOBUFS;
866 		err = dev_set_allmulti(dev, 1);
867 		if (err) {
868 			unregister_netdevice(dev);
869 			dev_put(dev);
870 			return err;
871 		}
872 		break;
873 #endif
874 	case 0:
875 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
876 		if (!dev)
877 			return -EADDRNOTAVAIL;
878 		err = dev_set_allmulti(dev, 1);
879 		if (err) {
880 			dev_put(dev);
881 			return err;
882 		}
883 		break;
884 	default:
885 		return -EINVAL;
886 	}
887 
888 	in6_dev = __in6_dev_get(dev);
889 	if (in6_dev) {
890 		in6_dev->cnf.mc_forwarding++;
891 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
892 					     NETCONFA_MC_FORWARDING,
893 					     dev->ifindex, &in6_dev->cnf);
894 	}
895 
896 	/* Fill in the VIF structures */
897 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
898 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
899 			MIFF_REGISTER);
900 
901 	/* And finish update writing critical data */
902 	write_lock_bh(&mrt_lock);
903 	v->dev = dev;
904 #ifdef CONFIG_IPV6_PIMSM_V2
905 	if (v->flags & MIFF_REGISTER)
906 		mrt->mroute_reg_vif_num = vifi;
907 #endif
908 	if (vifi + 1 > mrt->maxvif)
909 		mrt->maxvif = vifi + 1;
910 	write_unlock_bh(&mrt_lock);
911 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
912 				       v, vifi, mrt->id);
913 	return 0;
914 }
915 
916 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
917 					   const struct in6_addr *origin,
918 					   const struct in6_addr *mcastgrp)
919 {
920 	struct mfc6_cache_cmp_arg arg = {
921 		.mf6c_origin = *origin,
922 		.mf6c_mcastgrp = *mcastgrp,
923 	};
924 
925 	return mr_mfc_find(mrt, &arg);
926 }
927 
928 /* Look for a (*,G) entry */
929 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
930 					       struct in6_addr *mcastgrp,
931 					       mifi_t mifi)
932 {
933 	struct mfc6_cache_cmp_arg arg = {
934 		.mf6c_origin = in6addr_any,
935 		.mf6c_mcastgrp = *mcastgrp,
936 	};
937 
938 	if (ipv6_addr_any(mcastgrp))
939 		return mr_mfc_find_any_parent(mrt, mifi);
940 	return mr_mfc_find_any(mrt, mifi, &arg);
941 }
942 
943 /* Look for a (S,G,iif) entry if parent != -1 */
944 static struct mfc6_cache *
945 ip6mr_cache_find_parent(struct mr_table *mrt,
946 			const struct in6_addr *origin,
947 			const struct in6_addr *mcastgrp,
948 			int parent)
949 {
950 	struct mfc6_cache_cmp_arg arg = {
951 		.mf6c_origin = *origin,
952 		.mf6c_mcastgrp = *mcastgrp,
953 	};
954 
955 	return mr_mfc_find_parent(mrt, &arg, parent);
956 }
957 
958 /* Allocate a multicast cache entry */
959 static struct mfc6_cache *ip6mr_cache_alloc(void)
960 {
961 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
962 	if (!c)
963 		return NULL;
964 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
965 	c->_c.mfc_un.res.minvif = MAXMIFS;
966 	c->_c.free = ip6mr_cache_free_rcu;
967 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
968 	return c;
969 }
970 
971 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
972 {
973 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
974 	if (!c)
975 		return NULL;
976 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
977 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
978 	return c;
979 }
980 
981 /*
982  *	A cache entry has gone into a resolved state from queued
983  */
984 
985 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
986 				struct mfc6_cache *uc, struct mfc6_cache *c)
987 {
988 	struct sk_buff *skb;
989 
990 	/*
991 	 *	Play the pending entries through our router
992 	 */
993 
994 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
995 		if (ipv6_hdr(skb)->version == 0) {
996 			struct nlmsghdr *nlh = skb_pull(skb,
997 							sizeof(struct ipv6hdr));
998 
999 			if (mr_fill_mroute(mrt, skb, &c->_c,
1000 					   nlmsg_data(nlh)) > 0) {
1001 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1002 			} else {
1003 				nlh->nlmsg_type = NLMSG_ERROR;
1004 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1005 				skb_trim(skb, nlh->nlmsg_len);
1006 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1007 			}
1008 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1009 		} else
1010 			ip6_mr_forward(net, mrt, skb, c);
1011 	}
1012 }
1013 
1014 /*
1015  *	Bounce a cache query up to pim6sd and netlink.
1016  *
1017  *	Called under mrt_lock.
1018  */
1019 
1020 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1021 			      mifi_t mifi, int assert)
1022 {
1023 	struct sock *mroute6_sk;
1024 	struct sk_buff *skb;
1025 	struct mrt6msg *msg;
1026 	int ret;
1027 
1028 #ifdef CONFIG_IPV6_PIMSM_V2
1029 	if (assert == MRT6MSG_WHOLEPKT)
1030 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1031 						+sizeof(*msg));
1032 	else
1033 #endif
1034 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1035 
1036 	if (!skb)
1037 		return -ENOBUFS;
1038 
1039 	/* I suppose that internal messages
1040 	 * do not require checksums */
1041 
1042 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1043 
1044 #ifdef CONFIG_IPV6_PIMSM_V2
1045 	if (assert == MRT6MSG_WHOLEPKT) {
1046 		/* Ugly, but we have no choice with this interface.
1047 		   Duplicate old header, fix length etc.
1048 		   And all this only to mangle msg->im6_msgtype and
1049 		   to set msg->im6_mbz to "mbz" :-)
1050 		 */
1051 		skb_push(skb, -skb_network_offset(pkt));
1052 
1053 		skb_push(skb, sizeof(*msg));
1054 		skb_reset_transport_header(skb);
1055 		msg = (struct mrt6msg *)skb_transport_header(skb);
1056 		msg->im6_mbz = 0;
1057 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1058 		msg->im6_mif = mrt->mroute_reg_vif_num;
1059 		msg->im6_pad = 0;
1060 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1061 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1062 
1063 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1064 	} else
1065 #endif
1066 	{
1067 	/*
1068 	 *	Copy the IP header
1069 	 */
1070 
1071 	skb_put(skb, sizeof(struct ipv6hdr));
1072 	skb_reset_network_header(skb);
1073 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1074 
1075 	/*
1076 	 *	Add our header
1077 	 */
1078 	skb_put(skb, sizeof(*msg));
1079 	skb_reset_transport_header(skb);
1080 	msg = (struct mrt6msg *)skb_transport_header(skb);
1081 
1082 	msg->im6_mbz = 0;
1083 	msg->im6_msgtype = assert;
1084 	msg->im6_mif = mifi;
1085 	msg->im6_pad = 0;
1086 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1087 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1088 
1089 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1090 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1091 	}
1092 
1093 	rcu_read_lock();
1094 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1095 	if (!mroute6_sk) {
1096 		rcu_read_unlock();
1097 		kfree_skb(skb);
1098 		return -EINVAL;
1099 	}
1100 
1101 	mrt6msg_netlink_event(mrt, skb);
1102 
1103 	/* Deliver to user space multicast routing algorithms */
1104 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1105 	rcu_read_unlock();
1106 	if (ret < 0) {
1107 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1108 		kfree_skb(skb);
1109 	}
1110 
1111 	return ret;
1112 }
1113 
1114 /* Queue a packet for resolution. It gets locked cache entry! */
1115 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1116 				  struct sk_buff *skb)
1117 {
1118 	struct mfc6_cache *c;
1119 	bool found = false;
1120 	int err;
1121 
1122 	spin_lock_bh(&mfc_unres_lock);
1123 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1124 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1125 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1126 			found = true;
1127 			break;
1128 		}
1129 	}
1130 
1131 	if (!found) {
1132 		/*
1133 		 *	Create a new entry if allowable
1134 		 */
1135 
1136 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1137 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1138 			spin_unlock_bh(&mfc_unres_lock);
1139 
1140 			kfree_skb(skb);
1141 			return -ENOBUFS;
1142 		}
1143 
1144 		/* Fill in the new cache entry */
1145 		c->_c.mfc_parent = -1;
1146 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1147 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1148 
1149 		/*
1150 		 *	Reflect first query at pim6sd
1151 		 */
1152 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1153 		if (err < 0) {
1154 			/* If the report failed throw the cache entry
1155 			   out - Brad Parker
1156 			 */
1157 			spin_unlock_bh(&mfc_unres_lock);
1158 
1159 			ip6mr_cache_free(c);
1160 			kfree_skb(skb);
1161 			return err;
1162 		}
1163 
1164 		atomic_inc(&mrt->cache_resolve_queue_len);
1165 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1166 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1167 
1168 		ipmr_do_expire_process(mrt);
1169 	}
1170 
1171 	/* See if we can append the packet */
1172 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1173 		kfree_skb(skb);
1174 		err = -ENOBUFS;
1175 	} else {
1176 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1177 		err = 0;
1178 	}
1179 
1180 	spin_unlock_bh(&mfc_unres_lock);
1181 	return err;
1182 }
1183 
1184 /*
1185  *	MFC6 cache manipulation by user space
1186  */
1187 
1188 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1189 			    int parent)
1190 {
1191 	struct mfc6_cache *c;
1192 
1193 	/* The entries are added/deleted only under RTNL */
1194 	rcu_read_lock();
1195 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1196 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1197 	rcu_read_unlock();
1198 	if (!c)
1199 		return -ENOENT;
1200 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1201 	list_del_rcu(&c->_c.list);
1202 
1203 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1204 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1205 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1206 	mr_cache_put(&c->_c);
1207 	return 0;
1208 }
1209 
1210 static int ip6mr_device_event(struct notifier_block *this,
1211 			      unsigned long event, void *ptr)
1212 {
1213 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1214 	struct net *net = dev_net(dev);
1215 	struct mr_table *mrt;
1216 	struct vif_device *v;
1217 	int ct;
1218 
1219 	if (event != NETDEV_UNREGISTER)
1220 		return NOTIFY_DONE;
1221 
1222 	ip6mr_for_each_table(mrt, net) {
1223 		v = &mrt->vif_table[0];
1224 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1225 			if (v->dev == dev)
1226 				mif6_delete(mrt, ct, 1, NULL);
1227 		}
1228 	}
1229 
1230 	return NOTIFY_DONE;
1231 }
1232 
1233 static unsigned int ip6mr_seq_read(struct net *net)
1234 {
1235 	ASSERT_RTNL();
1236 
1237 	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1238 }
1239 
1240 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1241 {
1242 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1243 		       ip6mr_mr_table_iter, &mrt_lock);
1244 }
1245 
1246 static struct notifier_block ip6_mr_notifier = {
1247 	.notifier_call = ip6mr_device_event
1248 };
1249 
1250 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1251 	.family		= RTNL_FAMILY_IP6MR,
1252 	.fib_seq_read	= ip6mr_seq_read,
1253 	.fib_dump	= ip6mr_dump,
1254 	.owner		= THIS_MODULE,
1255 };
1256 
1257 static int __net_init ip6mr_notifier_init(struct net *net)
1258 {
1259 	struct fib_notifier_ops *ops;
1260 
1261 	net->ipv6.ipmr_seq = 0;
1262 
1263 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1264 	if (IS_ERR(ops))
1265 		return PTR_ERR(ops);
1266 
1267 	net->ipv6.ip6mr_notifier_ops = ops;
1268 
1269 	return 0;
1270 }
1271 
1272 static void __net_exit ip6mr_notifier_exit(struct net *net)
1273 {
1274 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1275 	net->ipv6.ip6mr_notifier_ops = NULL;
1276 }
1277 
1278 /* Setup for IP multicast routing */
1279 static int __net_init ip6mr_net_init(struct net *net)
1280 {
1281 	int err;
1282 
1283 	err = ip6mr_notifier_init(net);
1284 	if (err)
1285 		return err;
1286 
1287 	err = ip6mr_rules_init(net);
1288 	if (err < 0)
1289 		goto ip6mr_rules_fail;
1290 
1291 #ifdef CONFIG_PROC_FS
1292 	err = -ENOMEM;
1293 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1294 			sizeof(struct mr_vif_iter)))
1295 		goto proc_vif_fail;
1296 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1297 			sizeof(struct mr_mfc_iter)))
1298 		goto proc_cache_fail;
1299 #endif
1300 
1301 	return 0;
1302 
1303 #ifdef CONFIG_PROC_FS
1304 proc_cache_fail:
1305 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1306 proc_vif_fail:
1307 	ip6mr_rules_exit(net);
1308 #endif
1309 ip6mr_rules_fail:
1310 	ip6mr_notifier_exit(net);
1311 	return err;
1312 }
1313 
1314 static void __net_exit ip6mr_net_exit(struct net *net)
1315 {
1316 #ifdef CONFIG_PROC_FS
1317 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1318 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1319 #endif
1320 	ip6mr_rules_exit(net);
1321 	ip6mr_notifier_exit(net);
1322 }
1323 
1324 static struct pernet_operations ip6mr_net_ops = {
1325 	.init = ip6mr_net_init,
1326 	.exit = ip6mr_net_exit,
1327 };
1328 
1329 int __init ip6_mr_init(void)
1330 {
1331 	int err;
1332 
1333 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1334 				       sizeof(struct mfc6_cache),
1335 				       0, SLAB_HWCACHE_ALIGN,
1336 				       NULL);
1337 	if (!mrt_cachep)
1338 		return -ENOMEM;
1339 
1340 	err = register_pernet_subsys(&ip6mr_net_ops);
1341 	if (err)
1342 		goto reg_pernet_fail;
1343 
1344 	err = register_netdevice_notifier(&ip6_mr_notifier);
1345 	if (err)
1346 		goto reg_notif_fail;
1347 #ifdef CONFIG_IPV6_PIMSM_V2
1348 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1349 		pr_err("%s: can't add PIM protocol\n", __func__);
1350 		err = -EAGAIN;
1351 		goto add_proto_fail;
1352 	}
1353 #endif
1354 	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1355 				   NULL, ip6mr_rtm_dumproute, 0);
1356 	if (err == 0)
1357 		return 0;
1358 
1359 #ifdef CONFIG_IPV6_PIMSM_V2
1360 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1361 add_proto_fail:
1362 	unregister_netdevice_notifier(&ip6_mr_notifier);
1363 #endif
1364 reg_notif_fail:
1365 	unregister_pernet_subsys(&ip6mr_net_ops);
1366 reg_pernet_fail:
1367 	kmem_cache_destroy(mrt_cachep);
1368 	return err;
1369 }
1370 
1371 void ip6_mr_cleanup(void)
1372 {
1373 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1374 #ifdef CONFIG_IPV6_PIMSM_V2
1375 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1376 #endif
1377 	unregister_netdevice_notifier(&ip6_mr_notifier);
1378 	unregister_pernet_subsys(&ip6mr_net_ops);
1379 	kmem_cache_destroy(mrt_cachep);
1380 }
1381 
1382 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1383 			 struct mf6cctl *mfc, int mrtsock, int parent)
1384 {
1385 	unsigned char ttls[MAXMIFS];
1386 	struct mfc6_cache *uc, *c;
1387 	struct mr_mfc *_uc;
1388 	bool found;
1389 	int i, err;
1390 
1391 	if (mfc->mf6cc_parent >= MAXMIFS)
1392 		return -ENFILE;
1393 
1394 	memset(ttls, 255, MAXMIFS);
1395 	for (i = 0; i < MAXMIFS; i++) {
1396 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1397 			ttls[i] = 1;
1398 	}
1399 
1400 	/* The entries are added/deleted only under RTNL */
1401 	rcu_read_lock();
1402 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1403 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1404 	rcu_read_unlock();
1405 	if (c) {
1406 		write_lock_bh(&mrt_lock);
1407 		c->_c.mfc_parent = mfc->mf6cc_parent;
1408 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1409 		if (!mrtsock)
1410 			c->_c.mfc_flags |= MFC_STATIC;
1411 		write_unlock_bh(&mrt_lock);
1412 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1413 					       c, mrt->id);
1414 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1415 		return 0;
1416 	}
1417 
1418 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1419 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1420 		return -EINVAL;
1421 
1422 	c = ip6mr_cache_alloc();
1423 	if (!c)
1424 		return -ENOMEM;
1425 
1426 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1427 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1428 	c->_c.mfc_parent = mfc->mf6cc_parent;
1429 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1430 	if (!mrtsock)
1431 		c->_c.mfc_flags |= MFC_STATIC;
1432 
1433 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1434 				  ip6mr_rht_params);
1435 	if (err) {
1436 		pr_err("ip6mr: rhtable insert error %d\n", err);
1437 		ip6mr_cache_free(c);
1438 		return err;
1439 	}
1440 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1441 
1442 	/* Check to see if we resolved a queued list. If so we
1443 	 * need to send on the frames and tidy up.
1444 	 */
1445 	found = false;
1446 	spin_lock_bh(&mfc_unres_lock);
1447 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1448 		uc = (struct mfc6_cache *)_uc;
1449 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1450 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1451 			list_del(&_uc->list);
1452 			atomic_dec(&mrt->cache_resolve_queue_len);
1453 			found = true;
1454 			break;
1455 		}
1456 	}
1457 	if (list_empty(&mrt->mfc_unres_queue))
1458 		del_timer(&mrt->ipmr_expire_timer);
1459 	spin_unlock_bh(&mfc_unres_lock);
1460 
1461 	if (found) {
1462 		ip6mr_cache_resolve(net, mrt, uc, c);
1463 		ip6mr_cache_free(uc);
1464 	}
1465 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1466 				       c, mrt->id);
1467 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1468 	return 0;
1469 }
1470 
1471 /*
1472  *	Close the multicast socket, and clear the vif tables etc
1473  */
1474 
1475 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1476 {
1477 	struct mr_mfc *c, *tmp;
1478 	LIST_HEAD(list);
1479 	int i;
1480 
1481 	/* Shut down all active vif entries */
1482 	for (i = 0; i < mrt->maxvif; i++) {
1483 		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1484 			continue;
1485 		mif6_delete(mrt, i, 0, &list);
1486 	}
1487 	unregister_netdevice_many(&list);
1488 
1489 	/* Wipe the cache */
1490 	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1491 		if (!all && (c->mfc_flags & MFC_STATIC))
1492 			continue;
1493 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1494 		list_del_rcu(&c->list);
1495 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1496 		mr_cache_put(c);
1497 	}
1498 
1499 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1500 		spin_lock_bh(&mfc_unres_lock);
1501 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1502 			list_del(&c->list);
1503 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1504 						       FIB_EVENT_ENTRY_DEL,
1505 						       (struct mfc6_cache *)c,
1506 						       mrt->id);
1507 			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1508 					  RTM_DELROUTE);
1509 			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1510 		}
1511 		spin_unlock_bh(&mfc_unres_lock);
1512 	}
1513 }
1514 
1515 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1516 {
1517 	int err = 0;
1518 	struct net *net = sock_net(sk);
1519 
1520 	rtnl_lock();
1521 	write_lock_bh(&mrt_lock);
1522 	if (rtnl_dereference(mrt->mroute_sk)) {
1523 		err = -EADDRINUSE;
1524 	} else {
1525 		rcu_assign_pointer(mrt->mroute_sk, sk);
1526 		sock_set_flag(sk, SOCK_RCU_FREE);
1527 		net->ipv6.devconf_all->mc_forwarding++;
1528 	}
1529 	write_unlock_bh(&mrt_lock);
1530 
1531 	if (!err)
1532 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1533 					     NETCONFA_MC_FORWARDING,
1534 					     NETCONFA_IFINDEX_ALL,
1535 					     net->ipv6.devconf_all);
1536 	rtnl_unlock();
1537 
1538 	return err;
1539 }
1540 
1541 int ip6mr_sk_done(struct sock *sk)
1542 {
1543 	int err = -EACCES;
1544 	struct net *net = sock_net(sk);
1545 	struct mr_table *mrt;
1546 
1547 	if (sk->sk_type != SOCK_RAW ||
1548 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1549 		return err;
1550 
1551 	rtnl_lock();
1552 	ip6mr_for_each_table(mrt, net) {
1553 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1554 			write_lock_bh(&mrt_lock);
1555 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1556 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1557 			 * so the RCU grace period before sk freeing
1558 			 * is guaranteed by sk_destruct()
1559 			 */
1560 			net->ipv6.devconf_all->mc_forwarding--;
1561 			write_unlock_bh(&mrt_lock);
1562 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1563 						     NETCONFA_MC_FORWARDING,
1564 						     NETCONFA_IFINDEX_ALL,
1565 						     net->ipv6.devconf_all);
1566 
1567 			mroute_clean_tables(mrt, false);
1568 			err = 0;
1569 			break;
1570 		}
1571 	}
1572 	rtnl_unlock();
1573 
1574 	return err;
1575 }
1576 
1577 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1578 {
1579 	struct mr_table *mrt;
1580 	struct flowi6 fl6 = {
1581 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1582 		.flowi6_oif	= skb->dev->ifindex,
1583 		.flowi6_mark	= skb->mark,
1584 	};
1585 
1586 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1587 		return NULL;
1588 
1589 	return rcu_access_pointer(mrt->mroute_sk);
1590 }
1591 EXPORT_SYMBOL(mroute6_is_socket);
1592 
1593 /*
1594  *	Socket options and virtual interface manipulation. The whole
1595  *	virtual interface system is a complete heap, but unfortunately
1596  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1597  *	MOSPF/PIM router set up we can clean this up.
1598  */
1599 
1600 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1601 {
1602 	int ret, parent = 0;
1603 	struct mif6ctl vif;
1604 	struct mf6cctl mfc;
1605 	mifi_t mifi;
1606 	struct net *net = sock_net(sk);
1607 	struct mr_table *mrt;
1608 
1609 	if (sk->sk_type != SOCK_RAW ||
1610 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1611 		return -EOPNOTSUPP;
1612 
1613 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1614 	if (!mrt)
1615 		return -ENOENT;
1616 
1617 	if (optname != MRT6_INIT) {
1618 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1619 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1620 			return -EACCES;
1621 	}
1622 
1623 	switch (optname) {
1624 	case MRT6_INIT:
1625 		if (optlen < sizeof(int))
1626 			return -EINVAL;
1627 
1628 		return ip6mr_sk_init(mrt, sk);
1629 
1630 	case MRT6_DONE:
1631 		return ip6mr_sk_done(sk);
1632 
1633 	case MRT6_ADD_MIF:
1634 		if (optlen < sizeof(vif))
1635 			return -EINVAL;
1636 		if (copy_from_user(&vif, optval, sizeof(vif)))
1637 			return -EFAULT;
1638 		if (vif.mif6c_mifi >= MAXMIFS)
1639 			return -ENFILE;
1640 		rtnl_lock();
1641 		ret = mif6_add(net, mrt, &vif,
1642 			       sk == rtnl_dereference(mrt->mroute_sk));
1643 		rtnl_unlock();
1644 		return ret;
1645 
1646 	case MRT6_DEL_MIF:
1647 		if (optlen < sizeof(mifi_t))
1648 			return -EINVAL;
1649 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1650 			return -EFAULT;
1651 		rtnl_lock();
1652 		ret = mif6_delete(mrt, mifi, 0, NULL);
1653 		rtnl_unlock();
1654 		return ret;
1655 
1656 	/*
1657 	 *	Manipulate the forwarding caches. These live
1658 	 *	in a sort of kernel/user symbiosis.
1659 	 */
1660 	case MRT6_ADD_MFC:
1661 	case MRT6_DEL_MFC:
1662 		parent = -1;
1663 		/* fall through */
1664 	case MRT6_ADD_MFC_PROXY:
1665 	case MRT6_DEL_MFC_PROXY:
1666 		if (optlen < sizeof(mfc))
1667 			return -EINVAL;
1668 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1669 			return -EFAULT;
1670 		if (parent == 0)
1671 			parent = mfc.mf6cc_parent;
1672 		rtnl_lock();
1673 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1674 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1675 		else
1676 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1677 					    sk ==
1678 					    rtnl_dereference(mrt->mroute_sk),
1679 					    parent);
1680 		rtnl_unlock();
1681 		return ret;
1682 
1683 	/*
1684 	 *	Control PIM assert (to activate pim will activate assert)
1685 	 */
1686 	case MRT6_ASSERT:
1687 	{
1688 		int v;
1689 
1690 		if (optlen != sizeof(v))
1691 			return -EINVAL;
1692 		if (get_user(v, (int __user *)optval))
1693 			return -EFAULT;
1694 		mrt->mroute_do_assert = v;
1695 		return 0;
1696 	}
1697 
1698 #ifdef CONFIG_IPV6_PIMSM_V2
1699 	case MRT6_PIM:
1700 	{
1701 		int v;
1702 
1703 		if (optlen != sizeof(v))
1704 			return -EINVAL;
1705 		if (get_user(v, (int __user *)optval))
1706 			return -EFAULT;
1707 		v = !!v;
1708 		rtnl_lock();
1709 		ret = 0;
1710 		if (v != mrt->mroute_do_pim) {
1711 			mrt->mroute_do_pim = v;
1712 			mrt->mroute_do_assert = v;
1713 		}
1714 		rtnl_unlock();
1715 		return ret;
1716 	}
1717 
1718 #endif
1719 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1720 	case MRT6_TABLE:
1721 	{
1722 		u32 v;
1723 
1724 		if (optlen != sizeof(u32))
1725 			return -EINVAL;
1726 		if (get_user(v, (u32 __user *)optval))
1727 			return -EFAULT;
1728 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1729 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1730 			return -EINVAL;
1731 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1732 			return -EBUSY;
1733 
1734 		rtnl_lock();
1735 		ret = 0;
1736 		if (!ip6mr_new_table(net, v))
1737 			ret = -ENOMEM;
1738 		raw6_sk(sk)->ip6mr_table = v;
1739 		rtnl_unlock();
1740 		return ret;
1741 	}
1742 #endif
1743 	/*
1744 	 *	Spurious command, or MRT6_VERSION which you cannot
1745 	 *	set.
1746 	 */
1747 	default:
1748 		return -ENOPROTOOPT;
1749 	}
1750 }
1751 
1752 /*
1753  *	Getsock opt support for the multicast routing system.
1754  */
1755 
1756 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1757 			  int __user *optlen)
1758 {
1759 	int olr;
1760 	int val;
1761 	struct net *net = sock_net(sk);
1762 	struct mr_table *mrt;
1763 
1764 	if (sk->sk_type != SOCK_RAW ||
1765 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1766 		return -EOPNOTSUPP;
1767 
1768 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1769 	if (!mrt)
1770 		return -ENOENT;
1771 
1772 	switch (optname) {
1773 	case MRT6_VERSION:
1774 		val = 0x0305;
1775 		break;
1776 #ifdef CONFIG_IPV6_PIMSM_V2
1777 	case MRT6_PIM:
1778 		val = mrt->mroute_do_pim;
1779 		break;
1780 #endif
1781 	case MRT6_ASSERT:
1782 		val = mrt->mroute_do_assert;
1783 		break;
1784 	default:
1785 		return -ENOPROTOOPT;
1786 	}
1787 
1788 	if (get_user(olr, optlen))
1789 		return -EFAULT;
1790 
1791 	olr = min_t(int, olr, sizeof(int));
1792 	if (olr < 0)
1793 		return -EINVAL;
1794 
1795 	if (put_user(olr, optlen))
1796 		return -EFAULT;
1797 	if (copy_to_user(optval, &val, olr))
1798 		return -EFAULT;
1799 	return 0;
1800 }
1801 
1802 /*
1803  *	The IP multicast ioctl support routines.
1804  */
1805 
1806 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1807 {
1808 	struct sioc_sg_req6 sr;
1809 	struct sioc_mif_req6 vr;
1810 	struct vif_device *vif;
1811 	struct mfc6_cache *c;
1812 	struct net *net = sock_net(sk);
1813 	struct mr_table *mrt;
1814 
1815 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1816 	if (!mrt)
1817 		return -ENOENT;
1818 
1819 	switch (cmd) {
1820 	case SIOCGETMIFCNT_IN6:
1821 		if (copy_from_user(&vr, arg, sizeof(vr)))
1822 			return -EFAULT;
1823 		if (vr.mifi >= mrt->maxvif)
1824 			return -EINVAL;
1825 		read_lock(&mrt_lock);
1826 		vif = &mrt->vif_table[vr.mifi];
1827 		if (VIF_EXISTS(mrt, vr.mifi)) {
1828 			vr.icount = vif->pkt_in;
1829 			vr.ocount = vif->pkt_out;
1830 			vr.ibytes = vif->bytes_in;
1831 			vr.obytes = vif->bytes_out;
1832 			read_unlock(&mrt_lock);
1833 
1834 			if (copy_to_user(arg, &vr, sizeof(vr)))
1835 				return -EFAULT;
1836 			return 0;
1837 		}
1838 		read_unlock(&mrt_lock);
1839 		return -EADDRNOTAVAIL;
1840 	case SIOCGETSGCNT_IN6:
1841 		if (copy_from_user(&sr, arg, sizeof(sr)))
1842 			return -EFAULT;
1843 
1844 		rcu_read_lock();
1845 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1846 		if (c) {
1847 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1848 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1849 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1850 			rcu_read_unlock();
1851 
1852 			if (copy_to_user(arg, &sr, sizeof(sr)))
1853 				return -EFAULT;
1854 			return 0;
1855 		}
1856 		rcu_read_unlock();
1857 		return -EADDRNOTAVAIL;
1858 	default:
1859 		return -ENOIOCTLCMD;
1860 	}
1861 }
1862 
1863 #ifdef CONFIG_COMPAT
1864 struct compat_sioc_sg_req6 {
1865 	struct sockaddr_in6 src;
1866 	struct sockaddr_in6 grp;
1867 	compat_ulong_t pktcnt;
1868 	compat_ulong_t bytecnt;
1869 	compat_ulong_t wrong_if;
1870 };
1871 
1872 struct compat_sioc_mif_req6 {
1873 	mifi_t	mifi;
1874 	compat_ulong_t icount;
1875 	compat_ulong_t ocount;
1876 	compat_ulong_t ibytes;
1877 	compat_ulong_t obytes;
1878 };
1879 
1880 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1881 {
1882 	struct compat_sioc_sg_req6 sr;
1883 	struct compat_sioc_mif_req6 vr;
1884 	struct vif_device *vif;
1885 	struct mfc6_cache *c;
1886 	struct net *net = sock_net(sk);
1887 	struct mr_table *mrt;
1888 
1889 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1890 	if (!mrt)
1891 		return -ENOENT;
1892 
1893 	switch (cmd) {
1894 	case SIOCGETMIFCNT_IN6:
1895 		if (copy_from_user(&vr, arg, sizeof(vr)))
1896 			return -EFAULT;
1897 		if (vr.mifi >= mrt->maxvif)
1898 			return -EINVAL;
1899 		read_lock(&mrt_lock);
1900 		vif = &mrt->vif_table[vr.mifi];
1901 		if (VIF_EXISTS(mrt, vr.mifi)) {
1902 			vr.icount = vif->pkt_in;
1903 			vr.ocount = vif->pkt_out;
1904 			vr.ibytes = vif->bytes_in;
1905 			vr.obytes = vif->bytes_out;
1906 			read_unlock(&mrt_lock);
1907 
1908 			if (copy_to_user(arg, &vr, sizeof(vr)))
1909 				return -EFAULT;
1910 			return 0;
1911 		}
1912 		read_unlock(&mrt_lock);
1913 		return -EADDRNOTAVAIL;
1914 	case SIOCGETSGCNT_IN6:
1915 		if (copy_from_user(&sr, arg, sizeof(sr)))
1916 			return -EFAULT;
1917 
1918 		rcu_read_lock();
1919 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1920 		if (c) {
1921 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1922 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1923 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1924 			rcu_read_unlock();
1925 
1926 			if (copy_to_user(arg, &sr, sizeof(sr)))
1927 				return -EFAULT;
1928 			return 0;
1929 		}
1930 		rcu_read_unlock();
1931 		return -EADDRNOTAVAIL;
1932 	default:
1933 		return -ENOIOCTLCMD;
1934 	}
1935 }
1936 #endif
1937 
1938 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1939 {
1940 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1941 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1942 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1943 			IPSTATS_MIB_OUTOCTETS, skb->len);
1944 	return dst_output(net, sk, skb);
1945 }
1946 
1947 /*
1948  *	Processing handlers for ip6mr_forward
1949  */
1950 
1951 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1952 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1953 {
1954 	struct ipv6hdr *ipv6h;
1955 	struct vif_device *vif = &mrt->vif_table[vifi];
1956 	struct net_device *dev;
1957 	struct dst_entry *dst;
1958 	struct flowi6 fl6;
1959 
1960 	if (!vif->dev)
1961 		goto out_free;
1962 
1963 #ifdef CONFIG_IPV6_PIMSM_V2
1964 	if (vif->flags & MIFF_REGISTER) {
1965 		vif->pkt_out++;
1966 		vif->bytes_out += skb->len;
1967 		vif->dev->stats.tx_bytes += skb->len;
1968 		vif->dev->stats.tx_packets++;
1969 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1970 		goto out_free;
1971 	}
1972 #endif
1973 
1974 	ipv6h = ipv6_hdr(skb);
1975 
1976 	fl6 = (struct flowi6) {
1977 		.flowi6_oif = vif->link,
1978 		.daddr = ipv6h->daddr,
1979 	};
1980 
1981 	dst = ip6_route_output(net, NULL, &fl6);
1982 	if (dst->error) {
1983 		dst_release(dst);
1984 		goto out_free;
1985 	}
1986 
1987 	skb_dst_drop(skb);
1988 	skb_dst_set(skb, dst);
1989 
1990 	/*
1991 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1992 	 * not only before forwarding, but after forwarding on all output
1993 	 * interfaces. It is clear, if mrouter runs a multicasting
1994 	 * program, it should receive packets not depending to what interface
1995 	 * program is joined.
1996 	 * If we will not make it, the program will have to join on all
1997 	 * interfaces. On the other hand, multihoming host (or router, but
1998 	 * not mrouter) cannot join to more than one interface - it will
1999 	 * result in receiving multiple packets.
2000 	 */
2001 	dev = vif->dev;
2002 	skb->dev = dev;
2003 	vif->pkt_out++;
2004 	vif->bytes_out += skb->len;
2005 
2006 	/* We are about to write */
2007 	/* XXX: extension headers? */
2008 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2009 		goto out_free;
2010 
2011 	ipv6h = ipv6_hdr(skb);
2012 	ipv6h->hop_limit--;
2013 
2014 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2015 
2016 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2017 		       net, NULL, skb, skb->dev, dev,
2018 		       ip6mr_forward2_finish);
2019 
2020 out_free:
2021 	kfree_skb(skb);
2022 	return 0;
2023 }
2024 
2025 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2026 {
2027 	int ct;
2028 
2029 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2030 		if (mrt->vif_table[ct].dev == dev)
2031 			break;
2032 	}
2033 	return ct;
2034 }
2035 
2036 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2037 			   struct sk_buff *skb, struct mfc6_cache *c)
2038 {
2039 	int psend = -1;
2040 	int vif, ct;
2041 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2042 
2043 	vif = c->_c.mfc_parent;
2044 	c->_c.mfc_un.res.pkt++;
2045 	c->_c.mfc_un.res.bytes += skb->len;
2046 	c->_c.mfc_un.res.lastuse = jiffies;
2047 
2048 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2049 		struct mfc6_cache *cache_proxy;
2050 
2051 		/* For an (*,G) entry, we only check that the incoming
2052 		 * interface is part of the static tree.
2053 		 */
2054 		rcu_read_lock();
2055 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2056 		if (cache_proxy &&
2057 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2058 			rcu_read_unlock();
2059 			goto forward;
2060 		}
2061 		rcu_read_unlock();
2062 	}
2063 
2064 	/*
2065 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2066 	 */
2067 	if (mrt->vif_table[vif].dev != skb->dev) {
2068 		c->_c.mfc_un.res.wrong_if++;
2069 
2070 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2071 		    /* pimsm uses asserts, when switching from RPT to SPT,
2072 		       so that we cannot check that packet arrived on an oif.
2073 		       It is bad, but otherwise we would need to move pretty
2074 		       large chunk of pimd to kernel. Ough... --ANK
2075 		     */
2076 		    (mrt->mroute_do_pim ||
2077 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2078 		    time_after(jiffies,
2079 			       c->_c.mfc_un.res.last_assert +
2080 			       MFC_ASSERT_THRESH)) {
2081 			c->_c.mfc_un.res.last_assert = jiffies;
2082 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2083 		}
2084 		goto dont_forward;
2085 	}
2086 
2087 forward:
2088 	mrt->vif_table[vif].pkt_in++;
2089 	mrt->vif_table[vif].bytes_in += skb->len;
2090 
2091 	/*
2092 	 *	Forward the frame
2093 	 */
2094 	if (ipv6_addr_any(&c->mf6c_origin) &&
2095 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2096 		if (true_vifi >= 0 &&
2097 		    true_vifi != c->_c.mfc_parent &&
2098 		    ipv6_hdr(skb)->hop_limit >
2099 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2100 			/* It's an (*,*) entry and the packet is not coming from
2101 			 * the upstream: forward the packet to the upstream
2102 			 * only.
2103 			 */
2104 			psend = c->_c.mfc_parent;
2105 			goto last_forward;
2106 		}
2107 		goto dont_forward;
2108 	}
2109 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2110 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2111 		/* For (*,G) entry, don't forward to the incoming interface */
2112 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2113 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2114 			if (psend != -1) {
2115 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2116 				if (skb2)
2117 					ip6mr_forward2(net, mrt, skb2,
2118 						       c, psend);
2119 			}
2120 			psend = ct;
2121 		}
2122 	}
2123 last_forward:
2124 	if (psend != -1) {
2125 		ip6mr_forward2(net, mrt, skb, c, psend);
2126 		return;
2127 	}
2128 
2129 dont_forward:
2130 	kfree_skb(skb);
2131 }
2132 
2133 
2134 /*
2135  *	Multicast packets for forwarding arrive here
2136  */
2137 
2138 int ip6_mr_input(struct sk_buff *skb)
2139 {
2140 	struct mfc6_cache *cache;
2141 	struct net *net = dev_net(skb->dev);
2142 	struct mr_table *mrt;
2143 	struct flowi6 fl6 = {
2144 		.flowi6_iif	= skb->dev->ifindex,
2145 		.flowi6_mark	= skb->mark,
2146 	};
2147 	int err;
2148 
2149 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2150 	if (err < 0) {
2151 		kfree_skb(skb);
2152 		return err;
2153 	}
2154 
2155 	read_lock(&mrt_lock);
2156 	cache = ip6mr_cache_find(mrt,
2157 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2158 	if (!cache) {
2159 		int vif = ip6mr_find_vif(mrt, skb->dev);
2160 
2161 		if (vif >= 0)
2162 			cache = ip6mr_cache_find_any(mrt,
2163 						     &ipv6_hdr(skb)->daddr,
2164 						     vif);
2165 	}
2166 
2167 	/*
2168 	 *	No usable cache entry
2169 	 */
2170 	if (!cache) {
2171 		int vif;
2172 
2173 		vif = ip6mr_find_vif(mrt, skb->dev);
2174 		if (vif >= 0) {
2175 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2176 			read_unlock(&mrt_lock);
2177 
2178 			return err;
2179 		}
2180 		read_unlock(&mrt_lock);
2181 		kfree_skb(skb);
2182 		return -ENODEV;
2183 	}
2184 
2185 	ip6_mr_forward(net, mrt, skb, cache);
2186 
2187 	read_unlock(&mrt_lock);
2188 
2189 	return 0;
2190 }
2191 
2192 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2193 		    u32 portid)
2194 {
2195 	int err;
2196 	struct mr_table *mrt;
2197 	struct mfc6_cache *cache;
2198 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2199 
2200 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2201 	if (!mrt)
2202 		return -ENOENT;
2203 
2204 	read_lock(&mrt_lock);
2205 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2206 	if (!cache && skb->dev) {
2207 		int vif = ip6mr_find_vif(mrt, skb->dev);
2208 
2209 		if (vif >= 0)
2210 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2211 						     vif);
2212 	}
2213 
2214 	if (!cache) {
2215 		struct sk_buff *skb2;
2216 		struct ipv6hdr *iph;
2217 		struct net_device *dev;
2218 		int vif;
2219 
2220 		dev = skb->dev;
2221 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2222 			read_unlock(&mrt_lock);
2223 			return -ENODEV;
2224 		}
2225 
2226 		/* really correct? */
2227 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2228 		if (!skb2) {
2229 			read_unlock(&mrt_lock);
2230 			return -ENOMEM;
2231 		}
2232 
2233 		NETLINK_CB(skb2).portid = portid;
2234 		skb_reset_transport_header(skb2);
2235 
2236 		skb_put(skb2, sizeof(struct ipv6hdr));
2237 		skb_reset_network_header(skb2);
2238 
2239 		iph = ipv6_hdr(skb2);
2240 		iph->version = 0;
2241 		iph->priority = 0;
2242 		iph->flow_lbl[0] = 0;
2243 		iph->flow_lbl[1] = 0;
2244 		iph->flow_lbl[2] = 0;
2245 		iph->payload_len = 0;
2246 		iph->nexthdr = IPPROTO_NONE;
2247 		iph->hop_limit = 0;
2248 		iph->saddr = rt->rt6i_src.addr;
2249 		iph->daddr = rt->rt6i_dst.addr;
2250 
2251 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2252 		read_unlock(&mrt_lock);
2253 
2254 		return err;
2255 	}
2256 
2257 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2258 	read_unlock(&mrt_lock);
2259 	return err;
2260 }
2261 
2262 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2263 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2264 			     int flags)
2265 {
2266 	struct nlmsghdr *nlh;
2267 	struct rtmsg *rtm;
2268 	int err;
2269 
2270 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2271 	if (!nlh)
2272 		return -EMSGSIZE;
2273 
2274 	rtm = nlmsg_data(nlh);
2275 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2276 	rtm->rtm_dst_len  = 128;
2277 	rtm->rtm_src_len  = 128;
2278 	rtm->rtm_tos      = 0;
2279 	rtm->rtm_table    = mrt->id;
2280 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2281 		goto nla_put_failure;
2282 	rtm->rtm_type = RTN_MULTICAST;
2283 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2284 	if (c->_c.mfc_flags & MFC_STATIC)
2285 		rtm->rtm_protocol = RTPROT_STATIC;
2286 	else
2287 		rtm->rtm_protocol = RTPROT_MROUTED;
2288 	rtm->rtm_flags    = 0;
2289 
2290 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2291 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2292 		goto nla_put_failure;
2293 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2294 	/* do not break the dump if cache is unresolved */
2295 	if (err < 0 && err != -ENOENT)
2296 		goto nla_put_failure;
2297 
2298 	nlmsg_end(skb, nlh);
2299 	return 0;
2300 
2301 nla_put_failure:
2302 	nlmsg_cancel(skb, nlh);
2303 	return -EMSGSIZE;
2304 }
2305 
2306 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2307 			      u32 portid, u32 seq, struct mr_mfc *c,
2308 			      int cmd, int flags)
2309 {
2310 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2311 				 cmd, flags);
2312 }
2313 
2314 static int mr6_msgsize(bool unresolved, int maxvif)
2315 {
2316 	size_t len =
2317 		NLMSG_ALIGN(sizeof(struct rtmsg))
2318 		+ nla_total_size(4)	/* RTA_TABLE */
2319 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2320 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2321 		;
2322 
2323 	if (!unresolved)
2324 		len = len
2325 		      + nla_total_size(4)	/* RTA_IIF */
2326 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2327 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2328 						/* RTA_MFC_STATS */
2329 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2330 		;
2331 
2332 	return len;
2333 }
2334 
2335 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2336 			      int cmd)
2337 {
2338 	struct net *net = read_pnet(&mrt->net);
2339 	struct sk_buff *skb;
2340 	int err = -ENOBUFS;
2341 
2342 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2343 			GFP_ATOMIC);
2344 	if (!skb)
2345 		goto errout;
2346 
2347 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2348 	if (err < 0)
2349 		goto errout;
2350 
2351 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2352 	return;
2353 
2354 errout:
2355 	kfree_skb(skb);
2356 	if (err < 0)
2357 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2358 }
2359 
2360 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2361 {
2362 	size_t len =
2363 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2364 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2365 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2366 					/* IP6MRA_CREPORT_SRC_ADDR */
2367 		+ nla_total_size(sizeof(struct in6_addr))
2368 					/* IP6MRA_CREPORT_DST_ADDR */
2369 		+ nla_total_size(sizeof(struct in6_addr))
2370 					/* IP6MRA_CREPORT_PKT */
2371 		+ nla_total_size(payloadlen)
2372 		;
2373 
2374 	return len;
2375 }
2376 
2377 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2378 {
2379 	struct net *net = read_pnet(&mrt->net);
2380 	struct nlmsghdr *nlh;
2381 	struct rtgenmsg *rtgenm;
2382 	struct mrt6msg *msg;
2383 	struct sk_buff *skb;
2384 	struct nlattr *nla;
2385 	int payloadlen;
2386 
2387 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2388 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2389 
2390 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2391 	if (!skb)
2392 		goto errout;
2393 
2394 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2395 			sizeof(struct rtgenmsg), 0);
2396 	if (!nlh)
2397 		goto errout;
2398 	rtgenm = nlmsg_data(nlh);
2399 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2400 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2401 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2402 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2403 			     &msg->im6_src) ||
2404 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2405 			     &msg->im6_dst))
2406 		goto nla_put_failure;
2407 
2408 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2409 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2410 				  nla_data(nla), payloadlen))
2411 		goto nla_put_failure;
2412 
2413 	nlmsg_end(skb, nlh);
2414 
2415 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2416 	return;
2417 
2418 nla_put_failure:
2419 	nlmsg_cancel(skb, nlh);
2420 errout:
2421 	kfree_skb(skb);
2422 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2423 }
2424 
2425 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2426 {
2427 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2428 				_ip6mr_fill_mroute, &mfc_unres_lock);
2429 }
2430