xref: /linux/net/ipv6/ip6mr.c (revision 3e9201e4fe8bd78f4601a51212562505bbb60e3a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux IPv6 multicast routing support for BSD pim6sd
4  *	Based on net/ipv4/ipmr.c.
5  *
6  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7  *		LSIIT Laboratory, Strasbourg, France
8  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9  *		6WIND, Paris, France
10  *	Copyright (C)2007,2008 USAGI/WIDE Project
11  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12  */
13 
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39 
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50 
51 #include <linux/nospec.h>
52 
53 struct ip6mr_rule {
54 	struct fib_rule		common;
55 };
56 
57 struct ip6mr_result {
58 	struct mr_table	*mrt;
59 };
60 
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62    Note that the changes are semaphored via rtnl_lock.
63  */
64 
65 static DEFINE_SPINLOCK(mrt_lock);
66 
67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 	return rcu_dereference(vif->dev);
70 }
71 
72 /* Multicast router control variables */
73 
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76 
77 /* We return to original Alan's scheme. Hash table of resolved
78    entries is changed only in process context and protected
79    with weak lock mrt_lock. Queue of unresolved entries is protected
80    with strong spinlock mfc_unres_lock.
81 
82    In this case data path is free of exclusive locks at all.
83  */
84 
85 static struct kmem_cache *mrt_cachep __read_mostly;
86 
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt,
89 			     struct list_head *dev_kill_list);
90 
91 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
92 			   struct net_device *dev, struct sk_buff *skb,
93 			   struct mfc6_cache *cache);
94 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
95 			      mifi_t mifi, int assert);
96 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
97 			      int cmd);
98 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
99 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
100 			      struct netlink_ext_ack *extack);
101 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
102 			       struct netlink_callback *cb);
103 static void mroute_clean_tables(struct mr_table *mrt, int flags,
104 				struct list_head *dev_kill_list);
105 static void ipmr_expire_process(struct timer_list *t);
106 
107 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
108 #define ip6mr_for_each_table(mrt, net) \
109 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
110 				lockdep_rtnl_is_held() || \
111 				list_empty(&net->ipv6.mr6_tables))
112 
113 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
114 					    struct mr_table *mrt)
115 {
116 	struct mr_table *ret;
117 
118 	if (!mrt)
119 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
120 				     struct mr_table, list);
121 	else
122 		ret = list_entry_rcu(mrt->list.next,
123 				     struct mr_table, list);
124 
125 	if (&ret->list == &net->ipv6.mr6_tables)
126 		return NULL;
127 	return ret;
128 }
129 
130 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr_table **mrt)
143 {
144 	int err;
145 	struct ip6mr_result res;
146 	struct fib_lookup_arg arg = {
147 		.result = &res,
148 		.flags = FIB_LOOKUP_NOREF,
149 	};
150 
151 	/* update flow if oif or iif point to device enslaved to l3mdev */
152 	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
153 
154 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
155 			       flowi6_to_flowi(flp6), 0, &arg);
156 	if (err < 0)
157 		return err;
158 	*mrt = res.mrt;
159 	return 0;
160 }
161 
162 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
163 			     int flags, struct fib_lookup_arg *arg)
164 {
165 	struct ip6mr_result *res = arg->result;
166 	struct mr_table *mrt;
167 
168 	switch (rule->action) {
169 	case FR_ACT_TO_TBL:
170 		break;
171 	case FR_ACT_UNREACHABLE:
172 		return -ENETUNREACH;
173 	case FR_ACT_PROHIBIT:
174 		return -EACCES;
175 	case FR_ACT_BLACKHOLE:
176 	default:
177 		return -EINVAL;
178 	}
179 
180 	arg->table = fib_rule_get_table(rule, arg);
181 
182 	mrt = __ip6mr_get_table(rule->fr_net, arg->table);
183 	if (!mrt)
184 		return -EAGAIN;
185 	res->mrt = mrt;
186 	return 0;
187 }
188 
189 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
190 {
191 	return 1;
192 }
193 
194 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
195 				struct fib_rule_hdr *frh, struct nlattr **tb,
196 				struct netlink_ext_ack *extack)
197 {
198 	return 0;
199 }
200 
201 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
202 			      struct nlattr **tb)
203 {
204 	return 1;
205 }
206 
207 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208 			   struct fib_rule_hdr *frh)
209 {
210 	frh->dst_len = 0;
211 	frh->src_len = 0;
212 	frh->tos     = 0;
213 	return 0;
214 }
215 
216 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
217 	.family		= RTNL_FAMILY_IP6MR,
218 	.rule_size	= sizeof(struct ip6mr_rule),
219 	.addr_size	= sizeof(struct in6_addr),
220 	.action		= ip6mr_rule_action,
221 	.match		= ip6mr_rule_match,
222 	.configure	= ip6mr_rule_configure,
223 	.compare	= ip6mr_rule_compare,
224 	.fill		= ip6mr_rule_fill,
225 	.nlgroup	= RTNLGRP_IPV6_RULE,
226 	.owner		= THIS_MODULE,
227 };
228 
229 static int __net_init ip6mr_rules_init(struct net *net)
230 {
231 	struct fib_rules_ops *ops;
232 	LIST_HEAD(dev_kill_list);
233 	struct mr_table *mrt;
234 	int err;
235 
236 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
237 	if (IS_ERR(ops))
238 		return PTR_ERR(ops);
239 
240 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
241 
242 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
243 	if (IS_ERR(mrt)) {
244 		err = PTR_ERR(mrt);
245 		goto err1;
246 	}
247 
248 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
249 	if (err < 0)
250 		goto err2;
251 
252 	net->ipv6.mr6_rules_ops = ops;
253 	return 0;
254 
255 err2:
256 	ip6mr_free_table(mrt, &dev_kill_list);
257 err1:
258 	fib_rules_unregister(ops);
259 	return err;
260 }
261 
262 static void __net_exit ip6mr_rules_exit(struct net *net)
263 {
264 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
265 }
266 
267 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net,
268 					     struct list_head *dev_kill_list)
269 {
270 	struct mr_table *mrt, *next;
271 
272 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
273 		list_del_rcu(&mrt->list);
274 		ip6mr_free_table(mrt, dev_kill_list);
275 	}
276 }
277 
278 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
279 			    struct netlink_ext_ack *extack)
280 {
281 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
282 }
283 
284 static unsigned int ip6mr_rules_seq_read(const struct net *net)
285 {
286 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
287 }
288 
289 bool ip6mr_rule_default(const struct fib_rule *rule)
290 {
291 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
292 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
293 }
294 EXPORT_SYMBOL(ip6mr_rule_default);
295 #else
296 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
297 					    struct mr_table *mrt)
298 {
299 	if (!mrt)
300 		return rcu_dereference(net->ipv6.mrt6);
301 	return NULL;
302 }
303 
304 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
305 {
306 	return rcu_dereference_check(net->ipv6.mrt6,
307 				     lockdep_rtnl_is_held() ||
308 				     !rcu_access_pointer(net->ipv6.mrt6));
309 }
310 
311 #define ip6mr_for_each_table(mrt, net)				\
312 	for (mrt = __ip6mr_get_table(net, 0); mrt; mrt = NULL)
313 
314 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
315 			    struct mr_table **mrt)
316 {
317 	*mrt = rcu_dereference(net->ipv6.mrt6);
318 	if (!*mrt)
319 		return -EAGAIN;
320 	return 0;
321 }
322 
323 static int __net_init ip6mr_rules_init(struct net *net)
324 {
325 	struct mr_table *mrt;
326 
327 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
328 	if (IS_ERR(mrt))
329 		return PTR_ERR(mrt);
330 
331 	rcu_assign_pointer(net->ipv6.mrt6, mrt);
332 	return 0;
333 }
334 
335 static void __net_exit ip6mr_rules_exit(struct net *net)
336 {
337 }
338 
339 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net,
340 					     struct list_head *dev_kill_list)
341 {
342 	struct mr_table *mrt = rcu_dereference_protected(net->ipv6.mrt6, 1);
343 
344 	RCU_INIT_POINTER(net->ipv6.mrt6, NULL);
345 	ip6mr_free_table(mrt, dev_kill_list);
346 }
347 
348 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
349 			    struct netlink_ext_ack *extack)
350 {
351 	return 0;
352 }
353 
354 static unsigned int ip6mr_rules_seq_read(const struct net *net)
355 {
356 	return 0;
357 }
358 #endif
359 
360 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
361 {
362 	struct mr_table *mrt;
363 
364 	rcu_read_lock();
365 	mrt = __ip6mr_get_table(net, id);
366 	rcu_read_unlock();
367 
368 	return mrt;
369 }
370 
371 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
372 			  const void *ptr)
373 {
374 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
375 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
376 
377 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
378 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
379 }
380 
381 static const struct rhashtable_params ip6mr_rht_params = {
382 	.head_offset = offsetof(struct mr_mfc, mnode),
383 	.key_offset = offsetof(struct mfc6_cache, cmparg),
384 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
385 	.nelem_hint = 3,
386 	.obj_cmpfn = ip6mr_hash_cmp,
387 	.automatic_shrinking = true,
388 };
389 
390 static void ip6mr_new_table_set(struct mr_table *mrt,
391 				struct net *net)
392 {
393 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
394 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
395 #endif
396 }
397 
398 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
399 	.mf6c_origin = IN6ADDR_ANY_INIT,
400 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
401 };
402 
403 static struct mr_table_ops ip6mr_mr_table_ops = {
404 	.rht_params = &ip6mr_rht_params,
405 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
406 };
407 
408 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
409 {
410 	struct mr_table *mrt;
411 
412 	mrt = __ip6mr_get_table(net, id);
413 	if (mrt)
414 		return mrt;
415 
416 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
417 			      ipmr_expire_process, ip6mr_new_table_set);
418 }
419 
420 static void ip6mr_free_table(struct mr_table *mrt,
421 			     struct list_head *dev_kill_list)
422 {
423 	struct net *net = read_pnet(&mrt->net);
424 	LIST_HEAD(ip6mr_dev_kill_list);
425 
426 	WARN_ON_ONCE(!mr_can_free_table(net));
427 
428 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
429 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
430 			    MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC,
431 			    &ip6mr_dev_kill_list);
432 
433 	mr_table_free(mrt);
434 
435 	WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ip6mr_dev_kill_list));
436 	list_splice(&ip6mr_dev_kill_list, dev_kill_list);
437 }
438 
439 #ifdef CONFIG_PROC_FS
440 /* The /proc interfaces to multicast routing
441  * /proc/ip6_mr_cache /proc/ip6_mr_vif
442  */
443 
444 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
445 	__acquires(RCU)
446 {
447 	struct mr_vif_iter *iter = seq->private;
448 	struct net *net = seq_file_net(seq);
449 	struct mr_table *mrt;
450 
451 	rcu_read_lock();
452 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
453 	if (!mrt) {
454 		rcu_read_unlock();
455 		return ERR_PTR(-ENOENT);
456 	}
457 
458 	iter->mrt = mrt;
459 
460 	return mr_vif_seq_start(seq, pos);
461 }
462 
463 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
464 	__releases(RCU)
465 {
466 	rcu_read_unlock();
467 }
468 
469 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
470 {
471 	struct mr_vif_iter *iter = seq->private;
472 	struct mr_table *mrt = iter->mrt;
473 
474 	if (v == SEQ_START_TOKEN) {
475 		seq_puts(seq,
476 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
477 	} else {
478 		const struct vif_device *vif = v;
479 		const struct net_device *vif_dev;
480 		const char *name;
481 
482 		vif_dev = vif_dev_read(vif);
483 		name = vif_dev ? vif_dev->name : "none";
484 
485 		seq_printf(seq,
486 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
487 			   vif - mrt->vif_table,
488 			   name, vif->bytes_in, vif->pkt_in,
489 			   vif->bytes_out, vif->pkt_out,
490 			   vif->flags);
491 	}
492 	return 0;
493 }
494 
495 static const struct seq_operations ip6mr_vif_seq_ops = {
496 	.start = ip6mr_vif_seq_start,
497 	.next  = mr_vif_seq_next,
498 	.stop  = ip6mr_vif_seq_stop,
499 	.show  = ip6mr_vif_seq_show,
500 };
501 
502 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
503 {
504 	struct net *net = seq_file_net(seq);
505 	struct mr_table *mrt;
506 
507 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
508 	if (!mrt)
509 		return ERR_PTR(-ENOENT);
510 
511 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
512 }
513 
514 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
515 {
516 	int n;
517 
518 	if (v == SEQ_START_TOKEN) {
519 		seq_puts(seq,
520 			 "Group                            "
521 			 "Origin                           "
522 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
523 	} else {
524 		const struct mfc6_cache *mfc = v;
525 		const struct mr_mfc_iter *it = seq->private;
526 		struct mr_table *mrt = it->mrt;
527 
528 		seq_printf(seq, "%pI6 %pI6 %-3hd",
529 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
530 			   mfc->_c.mfc_parent);
531 
532 		if (it->cache != &mrt->mfc_unres_queue) {
533 			seq_printf(seq, " %8lu %8lu %8lu",
534 				   atomic_long_read(&mfc->_c.mfc_un.res.pkt),
535 				   atomic_long_read(&mfc->_c.mfc_un.res.bytes),
536 				   atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
537 			for (n = mfc->_c.mfc_un.res.minvif;
538 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
539 				if (VIF_EXISTS(mrt, n) &&
540 				    mfc->_c.mfc_un.res.ttls[n] < 255)
541 					seq_printf(seq,
542 						   " %2d:%-3d", n,
543 						   mfc->_c.mfc_un.res.ttls[n]);
544 			}
545 		} else {
546 			/* unresolved mfc_caches don't contain
547 			 * pkt, bytes and wrong_if values
548 			 */
549 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
550 		}
551 		seq_putc(seq, '\n');
552 	}
553 	return 0;
554 }
555 
556 static const struct seq_operations ipmr_mfc_seq_ops = {
557 	.start = ipmr_mfc_seq_start,
558 	.next  = mr_mfc_seq_next,
559 	.stop  = mr_mfc_seq_stop,
560 	.show  = ipmr_mfc_seq_show,
561 };
562 #endif
563 
564 #ifdef CONFIG_IPV6_PIMSM_V2
565 
566 static int pim6_rcv(struct sk_buff *skb)
567 {
568 	struct pimreghdr *pim;
569 	struct ipv6hdr   *encap;
570 	struct net_device  *reg_dev = NULL;
571 	struct net *net = dev_net(skb->dev);
572 	struct mr_table *mrt;
573 	struct flowi6 fl6 = {
574 		.flowi6_iif	= skb->dev->ifindex,
575 		.flowi6_mark	= skb->mark,
576 	};
577 	int reg_vif_num;
578 
579 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
580 		goto drop;
581 
582 	pim = (struct pimreghdr *)skb_transport_header(skb);
583 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
584 	    (pim->flags & PIM_NULL_REGISTER) ||
585 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
586 			     sizeof(*pim), IPPROTO_PIM,
587 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
588 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
589 		goto drop;
590 
591 	/* check if the inner packet is destined to mcast group */
592 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
593 				   sizeof(*pim));
594 
595 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
596 	    encap->payload_len == 0 ||
597 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
598 		goto drop;
599 
600 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
601 		goto drop;
602 
603 	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
604 	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
605 	if (reg_vif_num >= 0)
606 		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
607 
608 	if (!reg_dev)
609 		goto drop;
610 
611 	skb->mac_header = skb->network_header;
612 	skb_pull(skb, (u8 *)encap - skb->data);
613 	skb_reset_network_header(skb);
614 	skb->protocol = htons(ETH_P_IPV6);
615 	skb->ip_summed = CHECKSUM_NONE;
616 
617 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
618 
619 	netif_rx(skb);
620 
621 	return 0;
622  drop:
623 	kfree_skb(skb);
624 	return 0;
625 }
626 
627 static const struct inet6_protocol pim6_protocol = {
628 	.handler	=	pim6_rcv,
629 };
630 
631 /* Service routines creating virtual interfaces: PIMREG */
632 
633 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
634 				      struct net_device *dev)
635 {
636 	struct net *net = dev_net(dev);
637 	struct mr_table *mrt;
638 	struct flowi6 fl6 = {
639 		.flowi6_oif	= dev->ifindex,
640 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
641 		.flowi6_mark	= skb->mark,
642 	};
643 
644 	if (!pskb_inet_may_pull(skb))
645 		goto tx_err;
646 
647 	rcu_read_lock();
648 
649 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
650 		goto tx_lookup_err;
651 
652 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
653 	DEV_STATS_INC(dev, tx_packets);
654 
655 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
656 			   MRT6MSG_WHOLEPKT);
657 	rcu_read_unlock();
658 	kfree_skb(skb);
659 	return NETDEV_TX_OK;
660 
661 tx_lookup_err:
662 	rcu_read_unlock();
663 tx_err:
664 	DEV_STATS_INC(dev, tx_errors);
665 	kfree_skb(skb);
666 	return NETDEV_TX_OK;
667 }
668 
669 static int reg_vif_get_iflink(const struct net_device *dev)
670 {
671 	return 0;
672 }
673 
674 static const struct net_device_ops reg_vif_netdev_ops = {
675 	.ndo_start_xmit	= reg_vif_xmit,
676 	.ndo_get_iflink = reg_vif_get_iflink,
677 };
678 
679 static void reg_vif_setup(struct net_device *dev)
680 {
681 	dev->type		= ARPHRD_PIMREG;
682 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
683 	dev->flags		= IFF_NOARP;
684 	dev->netdev_ops		= &reg_vif_netdev_ops;
685 	dev->needs_free_netdev	= true;
686 	dev->netns_immutable	= true;
687 }
688 
689 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
690 {
691 	struct net_device *dev;
692 	char name[IFNAMSIZ];
693 
694 	if (mrt->id == RT6_TABLE_DFLT)
695 		sprintf(name, "pim6reg");
696 	else
697 		sprintf(name, "pim6reg%u", mrt->id);
698 
699 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
700 	if (!dev)
701 		return NULL;
702 
703 	dev_net_set(dev, net);
704 
705 	if (register_netdevice(dev)) {
706 		free_netdev(dev);
707 		return NULL;
708 	}
709 
710 	if (dev_open(dev, NULL))
711 		goto failure;
712 
713 	dev_hold(dev);
714 	return dev;
715 
716 failure:
717 	unregister_netdevice(dev);
718 	return NULL;
719 }
720 #endif
721 
722 static int call_ip6mr_vif_entry_notifiers(struct net *net,
723 					  enum fib_event_type event_type,
724 					  struct vif_device *vif,
725 					  struct net_device *vif_dev,
726 					  mifi_t vif_index, u32 tb_id)
727 {
728 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
729 				     vif, vif_dev, vif_index, tb_id,
730 				     &net->ipv6.ipmr_seq);
731 }
732 
733 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
734 					  enum fib_event_type event_type,
735 					  struct mfc6_cache *mfc, u32 tb_id)
736 {
737 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
738 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
739 }
740 
741 /* Delete a VIF entry */
742 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
743 		       struct list_head *head)
744 {
745 	struct vif_device *v;
746 	struct net_device *dev;
747 	struct inet6_dev *in6_dev;
748 
749 	if (vifi < 0 || vifi >= mrt->maxvif)
750 		return -EADDRNOTAVAIL;
751 
752 	v = &mrt->vif_table[vifi];
753 
754 	dev = rtnl_dereference(v->dev);
755 	if (!dev)
756 		return -EADDRNOTAVAIL;
757 
758 	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
759 				       FIB_EVENT_VIF_DEL, v, dev,
760 				       vifi, mrt->id);
761 	spin_lock(&mrt_lock);
762 	RCU_INIT_POINTER(v->dev, NULL);
763 
764 #ifdef CONFIG_IPV6_PIMSM_V2
765 	if (vifi == mrt->mroute_reg_vif_num) {
766 		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
767 		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
768 	}
769 #endif
770 
771 	if (vifi + 1 == mrt->maxvif) {
772 		int tmp;
773 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
774 			if (VIF_EXISTS(mrt, tmp))
775 				break;
776 		}
777 		WRITE_ONCE(mrt->maxvif, tmp + 1);
778 	}
779 
780 	spin_unlock(&mrt_lock);
781 
782 	dev_set_allmulti(dev, -1);
783 
784 	in6_dev = __in6_dev_get(dev);
785 	if (in6_dev) {
786 		atomic_dec(&in6_dev->cnf.mc_forwarding);
787 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
788 					     NETCONFA_MC_FORWARDING,
789 					     dev->ifindex, &in6_dev->cnf);
790 	}
791 
792 	if ((v->flags & MIFF_REGISTER) && !notify)
793 		unregister_netdevice_queue(dev, head);
794 
795 	netdev_put(dev, &v->dev_tracker);
796 	return 0;
797 }
798 
799 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
800 {
801 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
802 
803 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
804 }
805 
806 static inline void ip6mr_cache_free(struct mfc6_cache *c)
807 {
808 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
809 }
810 
811 /* Destroy an unresolved cache entry, killing queued skbs
812    and reporting error to netlink readers.
813  */
814 
815 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
816 {
817 	struct net *net = read_pnet(&mrt->net);
818 	struct sk_buff *skb;
819 
820 	WRITE_ONCE(mrt->cache_resolve_queue_len,
821 		   mrt->cache_resolve_queue_len - 1);
822 
823 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
824 		if (ipv6_hdr(skb)->version == 0) {
825 			struct nlmsghdr *nlh = skb_pull(skb,
826 							sizeof(struct ipv6hdr));
827 			nlh->nlmsg_type = NLMSG_ERROR;
828 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
829 			skb_trim(skb, nlh->nlmsg_len);
830 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
831 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
832 		} else
833 			kfree_skb(skb);
834 	}
835 
836 	ip6mr_cache_free(c);
837 }
838 
839 
840 /* Timer process for all the unresolved queue. */
841 
842 static void ipmr_do_expire_process(struct mr_table *mrt)
843 {
844 	unsigned long now = jiffies;
845 	unsigned long expires = 10 * HZ;
846 	struct mr_mfc *c, *next;
847 
848 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
849 		if (time_after(c->mfc_un.unres.expires, now)) {
850 			/* not yet... */
851 			unsigned long interval = c->mfc_un.unres.expires - now;
852 			if (interval < expires)
853 				expires = interval;
854 			continue;
855 		}
856 
857 		list_del(&c->list);
858 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
859 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
860 	}
861 
862 	if (!list_empty(&mrt->mfc_unres_queue))
863 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
864 }
865 
866 static void ipmr_expire_process(struct timer_list *t)
867 {
868 	struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
869 
870 	if (!spin_trylock(&mfc_unres_lock)) {
871 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
872 		return;
873 	}
874 
875 	if (!list_empty(&mrt->mfc_unres_queue))
876 		ipmr_do_expire_process(mrt);
877 
878 	spin_unlock(&mfc_unres_lock);
879 }
880 
881 /* Fill oifs list. It is called under locked mrt_lock. */
882 
883 static void ip6mr_update_thresholds(struct mr_table *mrt,
884 				    struct mr_mfc *cache,
885 				    unsigned char *ttls)
886 {
887 	int vifi;
888 
889 	cache->mfc_un.res.minvif = MAXMIFS;
890 	cache->mfc_un.res.maxvif = 0;
891 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
892 
893 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
894 		if (VIF_EXISTS(mrt, vifi) &&
895 		    ttls[vifi] && ttls[vifi] < 255) {
896 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
897 			if (cache->mfc_un.res.minvif > vifi)
898 				cache->mfc_un.res.minvif = vifi;
899 			if (cache->mfc_un.res.maxvif <= vifi)
900 				cache->mfc_un.res.maxvif = vifi + 1;
901 		}
902 	}
903 	WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
904 }
905 
906 static int mif6_add(struct net *net, struct mr_table *mrt,
907 		    struct mif6ctl *vifc, int mrtsock)
908 {
909 	int vifi = vifc->mif6c_mifi;
910 	struct vif_device *v = &mrt->vif_table[vifi];
911 	struct net_device *dev;
912 	struct inet6_dev *in6_dev;
913 	int err;
914 
915 	/* Is vif busy ? */
916 	if (VIF_EXISTS(mrt, vifi))
917 		return -EADDRINUSE;
918 
919 	switch (vifc->mif6c_flags) {
920 #ifdef CONFIG_IPV6_PIMSM_V2
921 	case MIFF_REGISTER:
922 		/*
923 		 * Special Purpose VIF in PIM
924 		 * All the packets will be sent to the daemon
925 		 */
926 		if (mrt->mroute_reg_vif_num >= 0)
927 			return -EADDRINUSE;
928 		dev = ip6mr_reg_vif(net, mrt);
929 		if (!dev)
930 			return -ENOBUFS;
931 		err = dev_set_allmulti(dev, 1);
932 		if (err) {
933 			unregister_netdevice(dev);
934 			dev_put(dev);
935 			return err;
936 		}
937 		break;
938 #endif
939 	case 0:
940 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
941 		if (!dev)
942 			return -EADDRNOTAVAIL;
943 		err = dev_set_allmulti(dev, 1);
944 		if (err) {
945 			dev_put(dev);
946 			return err;
947 		}
948 		break;
949 	default:
950 		return -EINVAL;
951 	}
952 
953 	in6_dev = __in6_dev_get(dev);
954 	if (in6_dev) {
955 		atomic_inc(&in6_dev->cnf.mc_forwarding);
956 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
957 					     NETCONFA_MC_FORWARDING,
958 					     dev->ifindex, &in6_dev->cnf);
959 	}
960 
961 	/* Fill in the VIF structures */
962 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
963 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
964 			MIFF_REGISTER);
965 
966 	/* And finish update writing critical data */
967 	spin_lock(&mrt_lock);
968 	rcu_assign_pointer(v->dev, dev);
969 	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
970 #ifdef CONFIG_IPV6_PIMSM_V2
971 	if (v->flags & MIFF_REGISTER)
972 		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
973 #endif
974 	if (vifi + 1 > mrt->maxvif)
975 		WRITE_ONCE(mrt->maxvif, vifi + 1);
976 	spin_unlock(&mrt_lock);
977 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
978 				       v, dev, vifi, mrt->id);
979 	return 0;
980 }
981 
982 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
983 					   const struct in6_addr *origin,
984 					   const struct in6_addr *mcastgrp)
985 {
986 	struct mfc6_cache_cmp_arg arg = {
987 		.mf6c_origin = *origin,
988 		.mf6c_mcastgrp = *mcastgrp,
989 	};
990 
991 	return mr_mfc_find(mrt, &arg);
992 }
993 
994 /* Look for a (*,G) entry */
995 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
996 					       struct in6_addr *mcastgrp,
997 					       mifi_t mifi)
998 {
999 	struct mfc6_cache_cmp_arg arg = {
1000 		.mf6c_origin = in6addr_any,
1001 		.mf6c_mcastgrp = *mcastgrp,
1002 	};
1003 
1004 	if (ipv6_addr_any(mcastgrp))
1005 		return mr_mfc_find_any_parent(mrt, mifi);
1006 	return mr_mfc_find_any(mrt, mifi, &arg);
1007 }
1008 
1009 /* Look for a (S,G,iif) entry if parent != -1 */
1010 static struct mfc6_cache *
1011 ip6mr_cache_find_parent(struct mr_table *mrt,
1012 			const struct in6_addr *origin,
1013 			const struct in6_addr *mcastgrp,
1014 			int parent)
1015 {
1016 	struct mfc6_cache_cmp_arg arg = {
1017 		.mf6c_origin = *origin,
1018 		.mf6c_mcastgrp = *mcastgrp,
1019 	};
1020 
1021 	return mr_mfc_find_parent(mrt, &arg, parent);
1022 }
1023 
1024 /* Allocate a multicast cache entry */
1025 static struct mfc6_cache *ip6mr_cache_alloc(void)
1026 {
1027 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1028 	if (!c)
1029 		return NULL;
1030 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1031 	c->_c.mfc_un.res.minvif = MAXMIFS;
1032 	c->_c.free = ip6mr_cache_free_rcu;
1033 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
1034 	return c;
1035 }
1036 
1037 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1038 {
1039 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1040 	if (!c)
1041 		return NULL;
1042 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1043 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1044 	return c;
1045 }
1046 
1047 /*
1048  *	A cache entry has gone into a resolved state from queued
1049  */
1050 
1051 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1052 				struct mfc6_cache *uc, struct mfc6_cache *c)
1053 {
1054 	struct sk_buff *skb;
1055 
1056 	/*
1057 	 *	Play the pending entries through our router
1058 	 */
1059 
1060 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1061 		if (ipv6_hdr(skb)->version == 0) {
1062 			struct nlmsghdr *nlh = skb_pull(skb,
1063 							sizeof(struct ipv6hdr));
1064 
1065 			if (mr_fill_mroute(mrt, skb, &c->_c,
1066 					   nlmsg_data(nlh)) > 0) {
1067 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1068 			} else {
1069 				nlh->nlmsg_type = NLMSG_ERROR;
1070 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1071 				skb_trim(skb, nlh->nlmsg_len);
1072 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1073 			}
1074 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1075 		} else {
1076 			rcu_read_lock();
1077 			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1078 			rcu_read_unlock();
1079 		}
1080 	}
1081 }
1082 
1083 /*
1084  *	Bounce a cache query up to pim6sd and netlink.
1085  *
1086  *	Called under rcu_read_lock()
1087  */
1088 
1089 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1090 			      mifi_t mifi, int assert)
1091 {
1092 	enum skb_drop_reason reason;
1093 	struct sock *mroute6_sk;
1094 	struct sk_buff *skb;
1095 	struct mrt6msg *msg;
1096 
1097 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1098 	if (!mroute6_sk)
1099 		return -EINVAL;
1100 
1101 #ifdef CONFIG_IPV6_PIMSM_V2
1102 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1103 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1104 						+sizeof(*msg));
1105 	else
1106 #endif
1107 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1108 
1109 	if (!skb)
1110 		return -ENOBUFS;
1111 
1112 	/* I suppose that internal messages
1113 	 * do not require checksums */
1114 
1115 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1116 
1117 #ifdef CONFIG_IPV6_PIMSM_V2
1118 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1119 		/* Ugly, but we have no choice with this interface.
1120 		   Duplicate old header, fix length etc.
1121 		   And all this only to mangle msg->im6_msgtype and
1122 		   to set msg->im6_mbz to "mbz" :-)
1123 		 */
1124 		__skb_pull(skb, skb_network_offset(pkt));
1125 
1126 		skb_push(skb, sizeof(*msg));
1127 		skb_reset_transport_header(skb);
1128 		msg = (struct mrt6msg *)skb_transport_header(skb);
1129 		msg->im6_mbz = 0;
1130 		msg->im6_msgtype = assert;
1131 		if (assert == MRT6MSG_WRMIFWHOLE)
1132 			msg->im6_mif = mifi;
1133 		else
1134 			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1135 		msg->im6_pad = 0;
1136 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1137 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1138 
1139 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1140 	} else
1141 #endif
1142 	{
1143 	/*
1144 	 *	Copy the IP header
1145 	 */
1146 
1147 	skb_put(skb, sizeof(struct ipv6hdr));
1148 	skb_reset_network_header(skb);
1149 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1150 
1151 	/*
1152 	 *	Add our header
1153 	 */
1154 	skb_put(skb, sizeof(*msg));
1155 	skb_reset_transport_header(skb);
1156 	msg = (struct mrt6msg *)skb_transport_header(skb);
1157 
1158 	msg->im6_mbz = 0;
1159 	msg->im6_msgtype = assert;
1160 	msg->im6_mif = mifi;
1161 	msg->im6_pad = 0;
1162 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1163 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1164 
1165 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1166 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1167 	}
1168 
1169 	mrt6msg_netlink_event(mrt, skb);
1170 
1171 	/* Deliver to user space multicast routing algorithms */
1172 	reason = sock_queue_rcv_skb_reason(mroute6_sk, skb);
1173 
1174 	if (reason) {
1175 		sk_skb_reason_drop(mroute6_sk, skb, reason);
1176 		return -ENOMEM;
1177 	}
1178 
1179 	return 0;
1180 }
1181 
1182 /* Queue a packet for resolution. It gets locked cache entry! */
1183 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1184 				  struct sk_buff *skb, struct net_device *dev)
1185 {
1186 	struct net *net = read_pnet(&mrt->net);
1187 	struct mfc6_cache *c = NULL;
1188 	bool found = false;
1189 	int err;
1190 
1191 	spin_lock_bh(&mfc_unres_lock);
1192 
1193 	if (!check_net(net)) {
1194 		err = -EINVAL;
1195 		goto err;
1196 	}
1197 
1198 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1199 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1200 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1201 			found = true;
1202 			break;
1203 		}
1204 	}
1205 
1206 	if (!found) {
1207 		/*
1208 		 *	Create a new entry if allowable
1209 		 */
1210 
1211 		c = ip6mr_cache_alloc_unres();
1212 		if (!c) {
1213 			err = -ENOBUFS;
1214 			goto err;
1215 		}
1216 
1217 		/* Fill in the new cache entry */
1218 		c->_c.mfc_parent = -1;
1219 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1220 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1221 
1222 		/*
1223 		 *	Reflect first query at pim6sd
1224 		 */
1225 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1226 		if (err < 0)
1227 			goto err;
1228 
1229 		WRITE_ONCE(mrt->cache_resolve_queue_len,
1230 			   mrt->cache_resolve_queue_len + 1);
1231 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1232 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1233 
1234 		ipmr_do_expire_process(mrt);
1235 	}
1236 
1237 	/* See if we can append the packet */
1238 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1239 		c = NULL;
1240 		err = -ENOBUFS;
1241 		goto err;
1242 	}
1243 
1244 	if (dev) {
1245 		skb->dev = dev;
1246 		skb->skb_iif = dev->ifindex;
1247 	}
1248 
1249 	skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1250 
1251 	spin_unlock_bh(&mfc_unres_lock);
1252 	return 0;
1253 
1254 err:
1255 	spin_unlock_bh(&mfc_unres_lock);
1256 	if (c)
1257 		ip6mr_cache_free(c);
1258 	kfree_skb(skb);
1259 	return err;
1260 }
1261 
1262 /*
1263  *	MFC6 cache manipulation by user space
1264  */
1265 
1266 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1267 			    int parent)
1268 {
1269 	struct mfc6_cache *c;
1270 
1271 	rcu_read_lock();
1272 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1273 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1274 	rcu_read_unlock();
1275 	if (!c)
1276 		return -ENOENT;
1277 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1278 	list_del_rcu(&c->_c.list);
1279 
1280 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1281 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1282 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1283 	mr_cache_put(&c->_c);
1284 	return 0;
1285 }
1286 
1287 static int ip6mr_device_event(struct notifier_block *this,
1288 			      unsigned long event, void *ptr)
1289 {
1290 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1291 	struct net *net = dev_net(dev);
1292 	struct mr_table *mrt;
1293 	struct vif_device *v;
1294 	int ct;
1295 
1296 	if (event != NETDEV_UNREGISTER)
1297 		return NOTIFY_DONE;
1298 
1299 	ip6mr_for_each_table(mrt, net) {
1300 		v = &mrt->vif_table[0];
1301 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1302 			if (rcu_access_pointer(v->dev) == dev)
1303 				mif6_delete(mrt, ct, 1, NULL);
1304 		}
1305 	}
1306 
1307 	return NOTIFY_DONE;
1308 }
1309 
1310 static unsigned int ip6mr_seq_read(const struct net *net)
1311 {
1312 	return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1313 }
1314 
1315 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1316 		      struct netlink_ext_ack *extack)
1317 {
1318 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1319 		       ip6mr_mr_table_iter, extack);
1320 }
1321 
1322 static struct notifier_block ip6_mr_notifier = {
1323 	.notifier_call = ip6mr_device_event
1324 };
1325 
1326 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1327 	.family		= RTNL_FAMILY_IP6MR,
1328 	.fib_seq_read	= ip6mr_seq_read,
1329 	.fib_dump	= ip6mr_dump,
1330 	.owner		= THIS_MODULE,
1331 };
1332 
1333 static int __net_init ip6mr_notifier_init(struct net *net)
1334 {
1335 	struct fib_notifier_ops *ops;
1336 
1337 	atomic_set(&net->ipv6.ipmr_seq, 0);
1338 
1339 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1340 	if (IS_ERR(ops))
1341 		return PTR_ERR(ops);
1342 
1343 	net->ipv6.ip6mr_notifier_ops = ops;
1344 
1345 	return 0;
1346 }
1347 
1348 static void __net_exit ip6mr_notifier_exit(struct net *net)
1349 {
1350 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1351 	net->ipv6.ip6mr_notifier_ops = NULL;
1352 }
1353 
1354 /* Setup for IP multicast routing */
1355 static int __net_init ip6mr_net_init(struct net *net)
1356 {
1357 #ifdef CONFIG_PROC_FS
1358 	LIST_HEAD(dev_kill_list);
1359 #endif
1360 	int err;
1361 
1362 	mutex_init(&net->ipv6.mfc_mutex);
1363 
1364 	err = ip6mr_notifier_init(net);
1365 	if (err)
1366 		return err;
1367 
1368 	err = ip6mr_rules_init(net);
1369 	if (err < 0)
1370 		goto ip6mr_rules_fail;
1371 
1372 #ifdef CONFIG_PROC_FS
1373 	err = -ENOMEM;
1374 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1375 			sizeof(struct mr_vif_iter)))
1376 		goto proc_vif_fail;
1377 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1378 			sizeof(struct mr_mfc_iter)))
1379 		goto proc_cache_fail;
1380 #endif
1381 
1382 	return 0;
1383 
1384 #ifdef CONFIG_PROC_FS
1385 proc_cache_fail:
1386 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1387 proc_vif_fail:
1388 	ip6mr_rules_exit_rtnl(net, &dev_kill_list);
1389 	ip6mr_rules_exit(net);
1390 #endif
1391 ip6mr_rules_fail:
1392 	ip6mr_notifier_exit(net);
1393 	return err;
1394 }
1395 
1396 static void __net_exit ip6mr_net_exit(struct net *net)
1397 {
1398 #ifdef CONFIG_PROC_FS
1399 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1400 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1401 #endif
1402 	ip6mr_rules_exit(net);
1403 	ip6mr_notifier_exit(net);
1404 }
1405 
1406 static void __net_exit ip6mr_net_exit_rtnl(struct net *net,
1407 					   struct list_head *dev_kill_list)
1408 {
1409 	ip6mr_rules_exit_rtnl(net, dev_kill_list);
1410 }
1411 
1412 static struct pernet_operations ip6mr_net_ops = {
1413 	.init = ip6mr_net_init,
1414 	.exit = ip6mr_net_exit,
1415 	.exit_rtnl = ip6mr_net_exit_rtnl,
1416 };
1417 
1418 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1419 	{.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1420 	 .msgtype = RTM_GETROUTE,
1421 	 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute,
1422 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
1423 };
1424 
1425 int __init ip6_mr_init(void)
1426 {
1427 	int err;
1428 
1429 	mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1430 	if (!mrt_cachep)
1431 		return -ENOMEM;
1432 
1433 	err = register_pernet_subsys(&ip6mr_net_ops);
1434 	if (err)
1435 		goto reg_pernet_fail;
1436 
1437 	err = register_netdevice_notifier(&ip6_mr_notifier);
1438 	if (err)
1439 		goto reg_notif_fail;
1440 #ifdef CONFIG_IPV6_PIMSM_V2
1441 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1442 		pr_err("%s: can't add PIM protocol\n", __func__);
1443 		err = -EAGAIN;
1444 		goto add_proto_fail;
1445 	}
1446 #endif
1447 	err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1448 	if (!err)
1449 		return 0;
1450 
1451 #ifdef CONFIG_IPV6_PIMSM_V2
1452 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1453 add_proto_fail:
1454 	unregister_netdevice_notifier(&ip6_mr_notifier);
1455 #endif
1456 reg_notif_fail:
1457 	unregister_pernet_subsys(&ip6mr_net_ops);
1458 reg_pernet_fail:
1459 	kmem_cache_destroy(mrt_cachep);
1460 	return err;
1461 }
1462 
1463 void __init ip6_mr_cleanup(void)
1464 {
1465 	rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1466 #ifdef CONFIG_IPV6_PIMSM_V2
1467 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1468 #endif
1469 	unregister_netdevice_notifier(&ip6_mr_notifier);
1470 	unregister_pernet_subsys(&ip6mr_net_ops);
1471 	kmem_cache_destroy(mrt_cachep);
1472 }
1473 
1474 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1475 			 struct mf6cctl *mfc, int mrtsock, int parent)
1476 {
1477 	unsigned char ttls[MAXMIFS];
1478 	struct mfc6_cache *uc, *c;
1479 	struct mr_mfc *_uc;
1480 	bool found;
1481 	int i, err;
1482 
1483 	if (mfc->mf6cc_parent >= MAXMIFS)
1484 		return -ENFILE;
1485 
1486 	memset(ttls, 255, MAXMIFS);
1487 	for (i = 0; i < MAXMIFS; i++) {
1488 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1489 			ttls[i] = 1;
1490 	}
1491 
1492 	rcu_read_lock();
1493 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1494 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1495 	rcu_read_unlock();
1496 	if (c) {
1497 		spin_lock(&mrt_lock);
1498 		c->_c.mfc_parent = mfc->mf6cc_parent;
1499 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1500 		if (!mrtsock)
1501 			c->_c.mfc_flags |= MFC_STATIC;
1502 		spin_unlock(&mrt_lock);
1503 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1504 					       c, mrt->id);
1505 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1506 		return 0;
1507 	}
1508 
1509 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1510 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1511 		return -EINVAL;
1512 
1513 	c = ip6mr_cache_alloc();
1514 	if (!c)
1515 		return -ENOMEM;
1516 
1517 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1518 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1519 	c->_c.mfc_parent = mfc->mf6cc_parent;
1520 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1521 	if (!mrtsock)
1522 		c->_c.mfc_flags |= MFC_STATIC;
1523 
1524 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1525 				  ip6mr_rht_params);
1526 	if (err) {
1527 		pr_err("ip6mr: rhtable insert error %d\n", err);
1528 		ip6mr_cache_free(c);
1529 		return err;
1530 	}
1531 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1532 
1533 	/* Check to see if we resolved a queued list. If so we
1534 	 * need to send on the frames and tidy up.
1535 	 */
1536 	found = false;
1537 	spin_lock_bh(&mfc_unres_lock);
1538 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1539 		uc = (struct mfc6_cache *)_uc;
1540 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1541 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1542 			list_del(&_uc->list);
1543 			WRITE_ONCE(mrt->cache_resolve_queue_len,
1544 				   mrt->cache_resolve_queue_len - 1);
1545 			found = true;
1546 			break;
1547 		}
1548 	}
1549 	if (list_empty(&mrt->mfc_unres_queue))
1550 		timer_delete(&mrt->ipmr_expire_timer);
1551 	spin_unlock_bh(&mfc_unres_lock);
1552 
1553 	if (found) {
1554 		ip6mr_cache_resolve(net, mrt, uc, c);
1555 		ip6mr_cache_free(uc);
1556 	}
1557 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1558 				       c, mrt->id);
1559 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1560 	return 0;
1561 }
1562 
1563 /*
1564  *	Close the multicast socket, and clear the vif tables etc
1565  */
1566 
1567 static void mroute_clean_tables(struct mr_table *mrt, int flags,
1568 				struct list_head *dev_kill_list)
1569 {
1570 	struct net *net = read_pnet(&mrt->net);
1571 	struct mr_mfc *c, *tmp;
1572 	int i;
1573 
1574 	/* Shut down all active vif entries */
1575 	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1576 		for (i = 0; i < mrt->maxvif; i++) {
1577 			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1578 			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1579 			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1580 				continue;
1581 			mif6_delete(mrt, i, 0, dev_kill_list);
1582 		}
1583 	}
1584 
1585 	/* Wipe the cache */
1586 	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1587 		mutex_lock(&net->ipv6.mfc_mutex);
1588 
1589 		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1590 			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1591 			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1592 				continue;
1593 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1594 			list_del_rcu(&c->list);
1595 			call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1596 						       (struct mfc6_cache *)c, mrt->id);
1597 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1598 			mr_cache_put(c);
1599 		}
1600 
1601 		mutex_unlock(&net->ipv6.mfc_mutex);
1602 	}
1603 
1604 	if (flags & MRT6_FLUSH_MFC) {
1605 		if (READ_ONCE(mrt->cache_resolve_queue_len) || !check_net(net)) {
1606 			spin_lock_bh(&mfc_unres_lock);
1607 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1608 				list_del(&c->list);
1609 				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1610 						  RTM_DELROUTE);
1611 				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1612 			}
1613 			spin_unlock_bh(&mfc_unres_lock);
1614 		}
1615 	}
1616 }
1617 
1618 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1619 {
1620 	int err = 0;
1621 	struct net *net = sock_net(sk);
1622 
1623 	rtnl_lock();
1624 	spin_lock(&mrt_lock);
1625 	if (rtnl_dereference(mrt->mroute_sk)) {
1626 		err = -EADDRINUSE;
1627 	} else {
1628 		rcu_assign_pointer(mrt->mroute_sk, sk);
1629 		sock_set_flag(sk, SOCK_RCU_FREE);
1630 		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1631 	}
1632 	spin_unlock(&mrt_lock);
1633 
1634 	if (!err)
1635 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1636 					     NETCONFA_MC_FORWARDING,
1637 					     NETCONFA_IFINDEX_ALL,
1638 					     net->ipv6.devconf_all);
1639 	rtnl_unlock();
1640 
1641 	return err;
1642 }
1643 
1644 int ip6mr_sk_done(struct sock *sk)
1645 {
1646 	struct net *net = sock_net(sk);
1647 	struct ipv6_devconf *devconf;
1648 	LIST_HEAD(dev_kill_list);
1649 	struct mr_table *mrt;
1650 	int err = -EACCES;
1651 
1652 	if (sk->sk_type != SOCK_RAW ||
1653 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1654 		return err;
1655 
1656 	devconf = net->ipv6.devconf_all;
1657 	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1658 		return err;
1659 
1660 	rtnl_lock();
1661 	ip6mr_for_each_table(mrt, net) {
1662 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1663 			spin_lock(&mrt_lock);
1664 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1665 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1666 			 * so the RCU grace period before sk freeing
1667 			 * is guaranteed by sk_destruct()
1668 			 */
1669 			atomic_dec(&devconf->mc_forwarding);
1670 			spin_unlock(&mrt_lock);
1671 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1672 						     NETCONFA_MC_FORWARDING,
1673 						     NETCONFA_IFINDEX_ALL,
1674 						     net->ipv6.devconf_all);
1675 
1676 			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC,
1677 					    &dev_kill_list);
1678 			err = 0;
1679 			break;
1680 		}
1681 	}
1682 	unregister_netdevice_many(&dev_kill_list);
1683 	rtnl_unlock();
1684 
1685 	return err;
1686 }
1687 
1688 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1689 {
1690 	struct mr_table *mrt;
1691 	struct flowi6 fl6 = {
1692 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1693 		.flowi6_oif	= skb->dev->ifindex,
1694 		.flowi6_mark	= skb->mark,
1695 	};
1696 
1697 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1698 		return NULL;
1699 
1700 	return rcu_access_pointer(mrt->mroute_sk);
1701 }
1702 EXPORT_SYMBOL(mroute6_is_socket);
1703 
1704 /*
1705  *	Socket options and virtual interface manipulation. The whole
1706  *	virtual interface system is a complete heap, but unfortunately
1707  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1708  *	MOSPF/PIM router set up we can clean this up.
1709  */
1710 
1711 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1712 			  unsigned int optlen)
1713 {
1714 	int ret, parent = 0;
1715 	struct mif6ctl vif;
1716 	struct mf6cctl mfc;
1717 	mifi_t mifi;
1718 	struct net *net = sock_net(sk);
1719 	struct mr_table *mrt;
1720 
1721 	if (sk->sk_type != SOCK_RAW ||
1722 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1723 		return -EOPNOTSUPP;
1724 
1725 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1726 	if (!mrt)
1727 		return -ENOENT;
1728 
1729 	if (optname != MRT6_INIT) {
1730 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1731 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1732 			return -EACCES;
1733 	}
1734 
1735 	switch (optname) {
1736 	case MRT6_INIT:
1737 		if (optlen < sizeof(int))
1738 			return -EINVAL;
1739 
1740 		return ip6mr_sk_init(mrt, sk);
1741 
1742 	case MRT6_DONE:
1743 		return ip6mr_sk_done(sk);
1744 
1745 	case MRT6_ADD_MIF:
1746 		if (optlen < sizeof(vif))
1747 			return -EINVAL;
1748 		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1749 			return -EFAULT;
1750 		if (vif.mif6c_mifi >= MAXMIFS)
1751 			return -ENFILE;
1752 		rtnl_lock();
1753 		ret = mif6_add(net, mrt, &vif,
1754 			       sk == rtnl_dereference(mrt->mroute_sk));
1755 		rtnl_unlock();
1756 		return ret;
1757 
1758 	case MRT6_DEL_MIF:
1759 		if (optlen < sizeof(mifi_t))
1760 			return -EINVAL;
1761 		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1762 			return -EFAULT;
1763 		rtnl_lock();
1764 		ret = mif6_delete(mrt, mifi, 0, NULL);
1765 		rtnl_unlock();
1766 		return ret;
1767 
1768 	/*
1769 	 *	Manipulate the forwarding caches. These live
1770 	 *	in a sort of kernel/user symbiosis.
1771 	 */
1772 	case MRT6_ADD_MFC:
1773 	case MRT6_DEL_MFC:
1774 		parent = -1;
1775 		fallthrough;
1776 	case MRT6_ADD_MFC_PROXY:
1777 	case MRT6_DEL_MFC_PROXY:
1778 		if (optlen < sizeof(mfc))
1779 			return -EINVAL;
1780 		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1781 			return -EFAULT;
1782 		if (parent == 0)
1783 			parent = mfc.mf6cc_parent;
1784 
1785 		mutex_lock(&net->ipv6.mfc_mutex);
1786 
1787 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1788 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1789 		else
1790 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1791 					    sk ==
1792 					    rcu_access_pointer(mrt->mroute_sk),
1793 					    parent);
1794 
1795 		mutex_unlock(&net->ipv6.mfc_mutex);
1796 		return ret;
1797 
1798 	case MRT6_FLUSH:
1799 	{
1800 		LIST_HEAD(dev_kill_list);
1801 		int flags;
1802 
1803 		if (optlen != sizeof(flags))
1804 			return -EINVAL;
1805 		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1806 			return -EFAULT;
1807 
1808 		rtnl_lock();
1809 		mroute_clean_tables(mrt, flags, &dev_kill_list);
1810 		unregister_netdevice_many(&dev_kill_list);
1811 		rtnl_unlock();
1812 		return 0;
1813 	}
1814 
1815 	/*
1816 	 *	Control PIM assert (to activate pim will activate assert)
1817 	 */
1818 	case MRT6_ASSERT:
1819 	{
1820 		int v;
1821 
1822 		if (optlen != sizeof(v))
1823 			return -EINVAL;
1824 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1825 			return -EFAULT;
1826 		WRITE_ONCE(mrt->mroute_do_assert, v);
1827 		return 0;
1828 	}
1829 
1830 #ifdef CONFIG_IPV6_PIMSM_V2
1831 	case MRT6_PIM:
1832 	{
1833 		bool do_wrmifwhole;
1834 		int v;
1835 
1836 		if (optlen != sizeof(v))
1837 			return -EINVAL;
1838 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1839 			return -EFAULT;
1840 
1841 		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1842 		v = !!v;
1843 		rtnl_lock();
1844 		ret = 0;
1845 		if (v != mrt->mroute_do_pim) {
1846 			WRITE_ONCE(mrt->mroute_do_pim, v);
1847 			WRITE_ONCE(mrt->mroute_do_assert, v);
1848 			WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrmifwhole);
1849 		}
1850 		rtnl_unlock();
1851 		return ret;
1852 	}
1853 
1854 #endif
1855 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1856 	case MRT6_TABLE:
1857 	{
1858 		u32 v;
1859 
1860 		if (optlen != sizeof(u32))
1861 			return -EINVAL;
1862 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1863 			return -EFAULT;
1864 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1865 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1866 			return -EINVAL;
1867 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1868 			return -EBUSY;
1869 
1870 		rtnl_lock();
1871 		ret = 0;
1872 		mrt = ip6mr_new_table(net, v);
1873 		if (IS_ERR(mrt))
1874 			ret = PTR_ERR(mrt);
1875 		else
1876 			raw6_sk(sk)->ip6mr_table = v;
1877 		rtnl_unlock();
1878 		return ret;
1879 	}
1880 #endif
1881 	/*
1882 	 *	Spurious command, or MRT6_VERSION which you cannot
1883 	 *	set.
1884 	 */
1885 	default:
1886 		return -ENOPROTOOPT;
1887 	}
1888 }
1889 
1890 /*
1891  *	Getsock opt support for the multicast routing system.
1892  */
1893 
1894 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1895 			  sockptr_t optlen)
1896 {
1897 	int olr;
1898 	int val;
1899 	struct net *net = sock_net(sk);
1900 	struct mr_table *mrt;
1901 
1902 	if (sk->sk_type != SOCK_RAW ||
1903 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1904 		return -EOPNOTSUPP;
1905 
1906 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1907 	if (!mrt)
1908 		return -ENOENT;
1909 
1910 	switch (optname) {
1911 	case MRT6_VERSION:
1912 		val = 0x0305;
1913 		break;
1914 #ifdef CONFIG_IPV6_PIMSM_V2
1915 	case MRT6_PIM:
1916 		val = READ_ONCE(mrt->mroute_do_pim);
1917 		break;
1918 #endif
1919 	case MRT6_ASSERT:
1920 		val = READ_ONCE(mrt->mroute_do_assert);
1921 		break;
1922 	default:
1923 		return -ENOPROTOOPT;
1924 	}
1925 
1926 	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1927 		return -EFAULT;
1928 
1929 	olr = min_t(int, olr, sizeof(int));
1930 	if (olr < 0)
1931 		return -EINVAL;
1932 
1933 	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1934 		return -EFAULT;
1935 	if (copy_to_sockptr(optval, &val, olr))
1936 		return -EFAULT;
1937 	return 0;
1938 }
1939 
1940 /*
1941  *	The IP multicast ioctl support routines.
1942  */
1943 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1944 {
1945 	struct sioc_sg_req6 *sr;
1946 	struct sioc_mif_req6 *vr;
1947 	struct vif_device *vif;
1948 	struct mfc6_cache *c;
1949 	struct net *net = sock_net(sk);
1950 	struct mr_table *mrt;
1951 
1952 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1953 	if (!mrt)
1954 		return -ENOENT;
1955 
1956 	switch (cmd) {
1957 	case SIOCGETMIFCNT_IN6:
1958 		vr = (struct sioc_mif_req6 *)arg;
1959 		if (vr->mifi >= mrt->maxvif)
1960 			return -EINVAL;
1961 		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1962 		rcu_read_lock();
1963 		vif = &mrt->vif_table[vr->mifi];
1964 		if (VIF_EXISTS(mrt, vr->mifi)) {
1965 			vr->icount = READ_ONCE(vif->pkt_in);
1966 			vr->ocount = READ_ONCE(vif->pkt_out);
1967 			vr->ibytes = READ_ONCE(vif->bytes_in);
1968 			vr->obytes = READ_ONCE(vif->bytes_out);
1969 			rcu_read_unlock();
1970 			return 0;
1971 		}
1972 		rcu_read_unlock();
1973 		return -EADDRNOTAVAIL;
1974 	case SIOCGETSGCNT_IN6:
1975 		sr = (struct sioc_sg_req6 *)arg;
1976 
1977 		rcu_read_lock();
1978 		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1979 				     &sr->grp.sin6_addr);
1980 		if (c) {
1981 			sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1982 			sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1983 			sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1984 			rcu_read_unlock();
1985 			return 0;
1986 		}
1987 		rcu_read_unlock();
1988 		return -EADDRNOTAVAIL;
1989 	default:
1990 		return -ENOIOCTLCMD;
1991 	}
1992 }
1993 
1994 #ifdef CONFIG_COMPAT
1995 struct compat_sioc_sg_req6 {
1996 	struct sockaddr_in6 src;
1997 	struct sockaddr_in6 grp;
1998 	compat_ulong_t pktcnt;
1999 	compat_ulong_t bytecnt;
2000 	compat_ulong_t wrong_if;
2001 };
2002 
2003 struct compat_sioc_mif_req6 {
2004 	mifi_t	mifi;
2005 	compat_ulong_t icount;
2006 	compat_ulong_t ocount;
2007 	compat_ulong_t ibytes;
2008 	compat_ulong_t obytes;
2009 };
2010 
2011 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
2012 {
2013 	struct compat_sioc_sg_req6 sr;
2014 	struct compat_sioc_mif_req6 vr;
2015 	struct vif_device *vif;
2016 	struct mfc6_cache *c;
2017 	struct net *net = sock_net(sk);
2018 	struct mr_table *mrt;
2019 
2020 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
2021 	if (!mrt)
2022 		return -ENOENT;
2023 
2024 	switch (cmd) {
2025 	case SIOCGETMIFCNT_IN6:
2026 		if (copy_from_user(&vr, arg, sizeof(vr)))
2027 			return -EFAULT;
2028 		if (vr.mifi >= mrt->maxvif)
2029 			return -EINVAL;
2030 		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
2031 		rcu_read_lock();
2032 		vif = &mrt->vif_table[vr.mifi];
2033 		if (VIF_EXISTS(mrt, vr.mifi)) {
2034 			vr.icount = READ_ONCE(vif->pkt_in);
2035 			vr.ocount = READ_ONCE(vif->pkt_out);
2036 			vr.ibytes = READ_ONCE(vif->bytes_in);
2037 			vr.obytes = READ_ONCE(vif->bytes_out);
2038 			rcu_read_unlock();
2039 
2040 			if (copy_to_user(arg, &vr, sizeof(vr)))
2041 				return -EFAULT;
2042 			return 0;
2043 		}
2044 		rcu_read_unlock();
2045 		return -EADDRNOTAVAIL;
2046 	case SIOCGETSGCNT_IN6:
2047 		if (copy_from_user(&sr, arg, sizeof(sr)))
2048 			return -EFAULT;
2049 
2050 		rcu_read_lock();
2051 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2052 		if (c) {
2053 			sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2054 			sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2055 			sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2056 			rcu_read_unlock();
2057 
2058 			if (copy_to_user(arg, &sr, sizeof(sr)))
2059 				return -EFAULT;
2060 			return 0;
2061 		}
2062 		rcu_read_unlock();
2063 		return -EADDRNOTAVAIL;
2064 	default:
2065 		return -ENOIOCTLCMD;
2066 	}
2067 }
2068 #endif
2069 
2070 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2071 {
2072 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2073 		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2074 	return dst_output(net, sk, skb);
2075 }
2076 
2077 /*
2078  *	Processing handlers for ip6mr_forward
2079  */
2080 
2081 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt,
2082 			      struct sk_buff *skb, int vifi)
2083 {
2084 	struct vif_device *vif = &mrt->vif_table[vifi];
2085 	struct net_device *vif_dev;
2086 	struct ipv6hdr *ipv6h;
2087 	struct dst_entry *dst;
2088 	struct flowi6 fl6;
2089 
2090 	vif_dev = vif_dev_read(vif);
2091 	if (!vif_dev)
2092 		return -1;
2093 
2094 #ifdef CONFIG_IPV6_PIMSM_V2
2095 	if (vif->flags & MIFF_REGISTER) {
2096 		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2097 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2098 		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2099 		DEV_STATS_INC(vif_dev, tx_packets);
2100 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2101 		return -1;
2102 	}
2103 #endif
2104 
2105 	ipv6h = ipv6_hdr(skb);
2106 
2107 	fl6 = (struct flowi6) {
2108 		.flowi6_oif = vif->link,
2109 		.daddr = ipv6h->daddr,
2110 	};
2111 
2112 	dst = ip6_route_output(net, NULL, &fl6);
2113 	if (dst->error) {
2114 		dst_release(dst);
2115 		return -1;
2116 	}
2117 
2118 	skb_dst_drop(skb);
2119 	skb_dst_set(skb, dst);
2120 
2121 	/*
2122 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2123 	 * not only before forwarding, but after forwarding on all output
2124 	 * interfaces. It is clear, if mrouter runs a multicasting
2125 	 * program, it should receive packets not depending to what interface
2126 	 * program is joined.
2127 	 * If we will not make it, the program will have to join on all
2128 	 * interfaces. On the other hand, multihoming host (or router, but
2129 	 * not mrouter) cannot join to more than one interface - it will
2130 	 * result in receiving multiple packets.
2131 	 */
2132 	skb->dev = vif_dev;
2133 	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2134 	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2135 
2136 	/* We are about to write */
2137 	/* XXX: extension headers? */
2138 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2139 		return -1;
2140 
2141 	ipv6h = ipv6_hdr(skb);
2142 	ipv6h->hop_limit--;
2143 	return 0;
2144 }
2145 
2146 static void ip6mr_forward2(struct net *net, struct mr_table *mrt,
2147 			   struct sk_buff *skb, int vifi)
2148 {
2149 	struct net_device *indev = skb->dev;
2150 
2151 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2152 		goto out_free;
2153 
2154 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2155 
2156 	NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2157 		net, NULL, skb, indev, skb->dev,
2158 		ip6mr_forward2_finish);
2159 	return;
2160 
2161 out_free:
2162 	kfree_skb(skb);
2163 }
2164 
2165 static void ip6mr_output2(struct net *net, struct mr_table *mrt,
2166 			  struct sk_buff *skb, int vifi)
2167 {
2168 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2169 		goto out_free;
2170 
2171 	ip6_output(net, NULL, skb);
2172 	return;
2173 
2174 out_free:
2175 	kfree_skb(skb);
2176 }
2177 
2178 /* Called with rcu_read_lock() */
2179 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2180 {
2181 	int ct;
2182 
2183 	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2184 	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2185 		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2186 			break;
2187 	}
2188 	return ct;
2189 }
2190 
2191 /* Called under rcu_read_lock() */
2192 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2193 			   struct net_device *dev, struct sk_buff *skb,
2194 			   struct mfc6_cache *c)
2195 {
2196 	int psend = -1;
2197 	int vif, ct;
2198 	int true_vifi = ip6mr_find_vif(mrt, dev);
2199 
2200 	vif = c->_c.mfc_parent;
2201 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2202 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2203 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2204 
2205 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2206 		struct mfc6_cache *cache_proxy;
2207 
2208 		/* For an (*,G) entry, we only check that the incoming
2209 		 * interface is part of the static tree.
2210 		 */
2211 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2212 		if (cache_proxy &&
2213 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2214 			goto forward;
2215 	}
2216 
2217 	/*
2218 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2219 	 */
2220 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2221 		atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2222 
2223 		if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) &&
2224 		    /* pimsm uses asserts, when switching from RPT to SPT,
2225 		       so that we cannot check that packet arrived on an oif.
2226 		       It is bad, but otherwise we would need to move pretty
2227 		       large chunk of pimd to kernel. Ough... --ANK
2228 		     */
2229 		    (READ_ONCE(mrt->mroute_do_pim) ||
2230 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2231 		    time_after(jiffies,
2232 			       c->_c.mfc_un.res.last_assert +
2233 			       MFC_ASSERT_THRESH)) {
2234 			c->_c.mfc_un.res.last_assert = jiffies;
2235 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2236 			if (READ_ONCE(mrt->mroute_do_wrvifwhole))
2237 				ip6mr_cache_report(mrt, skb, true_vifi,
2238 						   MRT6MSG_WRMIFWHOLE);
2239 		}
2240 		goto dont_forward;
2241 	}
2242 
2243 forward:
2244 	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2245 		   mrt->vif_table[vif].pkt_in + 1);
2246 	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2247 		   mrt->vif_table[vif].bytes_in + skb->len);
2248 
2249 	/*
2250 	 *	Forward the frame
2251 	 */
2252 	if (ipv6_addr_any(&c->mf6c_origin) &&
2253 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2254 		if (true_vifi >= 0 &&
2255 		    true_vifi != c->_c.mfc_parent &&
2256 		    ipv6_hdr(skb)->hop_limit >
2257 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2258 			/* It's an (*,*) entry and the packet is not coming from
2259 			 * the upstream: forward the packet to the upstream
2260 			 * only.
2261 			 */
2262 			psend = c->_c.mfc_parent;
2263 			goto last_forward;
2264 		}
2265 		goto dont_forward;
2266 	}
2267 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2268 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2269 		/* For (*,G) entry, don't forward to the incoming interface */
2270 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2271 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2272 			if (psend != -1) {
2273 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2274 				if (skb2)
2275 					ip6mr_forward2(net, mrt, skb2, psend);
2276 			}
2277 			psend = ct;
2278 		}
2279 	}
2280 last_forward:
2281 	if (psend != -1) {
2282 		ip6mr_forward2(net, mrt, skb, psend);
2283 		return;
2284 	}
2285 
2286 dont_forward:
2287 	kfree_skb(skb);
2288 }
2289 
2290 /* Called under rcu_read_lock() */
2291 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt,
2292 				 struct net_device *dev, struct sk_buff *skb,
2293 				 struct mfc6_cache *c)
2294 {
2295 	int psend = -1;
2296 	int ct;
2297 
2298 	WARN_ON_ONCE(!rcu_read_lock_held());
2299 
2300 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2301 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2302 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2303 
2304 	/* Forward the frame */
2305 	if (ipv6_addr_any(&c->mf6c_origin) &&
2306 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2307 		if (ipv6_hdr(skb)->hop_limit >
2308 		    c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2309 			/* It's an (*,*) entry and the packet is not coming from
2310 			 * the upstream: forward the packet to the upstream
2311 			 * only.
2312 			 */
2313 			psend = c->_c.mfc_parent;
2314 			goto last_forward;
2315 		}
2316 		goto dont_forward;
2317 	}
2318 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2319 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2320 		if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2321 			if (psend != -1) {
2322 				struct sk_buff *skb2;
2323 
2324 				skb2 = skb_clone(skb, GFP_ATOMIC);
2325 				if (skb2)
2326 					ip6mr_output2(net, mrt, skb2, psend);
2327 			}
2328 			psend = ct;
2329 		}
2330 	}
2331 last_forward:
2332 	if (psend != -1) {
2333 		ip6mr_output2(net, mrt, skb, psend);
2334 		return;
2335 	}
2336 
2337 dont_forward:
2338 	kfree_skb(skb);
2339 }
2340 
2341 /*
2342  *	Multicast packets for forwarding arrive here
2343  */
2344 
2345 int ip6_mr_input(struct sk_buff *skb)
2346 {
2347 	struct net_device *dev = skb->dev;
2348 	struct net *net = dev_net_rcu(dev);
2349 	struct mfc6_cache *cache;
2350 	struct mr_table *mrt;
2351 	struct flowi6 fl6 = {
2352 		.flowi6_iif	= dev->ifindex,
2353 		.flowi6_mark	= skb->mark,
2354 	};
2355 	int err;
2356 
2357 	/* skb->dev passed in is the master dev for vrfs.
2358 	 * Get the proper interface that does have a vif associated with it.
2359 	 */
2360 	if (netif_is_l3_master(dev)) {
2361 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2362 		if (!dev) {
2363 			kfree_skb(skb);
2364 			return -ENODEV;
2365 		}
2366 	}
2367 
2368 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2369 	if (err < 0) {
2370 		kfree_skb(skb);
2371 		return err;
2372 	}
2373 
2374 	cache = ip6mr_cache_find(mrt,
2375 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2376 	if (!cache) {
2377 		int vif = ip6mr_find_vif(mrt, dev);
2378 
2379 		if (vif >= 0)
2380 			cache = ip6mr_cache_find_any(mrt,
2381 						     &ipv6_hdr(skb)->daddr,
2382 						     vif);
2383 	}
2384 
2385 	/*
2386 	 *	No usable cache entry
2387 	 */
2388 	if (!cache) {
2389 		int vif;
2390 
2391 		vif = ip6mr_find_vif(mrt, dev);
2392 		if (vif >= 0) {
2393 			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2394 
2395 			return err;
2396 		}
2397 		kfree_skb(skb);
2398 		return -ENODEV;
2399 	}
2400 
2401 	ip6_mr_forward(net, mrt, dev, skb, cache);
2402 
2403 	return 0;
2404 }
2405 
2406 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2407 {
2408 	struct net_device *dev = skb_dst(skb)->dev;
2409 	struct flowi6 fl6 = (struct flowi6) {
2410 		.flowi6_iif = LOOPBACK_IFINDEX,
2411 		.flowi6_mark = skb->mark,
2412 	};
2413 	struct mfc6_cache *cache;
2414 	struct mr_table *mrt;
2415 	int err;
2416 	int vif;
2417 
2418 	guard(rcu)();
2419 
2420 	if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
2421 		goto ip6_output;
2422 	if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE))
2423 		goto ip6_output;
2424 
2425 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2426 	if (err < 0) {
2427 		kfree_skb(skb);
2428 		return err;
2429 	}
2430 
2431 	cache = ip6mr_cache_find(mrt,
2432 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2433 	if (!cache) {
2434 		vif = ip6mr_find_vif(mrt, dev);
2435 		if (vif >= 0)
2436 			cache = ip6mr_cache_find_any(mrt,
2437 						     &ipv6_hdr(skb)->daddr,
2438 						     vif);
2439 	}
2440 
2441 	/* No usable cache entry */
2442 	if (!cache) {
2443 		vif = ip6mr_find_vif(mrt, dev);
2444 		if (vif >= 0)
2445 			return ip6mr_cache_unresolved(mrt, vif, skb, dev);
2446 		goto ip6_output;
2447 	}
2448 
2449 	/* Wrong interface */
2450 	vif = cache->_c.mfc_parent;
2451 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
2452 		goto ip6_output;
2453 
2454 	ip6_mr_output_finish(net, mrt, dev, skb, cache);
2455 	return 0;
2456 
2457 ip6_output:
2458 	return ip6_output(net, sk, skb);
2459 }
2460 
2461 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2462 		    u32 portid)
2463 {
2464 	int err;
2465 	struct mr_table *mrt;
2466 	struct mfc6_cache *cache;
2467 	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2468 
2469 	rcu_read_lock();
2470 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2471 	if (!mrt) {
2472 		rcu_read_unlock();
2473 		return -ENOENT;
2474 	}
2475 
2476 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2477 	if (!cache && skb->dev) {
2478 		int vif = ip6mr_find_vif(mrt, skb->dev);
2479 
2480 		if (vif >= 0)
2481 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2482 						     vif);
2483 	}
2484 
2485 	if (!cache) {
2486 		struct sk_buff *skb2;
2487 		struct ipv6hdr *iph;
2488 		struct net_device *dev;
2489 		int vif;
2490 
2491 		dev = skb->dev;
2492 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2493 			rcu_read_unlock();
2494 			return -ENODEV;
2495 		}
2496 
2497 		/* really correct? */
2498 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2499 		if (!skb2) {
2500 			rcu_read_unlock();
2501 			return -ENOMEM;
2502 		}
2503 
2504 		NETLINK_CB(skb2).portid = portid;
2505 		skb_reset_transport_header(skb2);
2506 
2507 		skb_put(skb2, sizeof(struct ipv6hdr));
2508 		skb_reset_network_header(skb2);
2509 
2510 		iph = ipv6_hdr(skb2);
2511 		iph->version = 0;
2512 		iph->priority = 0;
2513 		iph->flow_lbl[0] = 0;
2514 		iph->flow_lbl[1] = 0;
2515 		iph->flow_lbl[2] = 0;
2516 		iph->payload_len = 0;
2517 		iph->nexthdr = IPPROTO_NONE;
2518 		iph->hop_limit = 0;
2519 		iph->saddr = rt->rt6i_src.addr;
2520 		iph->daddr = rt->rt6i_dst.addr;
2521 
2522 		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2523 		rcu_read_unlock();
2524 
2525 		return err;
2526 	}
2527 
2528 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2529 	rcu_read_unlock();
2530 	return err;
2531 }
2532 
2533 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2534 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2535 			     int flags)
2536 {
2537 	struct nlmsghdr *nlh;
2538 	struct rtmsg *rtm;
2539 	int err;
2540 
2541 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2542 	if (!nlh)
2543 		return -EMSGSIZE;
2544 
2545 	rtm = nlmsg_data(nlh);
2546 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2547 	rtm->rtm_dst_len  = 128;
2548 	rtm->rtm_src_len  = 128;
2549 	rtm->rtm_tos      = 0;
2550 	rtm->rtm_table    = mrt->id;
2551 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2552 		goto nla_put_failure;
2553 	rtm->rtm_type = RTN_MULTICAST;
2554 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2555 	if (c->_c.mfc_flags & MFC_STATIC)
2556 		rtm->rtm_protocol = RTPROT_STATIC;
2557 	else
2558 		rtm->rtm_protocol = RTPROT_MROUTED;
2559 	rtm->rtm_flags    = 0;
2560 
2561 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2562 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2563 		goto nla_put_failure;
2564 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2565 	/* do not break the dump if cache is unresolved */
2566 	if (err < 0 && err != -ENOENT)
2567 		goto nla_put_failure;
2568 
2569 	nlmsg_end(skb, nlh);
2570 	return 0;
2571 
2572 nla_put_failure:
2573 	nlmsg_cancel(skb, nlh);
2574 	return -EMSGSIZE;
2575 }
2576 
2577 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2578 			      u32 portid, u32 seq, struct mr_mfc *c,
2579 			      int cmd, int flags)
2580 {
2581 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2582 				 cmd, flags);
2583 }
2584 
2585 static int mr6_msgsize(bool unresolved)
2586 {
2587 	size_t len =
2588 		NLMSG_ALIGN(sizeof(struct rtmsg))
2589 		+ nla_total_size(4)	/* RTA_TABLE */
2590 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2591 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2592 		;
2593 
2594 	if (!unresolved)
2595 		len = len
2596 		      + nla_total_size(4)	/* RTA_IIF */
2597 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2598 		      + MAXMIFS * NLA_ALIGN(sizeof(struct rtnexthop))
2599 						/* RTA_MFC_STATS */
2600 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2601 		;
2602 
2603 	return len;
2604 }
2605 
2606 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2607 			      int cmd)
2608 {
2609 	struct net *net = read_pnet(&mrt->net);
2610 	struct sk_buff *skb;
2611 	int err = -ENOBUFS;
2612 
2613 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS), GFP_ATOMIC);
2614 	if (!skb)
2615 		goto errout;
2616 
2617 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2618 	if (err < 0)
2619 		goto errout;
2620 
2621 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2622 	return;
2623 
2624 errout:
2625 	kfree_skb(skb);
2626 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2627 }
2628 
2629 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2630 {
2631 	size_t len =
2632 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2633 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2634 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2635 					/* IP6MRA_CREPORT_SRC_ADDR */
2636 		+ nla_total_size(sizeof(struct in6_addr))
2637 					/* IP6MRA_CREPORT_DST_ADDR */
2638 		+ nla_total_size(sizeof(struct in6_addr))
2639 					/* IP6MRA_CREPORT_PKT */
2640 		+ nla_total_size(payloadlen)
2641 		;
2642 
2643 	return len;
2644 }
2645 
2646 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2647 {
2648 	struct net *net = read_pnet(&mrt->net);
2649 	struct nlmsghdr *nlh;
2650 	struct rtgenmsg *rtgenm;
2651 	struct mrt6msg *msg;
2652 	struct sk_buff *skb;
2653 	struct nlattr *nla;
2654 	int payloadlen;
2655 
2656 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2657 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2658 
2659 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2660 	if (!skb)
2661 		goto errout;
2662 
2663 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2664 			sizeof(struct rtgenmsg), 0);
2665 	if (!nlh)
2666 		goto errout;
2667 	rtgenm = nlmsg_data(nlh);
2668 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2669 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2670 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2671 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2672 			     &msg->im6_src) ||
2673 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2674 			     &msg->im6_dst))
2675 		goto nla_put_failure;
2676 
2677 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2678 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2679 				  nla_data(nla), payloadlen))
2680 		goto nla_put_failure;
2681 
2682 	nlmsg_end(skb, nlh);
2683 
2684 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2685 	return;
2686 
2687 nla_put_failure:
2688 	nlmsg_cancel(skb, nlh);
2689 errout:
2690 	kfree_skb(skb);
2691 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2692 }
2693 
2694 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2695 	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2696 	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2697 	[RTA_TABLE]		= { .type = NLA_U32 },
2698 };
2699 
2700 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2701 					const struct nlmsghdr *nlh,
2702 					struct nlattr **tb,
2703 					struct netlink_ext_ack *extack)
2704 {
2705 	struct rtmsg *rtm;
2706 	int err;
2707 
2708 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2709 			  extack);
2710 	if (err)
2711 		return err;
2712 
2713 	rtm = nlmsg_data(nlh);
2714 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2715 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2716 	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2717 	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2718 		NL_SET_ERR_MSG_MOD(extack,
2719 				   "Invalid values in header for multicast route get request");
2720 		return -EINVAL;
2721 	}
2722 
2723 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2724 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2725 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2726 		return -EINVAL;
2727 	}
2728 
2729 	return 0;
2730 }
2731 
2732 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2733 			      struct netlink_ext_ack *extack)
2734 {
2735 	struct net *net = sock_net(in_skb->sk);
2736 	struct in6_addr src = {}, grp = {};
2737 	struct nlattr *tb[RTA_MAX + 1];
2738 	struct mfc6_cache *cache;
2739 	struct mr_table *mrt;
2740 	struct sk_buff *skb;
2741 	u32 tableid;
2742 	int err;
2743 
2744 	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2745 	if (err < 0)
2746 		return err;
2747 
2748 	skb = nlmsg_new(mr6_msgsize(false), GFP_KERNEL);
2749 	if (!skb)
2750 		return -ENOBUFS;
2751 
2752 	if (tb[RTA_SRC])
2753 		src = nla_get_in6_addr(tb[RTA_SRC]);
2754 	if (tb[RTA_DST])
2755 		grp = nla_get_in6_addr(tb[RTA_DST]);
2756 	tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2757 
2758 	rcu_read_lock();
2759 
2760 	mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2761 	if (!mrt) {
2762 		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2763 		err = -ENOENT;
2764 		goto err;
2765 	}
2766 
2767 	cache = ip6mr_cache_find(mrt, &src, &grp);
2768 	if (!cache) {
2769 		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2770 		err = -ENOENT;
2771 		goto err;
2772 	}
2773 
2774 	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2775 				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2776 	if (err < 0)
2777 		goto err;
2778 
2779 	rcu_read_unlock();
2780 
2781 	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2782 
2783 err:
2784 	rcu_read_unlock();
2785 	kfree_skb(skb);
2786 	return err;
2787 }
2788 
2789 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2790 {
2791 	const struct nlmsghdr *nlh = cb->nlh;
2792 	struct fib_dump_filter filter = {};
2793 	int err;
2794 
2795 	rcu_read_lock();
2796 
2797 	if (cb->strict_check) {
2798 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2799 					    &filter, cb);
2800 		if (err < 0)
2801 			goto unlock;
2802 	}
2803 
2804 	if (filter.table_id) {
2805 		struct mr_table *mrt;
2806 
2807 		mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2808 		if (!mrt) {
2809 			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) {
2810 				err = skb->len;
2811 				goto unlock;
2812 			}
2813 
2814 			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2815 			err = -ENOENT;
2816 			goto unlock;
2817 		}
2818 
2819 		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2820 				    &mfc_unres_lock, &filter);
2821 		err = skb->len ? : err;
2822 		goto unlock;
2823 	}
2824 
2825 	err = mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2826 			       _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2827 unlock:
2828 	rcu_read_unlock();
2829 
2830 	return err;
2831 }
2832