xref: /linux/net/ipv6/ip6mr.c (revision 6443f4f20bdae726fe01cf5946fba9742a0ffda6)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux IPv6 multicast routing support for BSD pim6sd
4  *	Based on net/ipv4/ipmr.c.
5  *
6  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7  *		LSIIT Laboratory, Strasbourg, France
8  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9  *		6WIND, Paris, France
10  *	Copyright (C)2007,2008 USAGI/WIDE Project
11  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12  */
13 
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39 
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50 
51 #include <linux/nospec.h>
52 
53 struct ip6mr_rule {
54 	struct fib_rule		common;
55 };
56 
57 struct ip6mr_result {
58 	struct mr_table	*mrt;
59 };
60 
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62    Note that the changes are semaphored via rtnl_lock.
63  */
64 
65 static DEFINE_SPINLOCK(mrt_lock);
66 
67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 	return rcu_dereference(vif->dev);
70 }
71 
72 /* Multicast router control variables */
73 
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76 
77 /* We return to original Alan's scheme. Hash table of resolved
78    entries is changed only in process context and protected
79    with weak lock mrt_lock. Queue of unresolved entries is protected
80    with strong spinlock mfc_unres_lock.
81 
82    In this case data path is free of exclusive locks at all.
83  */
84 
85 static struct kmem_cache *mrt_cachep __read_mostly;
86 
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt,
89 			     struct list_head *dev_kill_list);
90 
91 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
92 			   struct net_device *dev, struct sk_buff *skb,
93 			   struct mfc6_cache *cache);
94 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
95 			      mifi_t mifi, int assert);
96 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
97 			      int cmd);
98 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
99 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
100 			      struct netlink_ext_ack *extack);
101 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
102 			       struct netlink_callback *cb);
103 static void mroute_clean_tables(struct mr_table *mrt, int flags,
104 				struct list_head *dev_kill_list);
105 static void ipmr_expire_process(struct timer_list *t);
106 
107 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
108 #define ip6mr_for_each_table(mrt, net) \
109 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
110 				lockdep_rtnl_is_held() || \
111 				list_empty(&net->ipv6.mr6_tables))
112 
113 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
114 					    struct mr_table *mrt)
115 {
116 	struct mr_table *ret;
117 
118 	if (!mrt)
119 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
120 				     struct mr_table, list);
121 	else
122 		ret = list_entry_rcu(mrt->list.next,
123 				     struct mr_table, list);
124 
125 	if (&ret->list == &net->ipv6.mr6_tables)
126 		return NULL;
127 	return ret;
128 }
129 
130 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr_table **mrt)
143 {
144 	int err;
145 	struct ip6mr_result res;
146 	struct fib_lookup_arg arg = {
147 		.result = &res,
148 		.flags = FIB_LOOKUP_NOREF,
149 	};
150 
151 	/* update flow if oif or iif point to device enslaved to l3mdev */
152 	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
153 
154 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
155 			       flowi6_to_flowi(flp6), 0, &arg);
156 	if (err < 0)
157 		return err;
158 	*mrt = res.mrt;
159 	return 0;
160 }
161 
162 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
163 			     int flags, struct fib_lookup_arg *arg)
164 {
165 	struct ip6mr_result *res = arg->result;
166 	struct mr_table *mrt;
167 
168 	switch (rule->action) {
169 	case FR_ACT_TO_TBL:
170 		break;
171 	case FR_ACT_UNREACHABLE:
172 		return -ENETUNREACH;
173 	case FR_ACT_PROHIBIT:
174 		return -EACCES;
175 	case FR_ACT_BLACKHOLE:
176 	default:
177 		return -EINVAL;
178 	}
179 
180 	arg->table = fib_rule_get_table(rule, arg);
181 
182 	mrt = __ip6mr_get_table(rule->fr_net, arg->table);
183 	if (!mrt)
184 		return -EAGAIN;
185 	res->mrt = mrt;
186 	return 0;
187 }
188 
189 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
190 {
191 	return 1;
192 }
193 
194 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
195 				struct fib_rule_hdr *frh, struct nlattr **tb,
196 				struct netlink_ext_ack *extack)
197 {
198 	return 0;
199 }
200 
201 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
202 			      struct nlattr **tb)
203 {
204 	return 1;
205 }
206 
207 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208 			   struct fib_rule_hdr *frh)
209 {
210 	frh->dst_len = 0;
211 	frh->src_len = 0;
212 	frh->tos     = 0;
213 	return 0;
214 }
215 
216 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
217 	.family		= RTNL_FAMILY_IP6MR,
218 	.rule_size	= sizeof(struct ip6mr_rule),
219 	.addr_size	= sizeof(struct in6_addr),
220 	.action		= ip6mr_rule_action,
221 	.match		= ip6mr_rule_match,
222 	.configure	= ip6mr_rule_configure,
223 	.compare	= ip6mr_rule_compare,
224 	.fill		= ip6mr_rule_fill,
225 	.nlgroup	= RTNLGRP_IPV6_RULE,
226 	.owner		= THIS_MODULE,
227 };
228 
229 static int __net_init ip6mr_rules_init(struct net *net)
230 {
231 	struct fib_rules_ops *ops;
232 	LIST_HEAD(dev_kill_list);
233 	struct mr_table *mrt;
234 	int err;
235 
236 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
237 	if (IS_ERR(ops))
238 		return PTR_ERR(ops);
239 
240 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
241 
242 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
243 	if (IS_ERR(mrt)) {
244 		err = PTR_ERR(mrt);
245 		goto err1;
246 	}
247 
248 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
249 	if (err < 0)
250 		goto err2;
251 
252 	net->ipv6.mr6_rules_ops = ops;
253 	return 0;
254 
255 err2:
256 	ip6mr_free_table(mrt, &dev_kill_list);
257 err1:
258 	fib_rules_unregister(ops);
259 	return err;
260 }
261 
262 static void __net_exit ip6mr_rules_exit(struct net *net)
263 {
264 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
265 }
266 
267 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net,
268 					     struct list_head *dev_kill_list)
269 {
270 	struct mr_table *mrt, *next;
271 
272 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
273 		list_del_rcu(&mrt->list);
274 		ip6mr_free_table(mrt, dev_kill_list);
275 	}
276 }
277 
278 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
279 			    struct netlink_ext_ack *extack)
280 {
281 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
282 }
283 
284 static unsigned int ip6mr_rules_seq_read(const struct net *net)
285 {
286 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
287 }
288 
289 bool ip6mr_rule_default(const struct fib_rule *rule)
290 {
291 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
292 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
293 }
294 EXPORT_SYMBOL(ip6mr_rule_default);
295 #else
296 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
297 					    struct mr_table *mrt)
298 {
299 	if (!mrt)
300 		return rcu_dereference(net->ipv6.mrt6);
301 	return NULL;
302 }
303 
304 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
305 {
306 	return rcu_dereference_check(net->ipv6.mrt6,
307 				     lockdep_rtnl_is_held() ||
308 				     !rcu_access_pointer(net->ipv6.mrt6));
309 }
310 
311 #define ip6mr_for_each_table(mrt, net)				\
312 	for (mrt = __ip6mr_get_table(net, 0); mrt; mrt = NULL)
313 
314 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
315 			    struct mr_table **mrt)
316 {
317 	*mrt = rcu_dereference(net->ipv6.mrt6);
318 	if (!*mrt)
319 		return -EAGAIN;
320 	return 0;
321 }
322 
323 static int __net_init ip6mr_rules_init(struct net *net)
324 {
325 	struct mr_table *mrt;
326 
327 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
328 	if (IS_ERR(mrt))
329 		return PTR_ERR(mrt);
330 
331 	rcu_assign_pointer(net->ipv6.mrt6, mrt);
332 	return 0;
333 }
334 
335 static void __net_exit ip6mr_rules_exit(struct net *net)
336 {
337 }
338 
339 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net,
340 					     struct list_head *dev_kill_list)
341 {
342 	struct mr_table *mrt = rcu_dereference_protected(net->ipv6.mrt6, 1);
343 
344 	RCU_INIT_POINTER(net->ipv6.mrt6, NULL);
345 	ip6mr_free_table(mrt, dev_kill_list);
346 }
347 
348 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
349 			    struct netlink_ext_ack *extack)
350 {
351 	return 0;
352 }
353 
354 static unsigned int ip6mr_rules_seq_read(const struct net *net)
355 {
356 	return 0;
357 }
358 #endif
359 
360 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
361 {
362 	struct mr_table *mrt;
363 
364 	rcu_read_lock();
365 	mrt = __ip6mr_get_table(net, id);
366 	rcu_read_unlock();
367 
368 	return mrt;
369 }
370 
371 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
372 			  const void *ptr)
373 {
374 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
375 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
376 
377 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
378 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
379 }
380 
381 static const struct rhashtable_params ip6mr_rht_params = {
382 	.head_offset = offsetof(struct mr_mfc, mnode),
383 	.key_offset = offsetof(struct mfc6_cache, cmparg),
384 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
385 	.nelem_hint = 3,
386 	.obj_cmpfn = ip6mr_hash_cmp,
387 	.automatic_shrinking = true,
388 };
389 
390 static void ip6mr_new_table_set(struct mr_table *mrt,
391 				struct net *net)
392 {
393 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
394 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
395 #endif
396 }
397 
398 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
399 	.mf6c_origin = IN6ADDR_ANY_INIT,
400 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
401 };
402 
403 static struct mr_table_ops ip6mr_mr_table_ops = {
404 	.rht_params = &ip6mr_rht_params,
405 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
406 };
407 
408 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
409 {
410 	struct mr_table *mrt;
411 
412 	mrt = __ip6mr_get_table(net, id);
413 	if (mrt)
414 		return mrt;
415 
416 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
417 			      ipmr_expire_process, ip6mr_new_table_set);
418 }
419 
420 static void ip6mr_free_table(struct mr_table *mrt,
421 			     struct list_head *dev_kill_list)
422 {
423 	struct net *net = read_pnet(&mrt->net);
424 	LIST_HEAD(ip6mr_dev_kill_list);
425 
426 	WARN_ON_ONCE(!mr_can_free_table(net));
427 
428 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
429 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
430 			    MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC,
431 			    &ip6mr_dev_kill_list);
432 
433 	mr_table_free(mrt);
434 
435 	WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ip6mr_dev_kill_list));
436 	list_splice(&ip6mr_dev_kill_list, dev_kill_list);
437 }
438 
439 #ifdef CONFIG_PROC_FS
440 /* The /proc interfaces to multicast routing
441  * /proc/ip6_mr_cache /proc/ip6_mr_vif
442  */
443 
444 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
445 	__acquires(RCU)
446 {
447 	struct mr_vif_iter *iter = seq->private;
448 	struct net *net = seq_file_net(seq);
449 	struct mr_table *mrt;
450 
451 	rcu_read_lock();
452 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
453 	if (!mrt) {
454 		rcu_read_unlock();
455 		return ERR_PTR(-ENOENT);
456 	}
457 
458 	iter->mrt = mrt;
459 
460 	return mr_vif_seq_start(seq, pos);
461 }
462 
463 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
464 	__releases(RCU)
465 {
466 	rcu_read_unlock();
467 }
468 
469 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
470 {
471 	struct mr_vif_iter *iter = seq->private;
472 	struct mr_table *mrt = iter->mrt;
473 
474 	if (v == SEQ_START_TOKEN) {
475 		seq_puts(seq,
476 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
477 	} else {
478 		const struct vif_device *vif = v;
479 		const struct net_device *vif_dev;
480 		const char *name;
481 
482 		vif_dev = vif_dev_read(vif);
483 		name = vif_dev ? vif_dev->name : "none";
484 
485 		seq_printf(seq,
486 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
487 			   vif - mrt->vif_table,
488 			   name, vif->bytes_in, vif->pkt_in,
489 			   vif->bytes_out, vif->pkt_out,
490 			   vif->flags);
491 	}
492 	return 0;
493 }
494 
495 static const struct seq_operations ip6mr_vif_seq_ops = {
496 	.start = ip6mr_vif_seq_start,
497 	.next  = mr_vif_seq_next,
498 	.stop  = ip6mr_vif_seq_stop,
499 	.show  = ip6mr_vif_seq_show,
500 };
501 
502 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
503 {
504 	struct net *net = seq_file_net(seq);
505 	struct mr_table *mrt;
506 
507 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
508 	if (!mrt)
509 		return ERR_PTR(-ENOENT);
510 
511 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
512 }
513 
514 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
515 {
516 	int n;
517 
518 	if (v == SEQ_START_TOKEN) {
519 		seq_puts(seq,
520 			 "Group                            "
521 			 "Origin                           "
522 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
523 	} else {
524 		const struct mfc6_cache *mfc = v;
525 		const struct mr_mfc_iter *it = seq->private;
526 		struct mr_table *mrt = it->mrt;
527 
528 		seq_printf(seq, "%pI6 %pI6 %-3hd",
529 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
530 			   mfc->_c.mfc_parent);
531 
532 		if (it->cache != &mrt->mfc_unres_queue) {
533 			seq_printf(seq, " %8lu %8lu %8lu",
534 				   atomic_long_read(&mfc->_c.mfc_un.res.pkt),
535 				   atomic_long_read(&mfc->_c.mfc_un.res.bytes),
536 				   atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
537 			for (n = mfc->_c.mfc_un.res.minvif;
538 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
539 				if (VIF_EXISTS(mrt, n) &&
540 				    mfc->_c.mfc_un.res.ttls[n] < 255)
541 					seq_printf(seq,
542 						   " %2d:%-3d", n,
543 						   mfc->_c.mfc_un.res.ttls[n]);
544 			}
545 		} else {
546 			/* unresolved mfc_caches don't contain
547 			 * pkt, bytes and wrong_if values
548 			 */
549 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
550 		}
551 		seq_putc(seq, '\n');
552 	}
553 	return 0;
554 }
555 
556 static const struct seq_operations ipmr_mfc_seq_ops = {
557 	.start = ipmr_mfc_seq_start,
558 	.next  = mr_mfc_seq_next,
559 	.stop  = mr_mfc_seq_stop,
560 	.show  = ipmr_mfc_seq_show,
561 };
562 #endif
563 
564 #ifdef CONFIG_IPV6_PIMSM_V2
565 
566 static int pim6_rcv(struct sk_buff *skb)
567 {
568 	struct pimreghdr *pim;
569 	struct ipv6hdr   *encap;
570 	struct net_device  *reg_dev = NULL;
571 	struct net *net = dev_net(skb->dev);
572 	struct mr_table *mrt;
573 	struct flowi6 fl6 = {
574 		.flowi6_iif	= skb->dev->ifindex,
575 		.flowi6_mark	= skb->mark,
576 	};
577 	int reg_vif_num;
578 
579 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
580 		goto drop;
581 
582 	pim = (struct pimreghdr *)skb_transport_header(skb);
583 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
584 	    (pim->flags & PIM_NULL_REGISTER) ||
585 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
586 			     sizeof(*pim), IPPROTO_PIM,
587 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
588 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
589 		goto drop;
590 
591 	/* check if the inner packet is destined to mcast group */
592 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
593 				   sizeof(*pim));
594 
595 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
596 	    encap->payload_len == 0 ||
597 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
598 		goto drop;
599 
600 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
601 		goto drop;
602 
603 	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
604 	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
605 	if (reg_vif_num >= 0)
606 		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
607 
608 	if (!reg_dev)
609 		goto drop;
610 
611 	skb->mac_header = skb->network_header;
612 	skb_pull(skb, (u8 *)encap - skb->data);
613 	skb_reset_network_header(skb);
614 	skb->protocol = htons(ETH_P_IPV6);
615 	skb->ip_summed = CHECKSUM_NONE;
616 
617 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
618 
619 	netif_rx(skb);
620 
621 	return 0;
622  drop:
623 	kfree_skb(skb);
624 	return 0;
625 }
626 
627 static const struct inet6_protocol pim6_protocol = {
628 	.handler	=	pim6_rcv,
629 };
630 
631 /* Service routines creating virtual interfaces: PIMREG */
632 
633 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
634 				      struct net_device *dev)
635 {
636 	struct net *net = dev_net(dev);
637 	struct mr_table *mrt;
638 	struct flowi6 fl6 = {
639 		.flowi6_oif	= dev->ifindex,
640 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
641 		.flowi6_mark	= skb->mark,
642 	};
643 
644 	if (!pskb_inet_may_pull(skb))
645 		goto tx_err;
646 
647 	rcu_read_lock();
648 
649 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
650 		goto tx_lookup_err;
651 
652 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
653 	DEV_STATS_INC(dev, tx_packets);
654 
655 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
656 			   MRT6MSG_WHOLEPKT);
657 	rcu_read_unlock();
658 	kfree_skb(skb);
659 	return NETDEV_TX_OK;
660 
661 tx_lookup_err:
662 	rcu_read_unlock();
663 tx_err:
664 	DEV_STATS_INC(dev, tx_errors);
665 	kfree_skb(skb);
666 	return NETDEV_TX_OK;
667 }
668 
669 static int reg_vif_get_iflink(const struct net_device *dev)
670 {
671 	return 0;
672 }
673 
674 static const struct net_device_ops reg_vif_netdev_ops = {
675 	.ndo_start_xmit	= reg_vif_xmit,
676 	.ndo_get_iflink = reg_vif_get_iflink,
677 };
678 
679 static void reg_vif_setup(struct net_device *dev)
680 {
681 	dev->type		= ARPHRD_PIMREG;
682 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
683 	dev->flags		= IFF_NOARP;
684 	dev->netdev_ops		= &reg_vif_netdev_ops;
685 	dev->needs_free_netdev	= true;
686 	dev->netns_immutable	= true;
687 }
688 
689 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
690 {
691 	struct net_device *dev;
692 	char name[IFNAMSIZ];
693 
694 	if (mrt->id == RT6_TABLE_DFLT)
695 		sprintf(name, "pim6reg");
696 	else
697 		sprintf(name, "pim6reg%u", mrt->id);
698 
699 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
700 	if (!dev)
701 		return NULL;
702 
703 	dev_net_set(dev, net);
704 
705 	if (register_netdevice(dev)) {
706 		free_netdev(dev);
707 		return NULL;
708 	}
709 
710 	if (dev_open(dev, NULL))
711 		goto failure;
712 
713 	dev_hold(dev);
714 	return dev;
715 
716 failure:
717 	unregister_netdevice(dev);
718 	return NULL;
719 }
720 #endif
721 
722 static int call_ip6mr_vif_entry_notifiers(struct net *net,
723 					  enum fib_event_type event_type,
724 					  struct vif_device *vif,
725 					  struct net_device *vif_dev,
726 					  mifi_t vif_index, u32 tb_id)
727 {
728 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
729 				     vif, vif_dev, vif_index, tb_id,
730 				     &net->ipv6.ipmr_seq);
731 }
732 
733 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
734 					  enum fib_event_type event_type,
735 					  struct mfc6_cache *mfc, u32 tb_id)
736 {
737 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
738 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
739 }
740 
741 /* Delete a VIF entry */
742 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
743 		       struct list_head *head)
744 {
745 	struct vif_device *v;
746 	struct net_device *dev;
747 	struct inet6_dev *in6_dev;
748 
749 	if (vifi < 0 || vifi >= mrt->maxvif)
750 		return -EADDRNOTAVAIL;
751 
752 	v = &mrt->vif_table[vifi];
753 
754 	dev = rtnl_dereference(v->dev);
755 	if (!dev)
756 		return -EADDRNOTAVAIL;
757 
758 	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
759 				       FIB_EVENT_VIF_DEL, v, dev,
760 				       vifi, mrt->id);
761 	spin_lock(&mrt_lock);
762 	RCU_INIT_POINTER(v->dev, NULL);
763 
764 #ifdef CONFIG_IPV6_PIMSM_V2
765 	if (vifi == mrt->mroute_reg_vif_num) {
766 		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
767 		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
768 	}
769 #endif
770 
771 	if (vifi + 1 == mrt->maxvif) {
772 		int tmp;
773 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
774 			if (VIF_EXISTS(mrt, tmp))
775 				break;
776 		}
777 		WRITE_ONCE(mrt->maxvif, tmp + 1);
778 	}
779 
780 	spin_unlock(&mrt_lock);
781 
782 	dev_set_allmulti(dev, -1);
783 
784 	in6_dev = __in6_dev_get(dev);
785 	if (in6_dev) {
786 		atomic_dec(&in6_dev->cnf.mc_forwarding);
787 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
788 					     NETCONFA_MC_FORWARDING,
789 					     dev->ifindex, &in6_dev->cnf);
790 	}
791 
792 	if ((v->flags & MIFF_REGISTER) && !notify)
793 		unregister_netdevice_queue(dev, head);
794 
795 	netdev_put(dev, &v->dev_tracker);
796 	return 0;
797 }
798 
799 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
800 {
801 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
802 
803 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
804 }
805 
806 static inline void ip6mr_cache_free(struct mfc6_cache *c)
807 {
808 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
809 }
810 
811 /* Destroy an unresolved cache entry, killing queued skbs
812    and reporting error to netlink readers.
813  */
814 
815 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
816 {
817 	struct net *net = read_pnet(&mrt->net);
818 	struct sk_buff *skb;
819 
820 	atomic_dec(&mrt->cache_resolve_queue_len);
821 
822 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
823 		if (ipv6_hdr(skb)->version == 0) {
824 			struct nlmsghdr *nlh = skb_pull(skb,
825 							sizeof(struct ipv6hdr));
826 			nlh->nlmsg_type = NLMSG_ERROR;
827 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
828 			skb_trim(skb, nlh->nlmsg_len);
829 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
830 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
831 		} else
832 			kfree_skb(skb);
833 	}
834 
835 	ip6mr_cache_free(c);
836 }
837 
838 
839 /* Timer process for all the unresolved queue. */
840 
841 static void ipmr_do_expire_process(struct mr_table *mrt)
842 {
843 	unsigned long now = jiffies;
844 	unsigned long expires = 10 * HZ;
845 	struct mr_mfc *c, *next;
846 
847 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
848 		if (time_after(c->mfc_un.unres.expires, now)) {
849 			/* not yet... */
850 			unsigned long interval = c->mfc_un.unres.expires - now;
851 			if (interval < expires)
852 				expires = interval;
853 			continue;
854 		}
855 
856 		list_del(&c->list);
857 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
858 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
859 	}
860 
861 	if (!list_empty(&mrt->mfc_unres_queue))
862 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
863 }
864 
865 static void ipmr_expire_process(struct timer_list *t)
866 {
867 	struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
868 
869 	if (!spin_trylock(&mfc_unres_lock)) {
870 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
871 		return;
872 	}
873 
874 	if (!list_empty(&mrt->mfc_unres_queue))
875 		ipmr_do_expire_process(mrt);
876 
877 	spin_unlock(&mfc_unres_lock);
878 }
879 
880 /* Fill oifs list. It is called under locked mrt_lock. */
881 
882 static void ip6mr_update_thresholds(struct mr_table *mrt,
883 				    struct mr_mfc *cache,
884 				    unsigned char *ttls)
885 {
886 	int vifi;
887 
888 	cache->mfc_un.res.minvif = MAXMIFS;
889 	cache->mfc_un.res.maxvif = 0;
890 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
891 
892 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
893 		if (VIF_EXISTS(mrt, vifi) &&
894 		    ttls[vifi] && ttls[vifi] < 255) {
895 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
896 			if (cache->mfc_un.res.minvif > vifi)
897 				cache->mfc_un.res.minvif = vifi;
898 			if (cache->mfc_un.res.maxvif <= vifi)
899 				cache->mfc_un.res.maxvif = vifi + 1;
900 		}
901 	}
902 	WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
903 }
904 
905 static int mif6_add(struct net *net, struct mr_table *mrt,
906 		    struct mif6ctl *vifc, int mrtsock)
907 {
908 	int vifi = vifc->mif6c_mifi;
909 	struct vif_device *v = &mrt->vif_table[vifi];
910 	struct net_device *dev;
911 	struct inet6_dev *in6_dev;
912 	int err;
913 
914 	/* Is vif busy ? */
915 	if (VIF_EXISTS(mrt, vifi))
916 		return -EADDRINUSE;
917 
918 	switch (vifc->mif6c_flags) {
919 #ifdef CONFIG_IPV6_PIMSM_V2
920 	case MIFF_REGISTER:
921 		/*
922 		 * Special Purpose VIF in PIM
923 		 * All the packets will be sent to the daemon
924 		 */
925 		if (mrt->mroute_reg_vif_num >= 0)
926 			return -EADDRINUSE;
927 		dev = ip6mr_reg_vif(net, mrt);
928 		if (!dev)
929 			return -ENOBUFS;
930 		err = dev_set_allmulti(dev, 1);
931 		if (err) {
932 			unregister_netdevice(dev);
933 			dev_put(dev);
934 			return err;
935 		}
936 		break;
937 #endif
938 	case 0:
939 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
940 		if (!dev)
941 			return -EADDRNOTAVAIL;
942 		err = dev_set_allmulti(dev, 1);
943 		if (err) {
944 			dev_put(dev);
945 			return err;
946 		}
947 		break;
948 	default:
949 		return -EINVAL;
950 	}
951 
952 	in6_dev = __in6_dev_get(dev);
953 	if (in6_dev) {
954 		atomic_inc(&in6_dev->cnf.mc_forwarding);
955 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
956 					     NETCONFA_MC_FORWARDING,
957 					     dev->ifindex, &in6_dev->cnf);
958 	}
959 
960 	/* Fill in the VIF structures */
961 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
962 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
963 			MIFF_REGISTER);
964 
965 	/* And finish update writing critical data */
966 	spin_lock(&mrt_lock);
967 	rcu_assign_pointer(v->dev, dev);
968 	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
969 #ifdef CONFIG_IPV6_PIMSM_V2
970 	if (v->flags & MIFF_REGISTER)
971 		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
972 #endif
973 	if (vifi + 1 > mrt->maxvif)
974 		WRITE_ONCE(mrt->maxvif, vifi + 1);
975 	spin_unlock(&mrt_lock);
976 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
977 				       v, dev, vifi, mrt->id);
978 	return 0;
979 }
980 
981 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
982 					   const struct in6_addr *origin,
983 					   const struct in6_addr *mcastgrp)
984 {
985 	struct mfc6_cache_cmp_arg arg = {
986 		.mf6c_origin = *origin,
987 		.mf6c_mcastgrp = *mcastgrp,
988 	};
989 
990 	return mr_mfc_find(mrt, &arg);
991 }
992 
993 /* Look for a (*,G) entry */
994 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
995 					       struct in6_addr *mcastgrp,
996 					       mifi_t mifi)
997 {
998 	struct mfc6_cache_cmp_arg arg = {
999 		.mf6c_origin = in6addr_any,
1000 		.mf6c_mcastgrp = *mcastgrp,
1001 	};
1002 
1003 	if (ipv6_addr_any(mcastgrp))
1004 		return mr_mfc_find_any_parent(mrt, mifi);
1005 	return mr_mfc_find_any(mrt, mifi, &arg);
1006 }
1007 
1008 /* Look for a (S,G,iif) entry if parent != -1 */
1009 static struct mfc6_cache *
1010 ip6mr_cache_find_parent(struct mr_table *mrt,
1011 			const struct in6_addr *origin,
1012 			const struct in6_addr *mcastgrp,
1013 			int parent)
1014 {
1015 	struct mfc6_cache_cmp_arg arg = {
1016 		.mf6c_origin = *origin,
1017 		.mf6c_mcastgrp = *mcastgrp,
1018 	};
1019 
1020 	return mr_mfc_find_parent(mrt, &arg, parent);
1021 }
1022 
1023 /* Allocate a multicast cache entry */
1024 static struct mfc6_cache *ip6mr_cache_alloc(void)
1025 {
1026 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1027 	if (!c)
1028 		return NULL;
1029 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1030 	c->_c.mfc_un.res.minvif = MAXMIFS;
1031 	c->_c.free = ip6mr_cache_free_rcu;
1032 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
1033 	return c;
1034 }
1035 
1036 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1037 {
1038 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1039 	if (!c)
1040 		return NULL;
1041 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1042 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1043 	return c;
1044 }
1045 
1046 /*
1047  *	A cache entry has gone into a resolved state from queued
1048  */
1049 
1050 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1051 				struct mfc6_cache *uc, struct mfc6_cache *c)
1052 {
1053 	struct sk_buff *skb;
1054 
1055 	/*
1056 	 *	Play the pending entries through our router
1057 	 */
1058 
1059 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1060 		if (ipv6_hdr(skb)->version == 0) {
1061 			struct nlmsghdr *nlh = skb_pull(skb,
1062 							sizeof(struct ipv6hdr));
1063 
1064 			if (mr_fill_mroute(mrt, skb, &c->_c,
1065 					   nlmsg_data(nlh)) > 0) {
1066 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1067 			} else {
1068 				nlh->nlmsg_type = NLMSG_ERROR;
1069 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1070 				skb_trim(skb, nlh->nlmsg_len);
1071 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1072 			}
1073 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1074 		} else {
1075 			rcu_read_lock();
1076 			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1077 			rcu_read_unlock();
1078 		}
1079 	}
1080 }
1081 
1082 /*
1083  *	Bounce a cache query up to pim6sd and netlink.
1084  *
1085  *	Called under rcu_read_lock()
1086  */
1087 
1088 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1089 			      mifi_t mifi, int assert)
1090 {
1091 	enum skb_drop_reason reason;
1092 	struct sock *mroute6_sk;
1093 	struct sk_buff *skb;
1094 	struct mrt6msg *msg;
1095 
1096 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1097 	if (!mroute6_sk)
1098 		return -EINVAL;
1099 
1100 #ifdef CONFIG_IPV6_PIMSM_V2
1101 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1102 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1103 						+sizeof(*msg));
1104 	else
1105 #endif
1106 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1107 
1108 	if (!skb)
1109 		return -ENOBUFS;
1110 
1111 	/* I suppose that internal messages
1112 	 * do not require checksums */
1113 
1114 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1115 
1116 #ifdef CONFIG_IPV6_PIMSM_V2
1117 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1118 		/* Ugly, but we have no choice with this interface.
1119 		   Duplicate old header, fix length etc.
1120 		   And all this only to mangle msg->im6_msgtype and
1121 		   to set msg->im6_mbz to "mbz" :-)
1122 		 */
1123 		__skb_pull(skb, skb_network_offset(pkt));
1124 
1125 		skb_push(skb, sizeof(*msg));
1126 		skb_reset_transport_header(skb);
1127 		msg = (struct mrt6msg *)skb_transport_header(skb);
1128 		msg->im6_mbz = 0;
1129 		msg->im6_msgtype = assert;
1130 		if (assert == MRT6MSG_WRMIFWHOLE)
1131 			msg->im6_mif = mifi;
1132 		else
1133 			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1134 		msg->im6_pad = 0;
1135 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1136 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1137 
1138 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1139 	} else
1140 #endif
1141 	{
1142 	/*
1143 	 *	Copy the IP header
1144 	 */
1145 
1146 	skb_put(skb, sizeof(struct ipv6hdr));
1147 	skb_reset_network_header(skb);
1148 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1149 
1150 	/*
1151 	 *	Add our header
1152 	 */
1153 	skb_put(skb, sizeof(*msg));
1154 	skb_reset_transport_header(skb);
1155 	msg = (struct mrt6msg *)skb_transport_header(skb);
1156 
1157 	msg->im6_mbz = 0;
1158 	msg->im6_msgtype = assert;
1159 	msg->im6_mif = mifi;
1160 	msg->im6_pad = 0;
1161 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1162 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1163 
1164 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1165 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1166 	}
1167 
1168 	mrt6msg_netlink_event(mrt, skb);
1169 
1170 	/* Deliver to user space multicast routing algorithms */
1171 	reason = sock_queue_rcv_skb_reason(mroute6_sk, skb);
1172 
1173 	if (reason) {
1174 		sk_skb_reason_drop(mroute6_sk, skb, reason);
1175 		return -ENOMEM;
1176 	}
1177 
1178 	return 0;
1179 }
1180 
1181 /* Queue a packet for resolution. It gets locked cache entry! */
1182 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1183 				  struct sk_buff *skb, struct net_device *dev)
1184 {
1185 	struct net *net = read_pnet(&mrt->net);
1186 	struct mfc6_cache *c = NULL;
1187 	bool found = false;
1188 	int err;
1189 
1190 	spin_lock_bh(&mfc_unres_lock);
1191 
1192 	if (!check_net(net)) {
1193 		err = -EINVAL;
1194 		goto err;
1195 	}
1196 
1197 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1198 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1199 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1200 			found = true;
1201 			break;
1202 		}
1203 	}
1204 
1205 	if (!found) {
1206 		/*
1207 		 *	Create a new entry if allowable
1208 		 */
1209 
1210 		c = ip6mr_cache_alloc_unres();
1211 		if (!c) {
1212 			err = -ENOBUFS;
1213 			goto err;
1214 		}
1215 
1216 		/* Fill in the new cache entry */
1217 		c->_c.mfc_parent = -1;
1218 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1219 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1220 
1221 		/*
1222 		 *	Reflect first query at pim6sd
1223 		 */
1224 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1225 		if (err < 0)
1226 			goto err;
1227 
1228 		atomic_inc(&mrt->cache_resolve_queue_len);
1229 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1230 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1231 
1232 		ipmr_do_expire_process(mrt);
1233 	}
1234 
1235 	/* See if we can append the packet */
1236 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1237 		c = NULL;
1238 		err = -ENOBUFS;
1239 		goto err;
1240 	}
1241 
1242 	if (dev) {
1243 		skb->dev = dev;
1244 		skb->skb_iif = dev->ifindex;
1245 	}
1246 
1247 	skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1248 
1249 	spin_unlock_bh(&mfc_unres_lock);
1250 	return 0;
1251 
1252 err:
1253 	spin_unlock_bh(&mfc_unres_lock);
1254 	if (c)
1255 		ip6mr_cache_free(c);
1256 	kfree_skb(skb);
1257 	return err;
1258 }
1259 
1260 /*
1261  *	MFC6 cache manipulation by user space
1262  */
1263 
1264 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1265 			    int parent)
1266 {
1267 	struct mfc6_cache *c;
1268 
1269 	rcu_read_lock();
1270 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1271 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1272 	rcu_read_unlock();
1273 	if (!c)
1274 		return -ENOENT;
1275 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1276 	list_del_rcu(&c->_c.list);
1277 
1278 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1279 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1280 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1281 	mr_cache_put(&c->_c);
1282 	return 0;
1283 }
1284 
1285 static int ip6mr_device_event(struct notifier_block *this,
1286 			      unsigned long event, void *ptr)
1287 {
1288 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1289 	struct net *net = dev_net(dev);
1290 	struct mr_table *mrt;
1291 	struct vif_device *v;
1292 	int ct;
1293 
1294 	if (event != NETDEV_UNREGISTER)
1295 		return NOTIFY_DONE;
1296 
1297 	ip6mr_for_each_table(mrt, net) {
1298 		v = &mrt->vif_table[0];
1299 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1300 			if (rcu_access_pointer(v->dev) == dev)
1301 				mif6_delete(mrt, ct, 1, NULL);
1302 		}
1303 	}
1304 
1305 	return NOTIFY_DONE;
1306 }
1307 
1308 static unsigned int ip6mr_seq_read(const struct net *net)
1309 {
1310 	return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1311 }
1312 
1313 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1314 		      struct netlink_ext_ack *extack)
1315 {
1316 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1317 		       ip6mr_mr_table_iter, extack);
1318 }
1319 
1320 static struct notifier_block ip6_mr_notifier = {
1321 	.notifier_call = ip6mr_device_event
1322 };
1323 
1324 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1325 	.family		= RTNL_FAMILY_IP6MR,
1326 	.fib_seq_read	= ip6mr_seq_read,
1327 	.fib_dump	= ip6mr_dump,
1328 	.owner		= THIS_MODULE,
1329 };
1330 
1331 static int __net_init ip6mr_notifier_init(struct net *net)
1332 {
1333 	struct fib_notifier_ops *ops;
1334 
1335 	atomic_set(&net->ipv6.ipmr_seq, 0);
1336 
1337 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1338 	if (IS_ERR(ops))
1339 		return PTR_ERR(ops);
1340 
1341 	net->ipv6.ip6mr_notifier_ops = ops;
1342 
1343 	return 0;
1344 }
1345 
1346 static void __net_exit ip6mr_notifier_exit(struct net *net)
1347 {
1348 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1349 	net->ipv6.ip6mr_notifier_ops = NULL;
1350 }
1351 
1352 /* Setup for IP multicast routing */
1353 static int __net_init ip6mr_net_init(struct net *net)
1354 {
1355 #ifdef CONFIG_PROC_FS
1356 	LIST_HEAD(dev_kill_list);
1357 #endif
1358 	int err;
1359 
1360 	mutex_init(&net->ipv6.mfc_mutex);
1361 
1362 	err = ip6mr_notifier_init(net);
1363 	if (err)
1364 		return err;
1365 
1366 	err = ip6mr_rules_init(net);
1367 	if (err < 0)
1368 		goto ip6mr_rules_fail;
1369 
1370 #ifdef CONFIG_PROC_FS
1371 	err = -ENOMEM;
1372 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1373 			sizeof(struct mr_vif_iter)))
1374 		goto proc_vif_fail;
1375 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1376 			sizeof(struct mr_mfc_iter)))
1377 		goto proc_cache_fail;
1378 #endif
1379 
1380 	return 0;
1381 
1382 #ifdef CONFIG_PROC_FS
1383 proc_cache_fail:
1384 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1385 proc_vif_fail:
1386 	ip6mr_rules_exit_rtnl(net, &dev_kill_list);
1387 	ip6mr_rules_exit(net);
1388 #endif
1389 ip6mr_rules_fail:
1390 	ip6mr_notifier_exit(net);
1391 	return err;
1392 }
1393 
1394 static void __net_exit ip6mr_net_exit(struct net *net)
1395 {
1396 #ifdef CONFIG_PROC_FS
1397 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1398 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1399 #endif
1400 	ip6mr_rules_exit(net);
1401 	ip6mr_notifier_exit(net);
1402 }
1403 
1404 static void __net_exit ip6mr_net_exit_rtnl(struct net *net,
1405 					   struct list_head *dev_kill_list)
1406 {
1407 	ip6mr_rules_exit_rtnl(net, dev_kill_list);
1408 }
1409 
1410 static struct pernet_operations ip6mr_net_ops = {
1411 	.init = ip6mr_net_init,
1412 	.exit = ip6mr_net_exit,
1413 	.exit_rtnl = ip6mr_net_exit_rtnl,
1414 };
1415 
1416 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1417 	{.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1418 	 .msgtype = RTM_GETROUTE,
1419 	 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute,
1420 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
1421 };
1422 
1423 int __init ip6_mr_init(void)
1424 {
1425 	int err;
1426 
1427 	mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1428 	if (!mrt_cachep)
1429 		return -ENOMEM;
1430 
1431 	err = register_pernet_subsys(&ip6mr_net_ops);
1432 	if (err)
1433 		goto reg_pernet_fail;
1434 
1435 	err = register_netdevice_notifier(&ip6_mr_notifier);
1436 	if (err)
1437 		goto reg_notif_fail;
1438 #ifdef CONFIG_IPV6_PIMSM_V2
1439 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1440 		pr_err("%s: can't add PIM protocol\n", __func__);
1441 		err = -EAGAIN;
1442 		goto add_proto_fail;
1443 	}
1444 #endif
1445 	err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1446 	if (!err)
1447 		return 0;
1448 
1449 #ifdef CONFIG_IPV6_PIMSM_V2
1450 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1451 add_proto_fail:
1452 	unregister_netdevice_notifier(&ip6_mr_notifier);
1453 #endif
1454 reg_notif_fail:
1455 	unregister_pernet_subsys(&ip6mr_net_ops);
1456 reg_pernet_fail:
1457 	kmem_cache_destroy(mrt_cachep);
1458 	return err;
1459 }
1460 
1461 void __init ip6_mr_cleanup(void)
1462 {
1463 	rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1464 #ifdef CONFIG_IPV6_PIMSM_V2
1465 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1466 #endif
1467 	unregister_netdevice_notifier(&ip6_mr_notifier);
1468 	unregister_pernet_subsys(&ip6mr_net_ops);
1469 	kmem_cache_destroy(mrt_cachep);
1470 }
1471 
1472 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1473 			 struct mf6cctl *mfc, int mrtsock, int parent)
1474 {
1475 	unsigned char ttls[MAXMIFS];
1476 	struct mfc6_cache *uc, *c;
1477 	struct mr_mfc *_uc;
1478 	bool found;
1479 	int i, err;
1480 
1481 	if (mfc->mf6cc_parent >= MAXMIFS)
1482 		return -ENFILE;
1483 
1484 	memset(ttls, 255, MAXMIFS);
1485 	for (i = 0; i < MAXMIFS; i++) {
1486 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1487 			ttls[i] = 1;
1488 	}
1489 
1490 	rcu_read_lock();
1491 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1492 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1493 	rcu_read_unlock();
1494 	if (c) {
1495 		spin_lock(&mrt_lock);
1496 		c->_c.mfc_parent = mfc->mf6cc_parent;
1497 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1498 		if (!mrtsock)
1499 			c->_c.mfc_flags |= MFC_STATIC;
1500 		spin_unlock(&mrt_lock);
1501 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1502 					       c, mrt->id);
1503 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1504 		return 0;
1505 	}
1506 
1507 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1508 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1509 		return -EINVAL;
1510 
1511 	c = ip6mr_cache_alloc();
1512 	if (!c)
1513 		return -ENOMEM;
1514 
1515 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1516 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1517 	c->_c.mfc_parent = mfc->mf6cc_parent;
1518 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1519 	if (!mrtsock)
1520 		c->_c.mfc_flags |= MFC_STATIC;
1521 
1522 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1523 				  ip6mr_rht_params);
1524 	if (err) {
1525 		pr_err("ip6mr: rhtable insert error %d\n", err);
1526 		ip6mr_cache_free(c);
1527 		return err;
1528 	}
1529 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1530 
1531 	/* Check to see if we resolved a queued list. If so we
1532 	 * need to send on the frames and tidy up.
1533 	 */
1534 	found = false;
1535 	spin_lock_bh(&mfc_unres_lock);
1536 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1537 		uc = (struct mfc6_cache *)_uc;
1538 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1539 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1540 			list_del(&_uc->list);
1541 			atomic_dec(&mrt->cache_resolve_queue_len);
1542 			found = true;
1543 			break;
1544 		}
1545 	}
1546 	if (list_empty(&mrt->mfc_unres_queue))
1547 		timer_delete(&mrt->ipmr_expire_timer);
1548 	spin_unlock_bh(&mfc_unres_lock);
1549 
1550 	if (found) {
1551 		ip6mr_cache_resolve(net, mrt, uc, c);
1552 		ip6mr_cache_free(uc);
1553 	}
1554 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1555 				       c, mrt->id);
1556 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1557 	return 0;
1558 }
1559 
1560 /*
1561  *	Close the multicast socket, and clear the vif tables etc
1562  */
1563 
1564 static void mroute_clean_tables(struct mr_table *mrt, int flags,
1565 				struct list_head *dev_kill_list)
1566 {
1567 	struct net *net = read_pnet(&mrt->net);
1568 	struct mr_mfc *c, *tmp;
1569 	int i;
1570 
1571 	/* Shut down all active vif entries */
1572 	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1573 		for (i = 0; i < mrt->maxvif; i++) {
1574 			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1575 			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1576 			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1577 				continue;
1578 			mif6_delete(mrt, i, 0, dev_kill_list);
1579 		}
1580 	}
1581 
1582 	/* Wipe the cache */
1583 	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1584 		mutex_lock(&net->ipv6.mfc_mutex);
1585 
1586 		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1587 			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1588 			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1589 				continue;
1590 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1591 			list_del_rcu(&c->list);
1592 			call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1593 						       (struct mfc6_cache *)c, mrt->id);
1594 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1595 			mr_cache_put(c);
1596 		}
1597 
1598 		mutex_unlock(&net->ipv6.mfc_mutex);
1599 	}
1600 
1601 	if (flags & MRT6_FLUSH_MFC) {
1602 		if (atomic_read(&mrt->cache_resolve_queue_len) != 0 ||
1603 		    !check_net(net)) {
1604 			spin_lock_bh(&mfc_unres_lock);
1605 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1606 				list_del(&c->list);
1607 				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1608 						  RTM_DELROUTE);
1609 				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1610 			}
1611 			spin_unlock_bh(&mfc_unres_lock);
1612 		}
1613 	}
1614 }
1615 
1616 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1617 {
1618 	int err = 0;
1619 	struct net *net = sock_net(sk);
1620 
1621 	rtnl_lock();
1622 	spin_lock(&mrt_lock);
1623 	if (rtnl_dereference(mrt->mroute_sk)) {
1624 		err = -EADDRINUSE;
1625 	} else {
1626 		rcu_assign_pointer(mrt->mroute_sk, sk);
1627 		sock_set_flag(sk, SOCK_RCU_FREE);
1628 		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1629 	}
1630 	spin_unlock(&mrt_lock);
1631 
1632 	if (!err)
1633 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1634 					     NETCONFA_MC_FORWARDING,
1635 					     NETCONFA_IFINDEX_ALL,
1636 					     net->ipv6.devconf_all);
1637 	rtnl_unlock();
1638 
1639 	return err;
1640 }
1641 
1642 int ip6mr_sk_done(struct sock *sk)
1643 {
1644 	struct net *net = sock_net(sk);
1645 	struct ipv6_devconf *devconf;
1646 	LIST_HEAD(dev_kill_list);
1647 	struct mr_table *mrt;
1648 	int err = -EACCES;
1649 
1650 	if (sk->sk_type != SOCK_RAW ||
1651 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1652 		return err;
1653 
1654 	devconf = net->ipv6.devconf_all;
1655 	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1656 		return err;
1657 
1658 	rtnl_lock();
1659 	ip6mr_for_each_table(mrt, net) {
1660 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1661 			spin_lock(&mrt_lock);
1662 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1663 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1664 			 * so the RCU grace period before sk freeing
1665 			 * is guaranteed by sk_destruct()
1666 			 */
1667 			atomic_dec(&devconf->mc_forwarding);
1668 			spin_unlock(&mrt_lock);
1669 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1670 						     NETCONFA_MC_FORWARDING,
1671 						     NETCONFA_IFINDEX_ALL,
1672 						     net->ipv6.devconf_all);
1673 
1674 			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC,
1675 					    &dev_kill_list);
1676 			err = 0;
1677 			break;
1678 		}
1679 	}
1680 	unregister_netdevice_many(&dev_kill_list);
1681 	rtnl_unlock();
1682 
1683 	return err;
1684 }
1685 
1686 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1687 {
1688 	struct mr_table *mrt;
1689 	struct flowi6 fl6 = {
1690 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1691 		.flowi6_oif	= skb->dev->ifindex,
1692 		.flowi6_mark	= skb->mark,
1693 	};
1694 
1695 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1696 		return NULL;
1697 
1698 	return rcu_access_pointer(mrt->mroute_sk);
1699 }
1700 EXPORT_SYMBOL(mroute6_is_socket);
1701 
1702 /*
1703  *	Socket options and virtual interface manipulation. The whole
1704  *	virtual interface system is a complete heap, but unfortunately
1705  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1706  *	MOSPF/PIM router set up we can clean this up.
1707  */
1708 
1709 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1710 			  unsigned int optlen)
1711 {
1712 	int ret, parent = 0;
1713 	struct mif6ctl vif;
1714 	struct mf6cctl mfc;
1715 	mifi_t mifi;
1716 	struct net *net = sock_net(sk);
1717 	struct mr_table *mrt;
1718 
1719 	if (sk->sk_type != SOCK_RAW ||
1720 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1721 		return -EOPNOTSUPP;
1722 
1723 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1724 	if (!mrt)
1725 		return -ENOENT;
1726 
1727 	if (optname != MRT6_INIT) {
1728 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1729 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1730 			return -EACCES;
1731 	}
1732 
1733 	switch (optname) {
1734 	case MRT6_INIT:
1735 		if (optlen < sizeof(int))
1736 			return -EINVAL;
1737 
1738 		return ip6mr_sk_init(mrt, sk);
1739 
1740 	case MRT6_DONE:
1741 		return ip6mr_sk_done(sk);
1742 
1743 	case MRT6_ADD_MIF:
1744 		if (optlen < sizeof(vif))
1745 			return -EINVAL;
1746 		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1747 			return -EFAULT;
1748 		if (vif.mif6c_mifi >= MAXMIFS)
1749 			return -ENFILE;
1750 		rtnl_lock();
1751 		ret = mif6_add(net, mrt, &vif,
1752 			       sk == rtnl_dereference(mrt->mroute_sk));
1753 		rtnl_unlock();
1754 		return ret;
1755 
1756 	case MRT6_DEL_MIF:
1757 		if (optlen < sizeof(mifi_t))
1758 			return -EINVAL;
1759 		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1760 			return -EFAULT;
1761 		rtnl_lock();
1762 		ret = mif6_delete(mrt, mifi, 0, NULL);
1763 		rtnl_unlock();
1764 		return ret;
1765 
1766 	/*
1767 	 *	Manipulate the forwarding caches. These live
1768 	 *	in a sort of kernel/user symbiosis.
1769 	 */
1770 	case MRT6_ADD_MFC:
1771 	case MRT6_DEL_MFC:
1772 		parent = -1;
1773 		fallthrough;
1774 	case MRT6_ADD_MFC_PROXY:
1775 	case MRT6_DEL_MFC_PROXY:
1776 		if (optlen < sizeof(mfc))
1777 			return -EINVAL;
1778 		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1779 			return -EFAULT;
1780 		if (parent == 0)
1781 			parent = mfc.mf6cc_parent;
1782 
1783 		mutex_lock(&net->ipv6.mfc_mutex);
1784 
1785 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1786 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1787 		else
1788 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1789 					    sk ==
1790 					    rcu_access_pointer(mrt->mroute_sk),
1791 					    parent);
1792 
1793 		mutex_unlock(&net->ipv6.mfc_mutex);
1794 		return ret;
1795 
1796 	case MRT6_FLUSH:
1797 	{
1798 		LIST_HEAD(dev_kill_list);
1799 		int flags;
1800 
1801 		if (optlen != sizeof(flags))
1802 			return -EINVAL;
1803 		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1804 			return -EFAULT;
1805 
1806 		rtnl_lock();
1807 		mroute_clean_tables(mrt, flags, &dev_kill_list);
1808 		unregister_netdevice_many(&dev_kill_list);
1809 		rtnl_unlock();
1810 		return 0;
1811 	}
1812 
1813 	/*
1814 	 *	Control PIM assert (to activate pim will activate assert)
1815 	 */
1816 	case MRT6_ASSERT:
1817 	{
1818 		int v;
1819 
1820 		if (optlen != sizeof(v))
1821 			return -EINVAL;
1822 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1823 			return -EFAULT;
1824 		WRITE_ONCE(mrt->mroute_do_assert, v);
1825 		return 0;
1826 	}
1827 
1828 #ifdef CONFIG_IPV6_PIMSM_V2
1829 	case MRT6_PIM:
1830 	{
1831 		bool do_wrmifwhole;
1832 		int v;
1833 
1834 		if (optlen != sizeof(v))
1835 			return -EINVAL;
1836 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1837 			return -EFAULT;
1838 
1839 		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1840 		v = !!v;
1841 		rtnl_lock();
1842 		ret = 0;
1843 		if (v != mrt->mroute_do_pim) {
1844 			WRITE_ONCE(mrt->mroute_do_pim, v);
1845 			WRITE_ONCE(mrt->mroute_do_assert, v);
1846 			WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrmifwhole);
1847 		}
1848 		rtnl_unlock();
1849 		return ret;
1850 	}
1851 
1852 #endif
1853 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1854 	case MRT6_TABLE:
1855 	{
1856 		u32 v;
1857 
1858 		if (optlen != sizeof(u32))
1859 			return -EINVAL;
1860 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1861 			return -EFAULT;
1862 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1863 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1864 			return -EINVAL;
1865 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1866 			return -EBUSY;
1867 
1868 		rtnl_lock();
1869 		ret = 0;
1870 		mrt = ip6mr_new_table(net, v);
1871 		if (IS_ERR(mrt))
1872 			ret = PTR_ERR(mrt);
1873 		else
1874 			raw6_sk(sk)->ip6mr_table = v;
1875 		rtnl_unlock();
1876 		return ret;
1877 	}
1878 #endif
1879 	/*
1880 	 *	Spurious command, or MRT6_VERSION which you cannot
1881 	 *	set.
1882 	 */
1883 	default:
1884 		return -ENOPROTOOPT;
1885 	}
1886 }
1887 
1888 /*
1889  *	Getsock opt support for the multicast routing system.
1890  */
1891 
1892 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1893 			  sockptr_t optlen)
1894 {
1895 	int olr;
1896 	int val;
1897 	struct net *net = sock_net(sk);
1898 	struct mr_table *mrt;
1899 
1900 	if (sk->sk_type != SOCK_RAW ||
1901 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1902 		return -EOPNOTSUPP;
1903 
1904 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1905 	if (!mrt)
1906 		return -ENOENT;
1907 
1908 	switch (optname) {
1909 	case MRT6_VERSION:
1910 		val = 0x0305;
1911 		break;
1912 #ifdef CONFIG_IPV6_PIMSM_V2
1913 	case MRT6_PIM:
1914 		val = READ_ONCE(mrt->mroute_do_pim);
1915 		break;
1916 #endif
1917 	case MRT6_ASSERT:
1918 		val = READ_ONCE(mrt->mroute_do_assert);
1919 		break;
1920 	default:
1921 		return -ENOPROTOOPT;
1922 	}
1923 
1924 	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1925 		return -EFAULT;
1926 
1927 	olr = min_t(int, olr, sizeof(int));
1928 	if (olr < 0)
1929 		return -EINVAL;
1930 
1931 	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1932 		return -EFAULT;
1933 	if (copy_to_sockptr(optval, &val, olr))
1934 		return -EFAULT;
1935 	return 0;
1936 }
1937 
1938 /*
1939  *	The IP multicast ioctl support routines.
1940  */
1941 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1942 {
1943 	struct sioc_sg_req6 *sr;
1944 	struct sioc_mif_req6 *vr;
1945 	struct vif_device *vif;
1946 	struct mfc6_cache *c;
1947 	struct net *net = sock_net(sk);
1948 	struct mr_table *mrt;
1949 
1950 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1951 	if (!mrt)
1952 		return -ENOENT;
1953 
1954 	switch (cmd) {
1955 	case SIOCGETMIFCNT_IN6:
1956 		vr = (struct sioc_mif_req6 *)arg;
1957 		if (vr->mifi >= mrt->maxvif)
1958 			return -EINVAL;
1959 		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1960 		rcu_read_lock();
1961 		vif = &mrt->vif_table[vr->mifi];
1962 		if (VIF_EXISTS(mrt, vr->mifi)) {
1963 			vr->icount = READ_ONCE(vif->pkt_in);
1964 			vr->ocount = READ_ONCE(vif->pkt_out);
1965 			vr->ibytes = READ_ONCE(vif->bytes_in);
1966 			vr->obytes = READ_ONCE(vif->bytes_out);
1967 			rcu_read_unlock();
1968 			return 0;
1969 		}
1970 		rcu_read_unlock();
1971 		return -EADDRNOTAVAIL;
1972 	case SIOCGETSGCNT_IN6:
1973 		sr = (struct sioc_sg_req6 *)arg;
1974 
1975 		rcu_read_lock();
1976 		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1977 				     &sr->grp.sin6_addr);
1978 		if (c) {
1979 			sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1980 			sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1981 			sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1982 			rcu_read_unlock();
1983 			return 0;
1984 		}
1985 		rcu_read_unlock();
1986 		return -EADDRNOTAVAIL;
1987 	default:
1988 		return -ENOIOCTLCMD;
1989 	}
1990 }
1991 
1992 #ifdef CONFIG_COMPAT
1993 struct compat_sioc_sg_req6 {
1994 	struct sockaddr_in6 src;
1995 	struct sockaddr_in6 grp;
1996 	compat_ulong_t pktcnt;
1997 	compat_ulong_t bytecnt;
1998 	compat_ulong_t wrong_if;
1999 };
2000 
2001 struct compat_sioc_mif_req6 {
2002 	mifi_t	mifi;
2003 	compat_ulong_t icount;
2004 	compat_ulong_t ocount;
2005 	compat_ulong_t ibytes;
2006 	compat_ulong_t obytes;
2007 };
2008 
2009 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
2010 {
2011 	struct compat_sioc_sg_req6 sr;
2012 	struct compat_sioc_mif_req6 vr;
2013 	struct vif_device *vif;
2014 	struct mfc6_cache *c;
2015 	struct net *net = sock_net(sk);
2016 	struct mr_table *mrt;
2017 
2018 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
2019 	if (!mrt)
2020 		return -ENOENT;
2021 
2022 	switch (cmd) {
2023 	case SIOCGETMIFCNT_IN6:
2024 		if (copy_from_user(&vr, arg, sizeof(vr)))
2025 			return -EFAULT;
2026 		if (vr.mifi >= mrt->maxvif)
2027 			return -EINVAL;
2028 		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
2029 		rcu_read_lock();
2030 		vif = &mrt->vif_table[vr.mifi];
2031 		if (VIF_EXISTS(mrt, vr.mifi)) {
2032 			vr.icount = READ_ONCE(vif->pkt_in);
2033 			vr.ocount = READ_ONCE(vif->pkt_out);
2034 			vr.ibytes = READ_ONCE(vif->bytes_in);
2035 			vr.obytes = READ_ONCE(vif->bytes_out);
2036 			rcu_read_unlock();
2037 
2038 			if (copy_to_user(arg, &vr, sizeof(vr)))
2039 				return -EFAULT;
2040 			return 0;
2041 		}
2042 		rcu_read_unlock();
2043 		return -EADDRNOTAVAIL;
2044 	case SIOCGETSGCNT_IN6:
2045 		if (copy_from_user(&sr, arg, sizeof(sr)))
2046 			return -EFAULT;
2047 
2048 		rcu_read_lock();
2049 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2050 		if (c) {
2051 			sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2052 			sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2053 			sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2054 			rcu_read_unlock();
2055 
2056 			if (copy_to_user(arg, &sr, sizeof(sr)))
2057 				return -EFAULT;
2058 			return 0;
2059 		}
2060 		rcu_read_unlock();
2061 		return -EADDRNOTAVAIL;
2062 	default:
2063 		return -ENOIOCTLCMD;
2064 	}
2065 }
2066 #endif
2067 
2068 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2069 {
2070 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2071 		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2072 	return dst_output(net, sk, skb);
2073 }
2074 
2075 /*
2076  *	Processing handlers for ip6mr_forward
2077  */
2078 
2079 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt,
2080 			      struct sk_buff *skb, int vifi)
2081 {
2082 	struct vif_device *vif = &mrt->vif_table[vifi];
2083 	struct net_device *vif_dev;
2084 	struct ipv6hdr *ipv6h;
2085 	struct dst_entry *dst;
2086 	struct flowi6 fl6;
2087 
2088 	vif_dev = vif_dev_read(vif);
2089 	if (!vif_dev)
2090 		return -1;
2091 
2092 #ifdef CONFIG_IPV6_PIMSM_V2
2093 	if (vif->flags & MIFF_REGISTER) {
2094 		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2095 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2096 		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2097 		DEV_STATS_INC(vif_dev, tx_packets);
2098 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2099 		return -1;
2100 	}
2101 #endif
2102 
2103 	ipv6h = ipv6_hdr(skb);
2104 
2105 	fl6 = (struct flowi6) {
2106 		.flowi6_oif = vif->link,
2107 		.daddr = ipv6h->daddr,
2108 	};
2109 
2110 	dst = ip6_route_output(net, NULL, &fl6);
2111 	if (dst->error) {
2112 		dst_release(dst);
2113 		return -1;
2114 	}
2115 
2116 	skb_dst_drop(skb);
2117 	skb_dst_set(skb, dst);
2118 
2119 	/*
2120 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2121 	 * not only before forwarding, but after forwarding on all output
2122 	 * interfaces. It is clear, if mrouter runs a multicasting
2123 	 * program, it should receive packets not depending to what interface
2124 	 * program is joined.
2125 	 * If we will not make it, the program will have to join on all
2126 	 * interfaces. On the other hand, multihoming host (or router, but
2127 	 * not mrouter) cannot join to more than one interface - it will
2128 	 * result in receiving multiple packets.
2129 	 */
2130 	skb->dev = vif_dev;
2131 	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2132 	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2133 
2134 	/* We are about to write */
2135 	/* XXX: extension headers? */
2136 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2137 		return -1;
2138 
2139 	ipv6h = ipv6_hdr(skb);
2140 	ipv6h->hop_limit--;
2141 	return 0;
2142 }
2143 
2144 static void ip6mr_forward2(struct net *net, struct mr_table *mrt,
2145 			   struct sk_buff *skb, int vifi)
2146 {
2147 	struct net_device *indev = skb->dev;
2148 
2149 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2150 		goto out_free;
2151 
2152 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2153 
2154 	NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2155 		net, NULL, skb, indev, skb->dev,
2156 		ip6mr_forward2_finish);
2157 	return;
2158 
2159 out_free:
2160 	kfree_skb(skb);
2161 }
2162 
2163 static void ip6mr_output2(struct net *net, struct mr_table *mrt,
2164 			  struct sk_buff *skb, int vifi)
2165 {
2166 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2167 		goto out_free;
2168 
2169 	ip6_output(net, NULL, skb);
2170 	return;
2171 
2172 out_free:
2173 	kfree_skb(skb);
2174 }
2175 
2176 /* Called with rcu_read_lock() */
2177 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2178 {
2179 	int ct;
2180 
2181 	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2182 	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2183 		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2184 			break;
2185 	}
2186 	return ct;
2187 }
2188 
2189 /* Called under rcu_read_lock() */
2190 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2191 			   struct net_device *dev, struct sk_buff *skb,
2192 			   struct mfc6_cache *c)
2193 {
2194 	int psend = -1;
2195 	int vif, ct;
2196 	int true_vifi = ip6mr_find_vif(mrt, dev);
2197 
2198 	vif = c->_c.mfc_parent;
2199 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2200 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2201 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2202 
2203 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2204 		struct mfc6_cache *cache_proxy;
2205 
2206 		/* For an (*,G) entry, we only check that the incoming
2207 		 * interface is part of the static tree.
2208 		 */
2209 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2210 		if (cache_proxy &&
2211 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2212 			goto forward;
2213 	}
2214 
2215 	/*
2216 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2217 	 */
2218 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2219 		atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2220 
2221 		if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) &&
2222 		    /* pimsm uses asserts, when switching from RPT to SPT,
2223 		       so that we cannot check that packet arrived on an oif.
2224 		       It is bad, but otherwise we would need to move pretty
2225 		       large chunk of pimd to kernel. Ough... --ANK
2226 		     */
2227 		    (READ_ONCE(mrt->mroute_do_pim) ||
2228 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2229 		    time_after(jiffies,
2230 			       c->_c.mfc_un.res.last_assert +
2231 			       MFC_ASSERT_THRESH)) {
2232 			c->_c.mfc_un.res.last_assert = jiffies;
2233 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2234 			if (READ_ONCE(mrt->mroute_do_wrvifwhole))
2235 				ip6mr_cache_report(mrt, skb, true_vifi,
2236 						   MRT6MSG_WRMIFWHOLE);
2237 		}
2238 		goto dont_forward;
2239 	}
2240 
2241 forward:
2242 	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2243 		   mrt->vif_table[vif].pkt_in + 1);
2244 	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2245 		   mrt->vif_table[vif].bytes_in + skb->len);
2246 
2247 	/*
2248 	 *	Forward the frame
2249 	 */
2250 	if (ipv6_addr_any(&c->mf6c_origin) &&
2251 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2252 		if (true_vifi >= 0 &&
2253 		    true_vifi != c->_c.mfc_parent &&
2254 		    ipv6_hdr(skb)->hop_limit >
2255 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2256 			/* It's an (*,*) entry and the packet is not coming from
2257 			 * the upstream: forward the packet to the upstream
2258 			 * only.
2259 			 */
2260 			psend = c->_c.mfc_parent;
2261 			goto last_forward;
2262 		}
2263 		goto dont_forward;
2264 	}
2265 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2266 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2267 		/* For (*,G) entry, don't forward to the incoming interface */
2268 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2269 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2270 			if (psend != -1) {
2271 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2272 				if (skb2)
2273 					ip6mr_forward2(net, mrt, skb2, psend);
2274 			}
2275 			psend = ct;
2276 		}
2277 	}
2278 last_forward:
2279 	if (psend != -1) {
2280 		ip6mr_forward2(net, mrt, skb, psend);
2281 		return;
2282 	}
2283 
2284 dont_forward:
2285 	kfree_skb(skb);
2286 }
2287 
2288 /* Called under rcu_read_lock() */
2289 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt,
2290 				 struct net_device *dev, struct sk_buff *skb,
2291 				 struct mfc6_cache *c)
2292 {
2293 	int psend = -1;
2294 	int ct;
2295 
2296 	WARN_ON_ONCE(!rcu_read_lock_held());
2297 
2298 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2299 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2300 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2301 
2302 	/* Forward the frame */
2303 	if (ipv6_addr_any(&c->mf6c_origin) &&
2304 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2305 		if (ipv6_hdr(skb)->hop_limit >
2306 		    c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2307 			/* It's an (*,*) entry and the packet is not coming from
2308 			 * the upstream: forward the packet to the upstream
2309 			 * only.
2310 			 */
2311 			psend = c->_c.mfc_parent;
2312 			goto last_forward;
2313 		}
2314 		goto dont_forward;
2315 	}
2316 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2317 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2318 		if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2319 			if (psend != -1) {
2320 				struct sk_buff *skb2;
2321 
2322 				skb2 = skb_clone(skb, GFP_ATOMIC);
2323 				if (skb2)
2324 					ip6mr_output2(net, mrt, skb2, psend);
2325 			}
2326 			psend = ct;
2327 		}
2328 	}
2329 last_forward:
2330 	if (psend != -1) {
2331 		ip6mr_output2(net, mrt, skb, psend);
2332 		return;
2333 	}
2334 
2335 dont_forward:
2336 	kfree_skb(skb);
2337 }
2338 
2339 /*
2340  *	Multicast packets for forwarding arrive here
2341  */
2342 
2343 int ip6_mr_input(struct sk_buff *skb)
2344 {
2345 	struct net_device *dev = skb->dev;
2346 	struct net *net = dev_net_rcu(dev);
2347 	struct mfc6_cache *cache;
2348 	struct mr_table *mrt;
2349 	struct flowi6 fl6 = {
2350 		.flowi6_iif	= dev->ifindex,
2351 		.flowi6_mark	= skb->mark,
2352 	};
2353 	int err;
2354 
2355 	/* skb->dev passed in is the master dev for vrfs.
2356 	 * Get the proper interface that does have a vif associated with it.
2357 	 */
2358 	if (netif_is_l3_master(dev)) {
2359 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2360 		if (!dev) {
2361 			kfree_skb(skb);
2362 			return -ENODEV;
2363 		}
2364 	}
2365 
2366 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2367 	if (err < 0) {
2368 		kfree_skb(skb);
2369 		return err;
2370 	}
2371 
2372 	cache = ip6mr_cache_find(mrt,
2373 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2374 	if (!cache) {
2375 		int vif = ip6mr_find_vif(mrt, dev);
2376 
2377 		if (vif >= 0)
2378 			cache = ip6mr_cache_find_any(mrt,
2379 						     &ipv6_hdr(skb)->daddr,
2380 						     vif);
2381 	}
2382 
2383 	/*
2384 	 *	No usable cache entry
2385 	 */
2386 	if (!cache) {
2387 		int vif;
2388 
2389 		vif = ip6mr_find_vif(mrt, dev);
2390 		if (vif >= 0) {
2391 			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2392 
2393 			return err;
2394 		}
2395 		kfree_skb(skb);
2396 		return -ENODEV;
2397 	}
2398 
2399 	ip6_mr_forward(net, mrt, dev, skb, cache);
2400 
2401 	return 0;
2402 }
2403 
2404 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2405 {
2406 	struct net_device *dev = skb_dst(skb)->dev;
2407 	struct flowi6 fl6 = (struct flowi6) {
2408 		.flowi6_iif = LOOPBACK_IFINDEX,
2409 		.flowi6_mark = skb->mark,
2410 	};
2411 	struct mfc6_cache *cache;
2412 	struct mr_table *mrt;
2413 	int err;
2414 	int vif;
2415 
2416 	guard(rcu)();
2417 
2418 	if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
2419 		goto ip6_output;
2420 	if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE))
2421 		goto ip6_output;
2422 
2423 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2424 	if (err < 0) {
2425 		kfree_skb(skb);
2426 		return err;
2427 	}
2428 
2429 	cache = ip6mr_cache_find(mrt,
2430 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2431 	if (!cache) {
2432 		vif = ip6mr_find_vif(mrt, dev);
2433 		if (vif >= 0)
2434 			cache = ip6mr_cache_find_any(mrt,
2435 						     &ipv6_hdr(skb)->daddr,
2436 						     vif);
2437 	}
2438 
2439 	/* No usable cache entry */
2440 	if (!cache) {
2441 		vif = ip6mr_find_vif(mrt, dev);
2442 		if (vif >= 0)
2443 			return ip6mr_cache_unresolved(mrt, vif, skb, dev);
2444 		goto ip6_output;
2445 	}
2446 
2447 	/* Wrong interface */
2448 	vif = cache->_c.mfc_parent;
2449 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
2450 		goto ip6_output;
2451 
2452 	ip6_mr_output_finish(net, mrt, dev, skb, cache);
2453 	return 0;
2454 
2455 ip6_output:
2456 	return ip6_output(net, sk, skb);
2457 }
2458 
2459 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2460 		    u32 portid)
2461 {
2462 	int err;
2463 	struct mr_table *mrt;
2464 	struct mfc6_cache *cache;
2465 	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2466 
2467 	rcu_read_lock();
2468 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2469 	if (!mrt) {
2470 		rcu_read_unlock();
2471 		return -ENOENT;
2472 	}
2473 
2474 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2475 	if (!cache && skb->dev) {
2476 		int vif = ip6mr_find_vif(mrt, skb->dev);
2477 
2478 		if (vif >= 0)
2479 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2480 						     vif);
2481 	}
2482 
2483 	if (!cache) {
2484 		struct sk_buff *skb2;
2485 		struct ipv6hdr *iph;
2486 		struct net_device *dev;
2487 		int vif;
2488 
2489 		dev = skb->dev;
2490 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2491 			rcu_read_unlock();
2492 			return -ENODEV;
2493 		}
2494 
2495 		/* really correct? */
2496 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2497 		if (!skb2) {
2498 			rcu_read_unlock();
2499 			return -ENOMEM;
2500 		}
2501 
2502 		NETLINK_CB(skb2).portid = portid;
2503 		skb_reset_transport_header(skb2);
2504 
2505 		skb_put(skb2, sizeof(struct ipv6hdr));
2506 		skb_reset_network_header(skb2);
2507 
2508 		iph = ipv6_hdr(skb2);
2509 		iph->version = 0;
2510 		iph->priority = 0;
2511 		iph->flow_lbl[0] = 0;
2512 		iph->flow_lbl[1] = 0;
2513 		iph->flow_lbl[2] = 0;
2514 		iph->payload_len = 0;
2515 		iph->nexthdr = IPPROTO_NONE;
2516 		iph->hop_limit = 0;
2517 		iph->saddr = rt->rt6i_src.addr;
2518 		iph->daddr = rt->rt6i_dst.addr;
2519 
2520 		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2521 		rcu_read_unlock();
2522 
2523 		return err;
2524 	}
2525 
2526 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2527 	rcu_read_unlock();
2528 	return err;
2529 }
2530 
2531 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2532 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2533 			     int flags)
2534 {
2535 	struct nlmsghdr *nlh;
2536 	struct rtmsg *rtm;
2537 	int err;
2538 
2539 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2540 	if (!nlh)
2541 		return -EMSGSIZE;
2542 
2543 	rtm = nlmsg_data(nlh);
2544 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2545 	rtm->rtm_dst_len  = 128;
2546 	rtm->rtm_src_len  = 128;
2547 	rtm->rtm_tos      = 0;
2548 	rtm->rtm_table    = mrt->id;
2549 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2550 		goto nla_put_failure;
2551 	rtm->rtm_type = RTN_MULTICAST;
2552 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2553 	if (c->_c.mfc_flags & MFC_STATIC)
2554 		rtm->rtm_protocol = RTPROT_STATIC;
2555 	else
2556 		rtm->rtm_protocol = RTPROT_MROUTED;
2557 	rtm->rtm_flags    = 0;
2558 
2559 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2560 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2561 		goto nla_put_failure;
2562 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2563 	/* do not break the dump if cache is unresolved */
2564 	if (err < 0 && err != -ENOENT)
2565 		goto nla_put_failure;
2566 
2567 	nlmsg_end(skb, nlh);
2568 	return 0;
2569 
2570 nla_put_failure:
2571 	nlmsg_cancel(skb, nlh);
2572 	return -EMSGSIZE;
2573 }
2574 
2575 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2576 			      u32 portid, u32 seq, struct mr_mfc *c,
2577 			      int cmd, int flags)
2578 {
2579 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2580 				 cmd, flags);
2581 }
2582 
2583 static int mr6_msgsize(bool unresolved)
2584 {
2585 	size_t len =
2586 		NLMSG_ALIGN(sizeof(struct rtmsg))
2587 		+ nla_total_size(4)	/* RTA_TABLE */
2588 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2589 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2590 		;
2591 
2592 	if (!unresolved)
2593 		len = len
2594 		      + nla_total_size(4)	/* RTA_IIF */
2595 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2596 		      + MAXMIFS * NLA_ALIGN(sizeof(struct rtnexthop))
2597 						/* RTA_MFC_STATS */
2598 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2599 		;
2600 
2601 	return len;
2602 }
2603 
2604 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2605 			      int cmd)
2606 {
2607 	struct net *net = read_pnet(&mrt->net);
2608 	struct sk_buff *skb;
2609 	int err = -ENOBUFS;
2610 
2611 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS), GFP_ATOMIC);
2612 	if (!skb)
2613 		goto errout;
2614 
2615 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2616 	if (err < 0)
2617 		goto errout;
2618 
2619 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2620 	return;
2621 
2622 errout:
2623 	kfree_skb(skb);
2624 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2625 }
2626 
2627 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2628 {
2629 	size_t len =
2630 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2631 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2632 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2633 					/* IP6MRA_CREPORT_SRC_ADDR */
2634 		+ nla_total_size(sizeof(struct in6_addr))
2635 					/* IP6MRA_CREPORT_DST_ADDR */
2636 		+ nla_total_size(sizeof(struct in6_addr))
2637 					/* IP6MRA_CREPORT_PKT */
2638 		+ nla_total_size(payloadlen)
2639 		;
2640 
2641 	return len;
2642 }
2643 
2644 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2645 {
2646 	struct net *net = read_pnet(&mrt->net);
2647 	struct nlmsghdr *nlh;
2648 	struct rtgenmsg *rtgenm;
2649 	struct mrt6msg *msg;
2650 	struct sk_buff *skb;
2651 	struct nlattr *nla;
2652 	int payloadlen;
2653 
2654 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2655 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2656 
2657 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2658 	if (!skb)
2659 		goto errout;
2660 
2661 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2662 			sizeof(struct rtgenmsg), 0);
2663 	if (!nlh)
2664 		goto errout;
2665 	rtgenm = nlmsg_data(nlh);
2666 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2667 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2668 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2669 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2670 			     &msg->im6_src) ||
2671 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2672 			     &msg->im6_dst))
2673 		goto nla_put_failure;
2674 
2675 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2676 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2677 				  nla_data(nla), payloadlen))
2678 		goto nla_put_failure;
2679 
2680 	nlmsg_end(skb, nlh);
2681 
2682 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2683 	return;
2684 
2685 nla_put_failure:
2686 	nlmsg_cancel(skb, nlh);
2687 errout:
2688 	kfree_skb(skb);
2689 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2690 }
2691 
2692 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2693 	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2694 	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2695 	[RTA_TABLE]		= { .type = NLA_U32 },
2696 };
2697 
2698 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2699 					const struct nlmsghdr *nlh,
2700 					struct nlattr **tb,
2701 					struct netlink_ext_ack *extack)
2702 {
2703 	struct rtmsg *rtm;
2704 	int err;
2705 
2706 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2707 			  extack);
2708 	if (err)
2709 		return err;
2710 
2711 	rtm = nlmsg_data(nlh);
2712 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2713 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2714 	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2715 	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2716 		NL_SET_ERR_MSG_MOD(extack,
2717 				   "Invalid values in header for multicast route get request");
2718 		return -EINVAL;
2719 	}
2720 
2721 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2722 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2723 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2724 		return -EINVAL;
2725 	}
2726 
2727 	return 0;
2728 }
2729 
2730 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2731 			      struct netlink_ext_ack *extack)
2732 {
2733 	struct net *net = sock_net(in_skb->sk);
2734 	struct in6_addr src = {}, grp = {};
2735 	struct nlattr *tb[RTA_MAX + 1];
2736 	struct mfc6_cache *cache;
2737 	struct mr_table *mrt;
2738 	struct sk_buff *skb;
2739 	u32 tableid;
2740 	int err;
2741 
2742 	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2743 	if (err < 0)
2744 		return err;
2745 
2746 	skb = nlmsg_new(mr6_msgsize(false), GFP_KERNEL);
2747 	if (!skb)
2748 		return -ENOBUFS;
2749 
2750 	if (tb[RTA_SRC])
2751 		src = nla_get_in6_addr(tb[RTA_SRC]);
2752 	if (tb[RTA_DST])
2753 		grp = nla_get_in6_addr(tb[RTA_DST]);
2754 	tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2755 
2756 	rcu_read_lock();
2757 
2758 	mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2759 	if (!mrt) {
2760 		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2761 		err = -ENOENT;
2762 		goto err;
2763 	}
2764 
2765 	cache = ip6mr_cache_find(mrt, &src, &grp);
2766 	if (!cache) {
2767 		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2768 		err = -ENOENT;
2769 		goto err;
2770 	}
2771 
2772 	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2773 				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2774 	if (err < 0)
2775 		goto err;
2776 
2777 	rcu_read_unlock();
2778 
2779 	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2780 
2781 err:
2782 	rcu_read_unlock();
2783 	kfree_skb(skb);
2784 	return err;
2785 }
2786 
2787 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2788 {
2789 	const struct nlmsghdr *nlh = cb->nlh;
2790 	struct fib_dump_filter filter = {};
2791 	int err;
2792 
2793 	rcu_read_lock();
2794 
2795 	if (cb->strict_check) {
2796 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2797 					    &filter, cb);
2798 		if (err < 0)
2799 			goto unlock;
2800 	}
2801 
2802 	if (filter.table_id) {
2803 		struct mr_table *mrt;
2804 
2805 		mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2806 		if (!mrt) {
2807 			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) {
2808 				err = skb->len;
2809 				goto unlock;
2810 			}
2811 
2812 			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2813 			err = -ENOENT;
2814 			goto unlock;
2815 		}
2816 
2817 		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2818 				    &mfc_unres_lock, &filter);
2819 		err = skb->len ? : err;
2820 		goto unlock;
2821 	}
2822 
2823 	err = mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2824 			       _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2825 unlock:
2826 	rcu_read_unlock();
2827 
2828 	return err;
2829 }
2830