xref: /linux/net/ipv6/ip6mr.c (revision c8b90d40d5bba8e6fba457b8a7c10d3c0d467e37)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux IPv6 multicast routing support for BSD pim6sd
4  *	Based on net/ipv4/ipmr.c.
5  *
6  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7  *		LSIIT Laboratory, Strasbourg, France
8  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9  *		6WIND, Paris, France
10  *	Copyright (C)2007,2008 USAGI/WIDE Project
11  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12  */
13 
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39 
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50 
51 #include <linux/nospec.h>
52 
53 struct ip6mr_rule {
54 	struct fib_rule		common;
55 };
56 
57 struct ip6mr_result {
58 	struct mr_table	*mrt;
59 };
60 
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62    Note that the changes are semaphored via rtnl_lock.
63  */
64 
65 static DEFINE_SPINLOCK(mrt_lock);
66 
67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 	return rcu_dereference(vif->dev);
70 }
71 
72 /* Multicast router control variables */
73 
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76 
77 /* We return to original Alan's scheme. Hash table of resolved
78    entries is changed only in process context and protected
79    with weak lock mrt_lock. Queue of unresolved entries is protected
80    with strong spinlock mfc_unres_lock.
81 
82    In this case data path is free of exclusive locks at all.
83  */
84 
85 static struct kmem_cache *mrt_cachep __read_mostly;
86 
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89 
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 			   struct net_device *dev, struct sk_buff *skb,
92 			   struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 			      mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 			      int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 			      struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 			       struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104 
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 				lockdep_rtnl_is_held() || \
109 				list_empty(&net->ipv6.mr6_tables))
110 
111 static bool ip6mr_can_free_table(struct net *net)
112 {
113 	return !check_net(net) || !net->ipv6.mr6_rules_ops;
114 }
115 
116 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
117 					    struct mr_table *mrt)
118 {
119 	struct mr_table *ret;
120 
121 	if (!mrt)
122 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
123 				     struct mr_table, list);
124 	else
125 		ret = list_entry_rcu(mrt->list.next,
126 				     struct mr_table, list);
127 
128 	if (&ret->list == &net->ipv6.mr6_tables)
129 		return NULL;
130 	return ret;
131 }
132 
133 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
134 {
135 	struct mr_table *mrt;
136 
137 	ip6mr_for_each_table(mrt, net) {
138 		if (mrt->id == id)
139 			return mrt;
140 	}
141 	return NULL;
142 }
143 
144 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
145 {
146 	struct mr_table *mrt;
147 
148 	rcu_read_lock();
149 	mrt = __ip6mr_get_table(net, id);
150 	rcu_read_unlock();
151 	return mrt;
152 }
153 
154 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
155 			    struct mr_table **mrt)
156 {
157 	int err;
158 	struct ip6mr_result res;
159 	struct fib_lookup_arg arg = {
160 		.result = &res,
161 		.flags = FIB_LOOKUP_NOREF,
162 	};
163 
164 	/* update flow if oif or iif point to device enslaved to l3mdev */
165 	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
166 
167 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
168 			       flowi6_to_flowi(flp6), 0, &arg);
169 	if (err < 0)
170 		return err;
171 	*mrt = res.mrt;
172 	return 0;
173 }
174 
175 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
176 			     int flags, struct fib_lookup_arg *arg)
177 {
178 	struct ip6mr_result *res = arg->result;
179 	struct mr_table *mrt;
180 
181 	switch (rule->action) {
182 	case FR_ACT_TO_TBL:
183 		break;
184 	case FR_ACT_UNREACHABLE:
185 		return -ENETUNREACH;
186 	case FR_ACT_PROHIBIT:
187 		return -EACCES;
188 	case FR_ACT_BLACKHOLE:
189 	default:
190 		return -EINVAL;
191 	}
192 
193 	arg->table = fib_rule_get_table(rule, arg);
194 
195 	mrt = __ip6mr_get_table(rule->fr_net, arg->table);
196 	if (!mrt)
197 		return -EAGAIN;
198 	res->mrt = mrt;
199 	return 0;
200 }
201 
202 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
203 {
204 	return 1;
205 }
206 
207 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
208 				struct fib_rule_hdr *frh, struct nlattr **tb,
209 				struct netlink_ext_ack *extack)
210 {
211 	return 0;
212 }
213 
214 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
215 			      struct nlattr **tb)
216 {
217 	return 1;
218 }
219 
220 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
221 			   struct fib_rule_hdr *frh)
222 {
223 	frh->dst_len = 0;
224 	frh->src_len = 0;
225 	frh->tos     = 0;
226 	return 0;
227 }
228 
229 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
230 	.family		= RTNL_FAMILY_IP6MR,
231 	.rule_size	= sizeof(struct ip6mr_rule),
232 	.addr_size	= sizeof(struct in6_addr),
233 	.action		= ip6mr_rule_action,
234 	.match		= ip6mr_rule_match,
235 	.configure	= ip6mr_rule_configure,
236 	.compare	= ip6mr_rule_compare,
237 	.fill		= ip6mr_rule_fill,
238 	.nlgroup	= RTNLGRP_IPV6_RULE,
239 	.owner		= THIS_MODULE,
240 };
241 
242 static int __net_init ip6mr_rules_init(struct net *net)
243 {
244 	struct fib_rules_ops *ops;
245 	struct mr_table *mrt;
246 	int err;
247 
248 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
249 	if (IS_ERR(ops))
250 		return PTR_ERR(ops);
251 
252 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
253 
254 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
255 	if (IS_ERR(mrt)) {
256 		err = PTR_ERR(mrt);
257 		goto err1;
258 	}
259 
260 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
261 	if (err < 0)
262 		goto err2;
263 
264 	net->ipv6.mr6_rules_ops = ops;
265 	return 0;
266 
267 err2:
268 	rtnl_lock();
269 	ip6mr_free_table(mrt);
270 	rtnl_unlock();
271 err1:
272 	fib_rules_unregister(ops);
273 	return err;
274 }
275 
276 static void __net_exit ip6mr_rules_exit(struct net *net)
277 {
278 	struct mr_table *mrt, *next;
279 
280 	ASSERT_RTNL();
281 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
282 		list_del(&mrt->list);
283 		ip6mr_free_table(mrt);
284 	}
285 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
286 }
287 
288 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
289 			    struct netlink_ext_ack *extack)
290 {
291 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
292 }
293 
294 static unsigned int ip6mr_rules_seq_read(const struct net *net)
295 {
296 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
297 }
298 
299 bool ip6mr_rule_default(const struct fib_rule *rule)
300 {
301 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
302 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
303 }
304 EXPORT_SYMBOL(ip6mr_rule_default);
305 #else
306 #define ip6mr_for_each_table(mrt, net) \
307 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
308 
309 static bool ip6mr_can_free_table(struct net *net)
310 {
311 	return !check_net(net);
312 }
313 
314 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
315 					    struct mr_table *mrt)
316 {
317 	if (!mrt)
318 		return net->ipv6.mrt6;
319 	return NULL;
320 }
321 
322 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
323 {
324 	return net->ipv6.mrt6;
325 }
326 
327 #define __ip6mr_get_table ip6mr_get_table
328 
329 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
330 			    struct mr_table **mrt)
331 {
332 	*mrt = net->ipv6.mrt6;
333 	return 0;
334 }
335 
336 static int __net_init ip6mr_rules_init(struct net *net)
337 {
338 	struct mr_table *mrt;
339 
340 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
341 	if (IS_ERR(mrt))
342 		return PTR_ERR(mrt);
343 	net->ipv6.mrt6 = mrt;
344 	return 0;
345 }
346 
347 static void __net_exit ip6mr_rules_exit(struct net *net)
348 {
349 	ASSERT_RTNL();
350 	ip6mr_free_table(net->ipv6.mrt6);
351 	net->ipv6.mrt6 = NULL;
352 }
353 
354 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
355 			    struct netlink_ext_ack *extack)
356 {
357 	return 0;
358 }
359 
360 static unsigned int ip6mr_rules_seq_read(const struct net *net)
361 {
362 	return 0;
363 }
364 #endif
365 
366 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
367 			  const void *ptr)
368 {
369 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
370 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
371 
372 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
373 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
374 }
375 
376 static const struct rhashtable_params ip6mr_rht_params = {
377 	.head_offset = offsetof(struct mr_mfc, mnode),
378 	.key_offset = offsetof(struct mfc6_cache, cmparg),
379 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
380 	.nelem_hint = 3,
381 	.obj_cmpfn = ip6mr_hash_cmp,
382 	.automatic_shrinking = true,
383 };
384 
385 static void ip6mr_new_table_set(struct mr_table *mrt,
386 				struct net *net)
387 {
388 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
389 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
390 #endif
391 }
392 
393 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
394 	.mf6c_origin = IN6ADDR_ANY_INIT,
395 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
396 };
397 
398 static struct mr_table_ops ip6mr_mr_table_ops = {
399 	.rht_params = &ip6mr_rht_params,
400 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
401 };
402 
403 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
404 {
405 	struct mr_table *mrt;
406 
407 	mrt = __ip6mr_get_table(net, id);
408 	if (mrt)
409 		return mrt;
410 
411 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
412 			      ipmr_expire_process, ip6mr_new_table_set);
413 }
414 
415 static void ip6mr_free_table(struct mr_table *mrt)
416 {
417 	struct net *net = read_pnet(&mrt->net);
418 
419 	WARN_ON_ONCE(!ip6mr_can_free_table(net));
420 
421 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
422 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
423 				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
424 	rhltable_destroy(&mrt->mfc_hash);
425 	kfree(mrt);
426 }
427 
428 #ifdef CONFIG_PROC_FS
429 /* The /proc interfaces to multicast routing
430  * /proc/ip6_mr_cache /proc/ip6_mr_vif
431  */
432 
433 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
434 	__acquires(RCU)
435 {
436 	struct mr_vif_iter *iter = seq->private;
437 	struct net *net = seq_file_net(seq);
438 	struct mr_table *mrt;
439 
440 	rcu_read_lock();
441 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
442 	if (!mrt) {
443 		rcu_read_unlock();
444 		return ERR_PTR(-ENOENT);
445 	}
446 
447 	iter->mrt = mrt;
448 
449 	return mr_vif_seq_start(seq, pos);
450 }
451 
452 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
453 	__releases(RCU)
454 {
455 	rcu_read_unlock();
456 }
457 
458 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
459 {
460 	struct mr_vif_iter *iter = seq->private;
461 	struct mr_table *mrt = iter->mrt;
462 
463 	if (v == SEQ_START_TOKEN) {
464 		seq_puts(seq,
465 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
466 	} else {
467 		const struct vif_device *vif = v;
468 		const struct net_device *vif_dev;
469 		const char *name;
470 
471 		vif_dev = vif_dev_read(vif);
472 		name = vif_dev ? vif_dev->name : "none";
473 
474 		seq_printf(seq,
475 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
476 			   vif - mrt->vif_table,
477 			   name, vif->bytes_in, vif->pkt_in,
478 			   vif->bytes_out, vif->pkt_out,
479 			   vif->flags);
480 	}
481 	return 0;
482 }
483 
484 static const struct seq_operations ip6mr_vif_seq_ops = {
485 	.start = ip6mr_vif_seq_start,
486 	.next  = mr_vif_seq_next,
487 	.stop  = ip6mr_vif_seq_stop,
488 	.show  = ip6mr_vif_seq_show,
489 };
490 
491 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
492 {
493 	struct net *net = seq_file_net(seq);
494 	struct mr_table *mrt;
495 
496 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
497 	if (!mrt)
498 		return ERR_PTR(-ENOENT);
499 
500 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
501 }
502 
503 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
504 {
505 	int n;
506 
507 	if (v == SEQ_START_TOKEN) {
508 		seq_puts(seq,
509 			 "Group                            "
510 			 "Origin                           "
511 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
512 	} else {
513 		const struct mfc6_cache *mfc = v;
514 		const struct mr_mfc_iter *it = seq->private;
515 		struct mr_table *mrt = it->mrt;
516 
517 		seq_printf(seq, "%pI6 %pI6 %-3hd",
518 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
519 			   mfc->_c.mfc_parent);
520 
521 		if (it->cache != &mrt->mfc_unres_queue) {
522 			seq_printf(seq, " %8lu %8lu %8lu",
523 				   mfc->_c.mfc_un.res.pkt,
524 				   mfc->_c.mfc_un.res.bytes,
525 				   mfc->_c.mfc_un.res.wrong_if);
526 			for (n = mfc->_c.mfc_un.res.minvif;
527 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
528 				if (VIF_EXISTS(mrt, n) &&
529 				    mfc->_c.mfc_un.res.ttls[n] < 255)
530 					seq_printf(seq,
531 						   " %2d:%-3d", n,
532 						   mfc->_c.mfc_un.res.ttls[n]);
533 			}
534 		} else {
535 			/* unresolved mfc_caches don't contain
536 			 * pkt, bytes and wrong_if values
537 			 */
538 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
539 		}
540 		seq_putc(seq, '\n');
541 	}
542 	return 0;
543 }
544 
545 static const struct seq_operations ipmr_mfc_seq_ops = {
546 	.start = ipmr_mfc_seq_start,
547 	.next  = mr_mfc_seq_next,
548 	.stop  = mr_mfc_seq_stop,
549 	.show  = ipmr_mfc_seq_show,
550 };
551 #endif
552 
553 #ifdef CONFIG_IPV6_PIMSM_V2
554 
555 static int pim6_rcv(struct sk_buff *skb)
556 {
557 	struct pimreghdr *pim;
558 	struct ipv6hdr   *encap;
559 	struct net_device  *reg_dev = NULL;
560 	struct net *net = dev_net(skb->dev);
561 	struct mr_table *mrt;
562 	struct flowi6 fl6 = {
563 		.flowi6_iif	= skb->dev->ifindex,
564 		.flowi6_mark	= skb->mark,
565 	};
566 	int reg_vif_num;
567 
568 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
569 		goto drop;
570 
571 	pim = (struct pimreghdr *)skb_transport_header(skb);
572 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
573 	    (pim->flags & PIM_NULL_REGISTER) ||
574 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
575 			     sizeof(*pim), IPPROTO_PIM,
576 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
577 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
578 		goto drop;
579 
580 	/* check if the inner packet is destined to mcast group */
581 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
582 				   sizeof(*pim));
583 
584 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
585 	    encap->payload_len == 0 ||
586 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
587 		goto drop;
588 
589 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
590 		goto drop;
591 
592 	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
593 	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
594 	if (reg_vif_num >= 0)
595 		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
596 
597 	if (!reg_dev)
598 		goto drop;
599 
600 	skb->mac_header = skb->network_header;
601 	skb_pull(skb, (u8 *)encap - skb->data);
602 	skb_reset_network_header(skb);
603 	skb->protocol = htons(ETH_P_IPV6);
604 	skb->ip_summed = CHECKSUM_NONE;
605 
606 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
607 
608 	netif_rx(skb);
609 
610 	return 0;
611  drop:
612 	kfree_skb(skb);
613 	return 0;
614 }
615 
616 static const struct inet6_protocol pim6_protocol = {
617 	.handler	=	pim6_rcv,
618 };
619 
620 /* Service routines creating virtual interfaces: PIMREG */
621 
622 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
623 				      struct net_device *dev)
624 {
625 	struct net *net = dev_net(dev);
626 	struct mr_table *mrt;
627 	struct flowi6 fl6 = {
628 		.flowi6_oif	= dev->ifindex,
629 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
630 		.flowi6_mark	= skb->mark,
631 	};
632 
633 	if (!pskb_inet_may_pull(skb))
634 		goto tx_err;
635 
636 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
637 		goto tx_err;
638 
639 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
640 	DEV_STATS_INC(dev, tx_packets);
641 	rcu_read_lock();
642 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
643 			   MRT6MSG_WHOLEPKT);
644 	rcu_read_unlock();
645 	kfree_skb(skb);
646 	return NETDEV_TX_OK;
647 
648 tx_err:
649 	DEV_STATS_INC(dev, tx_errors);
650 	kfree_skb(skb);
651 	return NETDEV_TX_OK;
652 }
653 
654 static int reg_vif_get_iflink(const struct net_device *dev)
655 {
656 	return 0;
657 }
658 
659 static const struct net_device_ops reg_vif_netdev_ops = {
660 	.ndo_start_xmit	= reg_vif_xmit,
661 	.ndo_get_iflink = reg_vif_get_iflink,
662 };
663 
664 static void reg_vif_setup(struct net_device *dev)
665 {
666 	dev->type		= ARPHRD_PIMREG;
667 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
668 	dev->flags		= IFF_NOARP;
669 	dev->netdev_ops		= &reg_vif_netdev_ops;
670 	dev->needs_free_netdev	= true;
671 	dev->netns_local	= true;
672 }
673 
674 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
675 {
676 	struct net_device *dev;
677 	char name[IFNAMSIZ];
678 
679 	if (mrt->id == RT6_TABLE_DFLT)
680 		sprintf(name, "pim6reg");
681 	else
682 		sprintf(name, "pim6reg%u", mrt->id);
683 
684 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
685 	if (!dev)
686 		return NULL;
687 
688 	dev_net_set(dev, net);
689 
690 	if (register_netdevice(dev)) {
691 		free_netdev(dev);
692 		return NULL;
693 	}
694 
695 	if (dev_open(dev, NULL))
696 		goto failure;
697 
698 	dev_hold(dev);
699 	return dev;
700 
701 failure:
702 	unregister_netdevice(dev);
703 	return NULL;
704 }
705 #endif
706 
707 static int call_ip6mr_vif_entry_notifiers(struct net *net,
708 					  enum fib_event_type event_type,
709 					  struct vif_device *vif,
710 					  struct net_device *vif_dev,
711 					  mifi_t vif_index, u32 tb_id)
712 {
713 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
714 				     vif, vif_dev, vif_index, tb_id,
715 				     &net->ipv6.ipmr_seq);
716 }
717 
718 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
719 					  enum fib_event_type event_type,
720 					  struct mfc6_cache *mfc, u32 tb_id)
721 {
722 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
723 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
724 }
725 
726 /* Delete a VIF entry */
727 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
728 		       struct list_head *head)
729 {
730 	struct vif_device *v;
731 	struct net_device *dev;
732 	struct inet6_dev *in6_dev;
733 
734 	if (vifi < 0 || vifi >= mrt->maxvif)
735 		return -EADDRNOTAVAIL;
736 
737 	v = &mrt->vif_table[vifi];
738 
739 	dev = rtnl_dereference(v->dev);
740 	if (!dev)
741 		return -EADDRNOTAVAIL;
742 
743 	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
744 				       FIB_EVENT_VIF_DEL, v, dev,
745 				       vifi, mrt->id);
746 	spin_lock(&mrt_lock);
747 	RCU_INIT_POINTER(v->dev, NULL);
748 
749 #ifdef CONFIG_IPV6_PIMSM_V2
750 	if (vifi == mrt->mroute_reg_vif_num) {
751 		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
752 		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
753 	}
754 #endif
755 
756 	if (vifi + 1 == mrt->maxvif) {
757 		int tmp;
758 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
759 			if (VIF_EXISTS(mrt, tmp))
760 				break;
761 		}
762 		WRITE_ONCE(mrt->maxvif, tmp + 1);
763 	}
764 
765 	spin_unlock(&mrt_lock);
766 
767 	dev_set_allmulti(dev, -1);
768 
769 	in6_dev = __in6_dev_get(dev);
770 	if (in6_dev) {
771 		atomic_dec(&in6_dev->cnf.mc_forwarding);
772 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
773 					     NETCONFA_MC_FORWARDING,
774 					     dev->ifindex, &in6_dev->cnf);
775 	}
776 
777 	if ((v->flags & MIFF_REGISTER) && !notify)
778 		unregister_netdevice_queue(dev, head);
779 
780 	netdev_put(dev, &v->dev_tracker);
781 	return 0;
782 }
783 
784 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
785 {
786 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
787 
788 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
789 }
790 
791 static inline void ip6mr_cache_free(struct mfc6_cache *c)
792 {
793 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
794 }
795 
796 /* Destroy an unresolved cache entry, killing queued skbs
797    and reporting error to netlink readers.
798  */
799 
800 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
801 {
802 	struct net *net = read_pnet(&mrt->net);
803 	struct sk_buff *skb;
804 
805 	atomic_dec(&mrt->cache_resolve_queue_len);
806 
807 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
808 		if (ipv6_hdr(skb)->version == 0) {
809 			struct nlmsghdr *nlh = skb_pull(skb,
810 							sizeof(struct ipv6hdr));
811 			nlh->nlmsg_type = NLMSG_ERROR;
812 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
813 			skb_trim(skb, nlh->nlmsg_len);
814 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
815 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
816 		} else
817 			kfree_skb(skb);
818 	}
819 
820 	ip6mr_cache_free(c);
821 }
822 
823 
824 /* Timer process for all the unresolved queue. */
825 
826 static void ipmr_do_expire_process(struct mr_table *mrt)
827 {
828 	unsigned long now = jiffies;
829 	unsigned long expires = 10 * HZ;
830 	struct mr_mfc *c, *next;
831 
832 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
833 		if (time_after(c->mfc_un.unres.expires, now)) {
834 			/* not yet... */
835 			unsigned long interval = c->mfc_un.unres.expires - now;
836 			if (interval < expires)
837 				expires = interval;
838 			continue;
839 		}
840 
841 		list_del(&c->list);
842 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
843 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
844 	}
845 
846 	if (!list_empty(&mrt->mfc_unres_queue))
847 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
848 }
849 
850 static void ipmr_expire_process(struct timer_list *t)
851 {
852 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
853 
854 	if (!spin_trylock(&mfc_unres_lock)) {
855 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
856 		return;
857 	}
858 
859 	if (!list_empty(&mrt->mfc_unres_queue))
860 		ipmr_do_expire_process(mrt);
861 
862 	spin_unlock(&mfc_unres_lock);
863 }
864 
865 /* Fill oifs list. It is called under locked mrt_lock. */
866 
867 static void ip6mr_update_thresholds(struct mr_table *mrt,
868 				    struct mr_mfc *cache,
869 				    unsigned char *ttls)
870 {
871 	int vifi;
872 
873 	cache->mfc_un.res.minvif = MAXMIFS;
874 	cache->mfc_un.res.maxvif = 0;
875 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
876 
877 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
878 		if (VIF_EXISTS(mrt, vifi) &&
879 		    ttls[vifi] && ttls[vifi] < 255) {
880 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
881 			if (cache->mfc_un.res.minvif > vifi)
882 				cache->mfc_un.res.minvif = vifi;
883 			if (cache->mfc_un.res.maxvif <= vifi)
884 				cache->mfc_un.res.maxvif = vifi + 1;
885 		}
886 	}
887 	cache->mfc_un.res.lastuse = jiffies;
888 }
889 
890 static int mif6_add(struct net *net, struct mr_table *mrt,
891 		    struct mif6ctl *vifc, int mrtsock)
892 {
893 	int vifi = vifc->mif6c_mifi;
894 	struct vif_device *v = &mrt->vif_table[vifi];
895 	struct net_device *dev;
896 	struct inet6_dev *in6_dev;
897 	int err;
898 
899 	/* Is vif busy ? */
900 	if (VIF_EXISTS(mrt, vifi))
901 		return -EADDRINUSE;
902 
903 	switch (vifc->mif6c_flags) {
904 #ifdef CONFIG_IPV6_PIMSM_V2
905 	case MIFF_REGISTER:
906 		/*
907 		 * Special Purpose VIF in PIM
908 		 * All the packets will be sent to the daemon
909 		 */
910 		if (mrt->mroute_reg_vif_num >= 0)
911 			return -EADDRINUSE;
912 		dev = ip6mr_reg_vif(net, mrt);
913 		if (!dev)
914 			return -ENOBUFS;
915 		err = dev_set_allmulti(dev, 1);
916 		if (err) {
917 			unregister_netdevice(dev);
918 			dev_put(dev);
919 			return err;
920 		}
921 		break;
922 #endif
923 	case 0:
924 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
925 		if (!dev)
926 			return -EADDRNOTAVAIL;
927 		err = dev_set_allmulti(dev, 1);
928 		if (err) {
929 			dev_put(dev);
930 			return err;
931 		}
932 		break;
933 	default:
934 		return -EINVAL;
935 	}
936 
937 	in6_dev = __in6_dev_get(dev);
938 	if (in6_dev) {
939 		atomic_inc(&in6_dev->cnf.mc_forwarding);
940 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
941 					     NETCONFA_MC_FORWARDING,
942 					     dev->ifindex, &in6_dev->cnf);
943 	}
944 
945 	/* Fill in the VIF structures */
946 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
947 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
948 			MIFF_REGISTER);
949 
950 	/* And finish update writing critical data */
951 	spin_lock(&mrt_lock);
952 	rcu_assign_pointer(v->dev, dev);
953 	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
954 #ifdef CONFIG_IPV6_PIMSM_V2
955 	if (v->flags & MIFF_REGISTER)
956 		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
957 #endif
958 	if (vifi + 1 > mrt->maxvif)
959 		WRITE_ONCE(mrt->maxvif, vifi + 1);
960 	spin_unlock(&mrt_lock);
961 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
962 				       v, dev, vifi, mrt->id);
963 	return 0;
964 }
965 
966 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
967 					   const struct in6_addr *origin,
968 					   const struct in6_addr *mcastgrp)
969 {
970 	struct mfc6_cache_cmp_arg arg = {
971 		.mf6c_origin = *origin,
972 		.mf6c_mcastgrp = *mcastgrp,
973 	};
974 
975 	return mr_mfc_find(mrt, &arg);
976 }
977 
978 /* Look for a (*,G) entry */
979 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
980 					       struct in6_addr *mcastgrp,
981 					       mifi_t mifi)
982 {
983 	struct mfc6_cache_cmp_arg arg = {
984 		.mf6c_origin = in6addr_any,
985 		.mf6c_mcastgrp = *mcastgrp,
986 	};
987 
988 	if (ipv6_addr_any(mcastgrp))
989 		return mr_mfc_find_any_parent(mrt, mifi);
990 	return mr_mfc_find_any(mrt, mifi, &arg);
991 }
992 
993 /* Look for a (S,G,iif) entry if parent != -1 */
994 static struct mfc6_cache *
995 ip6mr_cache_find_parent(struct mr_table *mrt,
996 			const struct in6_addr *origin,
997 			const struct in6_addr *mcastgrp,
998 			int parent)
999 {
1000 	struct mfc6_cache_cmp_arg arg = {
1001 		.mf6c_origin = *origin,
1002 		.mf6c_mcastgrp = *mcastgrp,
1003 	};
1004 
1005 	return mr_mfc_find_parent(mrt, &arg, parent);
1006 }
1007 
1008 /* Allocate a multicast cache entry */
1009 static struct mfc6_cache *ip6mr_cache_alloc(void)
1010 {
1011 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1012 	if (!c)
1013 		return NULL;
1014 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1015 	c->_c.mfc_un.res.minvif = MAXMIFS;
1016 	c->_c.free = ip6mr_cache_free_rcu;
1017 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
1018 	return c;
1019 }
1020 
1021 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1022 {
1023 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1024 	if (!c)
1025 		return NULL;
1026 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1027 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1028 	return c;
1029 }
1030 
1031 /*
1032  *	A cache entry has gone into a resolved state from queued
1033  */
1034 
1035 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1036 				struct mfc6_cache *uc, struct mfc6_cache *c)
1037 {
1038 	struct sk_buff *skb;
1039 
1040 	/*
1041 	 *	Play the pending entries through our router
1042 	 */
1043 
1044 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1045 		if (ipv6_hdr(skb)->version == 0) {
1046 			struct nlmsghdr *nlh = skb_pull(skb,
1047 							sizeof(struct ipv6hdr));
1048 
1049 			if (mr_fill_mroute(mrt, skb, &c->_c,
1050 					   nlmsg_data(nlh)) > 0) {
1051 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1052 			} else {
1053 				nlh->nlmsg_type = NLMSG_ERROR;
1054 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1055 				skb_trim(skb, nlh->nlmsg_len);
1056 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1057 			}
1058 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1059 		} else {
1060 			rcu_read_lock();
1061 			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1062 			rcu_read_unlock();
1063 		}
1064 	}
1065 }
1066 
1067 /*
1068  *	Bounce a cache query up to pim6sd and netlink.
1069  *
1070  *	Called under rcu_read_lock()
1071  */
1072 
1073 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1074 			      mifi_t mifi, int assert)
1075 {
1076 	struct sock *mroute6_sk;
1077 	struct sk_buff *skb;
1078 	struct mrt6msg *msg;
1079 	int ret;
1080 
1081 #ifdef CONFIG_IPV6_PIMSM_V2
1082 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1083 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1084 						+sizeof(*msg));
1085 	else
1086 #endif
1087 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1088 
1089 	if (!skb)
1090 		return -ENOBUFS;
1091 
1092 	/* I suppose that internal messages
1093 	 * do not require checksums */
1094 
1095 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1096 
1097 #ifdef CONFIG_IPV6_PIMSM_V2
1098 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1099 		/* Ugly, but we have no choice with this interface.
1100 		   Duplicate old header, fix length etc.
1101 		   And all this only to mangle msg->im6_msgtype and
1102 		   to set msg->im6_mbz to "mbz" :-)
1103 		 */
1104 		__skb_pull(skb, skb_network_offset(pkt));
1105 
1106 		skb_push(skb, sizeof(*msg));
1107 		skb_reset_transport_header(skb);
1108 		msg = (struct mrt6msg *)skb_transport_header(skb);
1109 		msg->im6_mbz = 0;
1110 		msg->im6_msgtype = assert;
1111 		if (assert == MRT6MSG_WRMIFWHOLE)
1112 			msg->im6_mif = mifi;
1113 		else
1114 			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1115 		msg->im6_pad = 0;
1116 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1117 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1118 
1119 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1120 	} else
1121 #endif
1122 	{
1123 	/*
1124 	 *	Copy the IP header
1125 	 */
1126 
1127 	skb_put(skb, sizeof(struct ipv6hdr));
1128 	skb_reset_network_header(skb);
1129 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1130 
1131 	/*
1132 	 *	Add our header
1133 	 */
1134 	skb_put(skb, sizeof(*msg));
1135 	skb_reset_transport_header(skb);
1136 	msg = (struct mrt6msg *)skb_transport_header(skb);
1137 
1138 	msg->im6_mbz = 0;
1139 	msg->im6_msgtype = assert;
1140 	msg->im6_mif = mifi;
1141 	msg->im6_pad = 0;
1142 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1143 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1144 
1145 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1146 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1147 	}
1148 
1149 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1150 	if (!mroute6_sk) {
1151 		kfree_skb(skb);
1152 		return -EINVAL;
1153 	}
1154 
1155 	mrt6msg_netlink_event(mrt, skb);
1156 
1157 	/* Deliver to user space multicast routing algorithms */
1158 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1159 
1160 	if (ret < 0) {
1161 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1162 		kfree_skb(skb);
1163 	}
1164 
1165 	return ret;
1166 }
1167 
1168 /* Queue a packet for resolution. It gets locked cache entry! */
1169 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1170 				  struct sk_buff *skb, struct net_device *dev)
1171 {
1172 	struct mfc6_cache *c;
1173 	bool found = false;
1174 	int err;
1175 
1176 	spin_lock_bh(&mfc_unres_lock);
1177 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1178 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1179 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1180 			found = true;
1181 			break;
1182 		}
1183 	}
1184 
1185 	if (!found) {
1186 		/*
1187 		 *	Create a new entry if allowable
1188 		 */
1189 
1190 		c = ip6mr_cache_alloc_unres();
1191 		if (!c) {
1192 			spin_unlock_bh(&mfc_unres_lock);
1193 
1194 			kfree_skb(skb);
1195 			return -ENOBUFS;
1196 		}
1197 
1198 		/* Fill in the new cache entry */
1199 		c->_c.mfc_parent = -1;
1200 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1201 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1202 
1203 		/*
1204 		 *	Reflect first query at pim6sd
1205 		 */
1206 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1207 		if (err < 0) {
1208 			/* If the report failed throw the cache entry
1209 			   out - Brad Parker
1210 			 */
1211 			spin_unlock_bh(&mfc_unres_lock);
1212 
1213 			ip6mr_cache_free(c);
1214 			kfree_skb(skb);
1215 			return err;
1216 		}
1217 
1218 		atomic_inc(&mrt->cache_resolve_queue_len);
1219 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1220 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1221 
1222 		ipmr_do_expire_process(mrt);
1223 	}
1224 
1225 	/* See if we can append the packet */
1226 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1227 		kfree_skb(skb);
1228 		err = -ENOBUFS;
1229 	} else {
1230 		if (dev) {
1231 			skb->dev = dev;
1232 			skb->skb_iif = dev->ifindex;
1233 		}
1234 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1235 		err = 0;
1236 	}
1237 
1238 	spin_unlock_bh(&mfc_unres_lock);
1239 	return err;
1240 }
1241 
1242 /*
1243  *	MFC6 cache manipulation by user space
1244  */
1245 
1246 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1247 			    int parent)
1248 {
1249 	struct mfc6_cache *c;
1250 
1251 	/* The entries are added/deleted only under RTNL */
1252 	rcu_read_lock();
1253 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1254 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1255 	rcu_read_unlock();
1256 	if (!c)
1257 		return -ENOENT;
1258 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1259 	list_del_rcu(&c->_c.list);
1260 
1261 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1262 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1263 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1264 	mr_cache_put(&c->_c);
1265 	return 0;
1266 }
1267 
1268 static int ip6mr_device_event(struct notifier_block *this,
1269 			      unsigned long event, void *ptr)
1270 {
1271 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1272 	struct net *net = dev_net(dev);
1273 	struct mr_table *mrt;
1274 	struct vif_device *v;
1275 	int ct;
1276 
1277 	if (event != NETDEV_UNREGISTER)
1278 		return NOTIFY_DONE;
1279 
1280 	ip6mr_for_each_table(mrt, net) {
1281 		v = &mrt->vif_table[0];
1282 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1283 			if (rcu_access_pointer(v->dev) == dev)
1284 				mif6_delete(mrt, ct, 1, NULL);
1285 		}
1286 	}
1287 
1288 	return NOTIFY_DONE;
1289 }
1290 
1291 static unsigned int ip6mr_seq_read(const struct net *net)
1292 {
1293 	return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1294 }
1295 
1296 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1297 		      struct netlink_ext_ack *extack)
1298 {
1299 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1300 		       ip6mr_mr_table_iter, extack);
1301 }
1302 
1303 static struct notifier_block ip6_mr_notifier = {
1304 	.notifier_call = ip6mr_device_event
1305 };
1306 
1307 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1308 	.family		= RTNL_FAMILY_IP6MR,
1309 	.fib_seq_read	= ip6mr_seq_read,
1310 	.fib_dump	= ip6mr_dump,
1311 	.owner		= THIS_MODULE,
1312 };
1313 
1314 static int __net_init ip6mr_notifier_init(struct net *net)
1315 {
1316 	struct fib_notifier_ops *ops;
1317 
1318 	net->ipv6.ipmr_seq = 0;
1319 
1320 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1321 	if (IS_ERR(ops))
1322 		return PTR_ERR(ops);
1323 
1324 	net->ipv6.ip6mr_notifier_ops = ops;
1325 
1326 	return 0;
1327 }
1328 
1329 static void __net_exit ip6mr_notifier_exit(struct net *net)
1330 {
1331 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1332 	net->ipv6.ip6mr_notifier_ops = NULL;
1333 }
1334 
1335 /* Setup for IP multicast routing */
1336 static int __net_init ip6mr_net_init(struct net *net)
1337 {
1338 	int err;
1339 
1340 	err = ip6mr_notifier_init(net);
1341 	if (err)
1342 		return err;
1343 
1344 	err = ip6mr_rules_init(net);
1345 	if (err < 0)
1346 		goto ip6mr_rules_fail;
1347 
1348 #ifdef CONFIG_PROC_FS
1349 	err = -ENOMEM;
1350 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1351 			sizeof(struct mr_vif_iter)))
1352 		goto proc_vif_fail;
1353 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1354 			sizeof(struct mr_mfc_iter)))
1355 		goto proc_cache_fail;
1356 #endif
1357 
1358 	return 0;
1359 
1360 #ifdef CONFIG_PROC_FS
1361 proc_cache_fail:
1362 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1363 proc_vif_fail:
1364 	rtnl_lock();
1365 	ip6mr_rules_exit(net);
1366 	rtnl_unlock();
1367 #endif
1368 ip6mr_rules_fail:
1369 	ip6mr_notifier_exit(net);
1370 	return err;
1371 }
1372 
1373 static void __net_exit ip6mr_net_exit(struct net *net)
1374 {
1375 #ifdef CONFIG_PROC_FS
1376 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1377 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1378 #endif
1379 	ip6mr_notifier_exit(net);
1380 }
1381 
1382 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1383 {
1384 	struct net *net;
1385 
1386 	rtnl_lock();
1387 	list_for_each_entry(net, net_list, exit_list)
1388 		ip6mr_rules_exit(net);
1389 	rtnl_unlock();
1390 }
1391 
1392 static struct pernet_operations ip6mr_net_ops = {
1393 	.init = ip6mr_net_init,
1394 	.exit = ip6mr_net_exit,
1395 	.exit_batch = ip6mr_net_exit_batch,
1396 };
1397 
1398 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1399 	{.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1400 	 .msgtype = RTM_GETROUTE,
1401 	 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
1402 };
1403 
1404 int __init ip6_mr_init(void)
1405 {
1406 	int err;
1407 
1408 	mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1409 	if (!mrt_cachep)
1410 		return -ENOMEM;
1411 
1412 	err = register_pernet_subsys(&ip6mr_net_ops);
1413 	if (err)
1414 		goto reg_pernet_fail;
1415 
1416 	err = register_netdevice_notifier(&ip6_mr_notifier);
1417 	if (err)
1418 		goto reg_notif_fail;
1419 #ifdef CONFIG_IPV6_PIMSM_V2
1420 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1421 		pr_err("%s: can't add PIM protocol\n", __func__);
1422 		err = -EAGAIN;
1423 		goto add_proto_fail;
1424 	}
1425 #endif
1426 	err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1427 	if (!err)
1428 		return 0;
1429 
1430 #ifdef CONFIG_IPV6_PIMSM_V2
1431 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1432 add_proto_fail:
1433 	unregister_netdevice_notifier(&ip6_mr_notifier);
1434 #endif
1435 reg_notif_fail:
1436 	unregister_pernet_subsys(&ip6mr_net_ops);
1437 reg_pernet_fail:
1438 	kmem_cache_destroy(mrt_cachep);
1439 	return err;
1440 }
1441 
1442 void __init ip6_mr_cleanup(void)
1443 {
1444 	rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1445 #ifdef CONFIG_IPV6_PIMSM_V2
1446 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1447 #endif
1448 	unregister_netdevice_notifier(&ip6_mr_notifier);
1449 	unregister_pernet_subsys(&ip6mr_net_ops);
1450 	kmem_cache_destroy(mrt_cachep);
1451 }
1452 
1453 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1454 			 struct mf6cctl *mfc, int mrtsock, int parent)
1455 {
1456 	unsigned char ttls[MAXMIFS];
1457 	struct mfc6_cache *uc, *c;
1458 	struct mr_mfc *_uc;
1459 	bool found;
1460 	int i, err;
1461 
1462 	if (mfc->mf6cc_parent >= MAXMIFS)
1463 		return -ENFILE;
1464 
1465 	memset(ttls, 255, MAXMIFS);
1466 	for (i = 0; i < MAXMIFS; i++) {
1467 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1468 			ttls[i] = 1;
1469 	}
1470 
1471 	/* The entries are added/deleted only under RTNL */
1472 	rcu_read_lock();
1473 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1474 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1475 	rcu_read_unlock();
1476 	if (c) {
1477 		spin_lock(&mrt_lock);
1478 		c->_c.mfc_parent = mfc->mf6cc_parent;
1479 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1480 		if (!mrtsock)
1481 			c->_c.mfc_flags |= MFC_STATIC;
1482 		spin_unlock(&mrt_lock);
1483 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1484 					       c, mrt->id);
1485 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1486 		return 0;
1487 	}
1488 
1489 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1490 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1491 		return -EINVAL;
1492 
1493 	c = ip6mr_cache_alloc();
1494 	if (!c)
1495 		return -ENOMEM;
1496 
1497 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1498 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1499 	c->_c.mfc_parent = mfc->mf6cc_parent;
1500 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1501 	if (!mrtsock)
1502 		c->_c.mfc_flags |= MFC_STATIC;
1503 
1504 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1505 				  ip6mr_rht_params);
1506 	if (err) {
1507 		pr_err("ip6mr: rhtable insert error %d\n", err);
1508 		ip6mr_cache_free(c);
1509 		return err;
1510 	}
1511 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1512 
1513 	/* Check to see if we resolved a queued list. If so we
1514 	 * need to send on the frames and tidy up.
1515 	 */
1516 	found = false;
1517 	spin_lock_bh(&mfc_unres_lock);
1518 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1519 		uc = (struct mfc6_cache *)_uc;
1520 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1521 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1522 			list_del(&_uc->list);
1523 			atomic_dec(&mrt->cache_resolve_queue_len);
1524 			found = true;
1525 			break;
1526 		}
1527 	}
1528 	if (list_empty(&mrt->mfc_unres_queue))
1529 		del_timer(&mrt->ipmr_expire_timer);
1530 	spin_unlock_bh(&mfc_unres_lock);
1531 
1532 	if (found) {
1533 		ip6mr_cache_resolve(net, mrt, uc, c);
1534 		ip6mr_cache_free(uc);
1535 	}
1536 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1537 				       c, mrt->id);
1538 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1539 	return 0;
1540 }
1541 
1542 /*
1543  *	Close the multicast socket, and clear the vif tables etc
1544  */
1545 
1546 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1547 {
1548 	struct mr_mfc *c, *tmp;
1549 	LIST_HEAD(list);
1550 	int i;
1551 
1552 	/* Shut down all active vif entries */
1553 	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1554 		for (i = 0; i < mrt->maxvif; i++) {
1555 			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1556 			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1557 			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1558 				continue;
1559 			mif6_delete(mrt, i, 0, &list);
1560 		}
1561 		unregister_netdevice_many(&list);
1562 	}
1563 
1564 	/* Wipe the cache */
1565 	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1566 		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1567 			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1568 			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1569 				continue;
1570 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1571 			list_del_rcu(&c->list);
1572 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1573 						       FIB_EVENT_ENTRY_DEL,
1574 						       (struct mfc6_cache *)c, mrt->id);
1575 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1576 			mr_cache_put(c);
1577 		}
1578 	}
1579 
1580 	if (flags & MRT6_FLUSH_MFC) {
1581 		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1582 			spin_lock_bh(&mfc_unres_lock);
1583 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1584 				list_del(&c->list);
1585 				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1586 						  RTM_DELROUTE);
1587 				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1588 			}
1589 			spin_unlock_bh(&mfc_unres_lock);
1590 		}
1591 	}
1592 }
1593 
1594 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1595 {
1596 	int err = 0;
1597 	struct net *net = sock_net(sk);
1598 
1599 	rtnl_lock();
1600 	spin_lock(&mrt_lock);
1601 	if (rtnl_dereference(mrt->mroute_sk)) {
1602 		err = -EADDRINUSE;
1603 	} else {
1604 		rcu_assign_pointer(mrt->mroute_sk, sk);
1605 		sock_set_flag(sk, SOCK_RCU_FREE);
1606 		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1607 	}
1608 	spin_unlock(&mrt_lock);
1609 
1610 	if (!err)
1611 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1612 					     NETCONFA_MC_FORWARDING,
1613 					     NETCONFA_IFINDEX_ALL,
1614 					     net->ipv6.devconf_all);
1615 	rtnl_unlock();
1616 
1617 	return err;
1618 }
1619 
1620 int ip6mr_sk_done(struct sock *sk)
1621 {
1622 	struct net *net = sock_net(sk);
1623 	struct ipv6_devconf *devconf;
1624 	struct mr_table *mrt;
1625 	int err = -EACCES;
1626 
1627 	if (sk->sk_type != SOCK_RAW ||
1628 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1629 		return err;
1630 
1631 	devconf = net->ipv6.devconf_all;
1632 	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1633 		return err;
1634 
1635 	rtnl_lock();
1636 	ip6mr_for_each_table(mrt, net) {
1637 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1638 			spin_lock(&mrt_lock);
1639 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1640 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1641 			 * so the RCU grace period before sk freeing
1642 			 * is guaranteed by sk_destruct()
1643 			 */
1644 			atomic_dec(&devconf->mc_forwarding);
1645 			spin_unlock(&mrt_lock);
1646 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1647 						     NETCONFA_MC_FORWARDING,
1648 						     NETCONFA_IFINDEX_ALL,
1649 						     net->ipv6.devconf_all);
1650 
1651 			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1652 			err = 0;
1653 			break;
1654 		}
1655 	}
1656 	rtnl_unlock();
1657 
1658 	return err;
1659 }
1660 
1661 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1662 {
1663 	struct mr_table *mrt;
1664 	struct flowi6 fl6 = {
1665 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1666 		.flowi6_oif	= skb->dev->ifindex,
1667 		.flowi6_mark	= skb->mark,
1668 	};
1669 
1670 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1671 		return NULL;
1672 
1673 	return rcu_access_pointer(mrt->mroute_sk);
1674 }
1675 EXPORT_SYMBOL(mroute6_is_socket);
1676 
1677 /*
1678  *	Socket options and virtual interface manipulation. The whole
1679  *	virtual interface system is a complete heap, but unfortunately
1680  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1681  *	MOSPF/PIM router set up we can clean this up.
1682  */
1683 
1684 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1685 			  unsigned int optlen)
1686 {
1687 	int ret, parent = 0;
1688 	struct mif6ctl vif;
1689 	struct mf6cctl mfc;
1690 	mifi_t mifi;
1691 	struct net *net = sock_net(sk);
1692 	struct mr_table *mrt;
1693 
1694 	if (sk->sk_type != SOCK_RAW ||
1695 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1696 		return -EOPNOTSUPP;
1697 
1698 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1699 	if (!mrt)
1700 		return -ENOENT;
1701 
1702 	if (optname != MRT6_INIT) {
1703 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1704 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1705 			return -EACCES;
1706 	}
1707 
1708 	switch (optname) {
1709 	case MRT6_INIT:
1710 		if (optlen < sizeof(int))
1711 			return -EINVAL;
1712 
1713 		return ip6mr_sk_init(mrt, sk);
1714 
1715 	case MRT6_DONE:
1716 		return ip6mr_sk_done(sk);
1717 
1718 	case MRT6_ADD_MIF:
1719 		if (optlen < sizeof(vif))
1720 			return -EINVAL;
1721 		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1722 			return -EFAULT;
1723 		if (vif.mif6c_mifi >= MAXMIFS)
1724 			return -ENFILE;
1725 		rtnl_lock();
1726 		ret = mif6_add(net, mrt, &vif,
1727 			       sk == rtnl_dereference(mrt->mroute_sk));
1728 		rtnl_unlock();
1729 		return ret;
1730 
1731 	case MRT6_DEL_MIF:
1732 		if (optlen < sizeof(mifi_t))
1733 			return -EINVAL;
1734 		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1735 			return -EFAULT;
1736 		rtnl_lock();
1737 		ret = mif6_delete(mrt, mifi, 0, NULL);
1738 		rtnl_unlock();
1739 		return ret;
1740 
1741 	/*
1742 	 *	Manipulate the forwarding caches. These live
1743 	 *	in a sort of kernel/user symbiosis.
1744 	 */
1745 	case MRT6_ADD_MFC:
1746 	case MRT6_DEL_MFC:
1747 		parent = -1;
1748 		fallthrough;
1749 	case MRT6_ADD_MFC_PROXY:
1750 	case MRT6_DEL_MFC_PROXY:
1751 		if (optlen < sizeof(mfc))
1752 			return -EINVAL;
1753 		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1754 			return -EFAULT;
1755 		if (parent == 0)
1756 			parent = mfc.mf6cc_parent;
1757 		rtnl_lock();
1758 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1759 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1760 		else
1761 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1762 					    sk ==
1763 					    rtnl_dereference(mrt->mroute_sk),
1764 					    parent);
1765 		rtnl_unlock();
1766 		return ret;
1767 
1768 	case MRT6_FLUSH:
1769 	{
1770 		int flags;
1771 
1772 		if (optlen != sizeof(flags))
1773 			return -EINVAL;
1774 		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1775 			return -EFAULT;
1776 		rtnl_lock();
1777 		mroute_clean_tables(mrt, flags);
1778 		rtnl_unlock();
1779 		return 0;
1780 	}
1781 
1782 	/*
1783 	 *	Control PIM assert (to activate pim will activate assert)
1784 	 */
1785 	case MRT6_ASSERT:
1786 	{
1787 		int v;
1788 
1789 		if (optlen != sizeof(v))
1790 			return -EINVAL;
1791 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1792 			return -EFAULT;
1793 		mrt->mroute_do_assert = v;
1794 		return 0;
1795 	}
1796 
1797 #ifdef CONFIG_IPV6_PIMSM_V2
1798 	case MRT6_PIM:
1799 	{
1800 		bool do_wrmifwhole;
1801 		int v;
1802 
1803 		if (optlen != sizeof(v))
1804 			return -EINVAL;
1805 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1806 			return -EFAULT;
1807 
1808 		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1809 		v = !!v;
1810 		rtnl_lock();
1811 		ret = 0;
1812 		if (v != mrt->mroute_do_pim) {
1813 			mrt->mroute_do_pim = v;
1814 			mrt->mroute_do_assert = v;
1815 			mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1816 		}
1817 		rtnl_unlock();
1818 		return ret;
1819 	}
1820 
1821 #endif
1822 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1823 	case MRT6_TABLE:
1824 	{
1825 		u32 v;
1826 
1827 		if (optlen != sizeof(u32))
1828 			return -EINVAL;
1829 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1830 			return -EFAULT;
1831 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1832 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1833 			return -EINVAL;
1834 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1835 			return -EBUSY;
1836 
1837 		rtnl_lock();
1838 		ret = 0;
1839 		mrt = ip6mr_new_table(net, v);
1840 		if (IS_ERR(mrt))
1841 			ret = PTR_ERR(mrt);
1842 		else
1843 			raw6_sk(sk)->ip6mr_table = v;
1844 		rtnl_unlock();
1845 		return ret;
1846 	}
1847 #endif
1848 	/*
1849 	 *	Spurious command, or MRT6_VERSION which you cannot
1850 	 *	set.
1851 	 */
1852 	default:
1853 		return -ENOPROTOOPT;
1854 	}
1855 }
1856 
1857 /*
1858  *	Getsock opt support for the multicast routing system.
1859  */
1860 
1861 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1862 			  sockptr_t optlen)
1863 {
1864 	int olr;
1865 	int val;
1866 	struct net *net = sock_net(sk);
1867 	struct mr_table *mrt;
1868 
1869 	if (sk->sk_type != SOCK_RAW ||
1870 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1871 		return -EOPNOTSUPP;
1872 
1873 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1874 	if (!mrt)
1875 		return -ENOENT;
1876 
1877 	switch (optname) {
1878 	case MRT6_VERSION:
1879 		val = 0x0305;
1880 		break;
1881 #ifdef CONFIG_IPV6_PIMSM_V2
1882 	case MRT6_PIM:
1883 		val = mrt->mroute_do_pim;
1884 		break;
1885 #endif
1886 	case MRT6_ASSERT:
1887 		val = mrt->mroute_do_assert;
1888 		break;
1889 	default:
1890 		return -ENOPROTOOPT;
1891 	}
1892 
1893 	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1894 		return -EFAULT;
1895 
1896 	olr = min_t(int, olr, sizeof(int));
1897 	if (olr < 0)
1898 		return -EINVAL;
1899 
1900 	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1901 		return -EFAULT;
1902 	if (copy_to_sockptr(optval, &val, olr))
1903 		return -EFAULT;
1904 	return 0;
1905 }
1906 
1907 /*
1908  *	The IP multicast ioctl support routines.
1909  */
1910 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1911 {
1912 	struct sioc_sg_req6 *sr;
1913 	struct sioc_mif_req6 *vr;
1914 	struct vif_device *vif;
1915 	struct mfc6_cache *c;
1916 	struct net *net = sock_net(sk);
1917 	struct mr_table *mrt;
1918 
1919 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1920 	if (!mrt)
1921 		return -ENOENT;
1922 
1923 	switch (cmd) {
1924 	case SIOCGETMIFCNT_IN6:
1925 		vr = (struct sioc_mif_req6 *)arg;
1926 		if (vr->mifi >= mrt->maxvif)
1927 			return -EINVAL;
1928 		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1929 		rcu_read_lock();
1930 		vif = &mrt->vif_table[vr->mifi];
1931 		if (VIF_EXISTS(mrt, vr->mifi)) {
1932 			vr->icount = READ_ONCE(vif->pkt_in);
1933 			vr->ocount = READ_ONCE(vif->pkt_out);
1934 			vr->ibytes = READ_ONCE(vif->bytes_in);
1935 			vr->obytes = READ_ONCE(vif->bytes_out);
1936 			rcu_read_unlock();
1937 			return 0;
1938 		}
1939 		rcu_read_unlock();
1940 		return -EADDRNOTAVAIL;
1941 	case SIOCGETSGCNT_IN6:
1942 		sr = (struct sioc_sg_req6 *)arg;
1943 
1944 		rcu_read_lock();
1945 		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1946 				     &sr->grp.sin6_addr);
1947 		if (c) {
1948 			sr->pktcnt = c->_c.mfc_un.res.pkt;
1949 			sr->bytecnt = c->_c.mfc_un.res.bytes;
1950 			sr->wrong_if = c->_c.mfc_un.res.wrong_if;
1951 			rcu_read_unlock();
1952 			return 0;
1953 		}
1954 		rcu_read_unlock();
1955 		return -EADDRNOTAVAIL;
1956 	default:
1957 		return -ENOIOCTLCMD;
1958 	}
1959 }
1960 
1961 #ifdef CONFIG_COMPAT
1962 struct compat_sioc_sg_req6 {
1963 	struct sockaddr_in6 src;
1964 	struct sockaddr_in6 grp;
1965 	compat_ulong_t pktcnt;
1966 	compat_ulong_t bytecnt;
1967 	compat_ulong_t wrong_if;
1968 };
1969 
1970 struct compat_sioc_mif_req6 {
1971 	mifi_t	mifi;
1972 	compat_ulong_t icount;
1973 	compat_ulong_t ocount;
1974 	compat_ulong_t ibytes;
1975 	compat_ulong_t obytes;
1976 };
1977 
1978 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1979 {
1980 	struct compat_sioc_sg_req6 sr;
1981 	struct compat_sioc_mif_req6 vr;
1982 	struct vif_device *vif;
1983 	struct mfc6_cache *c;
1984 	struct net *net = sock_net(sk);
1985 	struct mr_table *mrt;
1986 
1987 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1988 	if (!mrt)
1989 		return -ENOENT;
1990 
1991 	switch (cmd) {
1992 	case SIOCGETMIFCNT_IN6:
1993 		if (copy_from_user(&vr, arg, sizeof(vr)))
1994 			return -EFAULT;
1995 		if (vr.mifi >= mrt->maxvif)
1996 			return -EINVAL;
1997 		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1998 		rcu_read_lock();
1999 		vif = &mrt->vif_table[vr.mifi];
2000 		if (VIF_EXISTS(mrt, vr.mifi)) {
2001 			vr.icount = READ_ONCE(vif->pkt_in);
2002 			vr.ocount = READ_ONCE(vif->pkt_out);
2003 			vr.ibytes = READ_ONCE(vif->bytes_in);
2004 			vr.obytes = READ_ONCE(vif->bytes_out);
2005 			rcu_read_unlock();
2006 
2007 			if (copy_to_user(arg, &vr, sizeof(vr)))
2008 				return -EFAULT;
2009 			return 0;
2010 		}
2011 		rcu_read_unlock();
2012 		return -EADDRNOTAVAIL;
2013 	case SIOCGETSGCNT_IN6:
2014 		if (copy_from_user(&sr, arg, sizeof(sr)))
2015 			return -EFAULT;
2016 
2017 		rcu_read_lock();
2018 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2019 		if (c) {
2020 			sr.pktcnt = c->_c.mfc_un.res.pkt;
2021 			sr.bytecnt = c->_c.mfc_un.res.bytes;
2022 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
2023 			rcu_read_unlock();
2024 
2025 			if (copy_to_user(arg, &sr, sizeof(sr)))
2026 				return -EFAULT;
2027 			return 0;
2028 		}
2029 		rcu_read_unlock();
2030 		return -EADDRNOTAVAIL;
2031 	default:
2032 		return -ENOIOCTLCMD;
2033 	}
2034 }
2035 #endif
2036 
2037 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2038 {
2039 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2040 		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2041 	return dst_output(net, sk, skb);
2042 }
2043 
2044 /*
2045  *	Processing handlers for ip6mr_forward
2046  */
2047 
2048 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2049 			  struct sk_buff *skb, int vifi)
2050 {
2051 	struct vif_device *vif = &mrt->vif_table[vifi];
2052 	struct net_device *vif_dev;
2053 	struct ipv6hdr *ipv6h;
2054 	struct dst_entry *dst;
2055 	struct flowi6 fl6;
2056 
2057 	vif_dev = vif_dev_read(vif);
2058 	if (!vif_dev)
2059 		goto out_free;
2060 
2061 #ifdef CONFIG_IPV6_PIMSM_V2
2062 	if (vif->flags & MIFF_REGISTER) {
2063 		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2064 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2065 		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2066 		DEV_STATS_INC(vif_dev, tx_packets);
2067 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2068 		goto out_free;
2069 	}
2070 #endif
2071 
2072 	ipv6h = ipv6_hdr(skb);
2073 
2074 	fl6 = (struct flowi6) {
2075 		.flowi6_oif = vif->link,
2076 		.daddr = ipv6h->daddr,
2077 	};
2078 
2079 	dst = ip6_route_output(net, NULL, &fl6);
2080 	if (dst->error) {
2081 		dst_release(dst);
2082 		goto out_free;
2083 	}
2084 
2085 	skb_dst_drop(skb);
2086 	skb_dst_set(skb, dst);
2087 
2088 	/*
2089 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2090 	 * not only before forwarding, but after forwarding on all output
2091 	 * interfaces. It is clear, if mrouter runs a multicasting
2092 	 * program, it should receive packets not depending to what interface
2093 	 * program is joined.
2094 	 * If we will not make it, the program will have to join on all
2095 	 * interfaces. On the other hand, multihoming host (or router, but
2096 	 * not mrouter) cannot join to more than one interface - it will
2097 	 * result in receiving multiple packets.
2098 	 */
2099 	skb->dev = vif_dev;
2100 	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2101 	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2102 
2103 	/* We are about to write */
2104 	/* XXX: extension headers? */
2105 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2106 		goto out_free;
2107 
2108 	ipv6h = ipv6_hdr(skb);
2109 	ipv6h->hop_limit--;
2110 
2111 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2112 
2113 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2114 		       net, NULL, skb, skb->dev, vif_dev,
2115 		       ip6mr_forward2_finish);
2116 
2117 out_free:
2118 	kfree_skb(skb);
2119 	return 0;
2120 }
2121 
2122 /* Called with rcu_read_lock() */
2123 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2124 {
2125 	int ct;
2126 
2127 	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2128 	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2129 		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2130 			break;
2131 	}
2132 	return ct;
2133 }
2134 
2135 /* Called under rcu_read_lock() */
2136 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2137 			   struct net_device *dev, struct sk_buff *skb,
2138 			   struct mfc6_cache *c)
2139 {
2140 	int psend = -1;
2141 	int vif, ct;
2142 	int true_vifi = ip6mr_find_vif(mrt, dev);
2143 
2144 	vif = c->_c.mfc_parent;
2145 	c->_c.mfc_un.res.pkt++;
2146 	c->_c.mfc_un.res.bytes += skb->len;
2147 	c->_c.mfc_un.res.lastuse = jiffies;
2148 
2149 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2150 		struct mfc6_cache *cache_proxy;
2151 
2152 		/* For an (*,G) entry, we only check that the incoming
2153 		 * interface is part of the static tree.
2154 		 */
2155 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2156 		if (cache_proxy &&
2157 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2158 			goto forward;
2159 	}
2160 
2161 	/*
2162 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2163 	 */
2164 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2165 		c->_c.mfc_un.res.wrong_if++;
2166 
2167 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2168 		    /* pimsm uses asserts, when switching from RPT to SPT,
2169 		       so that we cannot check that packet arrived on an oif.
2170 		       It is bad, but otherwise we would need to move pretty
2171 		       large chunk of pimd to kernel. Ough... --ANK
2172 		     */
2173 		    (mrt->mroute_do_pim ||
2174 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2175 		    time_after(jiffies,
2176 			       c->_c.mfc_un.res.last_assert +
2177 			       MFC_ASSERT_THRESH)) {
2178 			c->_c.mfc_un.res.last_assert = jiffies;
2179 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2180 			if (mrt->mroute_do_wrvifwhole)
2181 				ip6mr_cache_report(mrt, skb, true_vifi,
2182 						   MRT6MSG_WRMIFWHOLE);
2183 		}
2184 		goto dont_forward;
2185 	}
2186 
2187 forward:
2188 	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2189 		   mrt->vif_table[vif].pkt_in + 1);
2190 	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2191 		   mrt->vif_table[vif].bytes_in + skb->len);
2192 
2193 	/*
2194 	 *	Forward the frame
2195 	 */
2196 	if (ipv6_addr_any(&c->mf6c_origin) &&
2197 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2198 		if (true_vifi >= 0 &&
2199 		    true_vifi != c->_c.mfc_parent &&
2200 		    ipv6_hdr(skb)->hop_limit >
2201 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2202 			/* It's an (*,*) entry and the packet is not coming from
2203 			 * the upstream: forward the packet to the upstream
2204 			 * only.
2205 			 */
2206 			psend = c->_c.mfc_parent;
2207 			goto last_forward;
2208 		}
2209 		goto dont_forward;
2210 	}
2211 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2212 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2213 		/* For (*,G) entry, don't forward to the incoming interface */
2214 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2215 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2216 			if (psend != -1) {
2217 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2218 				if (skb2)
2219 					ip6mr_forward2(net, mrt, skb2, psend);
2220 			}
2221 			psend = ct;
2222 		}
2223 	}
2224 last_forward:
2225 	if (psend != -1) {
2226 		ip6mr_forward2(net, mrt, skb, psend);
2227 		return;
2228 	}
2229 
2230 dont_forward:
2231 	kfree_skb(skb);
2232 }
2233 
2234 
2235 /*
2236  *	Multicast packets for forwarding arrive here
2237  */
2238 
2239 int ip6_mr_input(struct sk_buff *skb)
2240 {
2241 	struct mfc6_cache *cache;
2242 	struct net *net = dev_net(skb->dev);
2243 	struct mr_table *mrt;
2244 	struct flowi6 fl6 = {
2245 		.flowi6_iif	= skb->dev->ifindex,
2246 		.flowi6_mark	= skb->mark,
2247 	};
2248 	int err;
2249 	struct net_device *dev;
2250 
2251 	/* skb->dev passed in is the master dev for vrfs.
2252 	 * Get the proper interface that does have a vif associated with it.
2253 	 */
2254 	dev = skb->dev;
2255 	if (netif_is_l3_master(skb->dev)) {
2256 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2257 		if (!dev) {
2258 			kfree_skb(skb);
2259 			return -ENODEV;
2260 		}
2261 	}
2262 
2263 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2264 	if (err < 0) {
2265 		kfree_skb(skb);
2266 		return err;
2267 	}
2268 
2269 	cache = ip6mr_cache_find(mrt,
2270 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2271 	if (!cache) {
2272 		int vif = ip6mr_find_vif(mrt, dev);
2273 
2274 		if (vif >= 0)
2275 			cache = ip6mr_cache_find_any(mrt,
2276 						     &ipv6_hdr(skb)->daddr,
2277 						     vif);
2278 	}
2279 
2280 	/*
2281 	 *	No usable cache entry
2282 	 */
2283 	if (!cache) {
2284 		int vif;
2285 
2286 		vif = ip6mr_find_vif(mrt, dev);
2287 		if (vif >= 0) {
2288 			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2289 
2290 			return err;
2291 		}
2292 		kfree_skb(skb);
2293 		return -ENODEV;
2294 	}
2295 
2296 	ip6_mr_forward(net, mrt, dev, skb, cache);
2297 
2298 	return 0;
2299 }
2300 
2301 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2302 		    u32 portid)
2303 {
2304 	int err;
2305 	struct mr_table *mrt;
2306 	struct mfc6_cache *cache;
2307 	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2308 
2309 	rcu_read_lock();
2310 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2311 	if (!mrt) {
2312 		rcu_read_unlock();
2313 		return -ENOENT;
2314 	}
2315 
2316 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2317 	if (!cache && skb->dev) {
2318 		int vif = ip6mr_find_vif(mrt, skb->dev);
2319 
2320 		if (vif >= 0)
2321 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2322 						     vif);
2323 	}
2324 
2325 	if (!cache) {
2326 		struct sk_buff *skb2;
2327 		struct ipv6hdr *iph;
2328 		struct net_device *dev;
2329 		int vif;
2330 
2331 		dev = skb->dev;
2332 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2333 			rcu_read_unlock();
2334 			return -ENODEV;
2335 		}
2336 
2337 		/* really correct? */
2338 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2339 		if (!skb2) {
2340 			rcu_read_unlock();
2341 			return -ENOMEM;
2342 		}
2343 
2344 		NETLINK_CB(skb2).portid = portid;
2345 		skb_reset_transport_header(skb2);
2346 
2347 		skb_put(skb2, sizeof(struct ipv6hdr));
2348 		skb_reset_network_header(skb2);
2349 
2350 		iph = ipv6_hdr(skb2);
2351 		iph->version = 0;
2352 		iph->priority = 0;
2353 		iph->flow_lbl[0] = 0;
2354 		iph->flow_lbl[1] = 0;
2355 		iph->flow_lbl[2] = 0;
2356 		iph->payload_len = 0;
2357 		iph->nexthdr = IPPROTO_NONE;
2358 		iph->hop_limit = 0;
2359 		iph->saddr = rt->rt6i_src.addr;
2360 		iph->daddr = rt->rt6i_dst.addr;
2361 
2362 		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2363 		rcu_read_unlock();
2364 
2365 		return err;
2366 	}
2367 
2368 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2369 	rcu_read_unlock();
2370 	return err;
2371 }
2372 
2373 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2374 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2375 			     int flags)
2376 {
2377 	struct nlmsghdr *nlh;
2378 	struct rtmsg *rtm;
2379 	int err;
2380 
2381 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2382 	if (!nlh)
2383 		return -EMSGSIZE;
2384 
2385 	rtm = nlmsg_data(nlh);
2386 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2387 	rtm->rtm_dst_len  = 128;
2388 	rtm->rtm_src_len  = 128;
2389 	rtm->rtm_tos      = 0;
2390 	rtm->rtm_table    = mrt->id;
2391 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2392 		goto nla_put_failure;
2393 	rtm->rtm_type = RTN_MULTICAST;
2394 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2395 	if (c->_c.mfc_flags & MFC_STATIC)
2396 		rtm->rtm_protocol = RTPROT_STATIC;
2397 	else
2398 		rtm->rtm_protocol = RTPROT_MROUTED;
2399 	rtm->rtm_flags    = 0;
2400 
2401 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2402 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2403 		goto nla_put_failure;
2404 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2405 	/* do not break the dump if cache is unresolved */
2406 	if (err < 0 && err != -ENOENT)
2407 		goto nla_put_failure;
2408 
2409 	nlmsg_end(skb, nlh);
2410 	return 0;
2411 
2412 nla_put_failure:
2413 	nlmsg_cancel(skb, nlh);
2414 	return -EMSGSIZE;
2415 }
2416 
2417 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2418 			      u32 portid, u32 seq, struct mr_mfc *c,
2419 			      int cmd, int flags)
2420 {
2421 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2422 				 cmd, flags);
2423 }
2424 
2425 static int mr6_msgsize(bool unresolved, int maxvif)
2426 {
2427 	size_t len =
2428 		NLMSG_ALIGN(sizeof(struct rtmsg))
2429 		+ nla_total_size(4)	/* RTA_TABLE */
2430 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2431 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2432 		;
2433 
2434 	if (!unresolved)
2435 		len = len
2436 		      + nla_total_size(4)	/* RTA_IIF */
2437 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2438 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2439 						/* RTA_MFC_STATS */
2440 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2441 		;
2442 
2443 	return len;
2444 }
2445 
2446 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2447 			      int cmd)
2448 {
2449 	struct net *net = read_pnet(&mrt->net);
2450 	struct sk_buff *skb;
2451 	int err = -ENOBUFS;
2452 
2453 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2454 			GFP_ATOMIC);
2455 	if (!skb)
2456 		goto errout;
2457 
2458 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2459 	if (err < 0)
2460 		goto errout;
2461 
2462 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2463 	return;
2464 
2465 errout:
2466 	kfree_skb(skb);
2467 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2468 }
2469 
2470 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2471 {
2472 	size_t len =
2473 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2474 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2475 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2476 					/* IP6MRA_CREPORT_SRC_ADDR */
2477 		+ nla_total_size(sizeof(struct in6_addr))
2478 					/* IP6MRA_CREPORT_DST_ADDR */
2479 		+ nla_total_size(sizeof(struct in6_addr))
2480 					/* IP6MRA_CREPORT_PKT */
2481 		+ nla_total_size(payloadlen)
2482 		;
2483 
2484 	return len;
2485 }
2486 
2487 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2488 {
2489 	struct net *net = read_pnet(&mrt->net);
2490 	struct nlmsghdr *nlh;
2491 	struct rtgenmsg *rtgenm;
2492 	struct mrt6msg *msg;
2493 	struct sk_buff *skb;
2494 	struct nlattr *nla;
2495 	int payloadlen;
2496 
2497 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2498 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2499 
2500 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2501 	if (!skb)
2502 		goto errout;
2503 
2504 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2505 			sizeof(struct rtgenmsg), 0);
2506 	if (!nlh)
2507 		goto errout;
2508 	rtgenm = nlmsg_data(nlh);
2509 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2510 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2511 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2512 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2513 			     &msg->im6_src) ||
2514 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2515 			     &msg->im6_dst))
2516 		goto nla_put_failure;
2517 
2518 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2519 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2520 				  nla_data(nla), payloadlen))
2521 		goto nla_put_failure;
2522 
2523 	nlmsg_end(skb, nlh);
2524 
2525 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2526 	return;
2527 
2528 nla_put_failure:
2529 	nlmsg_cancel(skb, nlh);
2530 errout:
2531 	kfree_skb(skb);
2532 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2533 }
2534 
2535 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2536 	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2537 	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2538 	[RTA_TABLE]		= { .type = NLA_U32 },
2539 };
2540 
2541 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2542 					const struct nlmsghdr *nlh,
2543 					struct nlattr **tb,
2544 					struct netlink_ext_ack *extack)
2545 {
2546 	struct rtmsg *rtm;
2547 	int err;
2548 
2549 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2550 			  extack);
2551 	if (err)
2552 		return err;
2553 
2554 	rtm = nlmsg_data(nlh);
2555 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2556 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2557 	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2558 	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2559 		NL_SET_ERR_MSG_MOD(extack,
2560 				   "Invalid values in header for multicast route get request");
2561 		return -EINVAL;
2562 	}
2563 
2564 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2565 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2566 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2567 		return -EINVAL;
2568 	}
2569 
2570 	return 0;
2571 }
2572 
2573 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2574 			      struct netlink_ext_ack *extack)
2575 {
2576 	struct net *net = sock_net(in_skb->sk);
2577 	struct in6_addr src = {}, grp = {};
2578 	struct nlattr *tb[RTA_MAX + 1];
2579 	struct mfc6_cache *cache;
2580 	struct mr_table *mrt;
2581 	struct sk_buff *skb;
2582 	u32 tableid;
2583 	int err;
2584 
2585 	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2586 	if (err < 0)
2587 		return err;
2588 
2589 	if (tb[RTA_SRC])
2590 		src = nla_get_in6_addr(tb[RTA_SRC]);
2591 	if (tb[RTA_DST])
2592 		grp = nla_get_in6_addr(tb[RTA_DST]);
2593 	tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2594 
2595 	mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2596 	if (!mrt) {
2597 		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2598 		return -ENOENT;
2599 	}
2600 
2601 	/* entries are added/deleted only under RTNL */
2602 	rcu_read_lock();
2603 	cache = ip6mr_cache_find(mrt, &src, &grp);
2604 	rcu_read_unlock();
2605 	if (!cache) {
2606 		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2607 		return -ENOENT;
2608 	}
2609 
2610 	skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2611 	if (!skb)
2612 		return -ENOBUFS;
2613 
2614 	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2615 				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2616 	if (err < 0) {
2617 		kfree_skb(skb);
2618 		return err;
2619 	}
2620 
2621 	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2622 }
2623 
2624 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2625 {
2626 	const struct nlmsghdr *nlh = cb->nlh;
2627 	struct fib_dump_filter filter = {
2628 		.rtnl_held = true,
2629 	};
2630 	int err;
2631 
2632 	if (cb->strict_check) {
2633 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2634 					    &filter, cb);
2635 		if (err < 0)
2636 			return err;
2637 	}
2638 
2639 	if (filter.table_id) {
2640 		struct mr_table *mrt;
2641 
2642 		mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2643 		if (!mrt) {
2644 			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2645 				return skb->len;
2646 
2647 			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2648 			return -ENOENT;
2649 		}
2650 		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2651 				    &mfc_unres_lock, &filter);
2652 		return skb->len ? : err;
2653 	}
2654 
2655 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2656 				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2657 }
2658