xref: /linux/net/ipv6/ip6mr.c (revision db30e412b7f543d00396ab27f690608cad06aa97)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux IPv6 multicast routing support for BSD pim6sd
4  *	Based on net/ipv4/ipmr.c.
5  *
6  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7  *		LSIIT Laboratory, Strasbourg, France
8  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9  *		6WIND, Paris, France
10  *	Copyright (C)2007,2008 USAGI/WIDE Project
11  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12  */
13 
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39 
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50 
51 #include <linux/nospec.h>
52 
53 struct ip6mr_rule {
54 	struct fib_rule		common;
55 };
56 
57 struct ip6mr_result {
58 	struct mr_table	*mrt;
59 };
60 
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62    Note that the changes are semaphored via rtnl_lock.
63  */
64 
65 static DEFINE_SPINLOCK(mrt_lock);
66 
67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 	return rcu_dereference(vif->dev);
70 }
71 
72 /* Multicast router control variables */
73 
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76 
77 /* We return to original Alan's scheme. Hash table of resolved
78    entries is changed only in process context and protected
79    with weak lock mrt_lock. Queue of unresolved entries is protected
80    with strong spinlock mfc_unres_lock.
81 
82    In this case data path is free of exclusive locks at all.
83  */
84 
85 static struct kmem_cache *mrt_cachep __read_mostly;
86 
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89 
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 			   struct net_device *dev, struct sk_buff *skb,
92 			   struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 			      mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 			      int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 			      struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 			       struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104 
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 				lockdep_rtnl_is_held() || \
109 				list_empty(&net->ipv6.mr6_tables))
110 
111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 					    struct mr_table *mrt)
113 {
114 	struct mr_table *ret;
115 
116 	if (!mrt)
117 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 				     struct mr_table, list);
119 	else
120 		ret = list_entry_rcu(mrt->list.next,
121 				     struct mr_table, list);
122 
123 	if (&ret->list == &net->ipv6.mr6_tables)
124 		return NULL;
125 	return ret;
126 }
127 
128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
140 {
141 	struct mr_table *mrt;
142 
143 	rcu_read_lock();
144 	mrt = __ip6mr_get_table(net, id);
145 	rcu_read_unlock();
146 	return mrt;
147 }
148 
149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 			    struct mr_table **mrt)
151 {
152 	int err;
153 	struct ip6mr_result res;
154 	struct fib_lookup_arg arg = {
155 		.result = &res,
156 		.flags = FIB_LOOKUP_NOREF,
157 	};
158 
159 	/* update flow if oif or iif point to device enslaved to l3mdev */
160 	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
161 
162 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 			       flowi6_to_flowi(flp6), 0, &arg);
164 	if (err < 0)
165 		return err;
166 	*mrt = res.mrt;
167 	return 0;
168 }
169 
170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 			     int flags, struct fib_lookup_arg *arg)
172 {
173 	struct ip6mr_result *res = arg->result;
174 	struct mr_table *mrt;
175 
176 	switch (rule->action) {
177 	case FR_ACT_TO_TBL:
178 		break;
179 	case FR_ACT_UNREACHABLE:
180 		return -ENETUNREACH;
181 	case FR_ACT_PROHIBIT:
182 		return -EACCES;
183 	case FR_ACT_BLACKHOLE:
184 	default:
185 		return -EINVAL;
186 	}
187 
188 	arg->table = fib_rule_get_table(rule, arg);
189 
190 	mrt = __ip6mr_get_table(rule->fr_net, arg->table);
191 	if (!mrt)
192 		return -EAGAIN;
193 	res->mrt = mrt;
194 	return 0;
195 }
196 
197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
198 {
199 	return 1;
200 }
201 
202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 				struct fib_rule_hdr *frh, struct nlattr **tb,
204 				struct netlink_ext_ack *extack)
205 {
206 	return 0;
207 }
208 
209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 			      struct nlattr **tb)
211 {
212 	return 1;
213 }
214 
215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 			   struct fib_rule_hdr *frh)
217 {
218 	frh->dst_len = 0;
219 	frh->src_len = 0;
220 	frh->tos     = 0;
221 	return 0;
222 }
223 
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 	.family		= RTNL_FAMILY_IP6MR,
226 	.rule_size	= sizeof(struct ip6mr_rule),
227 	.addr_size	= sizeof(struct in6_addr),
228 	.action		= ip6mr_rule_action,
229 	.match		= ip6mr_rule_match,
230 	.configure	= ip6mr_rule_configure,
231 	.compare	= ip6mr_rule_compare,
232 	.fill		= ip6mr_rule_fill,
233 	.nlgroup	= RTNLGRP_IPV6_RULE,
234 	.owner		= THIS_MODULE,
235 };
236 
237 static int __net_init ip6mr_rules_init(struct net *net)
238 {
239 	struct fib_rules_ops *ops;
240 	struct mr_table *mrt;
241 	int err;
242 
243 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
244 	if (IS_ERR(ops))
245 		return PTR_ERR(ops);
246 
247 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
248 
249 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
250 	if (IS_ERR(mrt)) {
251 		err = PTR_ERR(mrt);
252 		goto err1;
253 	}
254 
255 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
256 	if (err < 0)
257 		goto err2;
258 
259 	net->ipv6.mr6_rules_ops = ops;
260 	return 0;
261 
262 err2:
263 	rtnl_lock();
264 	ip6mr_free_table(mrt);
265 	rtnl_unlock();
266 err1:
267 	fib_rules_unregister(ops);
268 	return err;
269 }
270 
271 static void __net_exit ip6mr_rules_exit(struct net *net)
272 {
273 	struct mr_table *mrt, *next;
274 
275 	ASSERT_RTNL();
276 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 		list_del(&mrt->list);
278 		ip6mr_free_table(mrt);
279 	}
280 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
281 }
282 
283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 			    struct netlink_ext_ack *extack)
285 {
286 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
287 }
288 
289 static unsigned int ip6mr_rules_seq_read(const struct net *net)
290 {
291 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
292 }
293 
294 bool ip6mr_rule_default(const struct fib_rule *rule)
295 {
296 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
298 }
299 EXPORT_SYMBOL(ip6mr_rule_default);
300 #else
301 #define ip6mr_for_each_table(mrt, net) \
302 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
303 
304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 					    struct mr_table *mrt)
306 {
307 	if (!mrt)
308 		return net->ipv6.mrt6;
309 	return NULL;
310 }
311 
312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
313 {
314 	return net->ipv6.mrt6;
315 }
316 
317 #define __ip6mr_get_table ip6mr_get_table
318 
319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 			    struct mr_table **mrt)
321 {
322 	*mrt = net->ipv6.mrt6;
323 	return 0;
324 }
325 
326 static int __net_init ip6mr_rules_init(struct net *net)
327 {
328 	struct mr_table *mrt;
329 
330 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
331 	if (IS_ERR(mrt))
332 		return PTR_ERR(mrt);
333 	net->ipv6.mrt6 = mrt;
334 	return 0;
335 }
336 
337 static void __net_exit ip6mr_rules_exit(struct net *net)
338 {
339 	ASSERT_RTNL();
340 	ip6mr_free_table(net->ipv6.mrt6);
341 	net->ipv6.mrt6 = NULL;
342 }
343 
344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 			    struct netlink_ext_ack *extack)
346 {
347 	return 0;
348 }
349 
350 static unsigned int ip6mr_rules_seq_read(const struct net *net)
351 {
352 	return 0;
353 }
354 #endif
355 
356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
357 			  const void *ptr)
358 {
359 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
361 
362 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
364 }
365 
366 static const struct rhashtable_params ip6mr_rht_params = {
367 	.head_offset = offsetof(struct mr_mfc, mnode),
368 	.key_offset = offsetof(struct mfc6_cache, cmparg),
369 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
370 	.nelem_hint = 3,
371 	.obj_cmpfn = ip6mr_hash_cmp,
372 	.automatic_shrinking = true,
373 };
374 
375 static void ip6mr_new_table_set(struct mr_table *mrt,
376 				struct net *net)
377 {
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
380 #endif
381 }
382 
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 	.mf6c_origin = IN6ADDR_ANY_INIT,
385 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
386 };
387 
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 	.rht_params = &ip6mr_rht_params,
390 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
391 };
392 
393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
394 {
395 	struct mr_table *mrt;
396 
397 	mrt = __ip6mr_get_table(net, id);
398 	if (mrt)
399 		return mrt;
400 
401 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 			      ipmr_expire_process, ip6mr_new_table_set);
403 }
404 
405 static void ip6mr_free_table(struct mr_table *mrt)
406 {
407 	struct net *net = read_pnet(&mrt->net);
408 
409 	WARN_ON_ONCE(!mr_can_free_table(net));
410 
411 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
412 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
413 				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
414 	rhltable_destroy(&mrt->mfc_hash);
415 	kfree(mrt);
416 }
417 
418 #ifdef CONFIG_PROC_FS
419 /* The /proc interfaces to multicast routing
420  * /proc/ip6_mr_cache /proc/ip6_mr_vif
421  */
422 
423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
424 	__acquires(RCU)
425 {
426 	struct mr_vif_iter *iter = seq->private;
427 	struct net *net = seq_file_net(seq);
428 	struct mr_table *mrt;
429 
430 	rcu_read_lock();
431 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
432 	if (!mrt) {
433 		rcu_read_unlock();
434 		return ERR_PTR(-ENOENT);
435 	}
436 
437 	iter->mrt = mrt;
438 
439 	return mr_vif_seq_start(seq, pos);
440 }
441 
442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
443 	__releases(RCU)
444 {
445 	rcu_read_unlock();
446 }
447 
448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
449 {
450 	struct mr_vif_iter *iter = seq->private;
451 	struct mr_table *mrt = iter->mrt;
452 
453 	if (v == SEQ_START_TOKEN) {
454 		seq_puts(seq,
455 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
456 	} else {
457 		const struct vif_device *vif = v;
458 		const struct net_device *vif_dev;
459 		const char *name;
460 
461 		vif_dev = vif_dev_read(vif);
462 		name = vif_dev ? vif_dev->name : "none";
463 
464 		seq_printf(seq,
465 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
466 			   vif - mrt->vif_table,
467 			   name, vif->bytes_in, vif->pkt_in,
468 			   vif->bytes_out, vif->pkt_out,
469 			   vif->flags);
470 	}
471 	return 0;
472 }
473 
474 static const struct seq_operations ip6mr_vif_seq_ops = {
475 	.start = ip6mr_vif_seq_start,
476 	.next  = mr_vif_seq_next,
477 	.stop  = ip6mr_vif_seq_stop,
478 	.show  = ip6mr_vif_seq_show,
479 };
480 
481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
482 {
483 	struct net *net = seq_file_net(seq);
484 	struct mr_table *mrt;
485 
486 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
487 	if (!mrt)
488 		return ERR_PTR(-ENOENT);
489 
490 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
491 }
492 
493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
494 {
495 	int n;
496 
497 	if (v == SEQ_START_TOKEN) {
498 		seq_puts(seq,
499 			 "Group                            "
500 			 "Origin                           "
501 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
502 	} else {
503 		const struct mfc6_cache *mfc = v;
504 		const struct mr_mfc_iter *it = seq->private;
505 		struct mr_table *mrt = it->mrt;
506 
507 		seq_printf(seq, "%pI6 %pI6 %-3hd",
508 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
509 			   mfc->_c.mfc_parent);
510 
511 		if (it->cache != &mrt->mfc_unres_queue) {
512 			seq_printf(seq, " %8lu %8lu %8lu",
513 				   atomic_long_read(&mfc->_c.mfc_un.res.pkt),
514 				   atomic_long_read(&mfc->_c.mfc_un.res.bytes),
515 				   atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
516 			for (n = mfc->_c.mfc_un.res.minvif;
517 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
518 				if (VIF_EXISTS(mrt, n) &&
519 				    mfc->_c.mfc_un.res.ttls[n] < 255)
520 					seq_printf(seq,
521 						   " %2d:%-3d", n,
522 						   mfc->_c.mfc_un.res.ttls[n]);
523 			}
524 		} else {
525 			/* unresolved mfc_caches don't contain
526 			 * pkt, bytes and wrong_if values
527 			 */
528 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
529 		}
530 		seq_putc(seq, '\n');
531 	}
532 	return 0;
533 }
534 
535 static const struct seq_operations ipmr_mfc_seq_ops = {
536 	.start = ipmr_mfc_seq_start,
537 	.next  = mr_mfc_seq_next,
538 	.stop  = mr_mfc_seq_stop,
539 	.show  = ipmr_mfc_seq_show,
540 };
541 #endif
542 
543 #ifdef CONFIG_IPV6_PIMSM_V2
544 
545 static int pim6_rcv(struct sk_buff *skb)
546 {
547 	struct pimreghdr *pim;
548 	struct ipv6hdr   *encap;
549 	struct net_device  *reg_dev = NULL;
550 	struct net *net = dev_net(skb->dev);
551 	struct mr_table *mrt;
552 	struct flowi6 fl6 = {
553 		.flowi6_iif	= skb->dev->ifindex,
554 		.flowi6_mark	= skb->mark,
555 	};
556 	int reg_vif_num;
557 
558 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
559 		goto drop;
560 
561 	pim = (struct pimreghdr *)skb_transport_header(skb);
562 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
563 	    (pim->flags & PIM_NULL_REGISTER) ||
564 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
565 			     sizeof(*pim), IPPROTO_PIM,
566 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
567 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
568 		goto drop;
569 
570 	/* check if the inner packet is destined to mcast group */
571 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
572 				   sizeof(*pim));
573 
574 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
575 	    encap->payload_len == 0 ||
576 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
577 		goto drop;
578 
579 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
580 		goto drop;
581 
582 	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
583 	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
584 	if (reg_vif_num >= 0)
585 		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
586 
587 	if (!reg_dev)
588 		goto drop;
589 
590 	skb->mac_header = skb->network_header;
591 	skb_pull(skb, (u8 *)encap - skb->data);
592 	skb_reset_network_header(skb);
593 	skb->protocol = htons(ETH_P_IPV6);
594 	skb->ip_summed = CHECKSUM_NONE;
595 
596 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
597 
598 	netif_rx(skb);
599 
600 	return 0;
601  drop:
602 	kfree_skb(skb);
603 	return 0;
604 }
605 
606 static const struct inet6_protocol pim6_protocol = {
607 	.handler	=	pim6_rcv,
608 };
609 
610 /* Service routines creating virtual interfaces: PIMREG */
611 
612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
613 				      struct net_device *dev)
614 {
615 	struct net *net = dev_net(dev);
616 	struct mr_table *mrt;
617 	struct flowi6 fl6 = {
618 		.flowi6_oif	= dev->ifindex,
619 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
620 		.flowi6_mark	= skb->mark,
621 	};
622 
623 	if (!pskb_inet_may_pull(skb))
624 		goto tx_err;
625 
626 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
627 		goto tx_err;
628 
629 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
630 	DEV_STATS_INC(dev, tx_packets);
631 	rcu_read_lock();
632 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
633 			   MRT6MSG_WHOLEPKT);
634 	rcu_read_unlock();
635 	kfree_skb(skb);
636 	return NETDEV_TX_OK;
637 
638 tx_err:
639 	DEV_STATS_INC(dev, tx_errors);
640 	kfree_skb(skb);
641 	return NETDEV_TX_OK;
642 }
643 
644 static int reg_vif_get_iflink(const struct net_device *dev)
645 {
646 	return 0;
647 }
648 
649 static const struct net_device_ops reg_vif_netdev_ops = {
650 	.ndo_start_xmit	= reg_vif_xmit,
651 	.ndo_get_iflink = reg_vif_get_iflink,
652 };
653 
654 static void reg_vif_setup(struct net_device *dev)
655 {
656 	dev->type		= ARPHRD_PIMREG;
657 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
658 	dev->flags		= IFF_NOARP;
659 	dev->netdev_ops		= &reg_vif_netdev_ops;
660 	dev->needs_free_netdev	= true;
661 	dev->netns_immutable	= true;
662 }
663 
664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
665 {
666 	struct net_device *dev;
667 	char name[IFNAMSIZ];
668 
669 	if (mrt->id == RT6_TABLE_DFLT)
670 		sprintf(name, "pim6reg");
671 	else
672 		sprintf(name, "pim6reg%u", mrt->id);
673 
674 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
675 	if (!dev)
676 		return NULL;
677 
678 	dev_net_set(dev, net);
679 
680 	if (register_netdevice(dev)) {
681 		free_netdev(dev);
682 		return NULL;
683 	}
684 
685 	if (dev_open(dev, NULL))
686 		goto failure;
687 
688 	dev_hold(dev);
689 	return dev;
690 
691 failure:
692 	unregister_netdevice(dev);
693 	return NULL;
694 }
695 #endif
696 
697 static int call_ip6mr_vif_entry_notifiers(struct net *net,
698 					  enum fib_event_type event_type,
699 					  struct vif_device *vif,
700 					  struct net_device *vif_dev,
701 					  mifi_t vif_index, u32 tb_id)
702 {
703 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
704 				     vif, vif_dev, vif_index, tb_id,
705 				     &net->ipv6.ipmr_seq);
706 }
707 
708 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
709 					  enum fib_event_type event_type,
710 					  struct mfc6_cache *mfc, u32 tb_id)
711 {
712 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
713 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
714 }
715 
716 /* Delete a VIF entry */
717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
718 		       struct list_head *head)
719 {
720 	struct vif_device *v;
721 	struct net_device *dev;
722 	struct inet6_dev *in6_dev;
723 
724 	if (vifi < 0 || vifi >= mrt->maxvif)
725 		return -EADDRNOTAVAIL;
726 
727 	v = &mrt->vif_table[vifi];
728 
729 	dev = rtnl_dereference(v->dev);
730 	if (!dev)
731 		return -EADDRNOTAVAIL;
732 
733 	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
734 				       FIB_EVENT_VIF_DEL, v, dev,
735 				       vifi, mrt->id);
736 	spin_lock(&mrt_lock);
737 	RCU_INIT_POINTER(v->dev, NULL);
738 
739 #ifdef CONFIG_IPV6_PIMSM_V2
740 	if (vifi == mrt->mroute_reg_vif_num) {
741 		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
742 		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
743 	}
744 #endif
745 
746 	if (vifi + 1 == mrt->maxvif) {
747 		int tmp;
748 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
749 			if (VIF_EXISTS(mrt, tmp))
750 				break;
751 		}
752 		WRITE_ONCE(mrt->maxvif, tmp + 1);
753 	}
754 
755 	spin_unlock(&mrt_lock);
756 
757 	dev_set_allmulti(dev, -1);
758 
759 	in6_dev = __in6_dev_get(dev);
760 	if (in6_dev) {
761 		atomic_dec(&in6_dev->cnf.mc_forwarding);
762 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
763 					     NETCONFA_MC_FORWARDING,
764 					     dev->ifindex, &in6_dev->cnf);
765 	}
766 
767 	if ((v->flags & MIFF_REGISTER) && !notify)
768 		unregister_netdevice_queue(dev, head);
769 
770 	netdev_put(dev, &v->dev_tracker);
771 	return 0;
772 }
773 
774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
775 {
776 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
777 
778 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
779 }
780 
781 static inline void ip6mr_cache_free(struct mfc6_cache *c)
782 {
783 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
784 }
785 
786 /* Destroy an unresolved cache entry, killing queued skbs
787    and reporting error to netlink readers.
788  */
789 
790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
791 {
792 	struct net *net = read_pnet(&mrt->net);
793 	struct sk_buff *skb;
794 
795 	atomic_dec(&mrt->cache_resolve_queue_len);
796 
797 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
798 		if (ipv6_hdr(skb)->version == 0) {
799 			struct nlmsghdr *nlh = skb_pull(skb,
800 							sizeof(struct ipv6hdr));
801 			nlh->nlmsg_type = NLMSG_ERROR;
802 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
803 			skb_trim(skb, nlh->nlmsg_len);
804 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
805 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
806 		} else
807 			kfree_skb(skb);
808 	}
809 
810 	ip6mr_cache_free(c);
811 }
812 
813 
814 /* Timer process for all the unresolved queue. */
815 
816 static void ipmr_do_expire_process(struct mr_table *mrt)
817 {
818 	unsigned long now = jiffies;
819 	unsigned long expires = 10 * HZ;
820 	struct mr_mfc *c, *next;
821 
822 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
823 		if (time_after(c->mfc_un.unres.expires, now)) {
824 			/* not yet... */
825 			unsigned long interval = c->mfc_un.unres.expires - now;
826 			if (interval < expires)
827 				expires = interval;
828 			continue;
829 		}
830 
831 		list_del(&c->list);
832 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
833 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
834 	}
835 
836 	if (!list_empty(&mrt->mfc_unres_queue))
837 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
838 }
839 
840 static void ipmr_expire_process(struct timer_list *t)
841 {
842 	struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
843 
844 	if (!spin_trylock(&mfc_unres_lock)) {
845 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
846 		return;
847 	}
848 
849 	if (!list_empty(&mrt->mfc_unres_queue))
850 		ipmr_do_expire_process(mrt);
851 
852 	spin_unlock(&mfc_unres_lock);
853 }
854 
855 /* Fill oifs list. It is called under locked mrt_lock. */
856 
857 static void ip6mr_update_thresholds(struct mr_table *mrt,
858 				    struct mr_mfc *cache,
859 				    unsigned char *ttls)
860 {
861 	int vifi;
862 
863 	cache->mfc_un.res.minvif = MAXMIFS;
864 	cache->mfc_un.res.maxvif = 0;
865 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
866 
867 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
868 		if (VIF_EXISTS(mrt, vifi) &&
869 		    ttls[vifi] && ttls[vifi] < 255) {
870 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
871 			if (cache->mfc_un.res.minvif > vifi)
872 				cache->mfc_un.res.minvif = vifi;
873 			if (cache->mfc_un.res.maxvif <= vifi)
874 				cache->mfc_un.res.maxvif = vifi + 1;
875 		}
876 	}
877 	WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
878 }
879 
880 static int mif6_add(struct net *net, struct mr_table *mrt,
881 		    struct mif6ctl *vifc, int mrtsock)
882 {
883 	int vifi = vifc->mif6c_mifi;
884 	struct vif_device *v = &mrt->vif_table[vifi];
885 	struct net_device *dev;
886 	struct inet6_dev *in6_dev;
887 	int err;
888 
889 	/* Is vif busy ? */
890 	if (VIF_EXISTS(mrt, vifi))
891 		return -EADDRINUSE;
892 
893 	switch (vifc->mif6c_flags) {
894 #ifdef CONFIG_IPV6_PIMSM_V2
895 	case MIFF_REGISTER:
896 		/*
897 		 * Special Purpose VIF in PIM
898 		 * All the packets will be sent to the daemon
899 		 */
900 		if (mrt->mroute_reg_vif_num >= 0)
901 			return -EADDRINUSE;
902 		dev = ip6mr_reg_vif(net, mrt);
903 		if (!dev)
904 			return -ENOBUFS;
905 		err = dev_set_allmulti(dev, 1);
906 		if (err) {
907 			unregister_netdevice(dev);
908 			dev_put(dev);
909 			return err;
910 		}
911 		break;
912 #endif
913 	case 0:
914 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
915 		if (!dev)
916 			return -EADDRNOTAVAIL;
917 		err = dev_set_allmulti(dev, 1);
918 		if (err) {
919 			dev_put(dev);
920 			return err;
921 		}
922 		break;
923 	default:
924 		return -EINVAL;
925 	}
926 
927 	in6_dev = __in6_dev_get(dev);
928 	if (in6_dev) {
929 		atomic_inc(&in6_dev->cnf.mc_forwarding);
930 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
931 					     NETCONFA_MC_FORWARDING,
932 					     dev->ifindex, &in6_dev->cnf);
933 	}
934 
935 	/* Fill in the VIF structures */
936 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
937 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
938 			MIFF_REGISTER);
939 
940 	/* And finish update writing critical data */
941 	spin_lock(&mrt_lock);
942 	rcu_assign_pointer(v->dev, dev);
943 	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
944 #ifdef CONFIG_IPV6_PIMSM_V2
945 	if (v->flags & MIFF_REGISTER)
946 		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
947 #endif
948 	if (vifi + 1 > mrt->maxvif)
949 		WRITE_ONCE(mrt->maxvif, vifi + 1);
950 	spin_unlock(&mrt_lock);
951 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
952 				       v, dev, vifi, mrt->id);
953 	return 0;
954 }
955 
956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
957 					   const struct in6_addr *origin,
958 					   const struct in6_addr *mcastgrp)
959 {
960 	struct mfc6_cache_cmp_arg arg = {
961 		.mf6c_origin = *origin,
962 		.mf6c_mcastgrp = *mcastgrp,
963 	};
964 
965 	return mr_mfc_find(mrt, &arg);
966 }
967 
968 /* Look for a (*,G) entry */
969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
970 					       struct in6_addr *mcastgrp,
971 					       mifi_t mifi)
972 {
973 	struct mfc6_cache_cmp_arg arg = {
974 		.mf6c_origin = in6addr_any,
975 		.mf6c_mcastgrp = *mcastgrp,
976 	};
977 
978 	if (ipv6_addr_any(mcastgrp))
979 		return mr_mfc_find_any_parent(mrt, mifi);
980 	return mr_mfc_find_any(mrt, mifi, &arg);
981 }
982 
983 /* Look for a (S,G,iif) entry if parent != -1 */
984 static struct mfc6_cache *
985 ip6mr_cache_find_parent(struct mr_table *mrt,
986 			const struct in6_addr *origin,
987 			const struct in6_addr *mcastgrp,
988 			int parent)
989 {
990 	struct mfc6_cache_cmp_arg arg = {
991 		.mf6c_origin = *origin,
992 		.mf6c_mcastgrp = *mcastgrp,
993 	};
994 
995 	return mr_mfc_find_parent(mrt, &arg, parent);
996 }
997 
998 /* Allocate a multicast cache entry */
999 static struct mfc6_cache *ip6mr_cache_alloc(void)
1000 {
1001 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1002 	if (!c)
1003 		return NULL;
1004 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1005 	c->_c.mfc_un.res.minvif = MAXMIFS;
1006 	c->_c.free = ip6mr_cache_free_rcu;
1007 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
1008 	return c;
1009 }
1010 
1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1012 {
1013 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1014 	if (!c)
1015 		return NULL;
1016 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1017 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1018 	return c;
1019 }
1020 
1021 /*
1022  *	A cache entry has gone into a resolved state from queued
1023  */
1024 
1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1026 				struct mfc6_cache *uc, struct mfc6_cache *c)
1027 {
1028 	struct sk_buff *skb;
1029 
1030 	/*
1031 	 *	Play the pending entries through our router
1032 	 */
1033 
1034 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1035 		if (ipv6_hdr(skb)->version == 0) {
1036 			struct nlmsghdr *nlh = skb_pull(skb,
1037 							sizeof(struct ipv6hdr));
1038 
1039 			if (mr_fill_mroute(mrt, skb, &c->_c,
1040 					   nlmsg_data(nlh)) > 0) {
1041 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1042 			} else {
1043 				nlh->nlmsg_type = NLMSG_ERROR;
1044 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1045 				skb_trim(skb, nlh->nlmsg_len);
1046 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1047 			}
1048 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1049 		} else {
1050 			rcu_read_lock();
1051 			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1052 			rcu_read_unlock();
1053 		}
1054 	}
1055 }
1056 
1057 /*
1058  *	Bounce a cache query up to pim6sd and netlink.
1059  *
1060  *	Called under rcu_read_lock()
1061  */
1062 
1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1064 			      mifi_t mifi, int assert)
1065 {
1066 	enum skb_drop_reason reason;
1067 	struct sock *mroute6_sk;
1068 	struct sk_buff *skb;
1069 	struct mrt6msg *msg;
1070 
1071 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1072 	if (!mroute6_sk)
1073 		return -EINVAL;
1074 
1075 #ifdef CONFIG_IPV6_PIMSM_V2
1076 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1077 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1078 						+sizeof(*msg));
1079 	else
1080 #endif
1081 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1082 
1083 	if (!skb)
1084 		return -ENOBUFS;
1085 
1086 	/* I suppose that internal messages
1087 	 * do not require checksums */
1088 
1089 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1090 
1091 #ifdef CONFIG_IPV6_PIMSM_V2
1092 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1093 		/* Ugly, but we have no choice with this interface.
1094 		   Duplicate old header, fix length etc.
1095 		   And all this only to mangle msg->im6_msgtype and
1096 		   to set msg->im6_mbz to "mbz" :-)
1097 		 */
1098 		__skb_pull(skb, skb_network_offset(pkt));
1099 
1100 		skb_push(skb, sizeof(*msg));
1101 		skb_reset_transport_header(skb);
1102 		msg = (struct mrt6msg *)skb_transport_header(skb);
1103 		msg->im6_mbz = 0;
1104 		msg->im6_msgtype = assert;
1105 		if (assert == MRT6MSG_WRMIFWHOLE)
1106 			msg->im6_mif = mifi;
1107 		else
1108 			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1109 		msg->im6_pad = 0;
1110 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1111 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1112 
1113 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1114 	} else
1115 #endif
1116 	{
1117 	/*
1118 	 *	Copy the IP header
1119 	 */
1120 
1121 	skb_put(skb, sizeof(struct ipv6hdr));
1122 	skb_reset_network_header(skb);
1123 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1124 
1125 	/*
1126 	 *	Add our header
1127 	 */
1128 	skb_put(skb, sizeof(*msg));
1129 	skb_reset_transport_header(skb);
1130 	msg = (struct mrt6msg *)skb_transport_header(skb);
1131 
1132 	msg->im6_mbz = 0;
1133 	msg->im6_msgtype = assert;
1134 	msg->im6_mif = mifi;
1135 	msg->im6_pad = 0;
1136 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1137 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1138 
1139 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1140 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1141 	}
1142 
1143 	mrt6msg_netlink_event(mrt, skb);
1144 
1145 	/* Deliver to user space multicast routing algorithms */
1146 	reason = sock_queue_rcv_skb_reason(mroute6_sk, skb);
1147 
1148 	if (reason) {
1149 		sk_skb_reason_drop(mroute6_sk, skb, reason);
1150 		return -ENOMEM;
1151 	}
1152 
1153 	return 0;
1154 }
1155 
1156 /* Queue a packet for resolution. It gets locked cache entry! */
1157 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1158 				  struct sk_buff *skb, struct net_device *dev)
1159 {
1160 	struct mfc6_cache *c;
1161 	bool found = false;
1162 	int err;
1163 
1164 	spin_lock_bh(&mfc_unres_lock);
1165 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1166 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1167 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1168 			found = true;
1169 			break;
1170 		}
1171 	}
1172 
1173 	if (!found) {
1174 		/*
1175 		 *	Create a new entry if allowable
1176 		 */
1177 
1178 		c = ip6mr_cache_alloc_unres();
1179 		if (!c) {
1180 			spin_unlock_bh(&mfc_unres_lock);
1181 
1182 			kfree_skb(skb);
1183 			return -ENOBUFS;
1184 		}
1185 
1186 		/* Fill in the new cache entry */
1187 		c->_c.mfc_parent = -1;
1188 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1189 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1190 
1191 		/*
1192 		 *	Reflect first query at pim6sd
1193 		 */
1194 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1195 		if (err < 0) {
1196 			/* If the report failed throw the cache entry
1197 			   out - Brad Parker
1198 			 */
1199 			spin_unlock_bh(&mfc_unres_lock);
1200 
1201 			ip6mr_cache_free(c);
1202 			kfree_skb(skb);
1203 			return err;
1204 		}
1205 
1206 		atomic_inc(&mrt->cache_resolve_queue_len);
1207 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1208 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1209 
1210 		ipmr_do_expire_process(mrt);
1211 	}
1212 
1213 	/* See if we can append the packet */
1214 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1215 		kfree_skb(skb);
1216 		err = -ENOBUFS;
1217 	} else {
1218 		if (dev) {
1219 			skb->dev = dev;
1220 			skb->skb_iif = dev->ifindex;
1221 		}
1222 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1223 		err = 0;
1224 	}
1225 
1226 	spin_unlock_bh(&mfc_unres_lock);
1227 	return err;
1228 }
1229 
1230 /*
1231  *	MFC6 cache manipulation by user space
1232  */
1233 
1234 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1235 			    int parent)
1236 {
1237 	struct mfc6_cache *c;
1238 
1239 	/* The entries are added/deleted only under RTNL */
1240 	rcu_read_lock();
1241 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1242 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1243 	rcu_read_unlock();
1244 	if (!c)
1245 		return -ENOENT;
1246 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1247 	list_del_rcu(&c->_c.list);
1248 
1249 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1250 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1251 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1252 	mr_cache_put(&c->_c);
1253 	return 0;
1254 }
1255 
1256 static int ip6mr_device_event(struct notifier_block *this,
1257 			      unsigned long event, void *ptr)
1258 {
1259 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1260 	struct net *net = dev_net(dev);
1261 	struct mr_table *mrt;
1262 	struct vif_device *v;
1263 	int ct;
1264 
1265 	if (event != NETDEV_UNREGISTER)
1266 		return NOTIFY_DONE;
1267 
1268 	ip6mr_for_each_table(mrt, net) {
1269 		v = &mrt->vif_table[0];
1270 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1271 			if (rcu_access_pointer(v->dev) == dev)
1272 				mif6_delete(mrt, ct, 1, NULL);
1273 		}
1274 	}
1275 
1276 	return NOTIFY_DONE;
1277 }
1278 
1279 static unsigned int ip6mr_seq_read(const struct net *net)
1280 {
1281 	return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1282 }
1283 
1284 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1285 		      struct netlink_ext_ack *extack)
1286 {
1287 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1288 		       ip6mr_mr_table_iter, extack);
1289 }
1290 
1291 static struct notifier_block ip6_mr_notifier = {
1292 	.notifier_call = ip6mr_device_event
1293 };
1294 
1295 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1296 	.family		= RTNL_FAMILY_IP6MR,
1297 	.fib_seq_read	= ip6mr_seq_read,
1298 	.fib_dump	= ip6mr_dump,
1299 	.owner		= THIS_MODULE,
1300 };
1301 
1302 static int __net_init ip6mr_notifier_init(struct net *net)
1303 {
1304 	struct fib_notifier_ops *ops;
1305 
1306 	atomic_set(&net->ipv6.ipmr_seq, 0);
1307 
1308 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1309 	if (IS_ERR(ops))
1310 		return PTR_ERR(ops);
1311 
1312 	net->ipv6.ip6mr_notifier_ops = ops;
1313 
1314 	return 0;
1315 }
1316 
1317 static void __net_exit ip6mr_notifier_exit(struct net *net)
1318 {
1319 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1320 	net->ipv6.ip6mr_notifier_ops = NULL;
1321 }
1322 
1323 /* Setup for IP multicast routing */
1324 static int __net_init ip6mr_net_init(struct net *net)
1325 {
1326 	int err;
1327 
1328 	err = ip6mr_notifier_init(net);
1329 	if (err)
1330 		return err;
1331 
1332 	err = ip6mr_rules_init(net);
1333 	if (err < 0)
1334 		goto ip6mr_rules_fail;
1335 
1336 #ifdef CONFIG_PROC_FS
1337 	err = -ENOMEM;
1338 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1339 			sizeof(struct mr_vif_iter)))
1340 		goto proc_vif_fail;
1341 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1342 			sizeof(struct mr_mfc_iter)))
1343 		goto proc_cache_fail;
1344 #endif
1345 
1346 	return 0;
1347 
1348 #ifdef CONFIG_PROC_FS
1349 proc_cache_fail:
1350 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1351 proc_vif_fail:
1352 	rtnl_lock();
1353 	ip6mr_rules_exit(net);
1354 	rtnl_unlock();
1355 #endif
1356 ip6mr_rules_fail:
1357 	ip6mr_notifier_exit(net);
1358 	return err;
1359 }
1360 
1361 static void __net_exit ip6mr_net_exit(struct net *net)
1362 {
1363 #ifdef CONFIG_PROC_FS
1364 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1365 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1366 #endif
1367 	ip6mr_notifier_exit(net);
1368 }
1369 
1370 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1371 {
1372 	struct net *net;
1373 
1374 	rtnl_lock();
1375 	list_for_each_entry(net, net_list, exit_list)
1376 		ip6mr_rules_exit(net);
1377 	rtnl_unlock();
1378 }
1379 
1380 static struct pernet_operations ip6mr_net_ops = {
1381 	.init = ip6mr_net_init,
1382 	.exit = ip6mr_net_exit,
1383 	.exit_batch = ip6mr_net_exit_batch,
1384 };
1385 
1386 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1387 	{.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1388 	 .msgtype = RTM_GETROUTE,
1389 	 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
1390 };
1391 
1392 int __init ip6_mr_init(void)
1393 {
1394 	int err;
1395 
1396 	mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1397 	if (!mrt_cachep)
1398 		return -ENOMEM;
1399 
1400 	err = register_pernet_subsys(&ip6mr_net_ops);
1401 	if (err)
1402 		goto reg_pernet_fail;
1403 
1404 	err = register_netdevice_notifier(&ip6_mr_notifier);
1405 	if (err)
1406 		goto reg_notif_fail;
1407 #ifdef CONFIG_IPV6_PIMSM_V2
1408 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1409 		pr_err("%s: can't add PIM protocol\n", __func__);
1410 		err = -EAGAIN;
1411 		goto add_proto_fail;
1412 	}
1413 #endif
1414 	err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1415 	if (!err)
1416 		return 0;
1417 
1418 #ifdef CONFIG_IPV6_PIMSM_V2
1419 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1420 add_proto_fail:
1421 	unregister_netdevice_notifier(&ip6_mr_notifier);
1422 #endif
1423 reg_notif_fail:
1424 	unregister_pernet_subsys(&ip6mr_net_ops);
1425 reg_pernet_fail:
1426 	kmem_cache_destroy(mrt_cachep);
1427 	return err;
1428 }
1429 
1430 void __init ip6_mr_cleanup(void)
1431 {
1432 	rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1433 #ifdef CONFIG_IPV6_PIMSM_V2
1434 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1435 #endif
1436 	unregister_netdevice_notifier(&ip6_mr_notifier);
1437 	unregister_pernet_subsys(&ip6mr_net_ops);
1438 	kmem_cache_destroy(mrt_cachep);
1439 }
1440 
1441 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1442 			 struct mf6cctl *mfc, int mrtsock, int parent)
1443 {
1444 	unsigned char ttls[MAXMIFS];
1445 	struct mfc6_cache *uc, *c;
1446 	struct mr_mfc *_uc;
1447 	bool found;
1448 	int i, err;
1449 
1450 	if (mfc->mf6cc_parent >= MAXMIFS)
1451 		return -ENFILE;
1452 
1453 	memset(ttls, 255, MAXMIFS);
1454 	for (i = 0; i < MAXMIFS; i++) {
1455 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1456 			ttls[i] = 1;
1457 	}
1458 
1459 	/* The entries are added/deleted only under RTNL */
1460 	rcu_read_lock();
1461 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1462 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1463 	rcu_read_unlock();
1464 	if (c) {
1465 		spin_lock(&mrt_lock);
1466 		c->_c.mfc_parent = mfc->mf6cc_parent;
1467 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1468 		if (!mrtsock)
1469 			c->_c.mfc_flags |= MFC_STATIC;
1470 		spin_unlock(&mrt_lock);
1471 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1472 					       c, mrt->id);
1473 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1474 		return 0;
1475 	}
1476 
1477 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1478 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1479 		return -EINVAL;
1480 
1481 	c = ip6mr_cache_alloc();
1482 	if (!c)
1483 		return -ENOMEM;
1484 
1485 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1486 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1487 	c->_c.mfc_parent = mfc->mf6cc_parent;
1488 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1489 	if (!mrtsock)
1490 		c->_c.mfc_flags |= MFC_STATIC;
1491 
1492 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1493 				  ip6mr_rht_params);
1494 	if (err) {
1495 		pr_err("ip6mr: rhtable insert error %d\n", err);
1496 		ip6mr_cache_free(c);
1497 		return err;
1498 	}
1499 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1500 
1501 	/* Check to see if we resolved a queued list. If so we
1502 	 * need to send on the frames and tidy up.
1503 	 */
1504 	found = false;
1505 	spin_lock_bh(&mfc_unres_lock);
1506 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1507 		uc = (struct mfc6_cache *)_uc;
1508 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1509 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1510 			list_del(&_uc->list);
1511 			atomic_dec(&mrt->cache_resolve_queue_len);
1512 			found = true;
1513 			break;
1514 		}
1515 	}
1516 	if (list_empty(&mrt->mfc_unres_queue))
1517 		timer_delete(&mrt->ipmr_expire_timer);
1518 	spin_unlock_bh(&mfc_unres_lock);
1519 
1520 	if (found) {
1521 		ip6mr_cache_resolve(net, mrt, uc, c);
1522 		ip6mr_cache_free(uc);
1523 	}
1524 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1525 				       c, mrt->id);
1526 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1527 	return 0;
1528 }
1529 
1530 /*
1531  *	Close the multicast socket, and clear the vif tables etc
1532  */
1533 
1534 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1535 {
1536 	struct mr_mfc *c, *tmp;
1537 	LIST_HEAD(list);
1538 	int i;
1539 
1540 	/* Shut down all active vif entries */
1541 	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1542 		for (i = 0; i < mrt->maxvif; i++) {
1543 			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1544 			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1545 			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1546 				continue;
1547 			mif6_delete(mrt, i, 0, &list);
1548 		}
1549 		unregister_netdevice_many(&list);
1550 	}
1551 
1552 	/* Wipe the cache */
1553 	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1554 		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1555 			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1556 			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1557 				continue;
1558 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1559 			list_del_rcu(&c->list);
1560 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1561 						       FIB_EVENT_ENTRY_DEL,
1562 						       (struct mfc6_cache *)c, mrt->id);
1563 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1564 			mr_cache_put(c);
1565 		}
1566 	}
1567 
1568 	if (flags & MRT6_FLUSH_MFC) {
1569 		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1570 			spin_lock_bh(&mfc_unres_lock);
1571 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1572 				list_del(&c->list);
1573 				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1574 						  RTM_DELROUTE);
1575 				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1576 			}
1577 			spin_unlock_bh(&mfc_unres_lock);
1578 		}
1579 	}
1580 }
1581 
1582 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1583 {
1584 	int err = 0;
1585 	struct net *net = sock_net(sk);
1586 
1587 	rtnl_lock();
1588 	spin_lock(&mrt_lock);
1589 	if (rtnl_dereference(mrt->mroute_sk)) {
1590 		err = -EADDRINUSE;
1591 	} else {
1592 		rcu_assign_pointer(mrt->mroute_sk, sk);
1593 		sock_set_flag(sk, SOCK_RCU_FREE);
1594 		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1595 	}
1596 	spin_unlock(&mrt_lock);
1597 
1598 	if (!err)
1599 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1600 					     NETCONFA_MC_FORWARDING,
1601 					     NETCONFA_IFINDEX_ALL,
1602 					     net->ipv6.devconf_all);
1603 	rtnl_unlock();
1604 
1605 	return err;
1606 }
1607 
1608 int ip6mr_sk_done(struct sock *sk)
1609 {
1610 	struct net *net = sock_net(sk);
1611 	struct ipv6_devconf *devconf;
1612 	struct mr_table *mrt;
1613 	int err = -EACCES;
1614 
1615 	if (sk->sk_type != SOCK_RAW ||
1616 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1617 		return err;
1618 
1619 	devconf = net->ipv6.devconf_all;
1620 	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1621 		return err;
1622 
1623 	rtnl_lock();
1624 	ip6mr_for_each_table(mrt, net) {
1625 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1626 			spin_lock(&mrt_lock);
1627 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1628 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1629 			 * so the RCU grace period before sk freeing
1630 			 * is guaranteed by sk_destruct()
1631 			 */
1632 			atomic_dec(&devconf->mc_forwarding);
1633 			spin_unlock(&mrt_lock);
1634 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1635 						     NETCONFA_MC_FORWARDING,
1636 						     NETCONFA_IFINDEX_ALL,
1637 						     net->ipv6.devconf_all);
1638 
1639 			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1640 			err = 0;
1641 			break;
1642 		}
1643 	}
1644 	rtnl_unlock();
1645 
1646 	return err;
1647 }
1648 
1649 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1650 {
1651 	struct mr_table *mrt;
1652 	struct flowi6 fl6 = {
1653 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1654 		.flowi6_oif	= skb->dev->ifindex,
1655 		.flowi6_mark	= skb->mark,
1656 	};
1657 
1658 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1659 		return NULL;
1660 
1661 	return rcu_access_pointer(mrt->mroute_sk);
1662 }
1663 EXPORT_SYMBOL(mroute6_is_socket);
1664 
1665 /*
1666  *	Socket options and virtual interface manipulation. The whole
1667  *	virtual interface system is a complete heap, but unfortunately
1668  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1669  *	MOSPF/PIM router set up we can clean this up.
1670  */
1671 
1672 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1673 			  unsigned int optlen)
1674 {
1675 	int ret, parent = 0;
1676 	struct mif6ctl vif;
1677 	struct mf6cctl mfc;
1678 	mifi_t mifi;
1679 	struct net *net = sock_net(sk);
1680 	struct mr_table *mrt;
1681 
1682 	if (sk->sk_type != SOCK_RAW ||
1683 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1684 		return -EOPNOTSUPP;
1685 
1686 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1687 	if (!mrt)
1688 		return -ENOENT;
1689 
1690 	if (optname != MRT6_INIT) {
1691 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1692 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1693 			return -EACCES;
1694 	}
1695 
1696 	switch (optname) {
1697 	case MRT6_INIT:
1698 		if (optlen < sizeof(int))
1699 			return -EINVAL;
1700 
1701 		return ip6mr_sk_init(mrt, sk);
1702 
1703 	case MRT6_DONE:
1704 		return ip6mr_sk_done(sk);
1705 
1706 	case MRT6_ADD_MIF:
1707 		if (optlen < sizeof(vif))
1708 			return -EINVAL;
1709 		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1710 			return -EFAULT;
1711 		if (vif.mif6c_mifi >= MAXMIFS)
1712 			return -ENFILE;
1713 		rtnl_lock();
1714 		ret = mif6_add(net, mrt, &vif,
1715 			       sk == rtnl_dereference(mrt->mroute_sk));
1716 		rtnl_unlock();
1717 		return ret;
1718 
1719 	case MRT6_DEL_MIF:
1720 		if (optlen < sizeof(mifi_t))
1721 			return -EINVAL;
1722 		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1723 			return -EFAULT;
1724 		rtnl_lock();
1725 		ret = mif6_delete(mrt, mifi, 0, NULL);
1726 		rtnl_unlock();
1727 		return ret;
1728 
1729 	/*
1730 	 *	Manipulate the forwarding caches. These live
1731 	 *	in a sort of kernel/user symbiosis.
1732 	 */
1733 	case MRT6_ADD_MFC:
1734 	case MRT6_DEL_MFC:
1735 		parent = -1;
1736 		fallthrough;
1737 	case MRT6_ADD_MFC_PROXY:
1738 	case MRT6_DEL_MFC_PROXY:
1739 		if (optlen < sizeof(mfc))
1740 			return -EINVAL;
1741 		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1742 			return -EFAULT;
1743 		if (parent == 0)
1744 			parent = mfc.mf6cc_parent;
1745 		rtnl_lock();
1746 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1747 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1748 		else
1749 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1750 					    sk ==
1751 					    rtnl_dereference(mrt->mroute_sk),
1752 					    parent);
1753 		rtnl_unlock();
1754 		return ret;
1755 
1756 	case MRT6_FLUSH:
1757 	{
1758 		int flags;
1759 
1760 		if (optlen != sizeof(flags))
1761 			return -EINVAL;
1762 		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1763 			return -EFAULT;
1764 		rtnl_lock();
1765 		mroute_clean_tables(mrt, flags);
1766 		rtnl_unlock();
1767 		return 0;
1768 	}
1769 
1770 	/*
1771 	 *	Control PIM assert (to activate pim will activate assert)
1772 	 */
1773 	case MRT6_ASSERT:
1774 	{
1775 		int v;
1776 
1777 		if (optlen != sizeof(v))
1778 			return -EINVAL;
1779 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1780 			return -EFAULT;
1781 		mrt->mroute_do_assert = v;
1782 		return 0;
1783 	}
1784 
1785 #ifdef CONFIG_IPV6_PIMSM_V2
1786 	case MRT6_PIM:
1787 	{
1788 		bool do_wrmifwhole;
1789 		int v;
1790 
1791 		if (optlen != sizeof(v))
1792 			return -EINVAL;
1793 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1794 			return -EFAULT;
1795 
1796 		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1797 		v = !!v;
1798 		rtnl_lock();
1799 		ret = 0;
1800 		if (v != mrt->mroute_do_pim) {
1801 			mrt->mroute_do_pim = v;
1802 			mrt->mroute_do_assert = v;
1803 			mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1804 		}
1805 		rtnl_unlock();
1806 		return ret;
1807 	}
1808 
1809 #endif
1810 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1811 	case MRT6_TABLE:
1812 	{
1813 		u32 v;
1814 
1815 		if (optlen != sizeof(u32))
1816 			return -EINVAL;
1817 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1818 			return -EFAULT;
1819 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1820 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1821 			return -EINVAL;
1822 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1823 			return -EBUSY;
1824 
1825 		rtnl_lock();
1826 		ret = 0;
1827 		mrt = ip6mr_new_table(net, v);
1828 		if (IS_ERR(mrt))
1829 			ret = PTR_ERR(mrt);
1830 		else
1831 			raw6_sk(sk)->ip6mr_table = v;
1832 		rtnl_unlock();
1833 		return ret;
1834 	}
1835 #endif
1836 	/*
1837 	 *	Spurious command, or MRT6_VERSION which you cannot
1838 	 *	set.
1839 	 */
1840 	default:
1841 		return -ENOPROTOOPT;
1842 	}
1843 }
1844 
1845 /*
1846  *	Getsock opt support for the multicast routing system.
1847  */
1848 
1849 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1850 			  sockptr_t optlen)
1851 {
1852 	int olr;
1853 	int val;
1854 	struct net *net = sock_net(sk);
1855 	struct mr_table *mrt;
1856 
1857 	if (sk->sk_type != SOCK_RAW ||
1858 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1859 		return -EOPNOTSUPP;
1860 
1861 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1862 	if (!mrt)
1863 		return -ENOENT;
1864 
1865 	switch (optname) {
1866 	case MRT6_VERSION:
1867 		val = 0x0305;
1868 		break;
1869 #ifdef CONFIG_IPV6_PIMSM_V2
1870 	case MRT6_PIM:
1871 		val = mrt->mroute_do_pim;
1872 		break;
1873 #endif
1874 	case MRT6_ASSERT:
1875 		val = mrt->mroute_do_assert;
1876 		break;
1877 	default:
1878 		return -ENOPROTOOPT;
1879 	}
1880 
1881 	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1882 		return -EFAULT;
1883 
1884 	olr = min_t(int, olr, sizeof(int));
1885 	if (olr < 0)
1886 		return -EINVAL;
1887 
1888 	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1889 		return -EFAULT;
1890 	if (copy_to_sockptr(optval, &val, olr))
1891 		return -EFAULT;
1892 	return 0;
1893 }
1894 
1895 /*
1896  *	The IP multicast ioctl support routines.
1897  */
1898 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1899 {
1900 	struct sioc_sg_req6 *sr;
1901 	struct sioc_mif_req6 *vr;
1902 	struct vif_device *vif;
1903 	struct mfc6_cache *c;
1904 	struct net *net = sock_net(sk);
1905 	struct mr_table *mrt;
1906 
1907 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1908 	if (!mrt)
1909 		return -ENOENT;
1910 
1911 	switch (cmd) {
1912 	case SIOCGETMIFCNT_IN6:
1913 		vr = (struct sioc_mif_req6 *)arg;
1914 		if (vr->mifi >= mrt->maxvif)
1915 			return -EINVAL;
1916 		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1917 		rcu_read_lock();
1918 		vif = &mrt->vif_table[vr->mifi];
1919 		if (VIF_EXISTS(mrt, vr->mifi)) {
1920 			vr->icount = READ_ONCE(vif->pkt_in);
1921 			vr->ocount = READ_ONCE(vif->pkt_out);
1922 			vr->ibytes = READ_ONCE(vif->bytes_in);
1923 			vr->obytes = READ_ONCE(vif->bytes_out);
1924 			rcu_read_unlock();
1925 			return 0;
1926 		}
1927 		rcu_read_unlock();
1928 		return -EADDRNOTAVAIL;
1929 	case SIOCGETSGCNT_IN6:
1930 		sr = (struct sioc_sg_req6 *)arg;
1931 
1932 		rcu_read_lock();
1933 		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1934 				     &sr->grp.sin6_addr);
1935 		if (c) {
1936 			sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1937 			sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1938 			sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1939 			rcu_read_unlock();
1940 			return 0;
1941 		}
1942 		rcu_read_unlock();
1943 		return -EADDRNOTAVAIL;
1944 	default:
1945 		return -ENOIOCTLCMD;
1946 	}
1947 }
1948 
1949 #ifdef CONFIG_COMPAT
1950 struct compat_sioc_sg_req6 {
1951 	struct sockaddr_in6 src;
1952 	struct sockaddr_in6 grp;
1953 	compat_ulong_t pktcnt;
1954 	compat_ulong_t bytecnt;
1955 	compat_ulong_t wrong_if;
1956 };
1957 
1958 struct compat_sioc_mif_req6 {
1959 	mifi_t	mifi;
1960 	compat_ulong_t icount;
1961 	compat_ulong_t ocount;
1962 	compat_ulong_t ibytes;
1963 	compat_ulong_t obytes;
1964 };
1965 
1966 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1967 {
1968 	struct compat_sioc_sg_req6 sr;
1969 	struct compat_sioc_mif_req6 vr;
1970 	struct vif_device *vif;
1971 	struct mfc6_cache *c;
1972 	struct net *net = sock_net(sk);
1973 	struct mr_table *mrt;
1974 
1975 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1976 	if (!mrt)
1977 		return -ENOENT;
1978 
1979 	switch (cmd) {
1980 	case SIOCGETMIFCNT_IN6:
1981 		if (copy_from_user(&vr, arg, sizeof(vr)))
1982 			return -EFAULT;
1983 		if (vr.mifi >= mrt->maxvif)
1984 			return -EINVAL;
1985 		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1986 		rcu_read_lock();
1987 		vif = &mrt->vif_table[vr.mifi];
1988 		if (VIF_EXISTS(mrt, vr.mifi)) {
1989 			vr.icount = READ_ONCE(vif->pkt_in);
1990 			vr.ocount = READ_ONCE(vif->pkt_out);
1991 			vr.ibytes = READ_ONCE(vif->bytes_in);
1992 			vr.obytes = READ_ONCE(vif->bytes_out);
1993 			rcu_read_unlock();
1994 
1995 			if (copy_to_user(arg, &vr, sizeof(vr)))
1996 				return -EFAULT;
1997 			return 0;
1998 		}
1999 		rcu_read_unlock();
2000 		return -EADDRNOTAVAIL;
2001 	case SIOCGETSGCNT_IN6:
2002 		if (copy_from_user(&sr, arg, sizeof(sr)))
2003 			return -EFAULT;
2004 
2005 		rcu_read_lock();
2006 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2007 		if (c) {
2008 			sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2009 			sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2010 			sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2011 			rcu_read_unlock();
2012 
2013 			if (copy_to_user(arg, &sr, sizeof(sr)))
2014 				return -EFAULT;
2015 			return 0;
2016 		}
2017 		rcu_read_unlock();
2018 		return -EADDRNOTAVAIL;
2019 	default:
2020 		return -ENOIOCTLCMD;
2021 	}
2022 }
2023 #endif
2024 
2025 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2026 {
2027 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2028 		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2029 	return dst_output(net, sk, skb);
2030 }
2031 
2032 /*
2033  *	Processing handlers for ip6mr_forward
2034  */
2035 
2036 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt,
2037 			      struct sk_buff *skb, int vifi)
2038 {
2039 	struct vif_device *vif = &mrt->vif_table[vifi];
2040 	struct net_device *vif_dev;
2041 	struct ipv6hdr *ipv6h;
2042 	struct dst_entry *dst;
2043 	struct flowi6 fl6;
2044 
2045 	vif_dev = vif_dev_read(vif);
2046 	if (!vif_dev)
2047 		return -1;
2048 
2049 #ifdef CONFIG_IPV6_PIMSM_V2
2050 	if (vif->flags & MIFF_REGISTER) {
2051 		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2052 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2053 		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2054 		DEV_STATS_INC(vif_dev, tx_packets);
2055 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2056 		return -1;
2057 	}
2058 #endif
2059 
2060 	ipv6h = ipv6_hdr(skb);
2061 
2062 	fl6 = (struct flowi6) {
2063 		.flowi6_oif = vif->link,
2064 		.daddr = ipv6h->daddr,
2065 	};
2066 
2067 	dst = ip6_route_output(net, NULL, &fl6);
2068 	if (dst->error) {
2069 		dst_release(dst);
2070 		return -1;
2071 	}
2072 
2073 	skb_dst_drop(skb);
2074 	skb_dst_set(skb, dst);
2075 
2076 	/*
2077 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2078 	 * not only before forwarding, but after forwarding on all output
2079 	 * interfaces. It is clear, if mrouter runs a multicasting
2080 	 * program, it should receive packets not depending to what interface
2081 	 * program is joined.
2082 	 * If we will not make it, the program will have to join on all
2083 	 * interfaces. On the other hand, multihoming host (or router, but
2084 	 * not mrouter) cannot join to more than one interface - it will
2085 	 * result in receiving multiple packets.
2086 	 */
2087 	skb->dev = vif_dev;
2088 	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2089 	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2090 
2091 	/* We are about to write */
2092 	/* XXX: extension headers? */
2093 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2094 		return -1;
2095 
2096 	ipv6h = ipv6_hdr(skb);
2097 	ipv6h->hop_limit--;
2098 	return 0;
2099 }
2100 
2101 static void ip6mr_forward2(struct net *net, struct mr_table *mrt,
2102 			   struct sk_buff *skb, int vifi)
2103 {
2104 	struct net_device *indev = skb->dev;
2105 
2106 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2107 		goto out_free;
2108 
2109 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2110 
2111 	NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2112 		net, NULL, skb, indev, skb->dev,
2113 		ip6mr_forward2_finish);
2114 	return;
2115 
2116 out_free:
2117 	kfree_skb(skb);
2118 }
2119 
2120 static void ip6mr_output2(struct net *net, struct mr_table *mrt,
2121 			  struct sk_buff *skb, int vifi)
2122 {
2123 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2124 		goto out_free;
2125 
2126 	ip6_output(net, NULL, skb);
2127 	return;
2128 
2129 out_free:
2130 	kfree_skb(skb);
2131 }
2132 
2133 /* Called with rcu_read_lock() */
2134 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2135 {
2136 	int ct;
2137 
2138 	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2139 	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2140 		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2141 			break;
2142 	}
2143 	return ct;
2144 }
2145 
2146 /* Called under rcu_read_lock() */
2147 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2148 			   struct net_device *dev, struct sk_buff *skb,
2149 			   struct mfc6_cache *c)
2150 {
2151 	int psend = -1;
2152 	int vif, ct;
2153 	int true_vifi = ip6mr_find_vif(mrt, dev);
2154 
2155 	vif = c->_c.mfc_parent;
2156 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2157 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2158 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2159 
2160 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2161 		struct mfc6_cache *cache_proxy;
2162 
2163 		/* For an (*,G) entry, we only check that the incoming
2164 		 * interface is part of the static tree.
2165 		 */
2166 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2167 		if (cache_proxy &&
2168 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2169 			goto forward;
2170 	}
2171 
2172 	/*
2173 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2174 	 */
2175 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2176 		atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2177 
2178 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2179 		    /* pimsm uses asserts, when switching from RPT to SPT,
2180 		       so that we cannot check that packet arrived on an oif.
2181 		       It is bad, but otherwise we would need to move pretty
2182 		       large chunk of pimd to kernel. Ough... --ANK
2183 		     */
2184 		    (mrt->mroute_do_pim ||
2185 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2186 		    time_after(jiffies,
2187 			       c->_c.mfc_un.res.last_assert +
2188 			       MFC_ASSERT_THRESH)) {
2189 			c->_c.mfc_un.res.last_assert = jiffies;
2190 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2191 			if (mrt->mroute_do_wrvifwhole)
2192 				ip6mr_cache_report(mrt, skb, true_vifi,
2193 						   MRT6MSG_WRMIFWHOLE);
2194 		}
2195 		goto dont_forward;
2196 	}
2197 
2198 forward:
2199 	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2200 		   mrt->vif_table[vif].pkt_in + 1);
2201 	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2202 		   mrt->vif_table[vif].bytes_in + skb->len);
2203 
2204 	/*
2205 	 *	Forward the frame
2206 	 */
2207 	if (ipv6_addr_any(&c->mf6c_origin) &&
2208 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2209 		if (true_vifi >= 0 &&
2210 		    true_vifi != c->_c.mfc_parent &&
2211 		    ipv6_hdr(skb)->hop_limit >
2212 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2213 			/* It's an (*,*) entry and the packet is not coming from
2214 			 * the upstream: forward the packet to the upstream
2215 			 * only.
2216 			 */
2217 			psend = c->_c.mfc_parent;
2218 			goto last_forward;
2219 		}
2220 		goto dont_forward;
2221 	}
2222 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2223 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2224 		/* For (*,G) entry, don't forward to the incoming interface */
2225 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2226 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2227 			if (psend != -1) {
2228 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2229 				if (skb2)
2230 					ip6mr_forward2(net, mrt, skb2, psend);
2231 			}
2232 			psend = ct;
2233 		}
2234 	}
2235 last_forward:
2236 	if (psend != -1) {
2237 		ip6mr_forward2(net, mrt, skb, psend);
2238 		return;
2239 	}
2240 
2241 dont_forward:
2242 	kfree_skb(skb);
2243 }
2244 
2245 /* Called under rcu_read_lock() */
2246 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt,
2247 				 struct net_device *dev, struct sk_buff *skb,
2248 				 struct mfc6_cache *c)
2249 {
2250 	int psend = -1;
2251 	int ct;
2252 
2253 	WARN_ON_ONCE(!rcu_read_lock_held());
2254 
2255 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2256 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2257 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2258 
2259 	/* Forward the frame */
2260 	if (ipv6_addr_any(&c->mf6c_origin) &&
2261 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2262 		if (ipv6_hdr(skb)->hop_limit >
2263 		    c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2264 			/* It's an (*,*) entry and the packet is not coming from
2265 			 * the upstream: forward the packet to the upstream
2266 			 * only.
2267 			 */
2268 			psend = c->_c.mfc_parent;
2269 			goto last_forward;
2270 		}
2271 		goto dont_forward;
2272 	}
2273 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2274 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2275 		if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2276 			if (psend != -1) {
2277 				struct sk_buff *skb2;
2278 
2279 				skb2 = skb_clone(skb, GFP_ATOMIC);
2280 				if (skb2)
2281 					ip6mr_output2(net, mrt, skb2, psend);
2282 			}
2283 			psend = ct;
2284 		}
2285 	}
2286 last_forward:
2287 	if (psend != -1) {
2288 		ip6mr_output2(net, mrt, skb, psend);
2289 		return;
2290 	}
2291 
2292 dont_forward:
2293 	kfree_skb(skb);
2294 }
2295 
2296 /*
2297  *	Multicast packets for forwarding arrive here
2298  */
2299 
2300 int ip6_mr_input(struct sk_buff *skb)
2301 {
2302 	struct net_device *dev = skb->dev;
2303 	struct net *net = dev_net_rcu(dev);
2304 	struct mfc6_cache *cache;
2305 	struct mr_table *mrt;
2306 	struct flowi6 fl6 = {
2307 		.flowi6_iif	= dev->ifindex,
2308 		.flowi6_mark	= skb->mark,
2309 	};
2310 	int err;
2311 
2312 	/* skb->dev passed in is the master dev for vrfs.
2313 	 * Get the proper interface that does have a vif associated with it.
2314 	 */
2315 	if (netif_is_l3_master(dev)) {
2316 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2317 		if (!dev) {
2318 			kfree_skb(skb);
2319 			return -ENODEV;
2320 		}
2321 	}
2322 
2323 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2324 	if (err < 0) {
2325 		kfree_skb(skb);
2326 		return err;
2327 	}
2328 
2329 	cache = ip6mr_cache_find(mrt,
2330 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2331 	if (!cache) {
2332 		int vif = ip6mr_find_vif(mrt, dev);
2333 
2334 		if (vif >= 0)
2335 			cache = ip6mr_cache_find_any(mrt,
2336 						     &ipv6_hdr(skb)->daddr,
2337 						     vif);
2338 	}
2339 
2340 	/*
2341 	 *	No usable cache entry
2342 	 */
2343 	if (!cache) {
2344 		int vif;
2345 
2346 		vif = ip6mr_find_vif(mrt, dev);
2347 		if (vif >= 0) {
2348 			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2349 
2350 			return err;
2351 		}
2352 		kfree_skb(skb);
2353 		return -ENODEV;
2354 	}
2355 
2356 	ip6_mr_forward(net, mrt, dev, skb, cache);
2357 
2358 	return 0;
2359 }
2360 
2361 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2362 {
2363 	struct net_device *dev = skb_dst(skb)->dev;
2364 	struct flowi6 fl6 = (struct flowi6) {
2365 		.flowi6_iif = LOOPBACK_IFINDEX,
2366 		.flowi6_mark = skb->mark,
2367 	};
2368 	struct mfc6_cache *cache;
2369 	struct mr_table *mrt;
2370 	int err;
2371 	int vif;
2372 
2373 	guard(rcu)();
2374 
2375 	if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
2376 		goto ip6_output;
2377 	if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE))
2378 		goto ip6_output;
2379 
2380 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2381 	if (err < 0) {
2382 		kfree_skb(skb);
2383 		return err;
2384 	}
2385 
2386 	cache = ip6mr_cache_find(mrt,
2387 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2388 	if (!cache) {
2389 		vif = ip6mr_find_vif(mrt, dev);
2390 		if (vif >= 0)
2391 			cache = ip6mr_cache_find_any(mrt,
2392 						     &ipv6_hdr(skb)->daddr,
2393 						     vif);
2394 	}
2395 
2396 	/* No usable cache entry */
2397 	if (!cache) {
2398 		vif = ip6mr_find_vif(mrt, dev);
2399 		if (vif >= 0)
2400 			return ip6mr_cache_unresolved(mrt, vif, skb, dev);
2401 		goto ip6_output;
2402 	}
2403 
2404 	/* Wrong interface */
2405 	vif = cache->_c.mfc_parent;
2406 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
2407 		goto ip6_output;
2408 
2409 	ip6_mr_output_finish(net, mrt, dev, skb, cache);
2410 	return 0;
2411 
2412 ip6_output:
2413 	return ip6_output(net, sk, skb);
2414 }
2415 
2416 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2417 		    u32 portid)
2418 {
2419 	int err;
2420 	struct mr_table *mrt;
2421 	struct mfc6_cache *cache;
2422 	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2423 
2424 	rcu_read_lock();
2425 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2426 	if (!mrt) {
2427 		rcu_read_unlock();
2428 		return -ENOENT;
2429 	}
2430 
2431 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2432 	if (!cache && skb->dev) {
2433 		int vif = ip6mr_find_vif(mrt, skb->dev);
2434 
2435 		if (vif >= 0)
2436 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2437 						     vif);
2438 	}
2439 
2440 	if (!cache) {
2441 		struct sk_buff *skb2;
2442 		struct ipv6hdr *iph;
2443 		struct net_device *dev;
2444 		int vif;
2445 
2446 		dev = skb->dev;
2447 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2448 			rcu_read_unlock();
2449 			return -ENODEV;
2450 		}
2451 
2452 		/* really correct? */
2453 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2454 		if (!skb2) {
2455 			rcu_read_unlock();
2456 			return -ENOMEM;
2457 		}
2458 
2459 		NETLINK_CB(skb2).portid = portid;
2460 		skb_reset_transport_header(skb2);
2461 
2462 		skb_put(skb2, sizeof(struct ipv6hdr));
2463 		skb_reset_network_header(skb2);
2464 
2465 		iph = ipv6_hdr(skb2);
2466 		iph->version = 0;
2467 		iph->priority = 0;
2468 		iph->flow_lbl[0] = 0;
2469 		iph->flow_lbl[1] = 0;
2470 		iph->flow_lbl[2] = 0;
2471 		iph->payload_len = 0;
2472 		iph->nexthdr = IPPROTO_NONE;
2473 		iph->hop_limit = 0;
2474 		iph->saddr = rt->rt6i_src.addr;
2475 		iph->daddr = rt->rt6i_dst.addr;
2476 
2477 		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2478 		rcu_read_unlock();
2479 
2480 		return err;
2481 	}
2482 
2483 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2484 	rcu_read_unlock();
2485 	return err;
2486 }
2487 
2488 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2489 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2490 			     int flags)
2491 {
2492 	struct nlmsghdr *nlh;
2493 	struct rtmsg *rtm;
2494 	int err;
2495 
2496 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2497 	if (!nlh)
2498 		return -EMSGSIZE;
2499 
2500 	rtm = nlmsg_data(nlh);
2501 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2502 	rtm->rtm_dst_len  = 128;
2503 	rtm->rtm_src_len  = 128;
2504 	rtm->rtm_tos      = 0;
2505 	rtm->rtm_table    = mrt->id;
2506 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2507 		goto nla_put_failure;
2508 	rtm->rtm_type = RTN_MULTICAST;
2509 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2510 	if (c->_c.mfc_flags & MFC_STATIC)
2511 		rtm->rtm_protocol = RTPROT_STATIC;
2512 	else
2513 		rtm->rtm_protocol = RTPROT_MROUTED;
2514 	rtm->rtm_flags    = 0;
2515 
2516 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2517 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2518 		goto nla_put_failure;
2519 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2520 	/* do not break the dump if cache is unresolved */
2521 	if (err < 0 && err != -ENOENT)
2522 		goto nla_put_failure;
2523 
2524 	nlmsg_end(skb, nlh);
2525 	return 0;
2526 
2527 nla_put_failure:
2528 	nlmsg_cancel(skb, nlh);
2529 	return -EMSGSIZE;
2530 }
2531 
2532 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2533 			      u32 portid, u32 seq, struct mr_mfc *c,
2534 			      int cmd, int flags)
2535 {
2536 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2537 				 cmd, flags);
2538 }
2539 
2540 static int mr6_msgsize(bool unresolved, int maxvif)
2541 {
2542 	size_t len =
2543 		NLMSG_ALIGN(sizeof(struct rtmsg))
2544 		+ nla_total_size(4)	/* RTA_TABLE */
2545 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2546 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2547 		;
2548 
2549 	if (!unresolved)
2550 		len = len
2551 		      + nla_total_size(4)	/* RTA_IIF */
2552 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2553 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2554 						/* RTA_MFC_STATS */
2555 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2556 		;
2557 
2558 	return len;
2559 }
2560 
2561 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2562 			      int cmd)
2563 {
2564 	struct net *net = read_pnet(&mrt->net);
2565 	struct sk_buff *skb;
2566 	int err = -ENOBUFS;
2567 
2568 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2569 			GFP_ATOMIC);
2570 	if (!skb)
2571 		goto errout;
2572 
2573 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2574 	if (err < 0)
2575 		goto errout;
2576 
2577 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2578 	return;
2579 
2580 errout:
2581 	kfree_skb(skb);
2582 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2583 }
2584 
2585 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2586 {
2587 	size_t len =
2588 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2589 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2590 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2591 					/* IP6MRA_CREPORT_SRC_ADDR */
2592 		+ nla_total_size(sizeof(struct in6_addr))
2593 					/* IP6MRA_CREPORT_DST_ADDR */
2594 		+ nla_total_size(sizeof(struct in6_addr))
2595 					/* IP6MRA_CREPORT_PKT */
2596 		+ nla_total_size(payloadlen)
2597 		;
2598 
2599 	return len;
2600 }
2601 
2602 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2603 {
2604 	struct net *net = read_pnet(&mrt->net);
2605 	struct nlmsghdr *nlh;
2606 	struct rtgenmsg *rtgenm;
2607 	struct mrt6msg *msg;
2608 	struct sk_buff *skb;
2609 	struct nlattr *nla;
2610 	int payloadlen;
2611 
2612 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2613 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2614 
2615 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2616 	if (!skb)
2617 		goto errout;
2618 
2619 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2620 			sizeof(struct rtgenmsg), 0);
2621 	if (!nlh)
2622 		goto errout;
2623 	rtgenm = nlmsg_data(nlh);
2624 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2625 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2626 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2627 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2628 			     &msg->im6_src) ||
2629 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2630 			     &msg->im6_dst))
2631 		goto nla_put_failure;
2632 
2633 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2634 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2635 				  nla_data(nla), payloadlen))
2636 		goto nla_put_failure;
2637 
2638 	nlmsg_end(skb, nlh);
2639 
2640 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2641 	return;
2642 
2643 nla_put_failure:
2644 	nlmsg_cancel(skb, nlh);
2645 errout:
2646 	kfree_skb(skb);
2647 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2648 }
2649 
2650 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2651 	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2652 	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2653 	[RTA_TABLE]		= { .type = NLA_U32 },
2654 };
2655 
2656 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2657 					const struct nlmsghdr *nlh,
2658 					struct nlattr **tb,
2659 					struct netlink_ext_ack *extack)
2660 {
2661 	struct rtmsg *rtm;
2662 	int err;
2663 
2664 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2665 			  extack);
2666 	if (err)
2667 		return err;
2668 
2669 	rtm = nlmsg_data(nlh);
2670 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2671 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2672 	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2673 	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2674 		NL_SET_ERR_MSG_MOD(extack,
2675 				   "Invalid values in header for multicast route get request");
2676 		return -EINVAL;
2677 	}
2678 
2679 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2680 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2681 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2682 		return -EINVAL;
2683 	}
2684 
2685 	return 0;
2686 }
2687 
2688 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2689 			      struct netlink_ext_ack *extack)
2690 {
2691 	struct net *net = sock_net(in_skb->sk);
2692 	struct in6_addr src = {}, grp = {};
2693 	struct nlattr *tb[RTA_MAX + 1];
2694 	struct mfc6_cache *cache;
2695 	struct mr_table *mrt;
2696 	struct sk_buff *skb;
2697 	u32 tableid;
2698 	int err;
2699 
2700 	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2701 	if (err < 0)
2702 		return err;
2703 
2704 	if (tb[RTA_SRC])
2705 		src = nla_get_in6_addr(tb[RTA_SRC]);
2706 	if (tb[RTA_DST])
2707 		grp = nla_get_in6_addr(tb[RTA_DST]);
2708 	tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2709 
2710 	mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2711 	if (!mrt) {
2712 		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2713 		return -ENOENT;
2714 	}
2715 
2716 	/* entries are added/deleted only under RTNL */
2717 	rcu_read_lock();
2718 	cache = ip6mr_cache_find(mrt, &src, &grp);
2719 	rcu_read_unlock();
2720 	if (!cache) {
2721 		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2722 		return -ENOENT;
2723 	}
2724 
2725 	skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2726 	if (!skb)
2727 		return -ENOBUFS;
2728 
2729 	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2730 				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2731 	if (err < 0) {
2732 		kfree_skb(skb);
2733 		return err;
2734 	}
2735 
2736 	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2737 }
2738 
2739 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2740 {
2741 	const struct nlmsghdr *nlh = cb->nlh;
2742 	struct fib_dump_filter filter = {
2743 		.rtnl_held = true,
2744 	};
2745 	int err;
2746 
2747 	if (cb->strict_check) {
2748 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2749 					    &filter, cb);
2750 		if (err < 0)
2751 			return err;
2752 	}
2753 
2754 	if (filter.table_id) {
2755 		struct mr_table *mrt;
2756 
2757 		mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2758 		if (!mrt) {
2759 			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2760 				return skb->len;
2761 
2762 			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2763 			return -ENOENT;
2764 		}
2765 		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2766 				    &mfc_unres_lock, &filter);
2767 		return skb->len ? : err;
2768 	}
2769 
2770 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2771 				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2772 }
2773