xref: /linux/net/ipv6/ip6mr.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux IPv6 multicast routing support for BSD pim6sd
4  *	Based on net/ipv4/ipmr.c.
5  *
6  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7  *		LSIIT Laboratory, Strasbourg, France
8  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9  *		6WIND, Paris, France
10  *	Copyright (C)2007,2008 USAGI/WIDE Project
11  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12  */
13 
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39 
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50 
51 #include <linux/nospec.h>
52 
53 struct ip6mr_rule {
54 	struct fib_rule		common;
55 };
56 
57 struct ip6mr_result {
58 	struct mr_table	*mrt;
59 };
60 
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62    Note that the changes are semaphored via rtnl_lock.
63  */
64 
65 static DEFINE_SPINLOCK(mrt_lock);
66 
vif_dev_read(const struct vif_device * vif)67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 	return rcu_dereference(vif->dev);
70 }
71 
72 /* Multicast router control variables */
73 
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76 
77 /* We return to original Alan's scheme. Hash table of resolved
78    entries is changed only in process context and protected
79    with weak lock mrt_lock. Queue of unresolved entries is protected
80    with strong spinlock mfc_unres_lock.
81 
82    In this case data path is free of exclusive locks at all.
83  */
84 
85 static struct kmem_cache *mrt_cachep __read_mostly;
86 
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89 
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 			   struct net_device *dev, struct sk_buff *skb,
92 			   struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 			      mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 			      int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 			      struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 			       struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104 
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 				lockdep_rtnl_is_held() || \
109 				list_empty(&net->ipv6.mr6_tables))
110 
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 					    struct mr_table *mrt)
113 {
114 	struct mr_table *ret;
115 
116 	if (!mrt)
117 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 				     struct mr_table, list);
119 	else
120 		ret = list_entry_rcu(mrt->list.next,
121 				     struct mr_table, list);
122 
123 	if (&ret->list == &net->ipv6.mr6_tables)
124 		return NULL;
125 	return ret;
126 }
127 
__ip6mr_get_table(struct net * net,u32 id)128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
ip6mr_get_table(struct net * net,u32 id)139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
140 {
141 	struct mr_table *mrt;
142 
143 	rcu_read_lock();
144 	mrt = __ip6mr_get_table(net, id);
145 	rcu_read_unlock();
146 	return mrt;
147 }
148 
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 			    struct mr_table **mrt)
151 {
152 	int err;
153 	struct ip6mr_result res;
154 	struct fib_lookup_arg arg = {
155 		.result = &res,
156 		.flags = FIB_LOOKUP_NOREF,
157 	};
158 
159 	/* update flow if oif or iif point to device enslaved to l3mdev */
160 	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
161 
162 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 			       flowi6_to_flowi(flp6), 0, &arg);
164 	if (err < 0)
165 		return err;
166 	*mrt = res.mrt;
167 	return 0;
168 }
169 
ip6mr_rule_action(struct fib_rule * rule,struct flowi * flp,int flags,struct fib_lookup_arg * arg)170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 			     int flags, struct fib_lookup_arg *arg)
172 {
173 	struct ip6mr_result *res = arg->result;
174 	struct mr_table *mrt;
175 
176 	switch (rule->action) {
177 	case FR_ACT_TO_TBL:
178 		break;
179 	case FR_ACT_UNREACHABLE:
180 		return -ENETUNREACH;
181 	case FR_ACT_PROHIBIT:
182 		return -EACCES;
183 	case FR_ACT_BLACKHOLE:
184 	default:
185 		return -EINVAL;
186 	}
187 
188 	arg->table = fib_rule_get_table(rule, arg);
189 
190 	mrt = __ip6mr_get_table(rule->fr_net, arg->table);
191 	if (!mrt)
192 		return -EAGAIN;
193 	res->mrt = mrt;
194 	return 0;
195 }
196 
ip6mr_rule_match(struct fib_rule * rule,struct flowi * flp,int flags)197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
198 {
199 	return 1;
200 }
201 
ip6mr_rule_configure(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh,struct nlattr ** tb,struct netlink_ext_ack * extack)202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 				struct fib_rule_hdr *frh, struct nlattr **tb,
204 				struct netlink_ext_ack *extack)
205 {
206 	return 0;
207 }
208 
ip6mr_rule_compare(struct fib_rule * rule,struct fib_rule_hdr * frh,struct nlattr ** tb)209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 			      struct nlattr **tb)
211 {
212 	return 1;
213 }
214 
ip6mr_rule_fill(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh)215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 			   struct fib_rule_hdr *frh)
217 {
218 	frh->dst_len = 0;
219 	frh->src_len = 0;
220 	frh->tos     = 0;
221 	return 0;
222 }
223 
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 	.family		= RTNL_FAMILY_IP6MR,
226 	.rule_size	= sizeof(struct ip6mr_rule),
227 	.addr_size	= sizeof(struct in6_addr),
228 	.action		= ip6mr_rule_action,
229 	.match		= ip6mr_rule_match,
230 	.configure	= ip6mr_rule_configure,
231 	.compare	= ip6mr_rule_compare,
232 	.fill		= ip6mr_rule_fill,
233 	.nlgroup	= RTNLGRP_IPV6_RULE,
234 	.owner		= THIS_MODULE,
235 };
236 
ip6mr_rules_init(struct net * net)237 static int __net_init ip6mr_rules_init(struct net *net)
238 {
239 	struct fib_rules_ops *ops;
240 	struct mr_table *mrt;
241 	int err;
242 
243 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
244 	if (IS_ERR(ops))
245 		return PTR_ERR(ops);
246 
247 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
248 
249 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
250 	if (IS_ERR(mrt)) {
251 		err = PTR_ERR(mrt);
252 		goto err1;
253 	}
254 
255 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
256 	if (err < 0)
257 		goto err2;
258 
259 	net->ipv6.mr6_rules_ops = ops;
260 	return 0;
261 
262 err2:
263 	rtnl_lock();
264 	ip6mr_free_table(mrt);
265 	rtnl_unlock();
266 err1:
267 	fib_rules_unregister(ops);
268 	return err;
269 }
270 
ip6mr_rules_exit(struct net * net)271 static void __net_exit ip6mr_rules_exit(struct net *net)
272 {
273 	struct mr_table *mrt, *next;
274 
275 	ASSERT_RTNL();
276 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 		list_del(&mrt->list);
278 		ip6mr_free_table(mrt);
279 	}
280 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
281 }
282 
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 			    struct netlink_ext_ack *extack)
285 {
286 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
287 }
288 
ip6mr_rules_seq_read(const struct net * net)289 static unsigned int ip6mr_rules_seq_read(const struct net *net)
290 {
291 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
292 }
293 
ip6mr_rule_default(const struct fib_rule * rule)294 bool ip6mr_rule_default(const struct fib_rule *rule)
295 {
296 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
298 }
299 EXPORT_SYMBOL(ip6mr_rule_default);
300 #else
301 #define ip6mr_for_each_table(mrt, net) \
302 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
303 
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 					    struct mr_table *mrt)
306 {
307 	if (!mrt)
308 		return net->ipv6.mrt6;
309 	return NULL;
310 }
311 
ip6mr_get_table(struct net * net,u32 id)312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
313 {
314 	return net->ipv6.mrt6;
315 }
316 
317 #define __ip6mr_get_table ip6mr_get_table
318 
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 			    struct mr_table **mrt)
321 {
322 	*mrt = net->ipv6.mrt6;
323 	return 0;
324 }
325 
ip6mr_rules_init(struct net * net)326 static int __net_init ip6mr_rules_init(struct net *net)
327 {
328 	struct mr_table *mrt;
329 
330 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
331 	if (IS_ERR(mrt))
332 		return PTR_ERR(mrt);
333 	net->ipv6.mrt6 = mrt;
334 	return 0;
335 }
336 
ip6mr_rules_exit(struct net * net)337 static void __net_exit ip6mr_rules_exit(struct net *net)
338 {
339 	ASSERT_RTNL();
340 	ip6mr_free_table(net->ipv6.mrt6);
341 	net->ipv6.mrt6 = NULL;
342 }
343 
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 			    struct netlink_ext_ack *extack)
346 {
347 	return 0;
348 }
349 
ip6mr_rules_seq_read(const struct net * net)350 static unsigned int ip6mr_rules_seq_read(const struct net *net)
351 {
352 	return 0;
353 }
354 #endif
355 
ip6mr_hash_cmp(struct rhashtable_compare_arg * arg,const void * ptr)356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
357 			  const void *ptr)
358 {
359 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
361 
362 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
364 }
365 
366 static const struct rhashtable_params ip6mr_rht_params = {
367 	.head_offset = offsetof(struct mr_mfc, mnode),
368 	.key_offset = offsetof(struct mfc6_cache, cmparg),
369 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
370 	.nelem_hint = 3,
371 	.obj_cmpfn = ip6mr_hash_cmp,
372 	.automatic_shrinking = true,
373 };
374 
ip6mr_new_table_set(struct mr_table * mrt,struct net * net)375 static void ip6mr_new_table_set(struct mr_table *mrt,
376 				struct net *net)
377 {
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
380 #endif
381 }
382 
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 	.mf6c_origin = IN6ADDR_ANY_INIT,
385 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
386 };
387 
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 	.rht_params = &ip6mr_rht_params,
390 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
391 };
392 
ip6mr_new_table(struct net * net,u32 id)393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
394 {
395 	struct mr_table *mrt;
396 
397 	mrt = __ip6mr_get_table(net, id);
398 	if (mrt)
399 		return mrt;
400 
401 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 			      ipmr_expire_process, ip6mr_new_table_set);
403 }
404 
ip6mr_free_table(struct mr_table * mrt)405 static void ip6mr_free_table(struct mr_table *mrt)
406 {
407 	struct net *net = read_pnet(&mrt->net);
408 
409 	WARN_ON_ONCE(!mr_can_free_table(net));
410 
411 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
412 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
413 				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
414 	rhltable_destroy(&mrt->mfc_hash);
415 	kfree(mrt);
416 }
417 
418 #ifdef CONFIG_PROC_FS
419 /* The /proc interfaces to multicast routing
420  * /proc/ip6_mr_cache /proc/ip6_mr_vif
421  */
422 
ip6mr_vif_seq_start(struct seq_file * seq,loff_t * pos)423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
424 	__acquires(RCU)
425 {
426 	struct mr_vif_iter *iter = seq->private;
427 	struct net *net = seq_file_net(seq);
428 	struct mr_table *mrt;
429 
430 	rcu_read_lock();
431 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
432 	if (!mrt) {
433 		rcu_read_unlock();
434 		return ERR_PTR(-ENOENT);
435 	}
436 
437 	iter->mrt = mrt;
438 
439 	return mr_vif_seq_start(seq, pos);
440 }
441 
ip6mr_vif_seq_stop(struct seq_file * seq,void * v)442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
443 	__releases(RCU)
444 {
445 	rcu_read_unlock();
446 }
447 
ip6mr_vif_seq_show(struct seq_file * seq,void * v)448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
449 {
450 	struct mr_vif_iter *iter = seq->private;
451 	struct mr_table *mrt = iter->mrt;
452 
453 	if (v == SEQ_START_TOKEN) {
454 		seq_puts(seq,
455 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
456 	} else {
457 		const struct vif_device *vif = v;
458 		const struct net_device *vif_dev;
459 		const char *name;
460 
461 		vif_dev = vif_dev_read(vif);
462 		name = vif_dev ? vif_dev->name : "none";
463 
464 		seq_printf(seq,
465 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
466 			   vif - mrt->vif_table,
467 			   name, vif->bytes_in, vif->pkt_in,
468 			   vif->bytes_out, vif->pkt_out,
469 			   vif->flags);
470 	}
471 	return 0;
472 }
473 
474 static const struct seq_operations ip6mr_vif_seq_ops = {
475 	.start = ip6mr_vif_seq_start,
476 	.next  = mr_vif_seq_next,
477 	.stop  = ip6mr_vif_seq_stop,
478 	.show  = ip6mr_vif_seq_show,
479 };
480 
ipmr_mfc_seq_start(struct seq_file * seq,loff_t * pos)481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
482 {
483 	struct net *net = seq_file_net(seq);
484 	struct mr_table *mrt;
485 
486 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
487 	if (!mrt)
488 		return ERR_PTR(-ENOENT);
489 
490 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
491 }
492 
ipmr_mfc_seq_show(struct seq_file * seq,void * v)493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
494 {
495 	int n;
496 
497 	if (v == SEQ_START_TOKEN) {
498 		seq_puts(seq,
499 			 "Group                            "
500 			 "Origin                           "
501 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
502 	} else {
503 		const struct mfc6_cache *mfc = v;
504 		const struct mr_mfc_iter *it = seq->private;
505 		struct mr_table *mrt = it->mrt;
506 
507 		seq_printf(seq, "%pI6 %pI6 %-3hd",
508 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
509 			   mfc->_c.mfc_parent);
510 
511 		if (it->cache != &mrt->mfc_unres_queue) {
512 			seq_printf(seq, " %8lu %8lu %8lu",
513 				   atomic_long_read(&mfc->_c.mfc_un.res.pkt),
514 				   atomic_long_read(&mfc->_c.mfc_un.res.bytes),
515 				   atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
516 			for (n = mfc->_c.mfc_un.res.minvif;
517 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
518 				if (VIF_EXISTS(mrt, n) &&
519 				    mfc->_c.mfc_un.res.ttls[n] < 255)
520 					seq_printf(seq,
521 						   " %2d:%-3d", n,
522 						   mfc->_c.mfc_un.res.ttls[n]);
523 			}
524 		} else {
525 			/* unresolved mfc_caches don't contain
526 			 * pkt, bytes and wrong_if values
527 			 */
528 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
529 		}
530 		seq_putc(seq, '\n');
531 	}
532 	return 0;
533 }
534 
535 static const struct seq_operations ipmr_mfc_seq_ops = {
536 	.start = ipmr_mfc_seq_start,
537 	.next  = mr_mfc_seq_next,
538 	.stop  = mr_mfc_seq_stop,
539 	.show  = ipmr_mfc_seq_show,
540 };
541 #endif
542 
543 #ifdef CONFIG_IPV6_PIMSM_V2
544 
pim6_rcv(struct sk_buff * skb)545 static int pim6_rcv(struct sk_buff *skb)
546 {
547 	struct pimreghdr *pim;
548 	struct ipv6hdr   *encap;
549 	struct net_device  *reg_dev = NULL;
550 	struct net *net = dev_net(skb->dev);
551 	struct mr_table *mrt;
552 	struct flowi6 fl6 = {
553 		.flowi6_iif	= skb->dev->ifindex,
554 		.flowi6_mark	= skb->mark,
555 	};
556 	int reg_vif_num;
557 
558 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
559 		goto drop;
560 
561 	pim = (struct pimreghdr *)skb_transport_header(skb);
562 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
563 	    (pim->flags & PIM_NULL_REGISTER) ||
564 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
565 			     sizeof(*pim), IPPROTO_PIM,
566 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
567 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
568 		goto drop;
569 
570 	/* check if the inner packet is destined to mcast group */
571 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
572 				   sizeof(*pim));
573 
574 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
575 	    encap->payload_len == 0 ||
576 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
577 		goto drop;
578 
579 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
580 		goto drop;
581 
582 	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
583 	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
584 	if (reg_vif_num >= 0)
585 		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
586 
587 	if (!reg_dev)
588 		goto drop;
589 
590 	skb->mac_header = skb->network_header;
591 	skb_pull(skb, (u8 *)encap - skb->data);
592 	skb_reset_network_header(skb);
593 	skb->protocol = htons(ETH_P_IPV6);
594 	skb->ip_summed = CHECKSUM_NONE;
595 
596 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
597 
598 	netif_rx(skb);
599 
600 	return 0;
601  drop:
602 	kfree_skb(skb);
603 	return 0;
604 }
605 
606 static const struct inet6_protocol pim6_protocol = {
607 	.handler	=	pim6_rcv,
608 };
609 
610 /* Service routines creating virtual interfaces: PIMREG */
611 
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
613 				      struct net_device *dev)
614 {
615 	struct net *net = dev_net(dev);
616 	struct mr_table *mrt;
617 	struct flowi6 fl6 = {
618 		.flowi6_oif	= dev->ifindex,
619 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
620 		.flowi6_mark	= skb->mark,
621 	};
622 
623 	if (!pskb_inet_may_pull(skb))
624 		goto tx_err;
625 
626 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
627 		goto tx_err;
628 
629 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
630 	DEV_STATS_INC(dev, tx_packets);
631 	rcu_read_lock();
632 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
633 			   MRT6MSG_WHOLEPKT);
634 	rcu_read_unlock();
635 	kfree_skb(skb);
636 	return NETDEV_TX_OK;
637 
638 tx_err:
639 	DEV_STATS_INC(dev, tx_errors);
640 	kfree_skb(skb);
641 	return NETDEV_TX_OK;
642 }
643 
reg_vif_get_iflink(const struct net_device * dev)644 static int reg_vif_get_iflink(const struct net_device *dev)
645 {
646 	return 0;
647 }
648 
649 static const struct net_device_ops reg_vif_netdev_ops = {
650 	.ndo_start_xmit	= reg_vif_xmit,
651 	.ndo_get_iflink = reg_vif_get_iflink,
652 };
653 
reg_vif_setup(struct net_device * dev)654 static void reg_vif_setup(struct net_device *dev)
655 {
656 	dev->type		= ARPHRD_PIMREG;
657 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
658 	dev->flags		= IFF_NOARP;
659 	dev->netdev_ops		= &reg_vif_netdev_ops;
660 	dev->needs_free_netdev	= true;
661 	dev->netns_immutable	= true;
662 }
663 
ip6mr_reg_vif(struct net * net,struct mr_table * mrt)664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
665 {
666 	struct net_device *dev;
667 	char name[IFNAMSIZ];
668 
669 	if (mrt->id == RT6_TABLE_DFLT)
670 		sprintf(name, "pim6reg");
671 	else
672 		sprintf(name, "pim6reg%u", mrt->id);
673 
674 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
675 	if (!dev)
676 		return NULL;
677 
678 	dev_net_set(dev, net);
679 
680 	if (register_netdevice(dev)) {
681 		free_netdev(dev);
682 		return NULL;
683 	}
684 
685 	if (dev_open(dev, NULL))
686 		goto failure;
687 
688 	dev_hold(dev);
689 	return dev;
690 
691 failure:
692 	unregister_netdevice(dev);
693 	return NULL;
694 }
695 #endif
696 
call_ip6mr_vif_entry_notifiers(struct net * net,enum fib_event_type event_type,struct vif_device * vif,struct net_device * vif_dev,mifi_t vif_index,u32 tb_id)697 static int call_ip6mr_vif_entry_notifiers(struct net *net,
698 					  enum fib_event_type event_type,
699 					  struct vif_device *vif,
700 					  struct net_device *vif_dev,
701 					  mifi_t vif_index, u32 tb_id)
702 {
703 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
704 				     vif, vif_dev, vif_index, tb_id,
705 				     &net->ipv6.ipmr_seq);
706 }
707 
call_ip6mr_mfc_entry_notifiers(struct net * net,enum fib_event_type event_type,struct mfc6_cache * mfc,u32 tb_id)708 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
709 					  enum fib_event_type event_type,
710 					  struct mfc6_cache *mfc, u32 tb_id)
711 {
712 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
713 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
714 }
715 
716 /* Delete a VIF entry */
mif6_delete(struct mr_table * mrt,int vifi,int notify,struct list_head * head)717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
718 		       struct list_head *head)
719 {
720 	struct vif_device *v;
721 	struct net_device *dev;
722 	struct inet6_dev *in6_dev;
723 
724 	if (vifi < 0 || vifi >= mrt->maxvif)
725 		return -EADDRNOTAVAIL;
726 
727 	v = &mrt->vif_table[vifi];
728 
729 	dev = rtnl_dereference(v->dev);
730 	if (!dev)
731 		return -EADDRNOTAVAIL;
732 
733 	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
734 				       FIB_EVENT_VIF_DEL, v, dev,
735 				       vifi, mrt->id);
736 	spin_lock(&mrt_lock);
737 	RCU_INIT_POINTER(v->dev, NULL);
738 
739 #ifdef CONFIG_IPV6_PIMSM_V2
740 	if (vifi == mrt->mroute_reg_vif_num) {
741 		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
742 		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
743 	}
744 #endif
745 
746 	if (vifi + 1 == mrt->maxvif) {
747 		int tmp;
748 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
749 			if (VIF_EXISTS(mrt, tmp))
750 				break;
751 		}
752 		WRITE_ONCE(mrt->maxvif, tmp + 1);
753 	}
754 
755 	spin_unlock(&mrt_lock);
756 
757 	dev_set_allmulti(dev, -1);
758 
759 	in6_dev = __in6_dev_get(dev);
760 	if (in6_dev) {
761 		atomic_dec(&in6_dev->cnf.mc_forwarding);
762 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
763 					     NETCONFA_MC_FORWARDING,
764 					     dev->ifindex, &in6_dev->cnf);
765 	}
766 
767 	if ((v->flags & MIFF_REGISTER) && !notify)
768 		unregister_netdevice_queue(dev, head);
769 
770 	netdev_put(dev, &v->dev_tracker);
771 	return 0;
772 }
773 
ip6mr_cache_free_rcu(struct rcu_head * head)774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
775 {
776 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
777 
778 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
779 }
780 
ip6mr_cache_free(struct mfc6_cache * c)781 static inline void ip6mr_cache_free(struct mfc6_cache *c)
782 {
783 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
784 }
785 
786 /* Destroy an unresolved cache entry, killing queued skbs
787    and reporting error to netlink readers.
788  */
789 
ip6mr_destroy_unres(struct mr_table * mrt,struct mfc6_cache * c)790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
791 {
792 	struct net *net = read_pnet(&mrt->net);
793 	struct sk_buff *skb;
794 
795 	atomic_dec(&mrt->cache_resolve_queue_len);
796 
797 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
798 		if (ipv6_hdr(skb)->version == 0) {
799 			struct nlmsghdr *nlh = skb_pull(skb,
800 							sizeof(struct ipv6hdr));
801 			nlh->nlmsg_type = NLMSG_ERROR;
802 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
803 			skb_trim(skb, nlh->nlmsg_len);
804 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
805 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
806 		} else
807 			kfree_skb(skb);
808 	}
809 
810 	ip6mr_cache_free(c);
811 }
812 
813 
814 /* Timer process for all the unresolved queue. */
815 
ipmr_do_expire_process(struct mr_table * mrt)816 static void ipmr_do_expire_process(struct mr_table *mrt)
817 {
818 	unsigned long now = jiffies;
819 	unsigned long expires = 10 * HZ;
820 	struct mr_mfc *c, *next;
821 
822 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
823 		if (time_after(c->mfc_un.unres.expires, now)) {
824 			/* not yet... */
825 			unsigned long interval = c->mfc_un.unres.expires - now;
826 			if (interval < expires)
827 				expires = interval;
828 			continue;
829 		}
830 
831 		list_del(&c->list);
832 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
833 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
834 	}
835 
836 	if (!list_empty(&mrt->mfc_unres_queue))
837 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
838 }
839 
ipmr_expire_process(struct timer_list * t)840 static void ipmr_expire_process(struct timer_list *t)
841 {
842 	struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
843 
844 	if (!spin_trylock(&mfc_unres_lock)) {
845 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
846 		return;
847 	}
848 
849 	if (!list_empty(&mrt->mfc_unres_queue))
850 		ipmr_do_expire_process(mrt);
851 
852 	spin_unlock(&mfc_unres_lock);
853 }
854 
855 /* Fill oifs list. It is called under locked mrt_lock. */
856 
ip6mr_update_thresholds(struct mr_table * mrt,struct mr_mfc * cache,unsigned char * ttls)857 static void ip6mr_update_thresholds(struct mr_table *mrt,
858 				    struct mr_mfc *cache,
859 				    unsigned char *ttls)
860 {
861 	int vifi;
862 
863 	cache->mfc_un.res.minvif = MAXMIFS;
864 	cache->mfc_un.res.maxvif = 0;
865 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
866 
867 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
868 		if (VIF_EXISTS(mrt, vifi) &&
869 		    ttls[vifi] && ttls[vifi] < 255) {
870 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
871 			if (cache->mfc_un.res.minvif > vifi)
872 				cache->mfc_un.res.minvif = vifi;
873 			if (cache->mfc_un.res.maxvif <= vifi)
874 				cache->mfc_un.res.maxvif = vifi + 1;
875 		}
876 	}
877 	WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
878 }
879 
mif6_add(struct net * net,struct mr_table * mrt,struct mif6ctl * vifc,int mrtsock)880 static int mif6_add(struct net *net, struct mr_table *mrt,
881 		    struct mif6ctl *vifc, int mrtsock)
882 {
883 	int vifi = vifc->mif6c_mifi;
884 	struct vif_device *v = &mrt->vif_table[vifi];
885 	struct net_device *dev;
886 	struct inet6_dev *in6_dev;
887 	int err;
888 
889 	/* Is vif busy ? */
890 	if (VIF_EXISTS(mrt, vifi))
891 		return -EADDRINUSE;
892 
893 	switch (vifc->mif6c_flags) {
894 #ifdef CONFIG_IPV6_PIMSM_V2
895 	case MIFF_REGISTER:
896 		/*
897 		 * Special Purpose VIF in PIM
898 		 * All the packets will be sent to the daemon
899 		 */
900 		if (mrt->mroute_reg_vif_num >= 0)
901 			return -EADDRINUSE;
902 		dev = ip6mr_reg_vif(net, mrt);
903 		if (!dev)
904 			return -ENOBUFS;
905 		err = dev_set_allmulti(dev, 1);
906 		if (err) {
907 			unregister_netdevice(dev);
908 			dev_put(dev);
909 			return err;
910 		}
911 		break;
912 #endif
913 	case 0:
914 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
915 		if (!dev)
916 			return -EADDRNOTAVAIL;
917 		err = dev_set_allmulti(dev, 1);
918 		if (err) {
919 			dev_put(dev);
920 			return err;
921 		}
922 		break;
923 	default:
924 		return -EINVAL;
925 	}
926 
927 	in6_dev = __in6_dev_get(dev);
928 	if (in6_dev) {
929 		atomic_inc(&in6_dev->cnf.mc_forwarding);
930 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
931 					     NETCONFA_MC_FORWARDING,
932 					     dev->ifindex, &in6_dev->cnf);
933 	}
934 
935 	/* Fill in the VIF structures */
936 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
937 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
938 			MIFF_REGISTER);
939 
940 	/* And finish update writing critical data */
941 	spin_lock(&mrt_lock);
942 	rcu_assign_pointer(v->dev, dev);
943 	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
944 #ifdef CONFIG_IPV6_PIMSM_V2
945 	if (v->flags & MIFF_REGISTER)
946 		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
947 #endif
948 	if (vifi + 1 > mrt->maxvif)
949 		WRITE_ONCE(mrt->maxvif, vifi + 1);
950 	spin_unlock(&mrt_lock);
951 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
952 				       v, dev, vifi, mrt->id);
953 	return 0;
954 }
955 
ip6mr_cache_find(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp)956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
957 					   const struct in6_addr *origin,
958 					   const struct in6_addr *mcastgrp)
959 {
960 	struct mfc6_cache_cmp_arg arg = {
961 		.mf6c_origin = *origin,
962 		.mf6c_mcastgrp = *mcastgrp,
963 	};
964 
965 	return mr_mfc_find(mrt, &arg);
966 }
967 
968 /* Look for a (*,G) entry */
ip6mr_cache_find_any(struct mr_table * mrt,struct in6_addr * mcastgrp,mifi_t mifi)969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
970 					       struct in6_addr *mcastgrp,
971 					       mifi_t mifi)
972 {
973 	struct mfc6_cache_cmp_arg arg = {
974 		.mf6c_origin = in6addr_any,
975 		.mf6c_mcastgrp = *mcastgrp,
976 	};
977 
978 	if (ipv6_addr_any(mcastgrp))
979 		return mr_mfc_find_any_parent(mrt, mifi);
980 	return mr_mfc_find_any(mrt, mifi, &arg);
981 }
982 
983 /* Look for a (S,G,iif) entry if parent != -1 */
984 static struct mfc6_cache *
ip6mr_cache_find_parent(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp,int parent)985 ip6mr_cache_find_parent(struct mr_table *mrt,
986 			const struct in6_addr *origin,
987 			const struct in6_addr *mcastgrp,
988 			int parent)
989 {
990 	struct mfc6_cache_cmp_arg arg = {
991 		.mf6c_origin = *origin,
992 		.mf6c_mcastgrp = *mcastgrp,
993 	};
994 
995 	return mr_mfc_find_parent(mrt, &arg, parent);
996 }
997 
998 /* Allocate a multicast cache entry */
ip6mr_cache_alloc(void)999 static struct mfc6_cache *ip6mr_cache_alloc(void)
1000 {
1001 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1002 	if (!c)
1003 		return NULL;
1004 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1005 	c->_c.mfc_un.res.minvif = MAXMIFS;
1006 	c->_c.free = ip6mr_cache_free_rcu;
1007 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
1008 	return c;
1009 }
1010 
ip6mr_cache_alloc_unres(void)1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1012 {
1013 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1014 	if (!c)
1015 		return NULL;
1016 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1017 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1018 	return c;
1019 }
1020 
1021 /*
1022  *	A cache entry has gone into a resolved state from queued
1023  */
1024 
ip6mr_cache_resolve(struct net * net,struct mr_table * mrt,struct mfc6_cache * uc,struct mfc6_cache * c)1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1026 				struct mfc6_cache *uc, struct mfc6_cache *c)
1027 {
1028 	struct sk_buff *skb;
1029 
1030 	/*
1031 	 *	Play the pending entries through our router
1032 	 */
1033 
1034 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1035 		if (ipv6_hdr(skb)->version == 0) {
1036 			struct nlmsghdr *nlh = skb_pull(skb,
1037 							sizeof(struct ipv6hdr));
1038 
1039 			if (mr_fill_mroute(mrt, skb, &c->_c,
1040 					   nlmsg_data(nlh)) > 0) {
1041 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1042 			} else {
1043 				nlh->nlmsg_type = NLMSG_ERROR;
1044 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1045 				skb_trim(skb, nlh->nlmsg_len);
1046 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1047 			}
1048 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1049 		} else {
1050 			rcu_read_lock();
1051 			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1052 			rcu_read_unlock();
1053 		}
1054 	}
1055 }
1056 
1057 /*
1058  *	Bounce a cache query up to pim6sd and netlink.
1059  *
1060  *	Called under rcu_read_lock()
1061  */
1062 
ip6mr_cache_report(const struct mr_table * mrt,struct sk_buff * pkt,mifi_t mifi,int assert)1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1064 			      mifi_t mifi, int assert)
1065 {
1066 	struct sock *mroute6_sk;
1067 	struct sk_buff *skb;
1068 	struct mrt6msg *msg;
1069 	int ret;
1070 
1071 #ifdef CONFIG_IPV6_PIMSM_V2
1072 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1073 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1074 						+sizeof(*msg));
1075 	else
1076 #endif
1077 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1078 
1079 	if (!skb)
1080 		return -ENOBUFS;
1081 
1082 	/* I suppose that internal messages
1083 	 * do not require checksums */
1084 
1085 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1086 
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1089 		/* Ugly, but we have no choice with this interface.
1090 		   Duplicate old header, fix length etc.
1091 		   And all this only to mangle msg->im6_msgtype and
1092 		   to set msg->im6_mbz to "mbz" :-)
1093 		 */
1094 		__skb_pull(skb, skb_network_offset(pkt));
1095 
1096 		skb_push(skb, sizeof(*msg));
1097 		skb_reset_transport_header(skb);
1098 		msg = (struct mrt6msg *)skb_transport_header(skb);
1099 		msg->im6_mbz = 0;
1100 		msg->im6_msgtype = assert;
1101 		if (assert == MRT6MSG_WRMIFWHOLE)
1102 			msg->im6_mif = mifi;
1103 		else
1104 			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1105 		msg->im6_pad = 0;
1106 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1107 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1108 
1109 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1110 	} else
1111 #endif
1112 	{
1113 	/*
1114 	 *	Copy the IP header
1115 	 */
1116 
1117 	skb_put(skb, sizeof(struct ipv6hdr));
1118 	skb_reset_network_header(skb);
1119 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1120 
1121 	/*
1122 	 *	Add our header
1123 	 */
1124 	skb_put(skb, sizeof(*msg));
1125 	skb_reset_transport_header(skb);
1126 	msg = (struct mrt6msg *)skb_transport_header(skb);
1127 
1128 	msg->im6_mbz = 0;
1129 	msg->im6_msgtype = assert;
1130 	msg->im6_mif = mifi;
1131 	msg->im6_pad = 0;
1132 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1133 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1134 
1135 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1136 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1137 	}
1138 
1139 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1140 	if (!mroute6_sk) {
1141 		kfree_skb(skb);
1142 		return -EINVAL;
1143 	}
1144 
1145 	mrt6msg_netlink_event(mrt, skb);
1146 
1147 	/* Deliver to user space multicast routing algorithms */
1148 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1149 
1150 	if (ret < 0) {
1151 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1152 		kfree_skb(skb);
1153 	}
1154 
1155 	return ret;
1156 }
1157 
1158 /* Queue a packet for resolution. It gets locked cache entry! */
ip6mr_cache_unresolved(struct mr_table * mrt,mifi_t mifi,struct sk_buff * skb,struct net_device * dev)1159 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1160 				  struct sk_buff *skb, struct net_device *dev)
1161 {
1162 	struct mfc6_cache *c;
1163 	bool found = false;
1164 	int err;
1165 
1166 	spin_lock_bh(&mfc_unres_lock);
1167 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1168 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1169 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1170 			found = true;
1171 			break;
1172 		}
1173 	}
1174 
1175 	if (!found) {
1176 		/*
1177 		 *	Create a new entry if allowable
1178 		 */
1179 
1180 		c = ip6mr_cache_alloc_unres();
1181 		if (!c) {
1182 			spin_unlock_bh(&mfc_unres_lock);
1183 
1184 			kfree_skb(skb);
1185 			return -ENOBUFS;
1186 		}
1187 
1188 		/* Fill in the new cache entry */
1189 		c->_c.mfc_parent = -1;
1190 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1191 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1192 
1193 		/*
1194 		 *	Reflect first query at pim6sd
1195 		 */
1196 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1197 		if (err < 0) {
1198 			/* If the report failed throw the cache entry
1199 			   out - Brad Parker
1200 			 */
1201 			spin_unlock_bh(&mfc_unres_lock);
1202 
1203 			ip6mr_cache_free(c);
1204 			kfree_skb(skb);
1205 			return err;
1206 		}
1207 
1208 		atomic_inc(&mrt->cache_resolve_queue_len);
1209 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1210 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1211 
1212 		ipmr_do_expire_process(mrt);
1213 	}
1214 
1215 	/* See if we can append the packet */
1216 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1217 		kfree_skb(skb);
1218 		err = -ENOBUFS;
1219 	} else {
1220 		if (dev) {
1221 			skb->dev = dev;
1222 			skb->skb_iif = dev->ifindex;
1223 		}
1224 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1225 		err = 0;
1226 	}
1227 
1228 	spin_unlock_bh(&mfc_unres_lock);
1229 	return err;
1230 }
1231 
1232 /*
1233  *	MFC6 cache manipulation by user space
1234  */
1235 
ip6mr_mfc_delete(struct mr_table * mrt,struct mf6cctl * mfc,int parent)1236 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1237 			    int parent)
1238 {
1239 	struct mfc6_cache *c;
1240 
1241 	/* The entries are added/deleted only under RTNL */
1242 	rcu_read_lock();
1243 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1244 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1245 	rcu_read_unlock();
1246 	if (!c)
1247 		return -ENOENT;
1248 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1249 	list_del_rcu(&c->_c.list);
1250 
1251 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1252 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1253 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1254 	mr_cache_put(&c->_c);
1255 	return 0;
1256 }
1257 
ip6mr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1258 static int ip6mr_device_event(struct notifier_block *this,
1259 			      unsigned long event, void *ptr)
1260 {
1261 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1262 	struct net *net = dev_net(dev);
1263 	struct mr_table *mrt;
1264 	struct vif_device *v;
1265 	int ct;
1266 
1267 	if (event != NETDEV_UNREGISTER)
1268 		return NOTIFY_DONE;
1269 
1270 	ip6mr_for_each_table(mrt, net) {
1271 		v = &mrt->vif_table[0];
1272 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 			if (rcu_access_pointer(v->dev) == dev)
1274 				mif6_delete(mrt, ct, 1, NULL);
1275 		}
1276 	}
1277 
1278 	return NOTIFY_DONE;
1279 }
1280 
ip6mr_seq_read(const struct net * net)1281 static unsigned int ip6mr_seq_read(const struct net *net)
1282 {
1283 	return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1284 }
1285 
ip6mr_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)1286 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1287 		      struct netlink_ext_ack *extack)
1288 {
1289 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1290 		       ip6mr_mr_table_iter, extack);
1291 }
1292 
1293 static struct notifier_block ip6_mr_notifier = {
1294 	.notifier_call = ip6mr_device_event
1295 };
1296 
1297 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1298 	.family		= RTNL_FAMILY_IP6MR,
1299 	.fib_seq_read	= ip6mr_seq_read,
1300 	.fib_dump	= ip6mr_dump,
1301 	.owner		= THIS_MODULE,
1302 };
1303 
ip6mr_notifier_init(struct net * net)1304 static int __net_init ip6mr_notifier_init(struct net *net)
1305 {
1306 	struct fib_notifier_ops *ops;
1307 
1308 	net->ipv6.ipmr_seq = 0;
1309 
1310 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1311 	if (IS_ERR(ops))
1312 		return PTR_ERR(ops);
1313 
1314 	net->ipv6.ip6mr_notifier_ops = ops;
1315 
1316 	return 0;
1317 }
1318 
ip6mr_notifier_exit(struct net * net)1319 static void __net_exit ip6mr_notifier_exit(struct net *net)
1320 {
1321 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1322 	net->ipv6.ip6mr_notifier_ops = NULL;
1323 }
1324 
1325 /* Setup for IP multicast routing */
ip6mr_net_init(struct net * net)1326 static int __net_init ip6mr_net_init(struct net *net)
1327 {
1328 	int err;
1329 
1330 	err = ip6mr_notifier_init(net);
1331 	if (err)
1332 		return err;
1333 
1334 	err = ip6mr_rules_init(net);
1335 	if (err < 0)
1336 		goto ip6mr_rules_fail;
1337 
1338 #ifdef CONFIG_PROC_FS
1339 	err = -ENOMEM;
1340 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1341 			sizeof(struct mr_vif_iter)))
1342 		goto proc_vif_fail;
1343 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1344 			sizeof(struct mr_mfc_iter)))
1345 		goto proc_cache_fail;
1346 #endif
1347 
1348 	return 0;
1349 
1350 #ifdef CONFIG_PROC_FS
1351 proc_cache_fail:
1352 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1353 proc_vif_fail:
1354 	rtnl_lock();
1355 	ip6mr_rules_exit(net);
1356 	rtnl_unlock();
1357 #endif
1358 ip6mr_rules_fail:
1359 	ip6mr_notifier_exit(net);
1360 	return err;
1361 }
1362 
ip6mr_net_exit(struct net * net)1363 static void __net_exit ip6mr_net_exit(struct net *net)
1364 {
1365 #ifdef CONFIG_PROC_FS
1366 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1367 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1368 #endif
1369 	ip6mr_notifier_exit(net);
1370 }
1371 
ip6mr_net_exit_batch(struct list_head * net_list)1372 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1373 {
1374 	struct net *net;
1375 
1376 	rtnl_lock();
1377 	list_for_each_entry(net, net_list, exit_list)
1378 		ip6mr_rules_exit(net);
1379 	rtnl_unlock();
1380 }
1381 
1382 static struct pernet_operations ip6mr_net_ops = {
1383 	.init = ip6mr_net_init,
1384 	.exit = ip6mr_net_exit,
1385 	.exit_batch = ip6mr_net_exit_batch,
1386 };
1387 
1388 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1389 	{.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1390 	 .msgtype = RTM_GETROUTE,
1391 	 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
1392 };
1393 
ip6_mr_init(void)1394 int __init ip6_mr_init(void)
1395 {
1396 	int err;
1397 
1398 	mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1399 	if (!mrt_cachep)
1400 		return -ENOMEM;
1401 
1402 	err = register_pernet_subsys(&ip6mr_net_ops);
1403 	if (err)
1404 		goto reg_pernet_fail;
1405 
1406 	err = register_netdevice_notifier(&ip6_mr_notifier);
1407 	if (err)
1408 		goto reg_notif_fail;
1409 #ifdef CONFIG_IPV6_PIMSM_V2
1410 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1411 		pr_err("%s: can't add PIM protocol\n", __func__);
1412 		err = -EAGAIN;
1413 		goto add_proto_fail;
1414 	}
1415 #endif
1416 	err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1417 	if (!err)
1418 		return 0;
1419 
1420 #ifdef CONFIG_IPV6_PIMSM_V2
1421 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1422 add_proto_fail:
1423 	unregister_netdevice_notifier(&ip6_mr_notifier);
1424 #endif
1425 reg_notif_fail:
1426 	unregister_pernet_subsys(&ip6mr_net_ops);
1427 reg_pernet_fail:
1428 	kmem_cache_destroy(mrt_cachep);
1429 	return err;
1430 }
1431 
ip6_mr_cleanup(void)1432 void __init ip6_mr_cleanup(void)
1433 {
1434 	rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1435 #ifdef CONFIG_IPV6_PIMSM_V2
1436 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1437 #endif
1438 	unregister_netdevice_notifier(&ip6_mr_notifier);
1439 	unregister_pernet_subsys(&ip6mr_net_ops);
1440 	kmem_cache_destroy(mrt_cachep);
1441 }
1442 
ip6mr_mfc_add(struct net * net,struct mr_table * mrt,struct mf6cctl * mfc,int mrtsock,int parent)1443 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1444 			 struct mf6cctl *mfc, int mrtsock, int parent)
1445 {
1446 	unsigned char ttls[MAXMIFS];
1447 	struct mfc6_cache *uc, *c;
1448 	struct mr_mfc *_uc;
1449 	bool found;
1450 	int i, err;
1451 
1452 	if (mfc->mf6cc_parent >= MAXMIFS)
1453 		return -ENFILE;
1454 
1455 	memset(ttls, 255, MAXMIFS);
1456 	for (i = 0; i < MAXMIFS; i++) {
1457 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1458 			ttls[i] = 1;
1459 	}
1460 
1461 	/* The entries are added/deleted only under RTNL */
1462 	rcu_read_lock();
1463 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1464 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1465 	rcu_read_unlock();
1466 	if (c) {
1467 		spin_lock(&mrt_lock);
1468 		c->_c.mfc_parent = mfc->mf6cc_parent;
1469 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1470 		if (!mrtsock)
1471 			c->_c.mfc_flags |= MFC_STATIC;
1472 		spin_unlock(&mrt_lock);
1473 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1474 					       c, mrt->id);
1475 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1476 		return 0;
1477 	}
1478 
1479 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1480 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1481 		return -EINVAL;
1482 
1483 	c = ip6mr_cache_alloc();
1484 	if (!c)
1485 		return -ENOMEM;
1486 
1487 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1488 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1489 	c->_c.mfc_parent = mfc->mf6cc_parent;
1490 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1491 	if (!mrtsock)
1492 		c->_c.mfc_flags |= MFC_STATIC;
1493 
1494 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1495 				  ip6mr_rht_params);
1496 	if (err) {
1497 		pr_err("ip6mr: rhtable insert error %d\n", err);
1498 		ip6mr_cache_free(c);
1499 		return err;
1500 	}
1501 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1502 
1503 	/* Check to see if we resolved a queued list. If so we
1504 	 * need to send on the frames and tidy up.
1505 	 */
1506 	found = false;
1507 	spin_lock_bh(&mfc_unres_lock);
1508 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1509 		uc = (struct mfc6_cache *)_uc;
1510 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1511 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1512 			list_del(&_uc->list);
1513 			atomic_dec(&mrt->cache_resolve_queue_len);
1514 			found = true;
1515 			break;
1516 		}
1517 	}
1518 	if (list_empty(&mrt->mfc_unres_queue))
1519 		timer_delete(&mrt->ipmr_expire_timer);
1520 	spin_unlock_bh(&mfc_unres_lock);
1521 
1522 	if (found) {
1523 		ip6mr_cache_resolve(net, mrt, uc, c);
1524 		ip6mr_cache_free(uc);
1525 	}
1526 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1527 				       c, mrt->id);
1528 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1529 	return 0;
1530 }
1531 
1532 /*
1533  *	Close the multicast socket, and clear the vif tables etc
1534  */
1535 
mroute_clean_tables(struct mr_table * mrt,int flags)1536 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1537 {
1538 	struct mr_mfc *c, *tmp;
1539 	LIST_HEAD(list);
1540 	int i;
1541 
1542 	/* Shut down all active vif entries */
1543 	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1544 		for (i = 0; i < mrt->maxvif; i++) {
1545 			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1546 			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1547 			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1548 				continue;
1549 			mif6_delete(mrt, i, 0, &list);
1550 		}
1551 		unregister_netdevice_many(&list);
1552 	}
1553 
1554 	/* Wipe the cache */
1555 	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1556 		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1557 			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1558 			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1559 				continue;
1560 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1561 			list_del_rcu(&c->list);
1562 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1563 						       FIB_EVENT_ENTRY_DEL,
1564 						       (struct mfc6_cache *)c, mrt->id);
1565 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1566 			mr_cache_put(c);
1567 		}
1568 	}
1569 
1570 	if (flags & MRT6_FLUSH_MFC) {
1571 		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1572 			spin_lock_bh(&mfc_unres_lock);
1573 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1574 				list_del(&c->list);
1575 				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1576 						  RTM_DELROUTE);
1577 				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1578 			}
1579 			spin_unlock_bh(&mfc_unres_lock);
1580 		}
1581 	}
1582 }
1583 
ip6mr_sk_init(struct mr_table * mrt,struct sock * sk)1584 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1585 {
1586 	int err = 0;
1587 	struct net *net = sock_net(sk);
1588 
1589 	rtnl_lock();
1590 	spin_lock(&mrt_lock);
1591 	if (rtnl_dereference(mrt->mroute_sk)) {
1592 		err = -EADDRINUSE;
1593 	} else {
1594 		rcu_assign_pointer(mrt->mroute_sk, sk);
1595 		sock_set_flag(sk, SOCK_RCU_FREE);
1596 		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1597 	}
1598 	spin_unlock(&mrt_lock);
1599 
1600 	if (!err)
1601 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1602 					     NETCONFA_MC_FORWARDING,
1603 					     NETCONFA_IFINDEX_ALL,
1604 					     net->ipv6.devconf_all);
1605 	rtnl_unlock();
1606 
1607 	return err;
1608 }
1609 
ip6mr_sk_done(struct sock * sk)1610 int ip6mr_sk_done(struct sock *sk)
1611 {
1612 	struct net *net = sock_net(sk);
1613 	struct ipv6_devconf *devconf;
1614 	struct mr_table *mrt;
1615 	int err = -EACCES;
1616 
1617 	if (sk->sk_type != SOCK_RAW ||
1618 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1619 		return err;
1620 
1621 	devconf = net->ipv6.devconf_all;
1622 	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1623 		return err;
1624 
1625 	rtnl_lock();
1626 	ip6mr_for_each_table(mrt, net) {
1627 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1628 			spin_lock(&mrt_lock);
1629 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1630 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1631 			 * so the RCU grace period before sk freeing
1632 			 * is guaranteed by sk_destruct()
1633 			 */
1634 			atomic_dec(&devconf->mc_forwarding);
1635 			spin_unlock(&mrt_lock);
1636 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1637 						     NETCONFA_MC_FORWARDING,
1638 						     NETCONFA_IFINDEX_ALL,
1639 						     net->ipv6.devconf_all);
1640 
1641 			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1642 			err = 0;
1643 			break;
1644 		}
1645 	}
1646 	rtnl_unlock();
1647 
1648 	return err;
1649 }
1650 
mroute6_is_socket(struct net * net,struct sk_buff * skb)1651 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1652 {
1653 	struct mr_table *mrt;
1654 	struct flowi6 fl6 = {
1655 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1656 		.flowi6_oif	= skb->dev->ifindex,
1657 		.flowi6_mark	= skb->mark,
1658 	};
1659 
1660 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1661 		return NULL;
1662 
1663 	return rcu_access_pointer(mrt->mroute_sk);
1664 }
1665 EXPORT_SYMBOL(mroute6_is_socket);
1666 
1667 /*
1668  *	Socket options and virtual interface manipulation. The whole
1669  *	virtual interface system is a complete heap, but unfortunately
1670  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1671  *	MOSPF/PIM router set up we can clean this up.
1672  */
1673 
ip6_mroute_setsockopt(struct sock * sk,int optname,sockptr_t optval,unsigned int optlen)1674 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1675 			  unsigned int optlen)
1676 {
1677 	int ret, parent = 0;
1678 	struct mif6ctl vif;
1679 	struct mf6cctl mfc;
1680 	mifi_t mifi;
1681 	struct net *net = sock_net(sk);
1682 	struct mr_table *mrt;
1683 
1684 	if (sk->sk_type != SOCK_RAW ||
1685 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1686 		return -EOPNOTSUPP;
1687 
1688 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1689 	if (!mrt)
1690 		return -ENOENT;
1691 
1692 	if (optname != MRT6_INIT) {
1693 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1694 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1695 			return -EACCES;
1696 	}
1697 
1698 	switch (optname) {
1699 	case MRT6_INIT:
1700 		if (optlen < sizeof(int))
1701 			return -EINVAL;
1702 
1703 		return ip6mr_sk_init(mrt, sk);
1704 
1705 	case MRT6_DONE:
1706 		return ip6mr_sk_done(sk);
1707 
1708 	case MRT6_ADD_MIF:
1709 		if (optlen < sizeof(vif))
1710 			return -EINVAL;
1711 		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1712 			return -EFAULT;
1713 		if (vif.mif6c_mifi >= MAXMIFS)
1714 			return -ENFILE;
1715 		rtnl_lock();
1716 		ret = mif6_add(net, mrt, &vif,
1717 			       sk == rtnl_dereference(mrt->mroute_sk));
1718 		rtnl_unlock();
1719 		return ret;
1720 
1721 	case MRT6_DEL_MIF:
1722 		if (optlen < sizeof(mifi_t))
1723 			return -EINVAL;
1724 		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1725 			return -EFAULT;
1726 		rtnl_lock();
1727 		ret = mif6_delete(mrt, mifi, 0, NULL);
1728 		rtnl_unlock();
1729 		return ret;
1730 
1731 	/*
1732 	 *	Manipulate the forwarding caches. These live
1733 	 *	in a sort of kernel/user symbiosis.
1734 	 */
1735 	case MRT6_ADD_MFC:
1736 	case MRT6_DEL_MFC:
1737 		parent = -1;
1738 		fallthrough;
1739 	case MRT6_ADD_MFC_PROXY:
1740 	case MRT6_DEL_MFC_PROXY:
1741 		if (optlen < sizeof(mfc))
1742 			return -EINVAL;
1743 		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1744 			return -EFAULT;
1745 		if (parent == 0)
1746 			parent = mfc.mf6cc_parent;
1747 		rtnl_lock();
1748 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1749 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1750 		else
1751 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1752 					    sk ==
1753 					    rtnl_dereference(mrt->mroute_sk),
1754 					    parent);
1755 		rtnl_unlock();
1756 		return ret;
1757 
1758 	case MRT6_FLUSH:
1759 	{
1760 		int flags;
1761 
1762 		if (optlen != sizeof(flags))
1763 			return -EINVAL;
1764 		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1765 			return -EFAULT;
1766 		rtnl_lock();
1767 		mroute_clean_tables(mrt, flags);
1768 		rtnl_unlock();
1769 		return 0;
1770 	}
1771 
1772 	/*
1773 	 *	Control PIM assert (to activate pim will activate assert)
1774 	 */
1775 	case MRT6_ASSERT:
1776 	{
1777 		int v;
1778 
1779 		if (optlen != sizeof(v))
1780 			return -EINVAL;
1781 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1782 			return -EFAULT;
1783 		mrt->mroute_do_assert = v;
1784 		return 0;
1785 	}
1786 
1787 #ifdef CONFIG_IPV6_PIMSM_V2
1788 	case MRT6_PIM:
1789 	{
1790 		bool do_wrmifwhole;
1791 		int v;
1792 
1793 		if (optlen != sizeof(v))
1794 			return -EINVAL;
1795 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1796 			return -EFAULT;
1797 
1798 		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1799 		v = !!v;
1800 		rtnl_lock();
1801 		ret = 0;
1802 		if (v != mrt->mroute_do_pim) {
1803 			mrt->mroute_do_pim = v;
1804 			mrt->mroute_do_assert = v;
1805 			mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1806 		}
1807 		rtnl_unlock();
1808 		return ret;
1809 	}
1810 
1811 #endif
1812 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1813 	case MRT6_TABLE:
1814 	{
1815 		u32 v;
1816 
1817 		if (optlen != sizeof(u32))
1818 			return -EINVAL;
1819 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1820 			return -EFAULT;
1821 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1822 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1823 			return -EINVAL;
1824 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1825 			return -EBUSY;
1826 
1827 		rtnl_lock();
1828 		ret = 0;
1829 		mrt = ip6mr_new_table(net, v);
1830 		if (IS_ERR(mrt))
1831 			ret = PTR_ERR(mrt);
1832 		else
1833 			raw6_sk(sk)->ip6mr_table = v;
1834 		rtnl_unlock();
1835 		return ret;
1836 	}
1837 #endif
1838 	/*
1839 	 *	Spurious command, or MRT6_VERSION which you cannot
1840 	 *	set.
1841 	 */
1842 	default:
1843 		return -ENOPROTOOPT;
1844 	}
1845 }
1846 
1847 /*
1848  *	Getsock opt support for the multicast routing system.
1849  */
1850 
ip6_mroute_getsockopt(struct sock * sk,int optname,sockptr_t optval,sockptr_t optlen)1851 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1852 			  sockptr_t optlen)
1853 {
1854 	int olr;
1855 	int val;
1856 	struct net *net = sock_net(sk);
1857 	struct mr_table *mrt;
1858 
1859 	if (sk->sk_type != SOCK_RAW ||
1860 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1861 		return -EOPNOTSUPP;
1862 
1863 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864 	if (!mrt)
1865 		return -ENOENT;
1866 
1867 	switch (optname) {
1868 	case MRT6_VERSION:
1869 		val = 0x0305;
1870 		break;
1871 #ifdef CONFIG_IPV6_PIMSM_V2
1872 	case MRT6_PIM:
1873 		val = mrt->mroute_do_pim;
1874 		break;
1875 #endif
1876 	case MRT6_ASSERT:
1877 		val = mrt->mroute_do_assert;
1878 		break;
1879 	default:
1880 		return -ENOPROTOOPT;
1881 	}
1882 
1883 	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1884 		return -EFAULT;
1885 
1886 	olr = min_t(int, olr, sizeof(int));
1887 	if (olr < 0)
1888 		return -EINVAL;
1889 
1890 	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1891 		return -EFAULT;
1892 	if (copy_to_sockptr(optval, &val, olr))
1893 		return -EFAULT;
1894 	return 0;
1895 }
1896 
1897 /*
1898  *	The IP multicast ioctl support routines.
1899  */
ip6mr_ioctl(struct sock * sk,int cmd,void * arg)1900 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1901 {
1902 	struct sioc_sg_req6 *sr;
1903 	struct sioc_mif_req6 *vr;
1904 	struct vif_device *vif;
1905 	struct mfc6_cache *c;
1906 	struct net *net = sock_net(sk);
1907 	struct mr_table *mrt;
1908 
1909 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1910 	if (!mrt)
1911 		return -ENOENT;
1912 
1913 	switch (cmd) {
1914 	case SIOCGETMIFCNT_IN6:
1915 		vr = (struct sioc_mif_req6 *)arg;
1916 		if (vr->mifi >= mrt->maxvif)
1917 			return -EINVAL;
1918 		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1919 		rcu_read_lock();
1920 		vif = &mrt->vif_table[vr->mifi];
1921 		if (VIF_EXISTS(mrt, vr->mifi)) {
1922 			vr->icount = READ_ONCE(vif->pkt_in);
1923 			vr->ocount = READ_ONCE(vif->pkt_out);
1924 			vr->ibytes = READ_ONCE(vif->bytes_in);
1925 			vr->obytes = READ_ONCE(vif->bytes_out);
1926 			rcu_read_unlock();
1927 			return 0;
1928 		}
1929 		rcu_read_unlock();
1930 		return -EADDRNOTAVAIL;
1931 	case SIOCGETSGCNT_IN6:
1932 		sr = (struct sioc_sg_req6 *)arg;
1933 
1934 		rcu_read_lock();
1935 		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1936 				     &sr->grp.sin6_addr);
1937 		if (c) {
1938 			sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1939 			sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1940 			sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1941 			rcu_read_unlock();
1942 			return 0;
1943 		}
1944 		rcu_read_unlock();
1945 		return -EADDRNOTAVAIL;
1946 	default:
1947 		return -ENOIOCTLCMD;
1948 	}
1949 }
1950 
1951 #ifdef CONFIG_COMPAT
1952 struct compat_sioc_sg_req6 {
1953 	struct sockaddr_in6 src;
1954 	struct sockaddr_in6 grp;
1955 	compat_ulong_t pktcnt;
1956 	compat_ulong_t bytecnt;
1957 	compat_ulong_t wrong_if;
1958 };
1959 
1960 struct compat_sioc_mif_req6 {
1961 	mifi_t	mifi;
1962 	compat_ulong_t icount;
1963 	compat_ulong_t ocount;
1964 	compat_ulong_t ibytes;
1965 	compat_ulong_t obytes;
1966 };
1967 
ip6mr_compat_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1968 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1969 {
1970 	struct compat_sioc_sg_req6 sr;
1971 	struct compat_sioc_mif_req6 vr;
1972 	struct vif_device *vif;
1973 	struct mfc6_cache *c;
1974 	struct net *net = sock_net(sk);
1975 	struct mr_table *mrt;
1976 
1977 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1978 	if (!mrt)
1979 		return -ENOENT;
1980 
1981 	switch (cmd) {
1982 	case SIOCGETMIFCNT_IN6:
1983 		if (copy_from_user(&vr, arg, sizeof(vr)))
1984 			return -EFAULT;
1985 		if (vr.mifi >= mrt->maxvif)
1986 			return -EINVAL;
1987 		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1988 		rcu_read_lock();
1989 		vif = &mrt->vif_table[vr.mifi];
1990 		if (VIF_EXISTS(mrt, vr.mifi)) {
1991 			vr.icount = READ_ONCE(vif->pkt_in);
1992 			vr.ocount = READ_ONCE(vif->pkt_out);
1993 			vr.ibytes = READ_ONCE(vif->bytes_in);
1994 			vr.obytes = READ_ONCE(vif->bytes_out);
1995 			rcu_read_unlock();
1996 
1997 			if (copy_to_user(arg, &vr, sizeof(vr)))
1998 				return -EFAULT;
1999 			return 0;
2000 		}
2001 		rcu_read_unlock();
2002 		return -EADDRNOTAVAIL;
2003 	case SIOCGETSGCNT_IN6:
2004 		if (copy_from_user(&sr, arg, sizeof(sr)))
2005 			return -EFAULT;
2006 
2007 		rcu_read_lock();
2008 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2009 		if (c) {
2010 			sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2011 			sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2012 			sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2013 			rcu_read_unlock();
2014 
2015 			if (copy_to_user(arg, &sr, sizeof(sr)))
2016 				return -EFAULT;
2017 			return 0;
2018 		}
2019 		rcu_read_unlock();
2020 		return -EADDRNOTAVAIL;
2021 	default:
2022 		return -ENOIOCTLCMD;
2023 	}
2024 }
2025 #endif
2026 
ip6mr_forward2_finish(struct net * net,struct sock * sk,struct sk_buff * skb)2027 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2028 {
2029 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2030 		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2031 	return dst_output(net, sk, skb);
2032 }
2033 
2034 /*
2035  *	Processing handlers for ip6mr_forward
2036  */
2037 
ip6mr_prepare_xmit(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2038 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt,
2039 			      struct sk_buff *skb, int vifi)
2040 {
2041 	struct vif_device *vif = &mrt->vif_table[vifi];
2042 	struct net_device *vif_dev;
2043 	struct ipv6hdr *ipv6h;
2044 	struct dst_entry *dst;
2045 	struct flowi6 fl6;
2046 
2047 	vif_dev = vif_dev_read(vif);
2048 	if (!vif_dev)
2049 		return -1;
2050 
2051 #ifdef CONFIG_IPV6_PIMSM_V2
2052 	if (vif->flags & MIFF_REGISTER) {
2053 		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2054 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2055 		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2056 		DEV_STATS_INC(vif_dev, tx_packets);
2057 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2058 		return -1;
2059 	}
2060 #endif
2061 
2062 	ipv6h = ipv6_hdr(skb);
2063 
2064 	fl6 = (struct flowi6) {
2065 		.flowi6_oif = vif->link,
2066 		.daddr = ipv6h->daddr,
2067 	};
2068 
2069 	dst = ip6_route_output(net, NULL, &fl6);
2070 	if (dst->error) {
2071 		dst_release(dst);
2072 		return -1;
2073 	}
2074 
2075 	skb_dst_drop(skb);
2076 	skb_dst_set(skb, dst);
2077 
2078 	/*
2079 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2080 	 * not only before forwarding, but after forwarding on all output
2081 	 * interfaces. It is clear, if mrouter runs a multicasting
2082 	 * program, it should receive packets not depending to what interface
2083 	 * program is joined.
2084 	 * If we will not make it, the program will have to join on all
2085 	 * interfaces. On the other hand, multihoming host (or router, but
2086 	 * not mrouter) cannot join to more than one interface - it will
2087 	 * result in receiving multiple packets.
2088 	 */
2089 	skb->dev = vif_dev;
2090 	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2091 	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2092 
2093 	/* We are about to write */
2094 	/* XXX: extension headers? */
2095 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2096 		return -1;
2097 
2098 	ipv6h = ipv6_hdr(skb);
2099 	ipv6h->hop_limit--;
2100 	return 0;
2101 }
2102 
ip6mr_forward2(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2103 static void ip6mr_forward2(struct net *net, struct mr_table *mrt,
2104 			   struct sk_buff *skb, int vifi)
2105 {
2106 	struct net_device *indev = skb->dev;
2107 
2108 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2109 		goto out_free;
2110 
2111 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2112 
2113 	NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2114 		net, NULL, skb, indev, skb->dev,
2115 		ip6mr_forward2_finish);
2116 	return;
2117 
2118 out_free:
2119 	kfree_skb(skb);
2120 }
2121 
ip6mr_output2(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2122 static void ip6mr_output2(struct net *net, struct mr_table *mrt,
2123 			  struct sk_buff *skb, int vifi)
2124 {
2125 	if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2126 		goto out_free;
2127 
2128 	ip6_output(net, NULL, skb);
2129 	return;
2130 
2131 out_free:
2132 	kfree_skb(skb);
2133 }
2134 
2135 /* Called with rcu_read_lock() */
ip6mr_find_vif(struct mr_table * mrt,struct net_device * dev)2136 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2137 {
2138 	int ct;
2139 
2140 	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2141 	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2142 		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2143 			break;
2144 	}
2145 	return ct;
2146 }
2147 
2148 /* Called under rcu_read_lock() */
ip6_mr_forward(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc6_cache * c)2149 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2150 			   struct net_device *dev, struct sk_buff *skb,
2151 			   struct mfc6_cache *c)
2152 {
2153 	int psend = -1;
2154 	int vif, ct;
2155 	int true_vifi = ip6mr_find_vif(mrt, dev);
2156 
2157 	vif = c->_c.mfc_parent;
2158 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2159 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2160 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2161 
2162 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2163 		struct mfc6_cache *cache_proxy;
2164 
2165 		/* For an (*,G) entry, we only check that the incoming
2166 		 * interface is part of the static tree.
2167 		 */
2168 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2169 		if (cache_proxy &&
2170 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2171 			goto forward;
2172 	}
2173 
2174 	/*
2175 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2176 	 */
2177 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2178 		atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2179 
2180 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2181 		    /* pimsm uses asserts, when switching from RPT to SPT,
2182 		       so that we cannot check that packet arrived on an oif.
2183 		       It is bad, but otherwise we would need to move pretty
2184 		       large chunk of pimd to kernel. Ough... --ANK
2185 		     */
2186 		    (mrt->mroute_do_pim ||
2187 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2188 		    time_after(jiffies,
2189 			       c->_c.mfc_un.res.last_assert +
2190 			       MFC_ASSERT_THRESH)) {
2191 			c->_c.mfc_un.res.last_assert = jiffies;
2192 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2193 			if (mrt->mroute_do_wrvifwhole)
2194 				ip6mr_cache_report(mrt, skb, true_vifi,
2195 						   MRT6MSG_WRMIFWHOLE);
2196 		}
2197 		goto dont_forward;
2198 	}
2199 
2200 forward:
2201 	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2202 		   mrt->vif_table[vif].pkt_in + 1);
2203 	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2204 		   mrt->vif_table[vif].bytes_in + skb->len);
2205 
2206 	/*
2207 	 *	Forward the frame
2208 	 */
2209 	if (ipv6_addr_any(&c->mf6c_origin) &&
2210 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2211 		if (true_vifi >= 0 &&
2212 		    true_vifi != c->_c.mfc_parent &&
2213 		    ipv6_hdr(skb)->hop_limit >
2214 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2215 			/* It's an (*,*) entry and the packet is not coming from
2216 			 * the upstream: forward the packet to the upstream
2217 			 * only.
2218 			 */
2219 			psend = c->_c.mfc_parent;
2220 			goto last_forward;
2221 		}
2222 		goto dont_forward;
2223 	}
2224 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2225 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2226 		/* For (*,G) entry, don't forward to the incoming interface */
2227 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2228 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2229 			if (psend != -1) {
2230 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2231 				if (skb2)
2232 					ip6mr_forward2(net, mrt, skb2, psend);
2233 			}
2234 			psend = ct;
2235 		}
2236 	}
2237 last_forward:
2238 	if (psend != -1) {
2239 		ip6mr_forward2(net, mrt, skb, psend);
2240 		return;
2241 	}
2242 
2243 dont_forward:
2244 	kfree_skb(skb);
2245 }
2246 
2247 /* Called under rcu_read_lock() */
ip6_mr_output_finish(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc6_cache * c)2248 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt,
2249 				 struct net_device *dev, struct sk_buff *skb,
2250 				 struct mfc6_cache *c)
2251 {
2252 	int psend = -1;
2253 	int ct;
2254 
2255 	WARN_ON_ONCE(!rcu_read_lock_held());
2256 
2257 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2258 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2259 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2260 
2261 	/* Forward the frame */
2262 	if (ipv6_addr_any(&c->mf6c_origin) &&
2263 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2264 		if (ipv6_hdr(skb)->hop_limit >
2265 		    c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2266 			/* It's an (*,*) entry and the packet is not coming from
2267 			 * the upstream: forward the packet to the upstream
2268 			 * only.
2269 			 */
2270 			psend = c->_c.mfc_parent;
2271 			goto last_forward;
2272 		}
2273 		goto dont_forward;
2274 	}
2275 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2276 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2277 		if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2278 			if (psend != -1) {
2279 				struct sk_buff *skb2;
2280 
2281 				skb2 = skb_clone(skb, GFP_ATOMIC);
2282 				if (skb2)
2283 					ip6mr_output2(net, mrt, skb2, psend);
2284 			}
2285 			psend = ct;
2286 		}
2287 	}
2288 last_forward:
2289 	if (psend != -1) {
2290 		ip6mr_output2(net, mrt, skb, psend);
2291 		return;
2292 	}
2293 
2294 dont_forward:
2295 	kfree_skb(skb);
2296 }
2297 
2298 /*
2299  *	Multicast packets for forwarding arrive here
2300  */
2301 
ip6_mr_input(struct sk_buff * skb)2302 int ip6_mr_input(struct sk_buff *skb)
2303 {
2304 	struct net_device *dev = skb->dev;
2305 	struct net *net = dev_net_rcu(dev);
2306 	struct mfc6_cache *cache;
2307 	struct mr_table *mrt;
2308 	struct flowi6 fl6 = {
2309 		.flowi6_iif	= dev->ifindex,
2310 		.flowi6_mark	= skb->mark,
2311 	};
2312 	int err;
2313 
2314 	/* skb->dev passed in is the master dev for vrfs.
2315 	 * Get the proper interface that does have a vif associated with it.
2316 	 */
2317 	if (netif_is_l3_master(dev)) {
2318 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2319 		if (!dev) {
2320 			kfree_skb(skb);
2321 			return -ENODEV;
2322 		}
2323 	}
2324 
2325 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2326 	if (err < 0) {
2327 		kfree_skb(skb);
2328 		return err;
2329 	}
2330 
2331 	cache = ip6mr_cache_find(mrt,
2332 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2333 	if (!cache) {
2334 		int vif = ip6mr_find_vif(mrt, dev);
2335 
2336 		if (vif >= 0)
2337 			cache = ip6mr_cache_find_any(mrt,
2338 						     &ipv6_hdr(skb)->daddr,
2339 						     vif);
2340 	}
2341 
2342 	/*
2343 	 *	No usable cache entry
2344 	 */
2345 	if (!cache) {
2346 		int vif;
2347 
2348 		vif = ip6mr_find_vif(mrt, dev);
2349 		if (vif >= 0) {
2350 			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2351 
2352 			return err;
2353 		}
2354 		kfree_skb(skb);
2355 		return -ENODEV;
2356 	}
2357 
2358 	ip6_mr_forward(net, mrt, dev, skb, cache);
2359 
2360 	return 0;
2361 }
2362 
ip6_mr_output(struct net * net,struct sock * sk,struct sk_buff * skb)2363 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2364 {
2365 	struct net_device *dev = skb_dst(skb)->dev;
2366 	struct flowi6 fl6 = (struct flowi6) {
2367 		.flowi6_iif = LOOPBACK_IFINDEX,
2368 		.flowi6_mark = skb->mark,
2369 	};
2370 	struct mfc6_cache *cache;
2371 	struct mr_table *mrt;
2372 	int err;
2373 	int vif;
2374 
2375 	guard(rcu)();
2376 
2377 	if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
2378 		goto ip6_output;
2379 	if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE))
2380 		goto ip6_output;
2381 
2382 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2383 	if (err < 0) {
2384 		kfree_skb(skb);
2385 		return err;
2386 	}
2387 
2388 	cache = ip6mr_cache_find(mrt,
2389 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2390 	if (!cache) {
2391 		vif = ip6mr_find_vif(mrt, dev);
2392 		if (vif >= 0)
2393 			cache = ip6mr_cache_find_any(mrt,
2394 						     &ipv6_hdr(skb)->daddr,
2395 						     vif);
2396 	}
2397 
2398 	/* No usable cache entry */
2399 	if (!cache) {
2400 		vif = ip6mr_find_vif(mrt, dev);
2401 		if (vif >= 0)
2402 			return ip6mr_cache_unresolved(mrt, vif, skb, dev);
2403 		goto ip6_output;
2404 	}
2405 
2406 	/* Wrong interface */
2407 	vif = cache->_c.mfc_parent;
2408 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
2409 		goto ip6_output;
2410 
2411 	ip6_mr_output_finish(net, mrt, dev, skb, cache);
2412 	return 0;
2413 
2414 ip6_output:
2415 	return ip6_output(net, sk, skb);
2416 }
2417 
ip6mr_get_route(struct net * net,struct sk_buff * skb,struct rtmsg * rtm,u32 portid)2418 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2419 		    u32 portid)
2420 {
2421 	int err;
2422 	struct mr_table *mrt;
2423 	struct mfc6_cache *cache;
2424 	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2425 
2426 	rcu_read_lock();
2427 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2428 	if (!mrt) {
2429 		rcu_read_unlock();
2430 		return -ENOENT;
2431 	}
2432 
2433 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2434 	if (!cache && skb->dev) {
2435 		int vif = ip6mr_find_vif(mrt, skb->dev);
2436 
2437 		if (vif >= 0)
2438 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2439 						     vif);
2440 	}
2441 
2442 	if (!cache) {
2443 		struct sk_buff *skb2;
2444 		struct ipv6hdr *iph;
2445 		struct net_device *dev;
2446 		int vif;
2447 
2448 		dev = skb->dev;
2449 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2450 			rcu_read_unlock();
2451 			return -ENODEV;
2452 		}
2453 
2454 		/* really correct? */
2455 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2456 		if (!skb2) {
2457 			rcu_read_unlock();
2458 			return -ENOMEM;
2459 		}
2460 
2461 		NETLINK_CB(skb2).portid = portid;
2462 		skb_reset_transport_header(skb2);
2463 
2464 		skb_put(skb2, sizeof(struct ipv6hdr));
2465 		skb_reset_network_header(skb2);
2466 
2467 		iph = ipv6_hdr(skb2);
2468 		iph->version = 0;
2469 		iph->priority = 0;
2470 		iph->flow_lbl[0] = 0;
2471 		iph->flow_lbl[1] = 0;
2472 		iph->flow_lbl[2] = 0;
2473 		iph->payload_len = 0;
2474 		iph->nexthdr = IPPROTO_NONE;
2475 		iph->hop_limit = 0;
2476 		iph->saddr = rt->rt6i_src.addr;
2477 		iph->daddr = rt->rt6i_dst.addr;
2478 
2479 		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2480 		rcu_read_unlock();
2481 
2482 		return err;
2483 	}
2484 
2485 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2486 	rcu_read_unlock();
2487 	return err;
2488 }
2489 
ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mfc6_cache * c,int cmd,int flags)2490 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2491 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2492 			     int flags)
2493 {
2494 	struct nlmsghdr *nlh;
2495 	struct rtmsg *rtm;
2496 	int err;
2497 
2498 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2499 	if (!nlh)
2500 		return -EMSGSIZE;
2501 
2502 	rtm = nlmsg_data(nlh);
2503 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2504 	rtm->rtm_dst_len  = 128;
2505 	rtm->rtm_src_len  = 128;
2506 	rtm->rtm_tos      = 0;
2507 	rtm->rtm_table    = mrt->id;
2508 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2509 		goto nla_put_failure;
2510 	rtm->rtm_type = RTN_MULTICAST;
2511 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2512 	if (c->_c.mfc_flags & MFC_STATIC)
2513 		rtm->rtm_protocol = RTPROT_STATIC;
2514 	else
2515 		rtm->rtm_protocol = RTPROT_MROUTED;
2516 	rtm->rtm_flags    = 0;
2517 
2518 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2519 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2520 		goto nla_put_failure;
2521 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2522 	/* do not break the dump if cache is unresolved */
2523 	if (err < 0 && err != -ENOENT)
2524 		goto nla_put_failure;
2525 
2526 	nlmsg_end(skb, nlh);
2527 	return 0;
2528 
2529 nla_put_failure:
2530 	nlmsg_cancel(skb, nlh);
2531 	return -EMSGSIZE;
2532 }
2533 
_ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mr_mfc * c,int cmd,int flags)2534 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2535 			      u32 portid, u32 seq, struct mr_mfc *c,
2536 			      int cmd, int flags)
2537 {
2538 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2539 				 cmd, flags);
2540 }
2541 
mr6_msgsize(bool unresolved,int maxvif)2542 static int mr6_msgsize(bool unresolved, int maxvif)
2543 {
2544 	size_t len =
2545 		NLMSG_ALIGN(sizeof(struct rtmsg))
2546 		+ nla_total_size(4)	/* RTA_TABLE */
2547 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2548 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2549 		;
2550 
2551 	if (!unresolved)
2552 		len = len
2553 		      + nla_total_size(4)	/* RTA_IIF */
2554 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2555 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2556 						/* RTA_MFC_STATS */
2557 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2558 		;
2559 
2560 	return len;
2561 }
2562 
mr6_netlink_event(struct mr_table * mrt,struct mfc6_cache * mfc,int cmd)2563 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2564 			      int cmd)
2565 {
2566 	struct net *net = read_pnet(&mrt->net);
2567 	struct sk_buff *skb;
2568 	int err = -ENOBUFS;
2569 
2570 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2571 			GFP_ATOMIC);
2572 	if (!skb)
2573 		goto errout;
2574 
2575 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2576 	if (err < 0)
2577 		goto errout;
2578 
2579 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2580 	return;
2581 
2582 errout:
2583 	kfree_skb(skb);
2584 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2585 }
2586 
mrt6msg_netlink_msgsize(size_t payloadlen)2587 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2588 {
2589 	size_t len =
2590 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2591 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2592 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2593 					/* IP6MRA_CREPORT_SRC_ADDR */
2594 		+ nla_total_size(sizeof(struct in6_addr))
2595 					/* IP6MRA_CREPORT_DST_ADDR */
2596 		+ nla_total_size(sizeof(struct in6_addr))
2597 					/* IP6MRA_CREPORT_PKT */
2598 		+ nla_total_size(payloadlen)
2599 		;
2600 
2601 	return len;
2602 }
2603 
mrt6msg_netlink_event(const struct mr_table * mrt,struct sk_buff * pkt)2604 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2605 {
2606 	struct net *net = read_pnet(&mrt->net);
2607 	struct nlmsghdr *nlh;
2608 	struct rtgenmsg *rtgenm;
2609 	struct mrt6msg *msg;
2610 	struct sk_buff *skb;
2611 	struct nlattr *nla;
2612 	int payloadlen;
2613 
2614 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2615 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2616 
2617 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2618 	if (!skb)
2619 		goto errout;
2620 
2621 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2622 			sizeof(struct rtgenmsg), 0);
2623 	if (!nlh)
2624 		goto errout;
2625 	rtgenm = nlmsg_data(nlh);
2626 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2627 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2628 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2629 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2630 			     &msg->im6_src) ||
2631 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2632 			     &msg->im6_dst))
2633 		goto nla_put_failure;
2634 
2635 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2636 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2637 				  nla_data(nla), payloadlen))
2638 		goto nla_put_failure;
2639 
2640 	nlmsg_end(skb, nlh);
2641 
2642 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2643 	return;
2644 
2645 nla_put_failure:
2646 	nlmsg_cancel(skb, nlh);
2647 errout:
2648 	kfree_skb(skb);
2649 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2650 }
2651 
2652 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2653 	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2654 	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2655 	[RTA_TABLE]		= { .type = NLA_U32 },
2656 };
2657 
ip6mr_rtm_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2658 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2659 					const struct nlmsghdr *nlh,
2660 					struct nlattr **tb,
2661 					struct netlink_ext_ack *extack)
2662 {
2663 	struct rtmsg *rtm;
2664 	int err;
2665 
2666 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2667 			  extack);
2668 	if (err)
2669 		return err;
2670 
2671 	rtm = nlmsg_data(nlh);
2672 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2673 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2674 	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2675 	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2676 		NL_SET_ERR_MSG_MOD(extack,
2677 				   "Invalid values in header for multicast route get request");
2678 		return -EINVAL;
2679 	}
2680 
2681 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2682 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2683 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2684 		return -EINVAL;
2685 	}
2686 
2687 	return 0;
2688 }
2689 
ip6mr_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2690 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2691 			      struct netlink_ext_ack *extack)
2692 {
2693 	struct net *net = sock_net(in_skb->sk);
2694 	struct in6_addr src = {}, grp = {};
2695 	struct nlattr *tb[RTA_MAX + 1];
2696 	struct mfc6_cache *cache;
2697 	struct mr_table *mrt;
2698 	struct sk_buff *skb;
2699 	u32 tableid;
2700 	int err;
2701 
2702 	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2703 	if (err < 0)
2704 		return err;
2705 
2706 	if (tb[RTA_SRC])
2707 		src = nla_get_in6_addr(tb[RTA_SRC]);
2708 	if (tb[RTA_DST])
2709 		grp = nla_get_in6_addr(tb[RTA_DST]);
2710 	tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2711 
2712 	mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2713 	if (!mrt) {
2714 		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2715 		return -ENOENT;
2716 	}
2717 
2718 	/* entries are added/deleted only under RTNL */
2719 	rcu_read_lock();
2720 	cache = ip6mr_cache_find(mrt, &src, &grp);
2721 	rcu_read_unlock();
2722 	if (!cache) {
2723 		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2724 		return -ENOENT;
2725 	}
2726 
2727 	skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2728 	if (!skb)
2729 		return -ENOBUFS;
2730 
2731 	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2732 				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2733 	if (err < 0) {
2734 		kfree_skb(skb);
2735 		return err;
2736 	}
2737 
2738 	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2739 }
2740 
ip6mr_rtm_dumproute(struct sk_buff * skb,struct netlink_callback * cb)2741 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2742 {
2743 	const struct nlmsghdr *nlh = cb->nlh;
2744 	struct fib_dump_filter filter = {
2745 		.rtnl_held = true,
2746 	};
2747 	int err;
2748 
2749 	if (cb->strict_check) {
2750 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2751 					    &filter, cb);
2752 		if (err < 0)
2753 			return err;
2754 	}
2755 
2756 	if (filter.table_id) {
2757 		struct mr_table *mrt;
2758 
2759 		mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2760 		if (!mrt) {
2761 			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2762 				return skb->len;
2763 
2764 			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2765 			return -ENOENT;
2766 		}
2767 		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2768 				    &mfc_unres_lock, &filter);
2769 		return skb->len ? : err;
2770 	}
2771 
2772 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2773 				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2774 }
2775