xref: /linux/net/ipv6/ip6mr.c (revision be54f8c558027a218423134dd9b8c7c46d92204a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux IPv6 multicast routing support for BSD pim6sd
4  *	Based on net/ipv4/ipmr.c.
5  *
6  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7  *		LSIIT Laboratory, Strasbourg, France
8  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9  *		6WIND, Paris, France
10  *	Copyright (C)2007,2008 USAGI/WIDE Project
11  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12  */
13 
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39 
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50 
51 #include <linux/nospec.h>
52 
53 struct ip6mr_rule {
54 	struct fib_rule		common;
55 };
56 
57 struct ip6mr_result {
58 	struct mr_table	*mrt;
59 };
60 
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62    Note that the changes are semaphored via rtnl_lock.
63  */
64 
65 static DEFINE_SPINLOCK(mrt_lock);
66 
vif_dev_read(const struct vif_device * vif)67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 	return rcu_dereference(vif->dev);
70 }
71 
72 /* Multicast router control variables */
73 
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76 
77 /* We return to original Alan's scheme. Hash table of resolved
78    entries is changed only in process context and protected
79    with weak lock mrt_lock. Queue of unresolved entries is protected
80    with strong spinlock mfc_unres_lock.
81 
82    In this case data path is free of exclusive locks at all.
83  */
84 
85 static struct kmem_cache *mrt_cachep __read_mostly;
86 
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89 
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 			   struct net_device *dev, struct sk_buff *skb,
92 			   struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 			      mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 			      int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 			      struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 			       struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104 
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 				lockdep_rtnl_is_held() || \
109 				list_empty(&net->ipv6.mr6_tables))
110 
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 					    struct mr_table *mrt)
113 {
114 	struct mr_table *ret;
115 
116 	if (!mrt)
117 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 				     struct mr_table, list);
119 	else
120 		ret = list_entry_rcu(mrt->list.next,
121 				     struct mr_table, list);
122 
123 	if (&ret->list == &net->ipv6.mr6_tables)
124 		return NULL;
125 	return ret;
126 }
127 
__ip6mr_get_table(struct net * net,u32 id)128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
ip6mr_get_table(struct net * net,u32 id)139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
140 {
141 	struct mr_table *mrt;
142 
143 	rcu_read_lock();
144 	mrt = __ip6mr_get_table(net, id);
145 	rcu_read_unlock();
146 	return mrt;
147 }
148 
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 			    struct mr_table **mrt)
151 {
152 	int err;
153 	struct ip6mr_result res;
154 	struct fib_lookup_arg arg = {
155 		.result = &res,
156 		.flags = FIB_LOOKUP_NOREF,
157 	};
158 
159 	/* update flow if oif or iif point to device enslaved to l3mdev */
160 	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
161 
162 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 			       flowi6_to_flowi(flp6), 0, &arg);
164 	if (err < 0)
165 		return err;
166 	*mrt = res.mrt;
167 	return 0;
168 }
169 
ip6mr_rule_action(struct fib_rule * rule,struct flowi * flp,int flags,struct fib_lookup_arg * arg)170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 			     int flags, struct fib_lookup_arg *arg)
172 {
173 	struct ip6mr_result *res = arg->result;
174 	struct mr_table *mrt;
175 
176 	switch (rule->action) {
177 	case FR_ACT_TO_TBL:
178 		break;
179 	case FR_ACT_UNREACHABLE:
180 		return -ENETUNREACH;
181 	case FR_ACT_PROHIBIT:
182 		return -EACCES;
183 	case FR_ACT_BLACKHOLE:
184 	default:
185 		return -EINVAL;
186 	}
187 
188 	arg->table = fib_rule_get_table(rule, arg);
189 
190 	mrt = __ip6mr_get_table(rule->fr_net, arg->table);
191 	if (!mrt)
192 		return -EAGAIN;
193 	res->mrt = mrt;
194 	return 0;
195 }
196 
ip6mr_rule_match(struct fib_rule * rule,struct flowi * flp,int flags)197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
198 {
199 	return 1;
200 }
201 
ip6mr_rule_configure(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh,struct nlattr ** tb,struct netlink_ext_ack * extack)202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 				struct fib_rule_hdr *frh, struct nlattr **tb,
204 				struct netlink_ext_ack *extack)
205 {
206 	return 0;
207 }
208 
ip6mr_rule_compare(struct fib_rule * rule,struct fib_rule_hdr * frh,struct nlattr ** tb)209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 			      struct nlattr **tb)
211 {
212 	return 1;
213 }
214 
ip6mr_rule_fill(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh)215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 			   struct fib_rule_hdr *frh)
217 {
218 	frh->dst_len = 0;
219 	frh->src_len = 0;
220 	frh->tos     = 0;
221 	return 0;
222 }
223 
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 	.family		= RTNL_FAMILY_IP6MR,
226 	.rule_size	= sizeof(struct ip6mr_rule),
227 	.addr_size	= sizeof(struct in6_addr),
228 	.action		= ip6mr_rule_action,
229 	.match		= ip6mr_rule_match,
230 	.configure	= ip6mr_rule_configure,
231 	.compare	= ip6mr_rule_compare,
232 	.fill		= ip6mr_rule_fill,
233 	.nlgroup	= RTNLGRP_IPV6_RULE,
234 	.owner		= THIS_MODULE,
235 };
236 
ip6mr_rules_init(struct net * net)237 static int __net_init ip6mr_rules_init(struct net *net)
238 {
239 	struct fib_rules_ops *ops;
240 	struct mr_table *mrt;
241 	int err;
242 
243 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
244 	if (IS_ERR(ops))
245 		return PTR_ERR(ops);
246 
247 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
248 
249 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
250 	if (IS_ERR(mrt)) {
251 		err = PTR_ERR(mrt);
252 		goto err1;
253 	}
254 
255 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
256 	if (err < 0)
257 		goto err2;
258 
259 	net->ipv6.mr6_rules_ops = ops;
260 	return 0;
261 
262 err2:
263 	rtnl_lock();
264 	ip6mr_free_table(mrt);
265 	rtnl_unlock();
266 err1:
267 	fib_rules_unregister(ops);
268 	return err;
269 }
270 
ip6mr_rules_exit(struct net * net)271 static void __net_exit ip6mr_rules_exit(struct net *net)
272 {
273 	struct mr_table *mrt, *next;
274 
275 	ASSERT_RTNL();
276 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 		list_del(&mrt->list);
278 		ip6mr_free_table(mrt);
279 	}
280 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
281 }
282 
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 			    struct netlink_ext_ack *extack)
285 {
286 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
287 }
288 
ip6mr_rules_seq_read(const struct net * net)289 static unsigned int ip6mr_rules_seq_read(const struct net *net)
290 {
291 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
292 }
293 
ip6mr_rule_default(const struct fib_rule * rule)294 bool ip6mr_rule_default(const struct fib_rule *rule)
295 {
296 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
298 }
299 EXPORT_SYMBOL(ip6mr_rule_default);
300 #else
301 #define ip6mr_for_each_table(mrt, net) \
302 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
303 
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 					    struct mr_table *mrt)
306 {
307 	if (!mrt)
308 		return net->ipv6.mrt6;
309 	return NULL;
310 }
311 
ip6mr_get_table(struct net * net,u32 id)312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
313 {
314 	return net->ipv6.mrt6;
315 }
316 
317 #define __ip6mr_get_table ip6mr_get_table
318 
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 			    struct mr_table **mrt)
321 {
322 	*mrt = net->ipv6.mrt6;
323 	return 0;
324 }
325 
ip6mr_rules_init(struct net * net)326 static int __net_init ip6mr_rules_init(struct net *net)
327 {
328 	struct mr_table *mrt;
329 
330 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
331 	if (IS_ERR(mrt))
332 		return PTR_ERR(mrt);
333 	net->ipv6.mrt6 = mrt;
334 	return 0;
335 }
336 
ip6mr_rules_exit(struct net * net)337 static void __net_exit ip6mr_rules_exit(struct net *net)
338 {
339 	ASSERT_RTNL();
340 	ip6mr_free_table(net->ipv6.mrt6);
341 	net->ipv6.mrt6 = NULL;
342 }
343 
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 			    struct netlink_ext_ack *extack)
346 {
347 	return 0;
348 }
349 
ip6mr_rules_seq_read(const struct net * net)350 static unsigned int ip6mr_rules_seq_read(const struct net *net)
351 {
352 	return 0;
353 }
354 #endif
355 
ip6mr_hash_cmp(struct rhashtable_compare_arg * arg,const void * ptr)356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
357 			  const void *ptr)
358 {
359 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
361 
362 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
364 }
365 
366 static const struct rhashtable_params ip6mr_rht_params = {
367 	.head_offset = offsetof(struct mr_mfc, mnode),
368 	.key_offset = offsetof(struct mfc6_cache, cmparg),
369 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
370 	.nelem_hint = 3,
371 	.obj_cmpfn = ip6mr_hash_cmp,
372 	.automatic_shrinking = true,
373 };
374 
ip6mr_new_table_set(struct mr_table * mrt,struct net * net)375 static void ip6mr_new_table_set(struct mr_table *mrt,
376 				struct net *net)
377 {
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
380 #endif
381 }
382 
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 	.mf6c_origin = IN6ADDR_ANY_INIT,
385 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
386 };
387 
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 	.rht_params = &ip6mr_rht_params,
390 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
391 };
392 
ip6mr_new_table(struct net * net,u32 id)393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
394 {
395 	struct mr_table *mrt;
396 
397 	mrt = __ip6mr_get_table(net, id);
398 	if (mrt)
399 		return mrt;
400 
401 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 			      ipmr_expire_process, ip6mr_new_table_set);
403 }
404 
ip6mr_free_table(struct mr_table * mrt)405 static void ip6mr_free_table(struct mr_table *mrt)
406 {
407 	struct net *net = read_pnet(&mrt->net);
408 
409 	WARN_ON_ONCE(!mr_can_free_table(net));
410 
411 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
412 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
413 				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
414 	rhltable_destroy(&mrt->mfc_hash);
415 	kfree(mrt);
416 }
417 
418 #ifdef CONFIG_PROC_FS
419 /* The /proc interfaces to multicast routing
420  * /proc/ip6_mr_cache /proc/ip6_mr_vif
421  */
422 
ip6mr_vif_seq_start(struct seq_file * seq,loff_t * pos)423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
424 	__acquires(RCU)
425 {
426 	struct mr_vif_iter *iter = seq->private;
427 	struct net *net = seq_file_net(seq);
428 	struct mr_table *mrt;
429 
430 	rcu_read_lock();
431 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
432 	if (!mrt) {
433 		rcu_read_unlock();
434 		return ERR_PTR(-ENOENT);
435 	}
436 
437 	iter->mrt = mrt;
438 
439 	return mr_vif_seq_start(seq, pos);
440 }
441 
ip6mr_vif_seq_stop(struct seq_file * seq,void * v)442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
443 	__releases(RCU)
444 {
445 	rcu_read_unlock();
446 }
447 
ip6mr_vif_seq_show(struct seq_file * seq,void * v)448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
449 {
450 	struct mr_vif_iter *iter = seq->private;
451 	struct mr_table *mrt = iter->mrt;
452 
453 	if (v == SEQ_START_TOKEN) {
454 		seq_puts(seq,
455 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
456 	} else {
457 		const struct vif_device *vif = v;
458 		const struct net_device *vif_dev;
459 		const char *name;
460 
461 		vif_dev = vif_dev_read(vif);
462 		name = vif_dev ? vif_dev->name : "none";
463 
464 		seq_printf(seq,
465 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
466 			   vif - mrt->vif_table,
467 			   name, vif->bytes_in, vif->pkt_in,
468 			   vif->bytes_out, vif->pkt_out,
469 			   vif->flags);
470 	}
471 	return 0;
472 }
473 
474 static const struct seq_operations ip6mr_vif_seq_ops = {
475 	.start = ip6mr_vif_seq_start,
476 	.next  = mr_vif_seq_next,
477 	.stop  = ip6mr_vif_seq_stop,
478 	.show  = ip6mr_vif_seq_show,
479 };
480 
ipmr_mfc_seq_start(struct seq_file * seq,loff_t * pos)481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
482 {
483 	struct net *net = seq_file_net(seq);
484 	struct mr_table *mrt;
485 
486 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
487 	if (!mrt)
488 		return ERR_PTR(-ENOENT);
489 
490 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
491 }
492 
ipmr_mfc_seq_show(struct seq_file * seq,void * v)493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
494 {
495 	int n;
496 
497 	if (v == SEQ_START_TOKEN) {
498 		seq_puts(seq,
499 			 "Group                            "
500 			 "Origin                           "
501 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
502 	} else {
503 		const struct mfc6_cache *mfc = v;
504 		const struct mr_mfc_iter *it = seq->private;
505 		struct mr_table *mrt = it->mrt;
506 
507 		seq_printf(seq, "%pI6 %pI6 %-3hd",
508 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
509 			   mfc->_c.mfc_parent);
510 
511 		if (it->cache != &mrt->mfc_unres_queue) {
512 			seq_printf(seq, " %8lu %8lu %8lu",
513 				   atomic_long_read(&mfc->_c.mfc_un.res.pkt),
514 				   atomic_long_read(&mfc->_c.mfc_un.res.bytes),
515 				   atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
516 			for (n = mfc->_c.mfc_un.res.minvif;
517 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
518 				if (VIF_EXISTS(mrt, n) &&
519 				    mfc->_c.mfc_un.res.ttls[n] < 255)
520 					seq_printf(seq,
521 						   " %2d:%-3d", n,
522 						   mfc->_c.mfc_un.res.ttls[n]);
523 			}
524 		} else {
525 			/* unresolved mfc_caches don't contain
526 			 * pkt, bytes and wrong_if values
527 			 */
528 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
529 		}
530 		seq_putc(seq, '\n');
531 	}
532 	return 0;
533 }
534 
535 static const struct seq_operations ipmr_mfc_seq_ops = {
536 	.start = ipmr_mfc_seq_start,
537 	.next  = mr_mfc_seq_next,
538 	.stop  = mr_mfc_seq_stop,
539 	.show  = ipmr_mfc_seq_show,
540 };
541 #endif
542 
543 #ifdef CONFIG_IPV6_PIMSM_V2
544 
pim6_rcv(struct sk_buff * skb)545 static int pim6_rcv(struct sk_buff *skb)
546 {
547 	struct pimreghdr *pim;
548 	struct ipv6hdr   *encap;
549 	struct net_device  *reg_dev = NULL;
550 	struct net *net = dev_net(skb->dev);
551 	struct mr_table *mrt;
552 	struct flowi6 fl6 = {
553 		.flowi6_iif	= skb->dev->ifindex,
554 		.flowi6_mark	= skb->mark,
555 	};
556 	int reg_vif_num;
557 
558 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
559 		goto drop;
560 
561 	pim = (struct pimreghdr *)skb_transport_header(skb);
562 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
563 	    (pim->flags & PIM_NULL_REGISTER) ||
564 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
565 			     sizeof(*pim), IPPROTO_PIM,
566 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
567 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
568 		goto drop;
569 
570 	/* check if the inner packet is destined to mcast group */
571 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
572 				   sizeof(*pim));
573 
574 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
575 	    encap->payload_len == 0 ||
576 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
577 		goto drop;
578 
579 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
580 		goto drop;
581 
582 	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
583 	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
584 	if (reg_vif_num >= 0)
585 		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
586 
587 	if (!reg_dev)
588 		goto drop;
589 
590 	skb->mac_header = skb->network_header;
591 	skb_pull(skb, (u8 *)encap - skb->data);
592 	skb_reset_network_header(skb);
593 	skb->protocol = htons(ETH_P_IPV6);
594 	skb->ip_summed = CHECKSUM_NONE;
595 
596 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
597 
598 	netif_rx(skb);
599 
600 	return 0;
601  drop:
602 	kfree_skb(skb);
603 	return 0;
604 }
605 
606 static const struct inet6_protocol pim6_protocol = {
607 	.handler	=	pim6_rcv,
608 };
609 
610 /* Service routines creating virtual interfaces: PIMREG */
611 
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
613 				      struct net_device *dev)
614 {
615 	struct net *net = dev_net(dev);
616 	struct mr_table *mrt;
617 	struct flowi6 fl6 = {
618 		.flowi6_oif	= dev->ifindex,
619 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
620 		.flowi6_mark	= skb->mark,
621 	};
622 
623 	if (!pskb_inet_may_pull(skb))
624 		goto tx_err;
625 
626 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
627 		goto tx_err;
628 
629 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
630 	DEV_STATS_INC(dev, tx_packets);
631 	rcu_read_lock();
632 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
633 			   MRT6MSG_WHOLEPKT);
634 	rcu_read_unlock();
635 	kfree_skb(skb);
636 	return NETDEV_TX_OK;
637 
638 tx_err:
639 	DEV_STATS_INC(dev, tx_errors);
640 	kfree_skb(skb);
641 	return NETDEV_TX_OK;
642 }
643 
reg_vif_get_iflink(const struct net_device * dev)644 static int reg_vif_get_iflink(const struct net_device *dev)
645 {
646 	return 0;
647 }
648 
649 static const struct net_device_ops reg_vif_netdev_ops = {
650 	.ndo_start_xmit	= reg_vif_xmit,
651 	.ndo_get_iflink = reg_vif_get_iflink,
652 };
653 
reg_vif_setup(struct net_device * dev)654 static void reg_vif_setup(struct net_device *dev)
655 {
656 	dev->type		= ARPHRD_PIMREG;
657 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
658 	dev->flags		= IFF_NOARP;
659 	dev->netdev_ops		= &reg_vif_netdev_ops;
660 	dev->needs_free_netdev	= true;
661 	dev->netns_immutable	= true;
662 }
663 
ip6mr_reg_vif(struct net * net,struct mr_table * mrt)664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
665 {
666 	struct net_device *dev;
667 	char name[IFNAMSIZ];
668 
669 	if (mrt->id == RT6_TABLE_DFLT)
670 		sprintf(name, "pim6reg");
671 	else
672 		sprintf(name, "pim6reg%u", mrt->id);
673 
674 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
675 	if (!dev)
676 		return NULL;
677 
678 	dev_net_set(dev, net);
679 
680 	if (register_netdevice(dev)) {
681 		free_netdev(dev);
682 		return NULL;
683 	}
684 
685 	if (dev_open(dev, NULL))
686 		goto failure;
687 
688 	dev_hold(dev);
689 	return dev;
690 
691 failure:
692 	unregister_netdevice(dev);
693 	return NULL;
694 }
695 #endif
696 
call_ip6mr_vif_entry_notifiers(struct net * net,enum fib_event_type event_type,struct vif_device * vif,struct net_device * vif_dev,mifi_t vif_index,u32 tb_id)697 static int call_ip6mr_vif_entry_notifiers(struct net *net,
698 					  enum fib_event_type event_type,
699 					  struct vif_device *vif,
700 					  struct net_device *vif_dev,
701 					  mifi_t vif_index, u32 tb_id)
702 {
703 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
704 				     vif, vif_dev, vif_index, tb_id,
705 				     &net->ipv6.ipmr_seq);
706 }
707 
call_ip6mr_mfc_entry_notifiers(struct net * net,enum fib_event_type event_type,struct mfc6_cache * mfc,u32 tb_id)708 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
709 					  enum fib_event_type event_type,
710 					  struct mfc6_cache *mfc, u32 tb_id)
711 {
712 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
713 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
714 }
715 
716 /* Delete a VIF entry */
mif6_delete(struct mr_table * mrt,int vifi,int notify,struct list_head * head)717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
718 		       struct list_head *head)
719 {
720 	struct vif_device *v;
721 	struct net_device *dev;
722 	struct inet6_dev *in6_dev;
723 
724 	if (vifi < 0 || vifi >= mrt->maxvif)
725 		return -EADDRNOTAVAIL;
726 
727 	v = &mrt->vif_table[vifi];
728 
729 	dev = rtnl_dereference(v->dev);
730 	if (!dev)
731 		return -EADDRNOTAVAIL;
732 
733 	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
734 				       FIB_EVENT_VIF_DEL, v, dev,
735 				       vifi, mrt->id);
736 	spin_lock(&mrt_lock);
737 	RCU_INIT_POINTER(v->dev, NULL);
738 
739 #ifdef CONFIG_IPV6_PIMSM_V2
740 	if (vifi == mrt->mroute_reg_vif_num) {
741 		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
742 		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
743 	}
744 #endif
745 
746 	if (vifi + 1 == mrt->maxvif) {
747 		int tmp;
748 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
749 			if (VIF_EXISTS(mrt, tmp))
750 				break;
751 		}
752 		WRITE_ONCE(mrt->maxvif, tmp + 1);
753 	}
754 
755 	spin_unlock(&mrt_lock);
756 
757 	dev_set_allmulti(dev, -1);
758 
759 	in6_dev = __in6_dev_get(dev);
760 	if (in6_dev) {
761 		atomic_dec(&in6_dev->cnf.mc_forwarding);
762 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
763 					     NETCONFA_MC_FORWARDING,
764 					     dev->ifindex, &in6_dev->cnf);
765 	}
766 
767 	if ((v->flags & MIFF_REGISTER) && !notify)
768 		unregister_netdevice_queue(dev, head);
769 
770 	netdev_put(dev, &v->dev_tracker);
771 	return 0;
772 }
773 
ip6mr_cache_free_rcu(struct rcu_head * head)774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
775 {
776 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
777 
778 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
779 }
780 
ip6mr_cache_free(struct mfc6_cache * c)781 static inline void ip6mr_cache_free(struct mfc6_cache *c)
782 {
783 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
784 }
785 
786 /* Destroy an unresolved cache entry, killing queued skbs
787    and reporting error to netlink readers.
788  */
789 
ip6mr_destroy_unres(struct mr_table * mrt,struct mfc6_cache * c)790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
791 {
792 	struct net *net = read_pnet(&mrt->net);
793 	struct sk_buff *skb;
794 
795 	atomic_dec(&mrt->cache_resolve_queue_len);
796 
797 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
798 		if (ipv6_hdr(skb)->version == 0) {
799 			struct nlmsghdr *nlh = skb_pull(skb,
800 							sizeof(struct ipv6hdr));
801 			nlh->nlmsg_type = NLMSG_ERROR;
802 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
803 			skb_trim(skb, nlh->nlmsg_len);
804 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
805 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
806 		} else
807 			kfree_skb(skb);
808 	}
809 
810 	ip6mr_cache_free(c);
811 }
812 
813 
814 /* Timer process for all the unresolved queue. */
815 
ipmr_do_expire_process(struct mr_table * mrt)816 static void ipmr_do_expire_process(struct mr_table *mrt)
817 {
818 	unsigned long now = jiffies;
819 	unsigned long expires = 10 * HZ;
820 	struct mr_mfc *c, *next;
821 
822 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
823 		if (time_after(c->mfc_un.unres.expires, now)) {
824 			/* not yet... */
825 			unsigned long interval = c->mfc_un.unres.expires - now;
826 			if (interval < expires)
827 				expires = interval;
828 			continue;
829 		}
830 
831 		list_del(&c->list);
832 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
833 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
834 	}
835 
836 	if (!list_empty(&mrt->mfc_unres_queue))
837 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
838 }
839 
ipmr_expire_process(struct timer_list * t)840 static void ipmr_expire_process(struct timer_list *t)
841 {
842 	struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
843 
844 	if (!spin_trylock(&mfc_unres_lock)) {
845 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
846 		return;
847 	}
848 
849 	if (!list_empty(&mrt->mfc_unres_queue))
850 		ipmr_do_expire_process(mrt);
851 
852 	spin_unlock(&mfc_unres_lock);
853 }
854 
855 /* Fill oifs list. It is called under locked mrt_lock. */
856 
ip6mr_update_thresholds(struct mr_table * mrt,struct mr_mfc * cache,unsigned char * ttls)857 static void ip6mr_update_thresholds(struct mr_table *mrt,
858 				    struct mr_mfc *cache,
859 				    unsigned char *ttls)
860 {
861 	int vifi;
862 
863 	cache->mfc_un.res.minvif = MAXMIFS;
864 	cache->mfc_un.res.maxvif = 0;
865 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
866 
867 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
868 		if (VIF_EXISTS(mrt, vifi) &&
869 		    ttls[vifi] && ttls[vifi] < 255) {
870 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
871 			if (cache->mfc_un.res.minvif > vifi)
872 				cache->mfc_un.res.minvif = vifi;
873 			if (cache->mfc_un.res.maxvif <= vifi)
874 				cache->mfc_un.res.maxvif = vifi + 1;
875 		}
876 	}
877 	WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
878 }
879 
mif6_add(struct net * net,struct mr_table * mrt,struct mif6ctl * vifc,int mrtsock)880 static int mif6_add(struct net *net, struct mr_table *mrt,
881 		    struct mif6ctl *vifc, int mrtsock)
882 {
883 	int vifi = vifc->mif6c_mifi;
884 	struct vif_device *v = &mrt->vif_table[vifi];
885 	struct net_device *dev;
886 	struct inet6_dev *in6_dev;
887 	int err;
888 
889 	/* Is vif busy ? */
890 	if (VIF_EXISTS(mrt, vifi))
891 		return -EADDRINUSE;
892 
893 	switch (vifc->mif6c_flags) {
894 #ifdef CONFIG_IPV6_PIMSM_V2
895 	case MIFF_REGISTER:
896 		/*
897 		 * Special Purpose VIF in PIM
898 		 * All the packets will be sent to the daemon
899 		 */
900 		if (mrt->mroute_reg_vif_num >= 0)
901 			return -EADDRINUSE;
902 		dev = ip6mr_reg_vif(net, mrt);
903 		if (!dev)
904 			return -ENOBUFS;
905 		err = dev_set_allmulti(dev, 1);
906 		if (err) {
907 			unregister_netdevice(dev);
908 			dev_put(dev);
909 			return err;
910 		}
911 		break;
912 #endif
913 	case 0:
914 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
915 		if (!dev)
916 			return -EADDRNOTAVAIL;
917 		err = dev_set_allmulti(dev, 1);
918 		if (err) {
919 			dev_put(dev);
920 			return err;
921 		}
922 		break;
923 	default:
924 		return -EINVAL;
925 	}
926 
927 	in6_dev = __in6_dev_get(dev);
928 	if (in6_dev) {
929 		atomic_inc(&in6_dev->cnf.mc_forwarding);
930 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
931 					     NETCONFA_MC_FORWARDING,
932 					     dev->ifindex, &in6_dev->cnf);
933 	}
934 
935 	/* Fill in the VIF structures */
936 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
937 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
938 			MIFF_REGISTER);
939 
940 	/* And finish update writing critical data */
941 	spin_lock(&mrt_lock);
942 	rcu_assign_pointer(v->dev, dev);
943 	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
944 #ifdef CONFIG_IPV6_PIMSM_V2
945 	if (v->flags & MIFF_REGISTER)
946 		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
947 #endif
948 	if (vifi + 1 > mrt->maxvif)
949 		WRITE_ONCE(mrt->maxvif, vifi + 1);
950 	spin_unlock(&mrt_lock);
951 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
952 				       v, dev, vifi, mrt->id);
953 	return 0;
954 }
955 
ip6mr_cache_find(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp)956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
957 					   const struct in6_addr *origin,
958 					   const struct in6_addr *mcastgrp)
959 {
960 	struct mfc6_cache_cmp_arg arg = {
961 		.mf6c_origin = *origin,
962 		.mf6c_mcastgrp = *mcastgrp,
963 	};
964 
965 	return mr_mfc_find(mrt, &arg);
966 }
967 
968 /* Look for a (*,G) entry */
ip6mr_cache_find_any(struct mr_table * mrt,struct in6_addr * mcastgrp,mifi_t mifi)969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
970 					       struct in6_addr *mcastgrp,
971 					       mifi_t mifi)
972 {
973 	struct mfc6_cache_cmp_arg arg = {
974 		.mf6c_origin = in6addr_any,
975 		.mf6c_mcastgrp = *mcastgrp,
976 	};
977 
978 	if (ipv6_addr_any(mcastgrp))
979 		return mr_mfc_find_any_parent(mrt, mifi);
980 	return mr_mfc_find_any(mrt, mifi, &arg);
981 }
982 
983 /* Look for a (S,G,iif) entry if parent != -1 */
984 static struct mfc6_cache *
ip6mr_cache_find_parent(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp,int parent)985 ip6mr_cache_find_parent(struct mr_table *mrt,
986 			const struct in6_addr *origin,
987 			const struct in6_addr *mcastgrp,
988 			int parent)
989 {
990 	struct mfc6_cache_cmp_arg arg = {
991 		.mf6c_origin = *origin,
992 		.mf6c_mcastgrp = *mcastgrp,
993 	};
994 
995 	return mr_mfc_find_parent(mrt, &arg, parent);
996 }
997 
998 /* Allocate a multicast cache entry */
ip6mr_cache_alloc(void)999 static struct mfc6_cache *ip6mr_cache_alloc(void)
1000 {
1001 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1002 	if (!c)
1003 		return NULL;
1004 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1005 	c->_c.mfc_un.res.minvif = MAXMIFS;
1006 	c->_c.free = ip6mr_cache_free_rcu;
1007 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
1008 	return c;
1009 }
1010 
ip6mr_cache_alloc_unres(void)1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1012 {
1013 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1014 	if (!c)
1015 		return NULL;
1016 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1017 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1018 	return c;
1019 }
1020 
1021 /*
1022  *	A cache entry has gone into a resolved state from queued
1023  */
1024 
ip6mr_cache_resolve(struct net * net,struct mr_table * mrt,struct mfc6_cache * uc,struct mfc6_cache * c)1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1026 				struct mfc6_cache *uc, struct mfc6_cache *c)
1027 {
1028 	struct sk_buff *skb;
1029 
1030 	/*
1031 	 *	Play the pending entries through our router
1032 	 */
1033 
1034 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1035 		if (ipv6_hdr(skb)->version == 0) {
1036 			struct nlmsghdr *nlh = skb_pull(skb,
1037 							sizeof(struct ipv6hdr));
1038 
1039 			if (mr_fill_mroute(mrt, skb, &c->_c,
1040 					   nlmsg_data(nlh)) > 0) {
1041 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1042 			} else {
1043 				nlh->nlmsg_type = NLMSG_ERROR;
1044 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1045 				skb_trim(skb, nlh->nlmsg_len);
1046 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1047 			}
1048 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1049 		} else {
1050 			rcu_read_lock();
1051 			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1052 			rcu_read_unlock();
1053 		}
1054 	}
1055 }
1056 
1057 /*
1058  *	Bounce a cache query up to pim6sd and netlink.
1059  *
1060  *	Called under rcu_read_lock()
1061  */
1062 
ip6mr_cache_report(const struct mr_table * mrt,struct sk_buff * pkt,mifi_t mifi,int assert)1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1064 			      mifi_t mifi, int assert)
1065 {
1066 	struct sock *mroute6_sk;
1067 	struct sk_buff *skb;
1068 	struct mrt6msg *msg;
1069 	int ret;
1070 
1071 #ifdef CONFIG_IPV6_PIMSM_V2
1072 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1073 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1074 						+sizeof(*msg));
1075 	else
1076 #endif
1077 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1078 
1079 	if (!skb)
1080 		return -ENOBUFS;
1081 
1082 	/* I suppose that internal messages
1083 	 * do not require checksums */
1084 
1085 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1086 
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1089 		/* Ugly, but we have no choice with this interface.
1090 		   Duplicate old header, fix length etc.
1091 		   And all this only to mangle msg->im6_msgtype and
1092 		   to set msg->im6_mbz to "mbz" :-)
1093 		 */
1094 		__skb_pull(skb, skb_network_offset(pkt));
1095 
1096 		skb_push(skb, sizeof(*msg));
1097 		skb_reset_transport_header(skb);
1098 		msg = (struct mrt6msg *)skb_transport_header(skb);
1099 		msg->im6_mbz = 0;
1100 		msg->im6_msgtype = assert;
1101 		if (assert == MRT6MSG_WRMIFWHOLE)
1102 			msg->im6_mif = mifi;
1103 		else
1104 			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1105 		msg->im6_pad = 0;
1106 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1107 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1108 
1109 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1110 	} else
1111 #endif
1112 	{
1113 	/*
1114 	 *	Copy the IP header
1115 	 */
1116 
1117 	skb_put(skb, sizeof(struct ipv6hdr));
1118 	skb_reset_network_header(skb);
1119 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1120 
1121 	/*
1122 	 *	Add our header
1123 	 */
1124 	skb_put(skb, sizeof(*msg));
1125 	skb_reset_transport_header(skb);
1126 	msg = (struct mrt6msg *)skb_transport_header(skb);
1127 
1128 	msg->im6_mbz = 0;
1129 	msg->im6_msgtype = assert;
1130 	msg->im6_mif = mifi;
1131 	msg->im6_pad = 0;
1132 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1133 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1134 
1135 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1136 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1137 	}
1138 
1139 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1140 	if (!mroute6_sk) {
1141 		kfree_skb(skb);
1142 		return -EINVAL;
1143 	}
1144 
1145 	mrt6msg_netlink_event(mrt, skb);
1146 
1147 	/* Deliver to user space multicast routing algorithms */
1148 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1149 
1150 	if (ret < 0) {
1151 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1152 		kfree_skb(skb);
1153 	}
1154 
1155 	return ret;
1156 }
1157 
1158 /* Queue a packet for resolution. It gets locked cache entry! */
ip6mr_cache_unresolved(struct mr_table * mrt,mifi_t mifi,struct sk_buff * skb,struct net_device * dev)1159 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1160 				  struct sk_buff *skb, struct net_device *dev)
1161 {
1162 	struct mfc6_cache *c;
1163 	bool found = false;
1164 	int err;
1165 
1166 	spin_lock_bh(&mfc_unres_lock);
1167 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1168 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1169 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1170 			found = true;
1171 			break;
1172 		}
1173 	}
1174 
1175 	if (!found) {
1176 		/*
1177 		 *	Create a new entry if allowable
1178 		 */
1179 
1180 		c = ip6mr_cache_alloc_unres();
1181 		if (!c) {
1182 			spin_unlock_bh(&mfc_unres_lock);
1183 
1184 			kfree_skb(skb);
1185 			return -ENOBUFS;
1186 		}
1187 
1188 		/* Fill in the new cache entry */
1189 		c->_c.mfc_parent = -1;
1190 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1191 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1192 
1193 		/*
1194 		 *	Reflect first query at pim6sd
1195 		 */
1196 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1197 		if (err < 0) {
1198 			/* If the report failed throw the cache entry
1199 			   out - Brad Parker
1200 			 */
1201 			spin_unlock_bh(&mfc_unres_lock);
1202 
1203 			ip6mr_cache_free(c);
1204 			kfree_skb(skb);
1205 			return err;
1206 		}
1207 
1208 		atomic_inc(&mrt->cache_resolve_queue_len);
1209 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1210 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1211 
1212 		ipmr_do_expire_process(mrt);
1213 	}
1214 
1215 	/* See if we can append the packet */
1216 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1217 		kfree_skb(skb);
1218 		err = -ENOBUFS;
1219 	} else {
1220 		if (dev) {
1221 			skb->dev = dev;
1222 			skb->skb_iif = dev->ifindex;
1223 		}
1224 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1225 		err = 0;
1226 	}
1227 
1228 	spin_unlock_bh(&mfc_unres_lock);
1229 	return err;
1230 }
1231 
1232 /*
1233  *	MFC6 cache manipulation by user space
1234  */
1235 
ip6mr_mfc_delete(struct mr_table * mrt,struct mf6cctl * mfc,int parent)1236 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1237 			    int parent)
1238 {
1239 	struct mfc6_cache *c;
1240 
1241 	/* The entries are added/deleted only under RTNL */
1242 	rcu_read_lock();
1243 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1244 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1245 	rcu_read_unlock();
1246 	if (!c)
1247 		return -ENOENT;
1248 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1249 	list_del_rcu(&c->_c.list);
1250 
1251 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1252 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1253 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1254 	mr_cache_put(&c->_c);
1255 	return 0;
1256 }
1257 
ip6mr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1258 static int ip6mr_device_event(struct notifier_block *this,
1259 			      unsigned long event, void *ptr)
1260 {
1261 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1262 	struct net *net = dev_net(dev);
1263 	struct mr_table *mrt;
1264 	struct vif_device *v;
1265 	int ct;
1266 
1267 	if (event != NETDEV_UNREGISTER)
1268 		return NOTIFY_DONE;
1269 
1270 	ip6mr_for_each_table(mrt, net) {
1271 		v = &mrt->vif_table[0];
1272 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 			if (rcu_access_pointer(v->dev) == dev)
1274 				mif6_delete(mrt, ct, 1, NULL);
1275 		}
1276 	}
1277 
1278 	return NOTIFY_DONE;
1279 }
1280 
ip6mr_seq_read(const struct net * net)1281 static unsigned int ip6mr_seq_read(const struct net *net)
1282 {
1283 	return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1284 }
1285 
ip6mr_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)1286 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1287 		      struct netlink_ext_ack *extack)
1288 {
1289 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1290 		       ip6mr_mr_table_iter, extack);
1291 }
1292 
1293 static struct notifier_block ip6_mr_notifier = {
1294 	.notifier_call = ip6mr_device_event
1295 };
1296 
1297 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1298 	.family		= RTNL_FAMILY_IP6MR,
1299 	.fib_seq_read	= ip6mr_seq_read,
1300 	.fib_dump	= ip6mr_dump,
1301 	.owner		= THIS_MODULE,
1302 };
1303 
ip6mr_notifier_init(struct net * net)1304 static int __net_init ip6mr_notifier_init(struct net *net)
1305 {
1306 	struct fib_notifier_ops *ops;
1307 
1308 	net->ipv6.ipmr_seq = 0;
1309 
1310 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1311 	if (IS_ERR(ops))
1312 		return PTR_ERR(ops);
1313 
1314 	net->ipv6.ip6mr_notifier_ops = ops;
1315 
1316 	return 0;
1317 }
1318 
ip6mr_notifier_exit(struct net * net)1319 static void __net_exit ip6mr_notifier_exit(struct net *net)
1320 {
1321 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1322 	net->ipv6.ip6mr_notifier_ops = NULL;
1323 }
1324 
1325 /* Setup for IP multicast routing */
ip6mr_net_init(struct net * net)1326 static int __net_init ip6mr_net_init(struct net *net)
1327 {
1328 	int err;
1329 
1330 	err = ip6mr_notifier_init(net);
1331 	if (err)
1332 		return err;
1333 
1334 	err = ip6mr_rules_init(net);
1335 	if (err < 0)
1336 		goto ip6mr_rules_fail;
1337 
1338 #ifdef CONFIG_PROC_FS
1339 	err = -ENOMEM;
1340 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1341 			sizeof(struct mr_vif_iter)))
1342 		goto proc_vif_fail;
1343 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1344 			sizeof(struct mr_mfc_iter)))
1345 		goto proc_cache_fail;
1346 #endif
1347 
1348 	return 0;
1349 
1350 #ifdef CONFIG_PROC_FS
1351 proc_cache_fail:
1352 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1353 proc_vif_fail:
1354 	rtnl_lock();
1355 	ip6mr_rules_exit(net);
1356 	rtnl_unlock();
1357 #endif
1358 ip6mr_rules_fail:
1359 	ip6mr_notifier_exit(net);
1360 	return err;
1361 }
1362 
ip6mr_net_exit(struct net * net)1363 static void __net_exit ip6mr_net_exit(struct net *net)
1364 {
1365 #ifdef CONFIG_PROC_FS
1366 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1367 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1368 #endif
1369 	ip6mr_notifier_exit(net);
1370 }
1371 
ip6mr_net_exit_batch(struct list_head * net_list)1372 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1373 {
1374 	struct net *net;
1375 
1376 	rtnl_lock();
1377 	list_for_each_entry(net, net_list, exit_list)
1378 		ip6mr_rules_exit(net);
1379 	rtnl_unlock();
1380 }
1381 
1382 static struct pernet_operations ip6mr_net_ops = {
1383 	.init = ip6mr_net_init,
1384 	.exit = ip6mr_net_exit,
1385 	.exit_batch = ip6mr_net_exit_batch,
1386 };
1387 
1388 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1389 	{.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1390 	 .msgtype = RTM_GETROUTE,
1391 	 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
1392 };
1393 
ip6_mr_init(void)1394 int __init ip6_mr_init(void)
1395 {
1396 	int err;
1397 
1398 	mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1399 	if (!mrt_cachep)
1400 		return -ENOMEM;
1401 
1402 	err = register_pernet_subsys(&ip6mr_net_ops);
1403 	if (err)
1404 		goto reg_pernet_fail;
1405 
1406 	err = register_netdevice_notifier(&ip6_mr_notifier);
1407 	if (err)
1408 		goto reg_notif_fail;
1409 #ifdef CONFIG_IPV6_PIMSM_V2
1410 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1411 		pr_err("%s: can't add PIM protocol\n", __func__);
1412 		err = -EAGAIN;
1413 		goto add_proto_fail;
1414 	}
1415 #endif
1416 	err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1417 	if (!err)
1418 		return 0;
1419 
1420 #ifdef CONFIG_IPV6_PIMSM_V2
1421 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1422 add_proto_fail:
1423 	unregister_netdevice_notifier(&ip6_mr_notifier);
1424 #endif
1425 reg_notif_fail:
1426 	unregister_pernet_subsys(&ip6mr_net_ops);
1427 reg_pernet_fail:
1428 	kmem_cache_destroy(mrt_cachep);
1429 	return err;
1430 }
1431 
ip6_mr_cleanup(void)1432 void __init ip6_mr_cleanup(void)
1433 {
1434 	rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1435 #ifdef CONFIG_IPV6_PIMSM_V2
1436 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1437 #endif
1438 	unregister_netdevice_notifier(&ip6_mr_notifier);
1439 	unregister_pernet_subsys(&ip6mr_net_ops);
1440 	kmem_cache_destroy(mrt_cachep);
1441 }
1442 
ip6mr_mfc_add(struct net * net,struct mr_table * mrt,struct mf6cctl * mfc,int mrtsock,int parent)1443 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1444 			 struct mf6cctl *mfc, int mrtsock, int parent)
1445 {
1446 	unsigned char ttls[MAXMIFS];
1447 	struct mfc6_cache *uc, *c;
1448 	struct mr_mfc *_uc;
1449 	bool found;
1450 	int i, err;
1451 
1452 	if (mfc->mf6cc_parent >= MAXMIFS)
1453 		return -ENFILE;
1454 
1455 	memset(ttls, 255, MAXMIFS);
1456 	for (i = 0; i < MAXMIFS; i++) {
1457 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1458 			ttls[i] = 1;
1459 	}
1460 
1461 	/* The entries are added/deleted only under RTNL */
1462 	rcu_read_lock();
1463 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1464 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1465 	rcu_read_unlock();
1466 	if (c) {
1467 		spin_lock(&mrt_lock);
1468 		c->_c.mfc_parent = mfc->mf6cc_parent;
1469 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1470 		if (!mrtsock)
1471 			c->_c.mfc_flags |= MFC_STATIC;
1472 		spin_unlock(&mrt_lock);
1473 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1474 					       c, mrt->id);
1475 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1476 		return 0;
1477 	}
1478 
1479 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1480 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1481 		return -EINVAL;
1482 
1483 	c = ip6mr_cache_alloc();
1484 	if (!c)
1485 		return -ENOMEM;
1486 
1487 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1488 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1489 	c->_c.mfc_parent = mfc->mf6cc_parent;
1490 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1491 	if (!mrtsock)
1492 		c->_c.mfc_flags |= MFC_STATIC;
1493 
1494 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1495 				  ip6mr_rht_params);
1496 	if (err) {
1497 		pr_err("ip6mr: rhtable insert error %d\n", err);
1498 		ip6mr_cache_free(c);
1499 		return err;
1500 	}
1501 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1502 
1503 	/* Check to see if we resolved a queued list. If so we
1504 	 * need to send on the frames and tidy up.
1505 	 */
1506 	found = false;
1507 	spin_lock_bh(&mfc_unres_lock);
1508 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1509 		uc = (struct mfc6_cache *)_uc;
1510 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1511 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1512 			list_del(&_uc->list);
1513 			atomic_dec(&mrt->cache_resolve_queue_len);
1514 			found = true;
1515 			break;
1516 		}
1517 	}
1518 	if (list_empty(&mrt->mfc_unres_queue))
1519 		timer_delete(&mrt->ipmr_expire_timer);
1520 	spin_unlock_bh(&mfc_unres_lock);
1521 
1522 	if (found) {
1523 		ip6mr_cache_resolve(net, mrt, uc, c);
1524 		ip6mr_cache_free(uc);
1525 	}
1526 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1527 				       c, mrt->id);
1528 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1529 	return 0;
1530 }
1531 
1532 /*
1533  *	Close the multicast socket, and clear the vif tables etc
1534  */
1535 
mroute_clean_tables(struct mr_table * mrt,int flags)1536 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1537 {
1538 	struct mr_mfc *c, *tmp;
1539 	LIST_HEAD(list);
1540 	int i;
1541 
1542 	/* Shut down all active vif entries */
1543 	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1544 		for (i = 0; i < mrt->maxvif; i++) {
1545 			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1546 			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1547 			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1548 				continue;
1549 			mif6_delete(mrt, i, 0, &list);
1550 		}
1551 		unregister_netdevice_many(&list);
1552 	}
1553 
1554 	/* Wipe the cache */
1555 	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1556 		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1557 			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1558 			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1559 				continue;
1560 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1561 			list_del_rcu(&c->list);
1562 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1563 						       FIB_EVENT_ENTRY_DEL,
1564 						       (struct mfc6_cache *)c, mrt->id);
1565 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1566 			mr_cache_put(c);
1567 		}
1568 	}
1569 
1570 	if (flags & MRT6_FLUSH_MFC) {
1571 		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1572 			spin_lock_bh(&mfc_unres_lock);
1573 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1574 				list_del(&c->list);
1575 				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1576 						  RTM_DELROUTE);
1577 				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1578 			}
1579 			spin_unlock_bh(&mfc_unres_lock);
1580 		}
1581 	}
1582 }
1583 
ip6mr_sk_init(struct mr_table * mrt,struct sock * sk)1584 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1585 {
1586 	int err = 0;
1587 	struct net *net = sock_net(sk);
1588 
1589 	rtnl_lock();
1590 	spin_lock(&mrt_lock);
1591 	if (rtnl_dereference(mrt->mroute_sk)) {
1592 		err = -EADDRINUSE;
1593 	} else {
1594 		rcu_assign_pointer(mrt->mroute_sk, sk);
1595 		sock_set_flag(sk, SOCK_RCU_FREE);
1596 		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1597 	}
1598 	spin_unlock(&mrt_lock);
1599 
1600 	if (!err)
1601 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1602 					     NETCONFA_MC_FORWARDING,
1603 					     NETCONFA_IFINDEX_ALL,
1604 					     net->ipv6.devconf_all);
1605 	rtnl_unlock();
1606 
1607 	return err;
1608 }
1609 
ip6mr_sk_done(struct sock * sk)1610 int ip6mr_sk_done(struct sock *sk)
1611 {
1612 	struct net *net = sock_net(sk);
1613 	struct ipv6_devconf *devconf;
1614 	struct mr_table *mrt;
1615 	int err = -EACCES;
1616 
1617 	if (sk->sk_type != SOCK_RAW ||
1618 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1619 		return err;
1620 
1621 	devconf = net->ipv6.devconf_all;
1622 	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1623 		return err;
1624 
1625 	rtnl_lock();
1626 	ip6mr_for_each_table(mrt, net) {
1627 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1628 			spin_lock(&mrt_lock);
1629 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1630 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1631 			 * so the RCU grace period before sk freeing
1632 			 * is guaranteed by sk_destruct()
1633 			 */
1634 			atomic_dec(&devconf->mc_forwarding);
1635 			spin_unlock(&mrt_lock);
1636 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1637 						     NETCONFA_MC_FORWARDING,
1638 						     NETCONFA_IFINDEX_ALL,
1639 						     net->ipv6.devconf_all);
1640 
1641 			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1642 			err = 0;
1643 			break;
1644 		}
1645 	}
1646 	rtnl_unlock();
1647 
1648 	return err;
1649 }
1650 
mroute6_is_socket(struct net * net,struct sk_buff * skb)1651 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1652 {
1653 	struct mr_table *mrt;
1654 	struct flowi6 fl6 = {
1655 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1656 		.flowi6_oif	= skb->dev->ifindex,
1657 		.flowi6_mark	= skb->mark,
1658 	};
1659 
1660 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1661 		return NULL;
1662 
1663 	return rcu_access_pointer(mrt->mroute_sk);
1664 }
1665 EXPORT_SYMBOL(mroute6_is_socket);
1666 
1667 /*
1668  *	Socket options and virtual interface manipulation. The whole
1669  *	virtual interface system is a complete heap, but unfortunately
1670  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1671  *	MOSPF/PIM router set up we can clean this up.
1672  */
1673 
ip6_mroute_setsockopt(struct sock * sk,int optname,sockptr_t optval,unsigned int optlen)1674 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1675 			  unsigned int optlen)
1676 {
1677 	int ret, parent = 0;
1678 	struct mif6ctl vif;
1679 	struct mf6cctl mfc;
1680 	mifi_t mifi;
1681 	struct net *net = sock_net(sk);
1682 	struct mr_table *mrt;
1683 
1684 	if (sk->sk_type != SOCK_RAW ||
1685 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1686 		return -EOPNOTSUPP;
1687 
1688 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1689 	if (!mrt)
1690 		return -ENOENT;
1691 
1692 	if (optname != MRT6_INIT) {
1693 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1694 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1695 			return -EACCES;
1696 	}
1697 
1698 	switch (optname) {
1699 	case MRT6_INIT:
1700 		if (optlen < sizeof(int))
1701 			return -EINVAL;
1702 
1703 		return ip6mr_sk_init(mrt, sk);
1704 
1705 	case MRT6_DONE:
1706 		return ip6mr_sk_done(sk);
1707 
1708 	case MRT6_ADD_MIF:
1709 		if (optlen < sizeof(vif))
1710 			return -EINVAL;
1711 		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1712 			return -EFAULT;
1713 		if (vif.mif6c_mifi >= MAXMIFS)
1714 			return -ENFILE;
1715 		rtnl_lock();
1716 		ret = mif6_add(net, mrt, &vif,
1717 			       sk == rtnl_dereference(mrt->mroute_sk));
1718 		rtnl_unlock();
1719 		return ret;
1720 
1721 	case MRT6_DEL_MIF:
1722 		if (optlen < sizeof(mifi_t))
1723 			return -EINVAL;
1724 		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1725 			return -EFAULT;
1726 		rtnl_lock();
1727 		ret = mif6_delete(mrt, mifi, 0, NULL);
1728 		rtnl_unlock();
1729 		return ret;
1730 
1731 	/*
1732 	 *	Manipulate the forwarding caches. These live
1733 	 *	in a sort of kernel/user symbiosis.
1734 	 */
1735 	case MRT6_ADD_MFC:
1736 	case MRT6_DEL_MFC:
1737 		parent = -1;
1738 		fallthrough;
1739 	case MRT6_ADD_MFC_PROXY:
1740 	case MRT6_DEL_MFC_PROXY:
1741 		if (optlen < sizeof(mfc))
1742 			return -EINVAL;
1743 		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1744 			return -EFAULT;
1745 		if (parent == 0)
1746 			parent = mfc.mf6cc_parent;
1747 		rtnl_lock();
1748 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1749 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1750 		else
1751 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1752 					    sk ==
1753 					    rtnl_dereference(mrt->mroute_sk),
1754 					    parent);
1755 		rtnl_unlock();
1756 		return ret;
1757 
1758 	case MRT6_FLUSH:
1759 	{
1760 		int flags;
1761 
1762 		if (optlen != sizeof(flags))
1763 			return -EINVAL;
1764 		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1765 			return -EFAULT;
1766 		rtnl_lock();
1767 		mroute_clean_tables(mrt, flags);
1768 		rtnl_unlock();
1769 		return 0;
1770 	}
1771 
1772 	/*
1773 	 *	Control PIM assert (to activate pim will activate assert)
1774 	 */
1775 	case MRT6_ASSERT:
1776 	{
1777 		int v;
1778 
1779 		if (optlen != sizeof(v))
1780 			return -EINVAL;
1781 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1782 			return -EFAULT;
1783 		mrt->mroute_do_assert = v;
1784 		return 0;
1785 	}
1786 
1787 #ifdef CONFIG_IPV6_PIMSM_V2
1788 	case MRT6_PIM:
1789 	{
1790 		bool do_wrmifwhole;
1791 		int v;
1792 
1793 		if (optlen != sizeof(v))
1794 			return -EINVAL;
1795 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1796 			return -EFAULT;
1797 
1798 		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1799 		v = !!v;
1800 		rtnl_lock();
1801 		ret = 0;
1802 		if (v != mrt->mroute_do_pim) {
1803 			mrt->mroute_do_pim = v;
1804 			mrt->mroute_do_assert = v;
1805 			mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1806 		}
1807 		rtnl_unlock();
1808 		return ret;
1809 	}
1810 
1811 #endif
1812 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1813 	case MRT6_TABLE:
1814 	{
1815 		u32 v;
1816 
1817 		if (optlen != sizeof(u32))
1818 			return -EINVAL;
1819 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1820 			return -EFAULT;
1821 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1822 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1823 			return -EINVAL;
1824 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1825 			return -EBUSY;
1826 
1827 		rtnl_lock();
1828 		ret = 0;
1829 		mrt = ip6mr_new_table(net, v);
1830 		if (IS_ERR(mrt))
1831 			ret = PTR_ERR(mrt);
1832 		else
1833 			raw6_sk(sk)->ip6mr_table = v;
1834 		rtnl_unlock();
1835 		return ret;
1836 	}
1837 #endif
1838 	/*
1839 	 *	Spurious command, or MRT6_VERSION which you cannot
1840 	 *	set.
1841 	 */
1842 	default:
1843 		return -ENOPROTOOPT;
1844 	}
1845 }
1846 
1847 /*
1848  *	Getsock opt support for the multicast routing system.
1849  */
1850 
ip6_mroute_getsockopt(struct sock * sk,int optname,sockptr_t optval,sockptr_t optlen)1851 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1852 			  sockptr_t optlen)
1853 {
1854 	int olr;
1855 	int val;
1856 	struct net *net = sock_net(sk);
1857 	struct mr_table *mrt;
1858 
1859 	if (sk->sk_type != SOCK_RAW ||
1860 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1861 		return -EOPNOTSUPP;
1862 
1863 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864 	if (!mrt)
1865 		return -ENOENT;
1866 
1867 	switch (optname) {
1868 	case MRT6_VERSION:
1869 		val = 0x0305;
1870 		break;
1871 #ifdef CONFIG_IPV6_PIMSM_V2
1872 	case MRT6_PIM:
1873 		val = mrt->mroute_do_pim;
1874 		break;
1875 #endif
1876 	case MRT6_ASSERT:
1877 		val = mrt->mroute_do_assert;
1878 		break;
1879 	default:
1880 		return -ENOPROTOOPT;
1881 	}
1882 
1883 	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1884 		return -EFAULT;
1885 
1886 	olr = min_t(int, olr, sizeof(int));
1887 	if (olr < 0)
1888 		return -EINVAL;
1889 
1890 	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1891 		return -EFAULT;
1892 	if (copy_to_sockptr(optval, &val, olr))
1893 		return -EFAULT;
1894 	return 0;
1895 }
1896 
1897 /*
1898  *	The IP multicast ioctl support routines.
1899  */
ip6mr_ioctl(struct sock * sk,int cmd,void * arg)1900 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1901 {
1902 	struct sioc_sg_req6 *sr;
1903 	struct sioc_mif_req6 *vr;
1904 	struct vif_device *vif;
1905 	struct mfc6_cache *c;
1906 	struct net *net = sock_net(sk);
1907 	struct mr_table *mrt;
1908 
1909 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1910 	if (!mrt)
1911 		return -ENOENT;
1912 
1913 	switch (cmd) {
1914 	case SIOCGETMIFCNT_IN6:
1915 		vr = (struct sioc_mif_req6 *)arg;
1916 		if (vr->mifi >= mrt->maxvif)
1917 			return -EINVAL;
1918 		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1919 		rcu_read_lock();
1920 		vif = &mrt->vif_table[vr->mifi];
1921 		if (VIF_EXISTS(mrt, vr->mifi)) {
1922 			vr->icount = READ_ONCE(vif->pkt_in);
1923 			vr->ocount = READ_ONCE(vif->pkt_out);
1924 			vr->ibytes = READ_ONCE(vif->bytes_in);
1925 			vr->obytes = READ_ONCE(vif->bytes_out);
1926 			rcu_read_unlock();
1927 			return 0;
1928 		}
1929 		rcu_read_unlock();
1930 		return -EADDRNOTAVAIL;
1931 	case SIOCGETSGCNT_IN6:
1932 		sr = (struct sioc_sg_req6 *)arg;
1933 
1934 		rcu_read_lock();
1935 		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1936 				     &sr->grp.sin6_addr);
1937 		if (c) {
1938 			sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1939 			sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1940 			sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1941 			rcu_read_unlock();
1942 			return 0;
1943 		}
1944 		rcu_read_unlock();
1945 		return -EADDRNOTAVAIL;
1946 	default:
1947 		return -ENOIOCTLCMD;
1948 	}
1949 }
1950 
1951 #ifdef CONFIG_COMPAT
1952 struct compat_sioc_sg_req6 {
1953 	struct sockaddr_in6 src;
1954 	struct sockaddr_in6 grp;
1955 	compat_ulong_t pktcnt;
1956 	compat_ulong_t bytecnt;
1957 	compat_ulong_t wrong_if;
1958 };
1959 
1960 struct compat_sioc_mif_req6 {
1961 	mifi_t	mifi;
1962 	compat_ulong_t icount;
1963 	compat_ulong_t ocount;
1964 	compat_ulong_t ibytes;
1965 	compat_ulong_t obytes;
1966 };
1967 
ip6mr_compat_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1968 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1969 {
1970 	struct compat_sioc_sg_req6 sr;
1971 	struct compat_sioc_mif_req6 vr;
1972 	struct vif_device *vif;
1973 	struct mfc6_cache *c;
1974 	struct net *net = sock_net(sk);
1975 	struct mr_table *mrt;
1976 
1977 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1978 	if (!mrt)
1979 		return -ENOENT;
1980 
1981 	switch (cmd) {
1982 	case SIOCGETMIFCNT_IN6:
1983 		if (copy_from_user(&vr, arg, sizeof(vr)))
1984 			return -EFAULT;
1985 		if (vr.mifi >= mrt->maxvif)
1986 			return -EINVAL;
1987 		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1988 		rcu_read_lock();
1989 		vif = &mrt->vif_table[vr.mifi];
1990 		if (VIF_EXISTS(mrt, vr.mifi)) {
1991 			vr.icount = READ_ONCE(vif->pkt_in);
1992 			vr.ocount = READ_ONCE(vif->pkt_out);
1993 			vr.ibytes = READ_ONCE(vif->bytes_in);
1994 			vr.obytes = READ_ONCE(vif->bytes_out);
1995 			rcu_read_unlock();
1996 
1997 			if (copy_to_user(arg, &vr, sizeof(vr)))
1998 				return -EFAULT;
1999 			return 0;
2000 		}
2001 		rcu_read_unlock();
2002 		return -EADDRNOTAVAIL;
2003 	case SIOCGETSGCNT_IN6:
2004 		if (copy_from_user(&sr, arg, sizeof(sr)))
2005 			return -EFAULT;
2006 
2007 		rcu_read_lock();
2008 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2009 		if (c) {
2010 			sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2011 			sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2012 			sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2013 			rcu_read_unlock();
2014 
2015 			if (copy_to_user(arg, &sr, sizeof(sr)))
2016 				return -EFAULT;
2017 			return 0;
2018 		}
2019 		rcu_read_unlock();
2020 		return -EADDRNOTAVAIL;
2021 	default:
2022 		return -ENOIOCTLCMD;
2023 	}
2024 }
2025 #endif
2026 
ip6mr_forward2_finish(struct net * net,struct sock * sk,struct sk_buff * skb)2027 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2028 {
2029 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2030 		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2031 	return dst_output(net, sk, skb);
2032 }
2033 
2034 /*
2035  *	Processing handlers for ip6mr_forward
2036  */
2037 
ip6mr_forward2(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2038 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2039 			  struct sk_buff *skb, int vifi)
2040 {
2041 	struct vif_device *vif = &mrt->vif_table[vifi];
2042 	struct net_device *vif_dev;
2043 	struct ipv6hdr *ipv6h;
2044 	struct dst_entry *dst;
2045 	struct flowi6 fl6;
2046 
2047 	vif_dev = vif_dev_read(vif);
2048 	if (!vif_dev)
2049 		goto out_free;
2050 
2051 #ifdef CONFIG_IPV6_PIMSM_V2
2052 	if (vif->flags & MIFF_REGISTER) {
2053 		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2054 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2055 		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2056 		DEV_STATS_INC(vif_dev, tx_packets);
2057 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2058 		goto out_free;
2059 	}
2060 #endif
2061 
2062 	ipv6h = ipv6_hdr(skb);
2063 
2064 	fl6 = (struct flowi6) {
2065 		.flowi6_oif = vif->link,
2066 		.daddr = ipv6h->daddr,
2067 	};
2068 
2069 	dst = ip6_route_output(net, NULL, &fl6);
2070 	if (dst->error) {
2071 		dst_release(dst);
2072 		goto out_free;
2073 	}
2074 
2075 	skb_dst_drop(skb);
2076 	skb_dst_set(skb, dst);
2077 
2078 	/*
2079 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2080 	 * not only before forwarding, but after forwarding on all output
2081 	 * interfaces. It is clear, if mrouter runs a multicasting
2082 	 * program, it should receive packets not depending to what interface
2083 	 * program is joined.
2084 	 * If we will not make it, the program will have to join on all
2085 	 * interfaces. On the other hand, multihoming host (or router, but
2086 	 * not mrouter) cannot join to more than one interface - it will
2087 	 * result in receiving multiple packets.
2088 	 */
2089 	skb->dev = vif_dev;
2090 	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2091 	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2092 
2093 	/* We are about to write */
2094 	/* XXX: extension headers? */
2095 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2096 		goto out_free;
2097 
2098 	ipv6h = ipv6_hdr(skb);
2099 	ipv6h->hop_limit--;
2100 
2101 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2102 
2103 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2104 		       net, NULL, skb, skb->dev, vif_dev,
2105 		       ip6mr_forward2_finish);
2106 
2107 out_free:
2108 	kfree_skb(skb);
2109 	return 0;
2110 }
2111 
2112 /* Called with rcu_read_lock() */
ip6mr_find_vif(struct mr_table * mrt,struct net_device * dev)2113 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2114 {
2115 	int ct;
2116 
2117 	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2118 	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2119 		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2120 			break;
2121 	}
2122 	return ct;
2123 }
2124 
2125 /* Called under rcu_read_lock() */
ip6_mr_forward(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc6_cache * c)2126 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2127 			   struct net_device *dev, struct sk_buff *skb,
2128 			   struct mfc6_cache *c)
2129 {
2130 	int psend = -1;
2131 	int vif, ct;
2132 	int true_vifi = ip6mr_find_vif(mrt, dev);
2133 
2134 	vif = c->_c.mfc_parent;
2135 	atomic_long_inc(&c->_c.mfc_un.res.pkt);
2136 	atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2137 	WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2138 
2139 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2140 		struct mfc6_cache *cache_proxy;
2141 
2142 		/* For an (*,G) entry, we only check that the incoming
2143 		 * interface is part of the static tree.
2144 		 */
2145 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2146 		if (cache_proxy &&
2147 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2148 			goto forward;
2149 	}
2150 
2151 	/*
2152 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2153 	 */
2154 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2155 		atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2156 
2157 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2158 		    /* pimsm uses asserts, when switching from RPT to SPT,
2159 		       so that we cannot check that packet arrived on an oif.
2160 		       It is bad, but otherwise we would need to move pretty
2161 		       large chunk of pimd to kernel. Ough... --ANK
2162 		     */
2163 		    (mrt->mroute_do_pim ||
2164 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2165 		    time_after(jiffies,
2166 			       c->_c.mfc_un.res.last_assert +
2167 			       MFC_ASSERT_THRESH)) {
2168 			c->_c.mfc_un.res.last_assert = jiffies;
2169 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2170 			if (mrt->mroute_do_wrvifwhole)
2171 				ip6mr_cache_report(mrt, skb, true_vifi,
2172 						   MRT6MSG_WRMIFWHOLE);
2173 		}
2174 		goto dont_forward;
2175 	}
2176 
2177 forward:
2178 	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2179 		   mrt->vif_table[vif].pkt_in + 1);
2180 	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2181 		   mrt->vif_table[vif].bytes_in + skb->len);
2182 
2183 	/*
2184 	 *	Forward the frame
2185 	 */
2186 	if (ipv6_addr_any(&c->mf6c_origin) &&
2187 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2188 		if (true_vifi >= 0 &&
2189 		    true_vifi != c->_c.mfc_parent &&
2190 		    ipv6_hdr(skb)->hop_limit >
2191 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2192 			/* It's an (*,*) entry and the packet is not coming from
2193 			 * the upstream: forward the packet to the upstream
2194 			 * only.
2195 			 */
2196 			psend = c->_c.mfc_parent;
2197 			goto last_forward;
2198 		}
2199 		goto dont_forward;
2200 	}
2201 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2202 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2203 		/* For (*,G) entry, don't forward to the incoming interface */
2204 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2205 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2206 			if (psend != -1) {
2207 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2208 				if (skb2)
2209 					ip6mr_forward2(net, mrt, skb2, psend);
2210 			}
2211 			psend = ct;
2212 		}
2213 	}
2214 last_forward:
2215 	if (psend != -1) {
2216 		ip6mr_forward2(net, mrt, skb, psend);
2217 		return;
2218 	}
2219 
2220 dont_forward:
2221 	kfree_skb(skb);
2222 }
2223 
2224 
2225 /*
2226  *	Multicast packets for forwarding arrive here
2227  */
2228 
ip6_mr_input(struct sk_buff * skb)2229 int ip6_mr_input(struct sk_buff *skb)
2230 {
2231 	struct mfc6_cache *cache;
2232 	struct net *net = dev_net(skb->dev);
2233 	struct mr_table *mrt;
2234 	struct flowi6 fl6 = {
2235 		.flowi6_iif	= skb->dev->ifindex,
2236 		.flowi6_mark	= skb->mark,
2237 	};
2238 	int err;
2239 	struct net_device *dev;
2240 
2241 	/* skb->dev passed in is the master dev for vrfs.
2242 	 * Get the proper interface that does have a vif associated with it.
2243 	 */
2244 	dev = skb->dev;
2245 	if (netif_is_l3_master(skb->dev)) {
2246 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2247 		if (!dev) {
2248 			kfree_skb(skb);
2249 			return -ENODEV;
2250 		}
2251 	}
2252 
2253 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2254 	if (err < 0) {
2255 		kfree_skb(skb);
2256 		return err;
2257 	}
2258 
2259 	cache = ip6mr_cache_find(mrt,
2260 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2261 	if (!cache) {
2262 		int vif = ip6mr_find_vif(mrt, dev);
2263 
2264 		if (vif >= 0)
2265 			cache = ip6mr_cache_find_any(mrt,
2266 						     &ipv6_hdr(skb)->daddr,
2267 						     vif);
2268 	}
2269 
2270 	/*
2271 	 *	No usable cache entry
2272 	 */
2273 	if (!cache) {
2274 		int vif;
2275 
2276 		vif = ip6mr_find_vif(mrt, dev);
2277 		if (vif >= 0) {
2278 			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2279 
2280 			return err;
2281 		}
2282 		kfree_skb(skb);
2283 		return -ENODEV;
2284 	}
2285 
2286 	ip6_mr_forward(net, mrt, dev, skb, cache);
2287 
2288 	return 0;
2289 }
2290 
ip6mr_get_route(struct net * net,struct sk_buff * skb,struct rtmsg * rtm,u32 portid)2291 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2292 		    u32 portid)
2293 {
2294 	int err;
2295 	struct mr_table *mrt;
2296 	struct mfc6_cache *cache;
2297 	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2298 
2299 	rcu_read_lock();
2300 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2301 	if (!mrt) {
2302 		rcu_read_unlock();
2303 		return -ENOENT;
2304 	}
2305 
2306 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2307 	if (!cache && skb->dev) {
2308 		int vif = ip6mr_find_vif(mrt, skb->dev);
2309 
2310 		if (vif >= 0)
2311 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2312 						     vif);
2313 	}
2314 
2315 	if (!cache) {
2316 		struct sk_buff *skb2;
2317 		struct ipv6hdr *iph;
2318 		struct net_device *dev;
2319 		int vif;
2320 
2321 		dev = skb->dev;
2322 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2323 			rcu_read_unlock();
2324 			return -ENODEV;
2325 		}
2326 
2327 		/* really correct? */
2328 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2329 		if (!skb2) {
2330 			rcu_read_unlock();
2331 			return -ENOMEM;
2332 		}
2333 
2334 		NETLINK_CB(skb2).portid = portid;
2335 		skb_reset_transport_header(skb2);
2336 
2337 		skb_put(skb2, sizeof(struct ipv6hdr));
2338 		skb_reset_network_header(skb2);
2339 
2340 		iph = ipv6_hdr(skb2);
2341 		iph->version = 0;
2342 		iph->priority = 0;
2343 		iph->flow_lbl[0] = 0;
2344 		iph->flow_lbl[1] = 0;
2345 		iph->flow_lbl[2] = 0;
2346 		iph->payload_len = 0;
2347 		iph->nexthdr = IPPROTO_NONE;
2348 		iph->hop_limit = 0;
2349 		iph->saddr = rt->rt6i_src.addr;
2350 		iph->daddr = rt->rt6i_dst.addr;
2351 
2352 		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2353 		rcu_read_unlock();
2354 
2355 		return err;
2356 	}
2357 
2358 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2359 	rcu_read_unlock();
2360 	return err;
2361 }
2362 
ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mfc6_cache * c,int cmd,int flags)2363 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2364 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2365 			     int flags)
2366 {
2367 	struct nlmsghdr *nlh;
2368 	struct rtmsg *rtm;
2369 	int err;
2370 
2371 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2372 	if (!nlh)
2373 		return -EMSGSIZE;
2374 
2375 	rtm = nlmsg_data(nlh);
2376 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2377 	rtm->rtm_dst_len  = 128;
2378 	rtm->rtm_src_len  = 128;
2379 	rtm->rtm_tos      = 0;
2380 	rtm->rtm_table    = mrt->id;
2381 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2382 		goto nla_put_failure;
2383 	rtm->rtm_type = RTN_MULTICAST;
2384 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2385 	if (c->_c.mfc_flags & MFC_STATIC)
2386 		rtm->rtm_protocol = RTPROT_STATIC;
2387 	else
2388 		rtm->rtm_protocol = RTPROT_MROUTED;
2389 	rtm->rtm_flags    = 0;
2390 
2391 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2392 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2393 		goto nla_put_failure;
2394 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2395 	/* do not break the dump if cache is unresolved */
2396 	if (err < 0 && err != -ENOENT)
2397 		goto nla_put_failure;
2398 
2399 	nlmsg_end(skb, nlh);
2400 	return 0;
2401 
2402 nla_put_failure:
2403 	nlmsg_cancel(skb, nlh);
2404 	return -EMSGSIZE;
2405 }
2406 
_ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mr_mfc * c,int cmd,int flags)2407 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2408 			      u32 portid, u32 seq, struct mr_mfc *c,
2409 			      int cmd, int flags)
2410 {
2411 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2412 				 cmd, flags);
2413 }
2414 
mr6_msgsize(bool unresolved,int maxvif)2415 static int mr6_msgsize(bool unresolved, int maxvif)
2416 {
2417 	size_t len =
2418 		NLMSG_ALIGN(sizeof(struct rtmsg))
2419 		+ nla_total_size(4)	/* RTA_TABLE */
2420 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2421 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2422 		;
2423 
2424 	if (!unresolved)
2425 		len = len
2426 		      + nla_total_size(4)	/* RTA_IIF */
2427 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2428 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2429 						/* RTA_MFC_STATS */
2430 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2431 		;
2432 
2433 	return len;
2434 }
2435 
mr6_netlink_event(struct mr_table * mrt,struct mfc6_cache * mfc,int cmd)2436 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2437 			      int cmd)
2438 {
2439 	struct net *net = read_pnet(&mrt->net);
2440 	struct sk_buff *skb;
2441 	int err = -ENOBUFS;
2442 
2443 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2444 			GFP_ATOMIC);
2445 	if (!skb)
2446 		goto errout;
2447 
2448 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2449 	if (err < 0)
2450 		goto errout;
2451 
2452 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2453 	return;
2454 
2455 errout:
2456 	kfree_skb(skb);
2457 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2458 }
2459 
mrt6msg_netlink_msgsize(size_t payloadlen)2460 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2461 {
2462 	size_t len =
2463 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2464 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2465 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2466 					/* IP6MRA_CREPORT_SRC_ADDR */
2467 		+ nla_total_size(sizeof(struct in6_addr))
2468 					/* IP6MRA_CREPORT_DST_ADDR */
2469 		+ nla_total_size(sizeof(struct in6_addr))
2470 					/* IP6MRA_CREPORT_PKT */
2471 		+ nla_total_size(payloadlen)
2472 		;
2473 
2474 	return len;
2475 }
2476 
mrt6msg_netlink_event(const struct mr_table * mrt,struct sk_buff * pkt)2477 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2478 {
2479 	struct net *net = read_pnet(&mrt->net);
2480 	struct nlmsghdr *nlh;
2481 	struct rtgenmsg *rtgenm;
2482 	struct mrt6msg *msg;
2483 	struct sk_buff *skb;
2484 	struct nlattr *nla;
2485 	int payloadlen;
2486 
2487 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2488 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2489 
2490 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2491 	if (!skb)
2492 		goto errout;
2493 
2494 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2495 			sizeof(struct rtgenmsg), 0);
2496 	if (!nlh)
2497 		goto errout;
2498 	rtgenm = nlmsg_data(nlh);
2499 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2500 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2501 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2502 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2503 			     &msg->im6_src) ||
2504 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2505 			     &msg->im6_dst))
2506 		goto nla_put_failure;
2507 
2508 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2509 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2510 				  nla_data(nla), payloadlen))
2511 		goto nla_put_failure;
2512 
2513 	nlmsg_end(skb, nlh);
2514 
2515 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2516 	return;
2517 
2518 nla_put_failure:
2519 	nlmsg_cancel(skb, nlh);
2520 errout:
2521 	kfree_skb(skb);
2522 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2523 }
2524 
2525 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2526 	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2527 	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2528 	[RTA_TABLE]		= { .type = NLA_U32 },
2529 };
2530 
ip6mr_rtm_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2531 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2532 					const struct nlmsghdr *nlh,
2533 					struct nlattr **tb,
2534 					struct netlink_ext_ack *extack)
2535 {
2536 	struct rtmsg *rtm;
2537 	int err;
2538 
2539 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2540 			  extack);
2541 	if (err)
2542 		return err;
2543 
2544 	rtm = nlmsg_data(nlh);
2545 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2546 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2547 	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2548 	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2549 		NL_SET_ERR_MSG_MOD(extack,
2550 				   "Invalid values in header for multicast route get request");
2551 		return -EINVAL;
2552 	}
2553 
2554 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2555 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2556 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2557 		return -EINVAL;
2558 	}
2559 
2560 	return 0;
2561 }
2562 
ip6mr_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2563 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2564 			      struct netlink_ext_ack *extack)
2565 {
2566 	struct net *net = sock_net(in_skb->sk);
2567 	struct in6_addr src = {}, grp = {};
2568 	struct nlattr *tb[RTA_MAX + 1];
2569 	struct mfc6_cache *cache;
2570 	struct mr_table *mrt;
2571 	struct sk_buff *skb;
2572 	u32 tableid;
2573 	int err;
2574 
2575 	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2576 	if (err < 0)
2577 		return err;
2578 
2579 	if (tb[RTA_SRC])
2580 		src = nla_get_in6_addr(tb[RTA_SRC]);
2581 	if (tb[RTA_DST])
2582 		grp = nla_get_in6_addr(tb[RTA_DST]);
2583 	tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2584 
2585 	mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2586 	if (!mrt) {
2587 		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2588 		return -ENOENT;
2589 	}
2590 
2591 	/* entries are added/deleted only under RTNL */
2592 	rcu_read_lock();
2593 	cache = ip6mr_cache_find(mrt, &src, &grp);
2594 	rcu_read_unlock();
2595 	if (!cache) {
2596 		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2597 		return -ENOENT;
2598 	}
2599 
2600 	skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2601 	if (!skb)
2602 		return -ENOBUFS;
2603 
2604 	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2605 				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2606 	if (err < 0) {
2607 		kfree_skb(skb);
2608 		return err;
2609 	}
2610 
2611 	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2612 }
2613 
ip6mr_rtm_dumproute(struct sk_buff * skb,struct netlink_callback * cb)2614 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2615 {
2616 	const struct nlmsghdr *nlh = cb->nlh;
2617 	struct fib_dump_filter filter = {
2618 		.rtnl_held = true,
2619 	};
2620 	int err;
2621 
2622 	if (cb->strict_check) {
2623 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2624 					    &filter, cb);
2625 		if (err < 0)
2626 			return err;
2627 	}
2628 
2629 	if (filter.table_id) {
2630 		struct mr_table *mrt;
2631 
2632 		mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2633 		if (!mrt) {
2634 			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2635 				return skb->len;
2636 
2637 			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2638 			return -ENOENT;
2639 		}
2640 		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2641 				    &mfc_unres_lock, &filter);
2642 		return skb->len ? : err;
2643 	}
2644 
2645 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2646 				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2647 }
2648