1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Linux IPv6 multicast routing support for BSD pim6sd
4 * Based on net/ipv4/ipmr.c.
5 *
6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7 * LSIIT Laboratory, Strasbourg, France
8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * 6WIND, Paris, France
10 * Copyright (C)2007,2008 USAGI/WIDE Project
11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 */
13
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50
51 #include <linux/nospec.h>
52
53 struct ip6mr_rule {
54 struct fib_rule common;
55 };
56
57 struct ip6mr_result {
58 struct mr_table *mrt;
59 };
60
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62 Note that the changes are semaphored via rtnl_lock.
63 */
64
65 static DEFINE_SPINLOCK(mrt_lock);
66
vif_dev_read(const struct vif_device * vif)67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 return rcu_dereference(vif->dev);
70 }
71
72 /* Multicast router control variables */
73
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76
77 /* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
81
82 In this case data path is free of exclusive locks at all.
83 */
84
85 static struct kmem_cache *mrt_cachep __read_mostly;
86
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 struct net_device *dev, struct sk_buff *skb,
92 struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 lockdep_rtnl_is_held() || \
109 list_empty(&net->ipv6.mr6_tables))
110
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 struct mr_table *mrt)
113 {
114 struct mr_table *ret;
115
116 if (!mrt)
117 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 struct mr_table, list);
119 else
120 ret = list_entry_rcu(mrt->list.next,
121 struct mr_table, list);
122
123 if (&ret->list == &net->ipv6.mr6_tables)
124 return NULL;
125 return ret;
126 }
127
__ip6mr_get_table(struct net * net,u32 id)128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
129 {
130 struct mr_table *mrt;
131
132 ip6mr_for_each_table(mrt, net) {
133 if (mrt->id == id)
134 return mrt;
135 }
136 return NULL;
137 }
138
ip6mr_get_table(struct net * net,u32 id)139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
140 {
141 struct mr_table *mrt;
142
143 rcu_read_lock();
144 mrt = __ip6mr_get_table(net, id);
145 rcu_read_unlock();
146 return mrt;
147 }
148
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 struct mr_table **mrt)
151 {
152 int err;
153 struct ip6mr_result res;
154 struct fib_lookup_arg arg = {
155 .result = &res,
156 .flags = FIB_LOOKUP_NOREF,
157 };
158
159 /* update flow if oif or iif point to device enslaved to l3mdev */
160 l3mdev_update_flow(net, flowi6_to_flowi(flp6));
161
162 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 flowi6_to_flowi(flp6), 0, &arg);
164 if (err < 0)
165 return err;
166 *mrt = res.mrt;
167 return 0;
168 }
169
ip6mr_rule_action(struct fib_rule * rule,struct flowi * flp,int flags,struct fib_lookup_arg * arg)170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 int flags, struct fib_lookup_arg *arg)
172 {
173 struct ip6mr_result *res = arg->result;
174 struct mr_table *mrt;
175
176 switch (rule->action) {
177 case FR_ACT_TO_TBL:
178 break;
179 case FR_ACT_UNREACHABLE:
180 return -ENETUNREACH;
181 case FR_ACT_PROHIBIT:
182 return -EACCES;
183 case FR_ACT_BLACKHOLE:
184 default:
185 return -EINVAL;
186 }
187
188 arg->table = fib_rule_get_table(rule, arg);
189
190 mrt = __ip6mr_get_table(rule->fr_net, arg->table);
191 if (!mrt)
192 return -EAGAIN;
193 res->mrt = mrt;
194 return 0;
195 }
196
ip6mr_rule_match(struct fib_rule * rule,struct flowi * flp,int flags)197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
198 {
199 return 1;
200 }
201
ip6mr_rule_configure(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh,struct nlattr ** tb,struct netlink_ext_ack * extack)202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 struct fib_rule_hdr *frh, struct nlattr **tb,
204 struct netlink_ext_ack *extack)
205 {
206 return 0;
207 }
208
ip6mr_rule_compare(struct fib_rule * rule,struct fib_rule_hdr * frh,struct nlattr ** tb)209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 struct nlattr **tb)
211 {
212 return 1;
213 }
214
ip6mr_rule_fill(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh)215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 struct fib_rule_hdr *frh)
217 {
218 frh->dst_len = 0;
219 frh->src_len = 0;
220 frh->tos = 0;
221 return 0;
222 }
223
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 .family = RTNL_FAMILY_IP6MR,
226 .rule_size = sizeof(struct ip6mr_rule),
227 .addr_size = sizeof(struct in6_addr),
228 .action = ip6mr_rule_action,
229 .match = ip6mr_rule_match,
230 .configure = ip6mr_rule_configure,
231 .compare = ip6mr_rule_compare,
232 .fill = ip6mr_rule_fill,
233 .nlgroup = RTNLGRP_IPV6_RULE,
234 .owner = THIS_MODULE,
235 };
236
ip6mr_rules_init(struct net * net)237 static int __net_init ip6mr_rules_init(struct net *net)
238 {
239 struct fib_rules_ops *ops;
240 struct mr_table *mrt;
241 int err;
242
243 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
244 if (IS_ERR(ops))
245 return PTR_ERR(ops);
246
247 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
248
249 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
250 if (IS_ERR(mrt)) {
251 err = PTR_ERR(mrt);
252 goto err1;
253 }
254
255 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
256 if (err < 0)
257 goto err2;
258
259 net->ipv6.mr6_rules_ops = ops;
260 return 0;
261
262 err2:
263 rtnl_lock();
264 ip6mr_free_table(mrt);
265 rtnl_unlock();
266 err1:
267 fib_rules_unregister(ops);
268 return err;
269 }
270
ip6mr_rules_exit(struct net * net)271 static void __net_exit ip6mr_rules_exit(struct net *net)
272 {
273 struct mr_table *mrt, *next;
274
275 ASSERT_RTNL();
276 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 list_del(&mrt->list);
278 ip6mr_free_table(mrt);
279 }
280 fib_rules_unregister(net->ipv6.mr6_rules_ops);
281 }
282
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 struct netlink_ext_ack *extack)
285 {
286 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
287 }
288
ip6mr_rules_seq_read(const struct net * net)289 static unsigned int ip6mr_rules_seq_read(const struct net *net)
290 {
291 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
292 }
293
ip6mr_rule_default(const struct fib_rule * rule)294 bool ip6mr_rule_default(const struct fib_rule *rule)
295 {
296 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
298 }
299 EXPORT_SYMBOL(ip6mr_rule_default);
300 #else
301 #define ip6mr_for_each_table(mrt, net) \
302 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
303
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 struct mr_table *mrt)
306 {
307 if (!mrt)
308 return net->ipv6.mrt6;
309 return NULL;
310 }
311
ip6mr_get_table(struct net * net,u32 id)312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
313 {
314 return net->ipv6.mrt6;
315 }
316
317 #define __ip6mr_get_table ip6mr_get_table
318
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 struct mr_table **mrt)
321 {
322 *mrt = net->ipv6.mrt6;
323 return 0;
324 }
325
ip6mr_rules_init(struct net * net)326 static int __net_init ip6mr_rules_init(struct net *net)
327 {
328 struct mr_table *mrt;
329
330 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
331 if (IS_ERR(mrt))
332 return PTR_ERR(mrt);
333 net->ipv6.mrt6 = mrt;
334 return 0;
335 }
336
ip6mr_rules_exit(struct net * net)337 static void __net_exit ip6mr_rules_exit(struct net *net)
338 {
339 ASSERT_RTNL();
340 ip6mr_free_table(net->ipv6.mrt6);
341 net->ipv6.mrt6 = NULL;
342 }
343
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 struct netlink_ext_ack *extack)
346 {
347 return 0;
348 }
349
ip6mr_rules_seq_read(const struct net * net)350 static unsigned int ip6mr_rules_seq_read(const struct net *net)
351 {
352 return 0;
353 }
354 #endif
355
ip6mr_hash_cmp(struct rhashtable_compare_arg * arg,const void * ptr)356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
357 const void *ptr)
358 {
359 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
361
362 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
364 }
365
366 static const struct rhashtable_params ip6mr_rht_params = {
367 .head_offset = offsetof(struct mr_mfc, mnode),
368 .key_offset = offsetof(struct mfc6_cache, cmparg),
369 .key_len = sizeof(struct mfc6_cache_cmp_arg),
370 .nelem_hint = 3,
371 .obj_cmpfn = ip6mr_hash_cmp,
372 .automatic_shrinking = true,
373 };
374
ip6mr_new_table_set(struct mr_table * mrt,struct net * net)375 static void ip6mr_new_table_set(struct mr_table *mrt,
376 struct net *net)
377 {
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
380 #endif
381 }
382
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 .mf6c_origin = IN6ADDR_ANY_INIT,
385 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
386 };
387
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 .rht_params = &ip6mr_rht_params,
390 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
391 };
392
ip6mr_new_table(struct net * net,u32 id)393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
394 {
395 struct mr_table *mrt;
396
397 mrt = __ip6mr_get_table(net, id);
398 if (mrt)
399 return mrt;
400
401 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 ipmr_expire_process, ip6mr_new_table_set);
403 }
404
ip6mr_free_table(struct mr_table * mrt)405 static void ip6mr_free_table(struct mr_table *mrt)
406 {
407 struct net *net = read_pnet(&mrt->net);
408
409 WARN_ON_ONCE(!mr_can_free_table(net));
410
411 timer_shutdown_sync(&mrt->ipmr_expire_timer);
412 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
413 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
414 rhltable_destroy(&mrt->mfc_hash);
415 kfree(mrt);
416 }
417
418 #ifdef CONFIG_PROC_FS
419 /* The /proc interfaces to multicast routing
420 * /proc/ip6_mr_cache /proc/ip6_mr_vif
421 */
422
ip6mr_vif_seq_start(struct seq_file * seq,loff_t * pos)423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
424 __acquires(RCU)
425 {
426 struct mr_vif_iter *iter = seq->private;
427 struct net *net = seq_file_net(seq);
428 struct mr_table *mrt;
429
430 rcu_read_lock();
431 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
432 if (!mrt) {
433 rcu_read_unlock();
434 return ERR_PTR(-ENOENT);
435 }
436
437 iter->mrt = mrt;
438
439 return mr_vif_seq_start(seq, pos);
440 }
441
ip6mr_vif_seq_stop(struct seq_file * seq,void * v)442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
443 __releases(RCU)
444 {
445 rcu_read_unlock();
446 }
447
ip6mr_vif_seq_show(struct seq_file * seq,void * v)448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
449 {
450 struct mr_vif_iter *iter = seq->private;
451 struct mr_table *mrt = iter->mrt;
452
453 if (v == SEQ_START_TOKEN) {
454 seq_puts(seq,
455 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
456 } else {
457 const struct vif_device *vif = v;
458 const struct net_device *vif_dev;
459 const char *name;
460
461 vif_dev = vif_dev_read(vif);
462 name = vif_dev ? vif_dev->name : "none";
463
464 seq_printf(seq,
465 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
466 vif - mrt->vif_table,
467 name, vif->bytes_in, vif->pkt_in,
468 vif->bytes_out, vif->pkt_out,
469 vif->flags);
470 }
471 return 0;
472 }
473
474 static const struct seq_operations ip6mr_vif_seq_ops = {
475 .start = ip6mr_vif_seq_start,
476 .next = mr_vif_seq_next,
477 .stop = ip6mr_vif_seq_stop,
478 .show = ip6mr_vif_seq_show,
479 };
480
ipmr_mfc_seq_start(struct seq_file * seq,loff_t * pos)481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
482 {
483 struct net *net = seq_file_net(seq);
484 struct mr_table *mrt;
485
486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
487 if (!mrt)
488 return ERR_PTR(-ENOENT);
489
490 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
491 }
492
ipmr_mfc_seq_show(struct seq_file * seq,void * v)493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
494 {
495 int n;
496
497 if (v == SEQ_START_TOKEN) {
498 seq_puts(seq,
499 "Group "
500 "Origin "
501 "Iif Pkts Bytes Wrong Oifs\n");
502 } else {
503 const struct mfc6_cache *mfc = v;
504 const struct mr_mfc_iter *it = seq->private;
505 struct mr_table *mrt = it->mrt;
506
507 seq_printf(seq, "%pI6 %pI6 %-3hd",
508 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
509 mfc->_c.mfc_parent);
510
511 if (it->cache != &mrt->mfc_unres_queue) {
512 seq_printf(seq, " %8lu %8lu %8lu",
513 atomic_long_read(&mfc->_c.mfc_un.res.pkt),
514 atomic_long_read(&mfc->_c.mfc_un.res.bytes),
515 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
516 for (n = mfc->_c.mfc_un.res.minvif;
517 n < mfc->_c.mfc_un.res.maxvif; n++) {
518 if (VIF_EXISTS(mrt, n) &&
519 mfc->_c.mfc_un.res.ttls[n] < 255)
520 seq_printf(seq,
521 " %2d:%-3d", n,
522 mfc->_c.mfc_un.res.ttls[n]);
523 }
524 } else {
525 /* unresolved mfc_caches don't contain
526 * pkt, bytes and wrong_if values
527 */
528 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
529 }
530 seq_putc(seq, '\n');
531 }
532 return 0;
533 }
534
535 static const struct seq_operations ipmr_mfc_seq_ops = {
536 .start = ipmr_mfc_seq_start,
537 .next = mr_mfc_seq_next,
538 .stop = mr_mfc_seq_stop,
539 .show = ipmr_mfc_seq_show,
540 };
541 #endif
542
543 #ifdef CONFIG_IPV6_PIMSM_V2
544
pim6_rcv(struct sk_buff * skb)545 static int pim6_rcv(struct sk_buff *skb)
546 {
547 struct pimreghdr *pim;
548 struct ipv6hdr *encap;
549 struct net_device *reg_dev = NULL;
550 struct net *net = dev_net(skb->dev);
551 struct mr_table *mrt;
552 struct flowi6 fl6 = {
553 .flowi6_iif = skb->dev->ifindex,
554 .flowi6_mark = skb->mark,
555 };
556 int reg_vif_num;
557
558 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
559 goto drop;
560
561 pim = (struct pimreghdr *)skb_transport_header(skb);
562 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
563 (pim->flags & PIM_NULL_REGISTER) ||
564 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
565 sizeof(*pim), IPPROTO_PIM,
566 csum_partial((void *)pim, sizeof(*pim), 0)) &&
567 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
568 goto drop;
569
570 /* check if the inner packet is destined to mcast group */
571 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
572 sizeof(*pim));
573
574 if (!ipv6_addr_is_multicast(&encap->daddr) ||
575 encap->payload_len == 0 ||
576 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
577 goto drop;
578
579 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
580 goto drop;
581
582 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
583 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
584 if (reg_vif_num >= 0)
585 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
586
587 if (!reg_dev)
588 goto drop;
589
590 skb->mac_header = skb->network_header;
591 skb_pull(skb, (u8 *)encap - skb->data);
592 skb_reset_network_header(skb);
593 skb->protocol = htons(ETH_P_IPV6);
594 skb->ip_summed = CHECKSUM_NONE;
595
596 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
597
598 netif_rx(skb);
599
600 return 0;
601 drop:
602 kfree_skb(skb);
603 return 0;
604 }
605
606 static const struct inet6_protocol pim6_protocol = {
607 .handler = pim6_rcv,
608 };
609
610 /* Service routines creating virtual interfaces: PIMREG */
611
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
613 struct net_device *dev)
614 {
615 struct net *net = dev_net(dev);
616 struct mr_table *mrt;
617 struct flowi6 fl6 = {
618 .flowi6_oif = dev->ifindex,
619 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
620 .flowi6_mark = skb->mark,
621 };
622
623 if (!pskb_inet_may_pull(skb))
624 goto tx_err;
625
626 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
627 goto tx_err;
628
629 DEV_STATS_ADD(dev, tx_bytes, skb->len);
630 DEV_STATS_INC(dev, tx_packets);
631 rcu_read_lock();
632 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
633 MRT6MSG_WHOLEPKT);
634 rcu_read_unlock();
635 kfree_skb(skb);
636 return NETDEV_TX_OK;
637
638 tx_err:
639 DEV_STATS_INC(dev, tx_errors);
640 kfree_skb(skb);
641 return NETDEV_TX_OK;
642 }
643
reg_vif_get_iflink(const struct net_device * dev)644 static int reg_vif_get_iflink(const struct net_device *dev)
645 {
646 return 0;
647 }
648
649 static const struct net_device_ops reg_vif_netdev_ops = {
650 .ndo_start_xmit = reg_vif_xmit,
651 .ndo_get_iflink = reg_vif_get_iflink,
652 };
653
reg_vif_setup(struct net_device * dev)654 static void reg_vif_setup(struct net_device *dev)
655 {
656 dev->type = ARPHRD_PIMREG;
657 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
658 dev->flags = IFF_NOARP;
659 dev->netdev_ops = ®_vif_netdev_ops;
660 dev->needs_free_netdev = true;
661 dev->netns_immutable = true;
662 }
663
ip6mr_reg_vif(struct net * net,struct mr_table * mrt)664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
665 {
666 struct net_device *dev;
667 char name[IFNAMSIZ];
668
669 if (mrt->id == RT6_TABLE_DFLT)
670 sprintf(name, "pim6reg");
671 else
672 sprintf(name, "pim6reg%u", mrt->id);
673
674 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
675 if (!dev)
676 return NULL;
677
678 dev_net_set(dev, net);
679
680 if (register_netdevice(dev)) {
681 free_netdev(dev);
682 return NULL;
683 }
684
685 if (dev_open(dev, NULL))
686 goto failure;
687
688 dev_hold(dev);
689 return dev;
690
691 failure:
692 unregister_netdevice(dev);
693 return NULL;
694 }
695 #endif
696
call_ip6mr_vif_entry_notifiers(struct net * net,enum fib_event_type event_type,struct vif_device * vif,struct net_device * vif_dev,mifi_t vif_index,u32 tb_id)697 static int call_ip6mr_vif_entry_notifiers(struct net *net,
698 enum fib_event_type event_type,
699 struct vif_device *vif,
700 struct net_device *vif_dev,
701 mifi_t vif_index, u32 tb_id)
702 {
703 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
704 vif, vif_dev, vif_index, tb_id,
705 &net->ipv6.ipmr_seq);
706 }
707
call_ip6mr_mfc_entry_notifiers(struct net * net,enum fib_event_type event_type,struct mfc6_cache * mfc,u32 tb_id)708 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
709 enum fib_event_type event_type,
710 struct mfc6_cache *mfc, u32 tb_id)
711 {
712 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
713 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
714 }
715
716 /* Delete a VIF entry */
mif6_delete(struct mr_table * mrt,int vifi,int notify,struct list_head * head)717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
718 struct list_head *head)
719 {
720 struct vif_device *v;
721 struct net_device *dev;
722 struct inet6_dev *in6_dev;
723
724 if (vifi < 0 || vifi >= mrt->maxvif)
725 return -EADDRNOTAVAIL;
726
727 v = &mrt->vif_table[vifi];
728
729 dev = rtnl_dereference(v->dev);
730 if (!dev)
731 return -EADDRNOTAVAIL;
732
733 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
734 FIB_EVENT_VIF_DEL, v, dev,
735 vifi, mrt->id);
736 spin_lock(&mrt_lock);
737 RCU_INIT_POINTER(v->dev, NULL);
738
739 #ifdef CONFIG_IPV6_PIMSM_V2
740 if (vifi == mrt->mroute_reg_vif_num) {
741 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
742 WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
743 }
744 #endif
745
746 if (vifi + 1 == mrt->maxvif) {
747 int tmp;
748 for (tmp = vifi - 1; tmp >= 0; tmp--) {
749 if (VIF_EXISTS(mrt, tmp))
750 break;
751 }
752 WRITE_ONCE(mrt->maxvif, tmp + 1);
753 }
754
755 spin_unlock(&mrt_lock);
756
757 dev_set_allmulti(dev, -1);
758
759 in6_dev = __in6_dev_get(dev);
760 if (in6_dev) {
761 atomic_dec(&in6_dev->cnf.mc_forwarding);
762 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
763 NETCONFA_MC_FORWARDING,
764 dev->ifindex, &in6_dev->cnf);
765 }
766
767 if ((v->flags & MIFF_REGISTER) && !notify)
768 unregister_netdevice_queue(dev, head);
769
770 netdev_put(dev, &v->dev_tracker);
771 return 0;
772 }
773
ip6mr_cache_free_rcu(struct rcu_head * head)774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
775 {
776 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
777
778 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
779 }
780
ip6mr_cache_free(struct mfc6_cache * c)781 static inline void ip6mr_cache_free(struct mfc6_cache *c)
782 {
783 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
784 }
785
786 /* Destroy an unresolved cache entry, killing queued skbs
787 and reporting error to netlink readers.
788 */
789
ip6mr_destroy_unres(struct mr_table * mrt,struct mfc6_cache * c)790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
791 {
792 struct net *net = read_pnet(&mrt->net);
793 struct sk_buff *skb;
794
795 atomic_dec(&mrt->cache_resolve_queue_len);
796
797 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
798 if (ipv6_hdr(skb)->version == 0) {
799 struct nlmsghdr *nlh = skb_pull(skb,
800 sizeof(struct ipv6hdr));
801 nlh->nlmsg_type = NLMSG_ERROR;
802 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
803 skb_trim(skb, nlh->nlmsg_len);
804 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
805 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
806 } else
807 kfree_skb(skb);
808 }
809
810 ip6mr_cache_free(c);
811 }
812
813
814 /* Timer process for all the unresolved queue. */
815
ipmr_do_expire_process(struct mr_table * mrt)816 static void ipmr_do_expire_process(struct mr_table *mrt)
817 {
818 unsigned long now = jiffies;
819 unsigned long expires = 10 * HZ;
820 struct mr_mfc *c, *next;
821
822 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
823 if (time_after(c->mfc_un.unres.expires, now)) {
824 /* not yet... */
825 unsigned long interval = c->mfc_un.unres.expires - now;
826 if (interval < expires)
827 expires = interval;
828 continue;
829 }
830
831 list_del(&c->list);
832 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
833 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
834 }
835
836 if (!list_empty(&mrt->mfc_unres_queue))
837 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
838 }
839
ipmr_expire_process(struct timer_list * t)840 static void ipmr_expire_process(struct timer_list *t)
841 {
842 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
843
844 if (!spin_trylock(&mfc_unres_lock)) {
845 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
846 return;
847 }
848
849 if (!list_empty(&mrt->mfc_unres_queue))
850 ipmr_do_expire_process(mrt);
851
852 spin_unlock(&mfc_unres_lock);
853 }
854
855 /* Fill oifs list. It is called under locked mrt_lock. */
856
ip6mr_update_thresholds(struct mr_table * mrt,struct mr_mfc * cache,unsigned char * ttls)857 static void ip6mr_update_thresholds(struct mr_table *mrt,
858 struct mr_mfc *cache,
859 unsigned char *ttls)
860 {
861 int vifi;
862
863 cache->mfc_un.res.minvif = MAXMIFS;
864 cache->mfc_un.res.maxvif = 0;
865 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
866
867 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
868 if (VIF_EXISTS(mrt, vifi) &&
869 ttls[vifi] && ttls[vifi] < 255) {
870 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
871 if (cache->mfc_un.res.minvif > vifi)
872 cache->mfc_un.res.minvif = vifi;
873 if (cache->mfc_un.res.maxvif <= vifi)
874 cache->mfc_un.res.maxvif = vifi + 1;
875 }
876 }
877 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
878 }
879
mif6_add(struct net * net,struct mr_table * mrt,struct mif6ctl * vifc,int mrtsock)880 static int mif6_add(struct net *net, struct mr_table *mrt,
881 struct mif6ctl *vifc, int mrtsock)
882 {
883 int vifi = vifc->mif6c_mifi;
884 struct vif_device *v = &mrt->vif_table[vifi];
885 struct net_device *dev;
886 struct inet6_dev *in6_dev;
887 int err;
888
889 /* Is vif busy ? */
890 if (VIF_EXISTS(mrt, vifi))
891 return -EADDRINUSE;
892
893 switch (vifc->mif6c_flags) {
894 #ifdef CONFIG_IPV6_PIMSM_V2
895 case MIFF_REGISTER:
896 /*
897 * Special Purpose VIF in PIM
898 * All the packets will be sent to the daemon
899 */
900 if (mrt->mroute_reg_vif_num >= 0)
901 return -EADDRINUSE;
902 dev = ip6mr_reg_vif(net, mrt);
903 if (!dev)
904 return -ENOBUFS;
905 err = dev_set_allmulti(dev, 1);
906 if (err) {
907 unregister_netdevice(dev);
908 dev_put(dev);
909 return err;
910 }
911 break;
912 #endif
913 case 0:
914 dev = dev_get_by_index(net, vifc->mif6c_pifi);
915 if (!dev)
916 return -EADDRNOTAVAIL;
917 err = dev_set_allmulti(dev, 1);
918 if (err) {
919 dev_put(dev);
920 return err;
921 }
922 break;
923 default:
924 return -EINVAL;
925 }
926
927 in6_dev = __in6_dev_get(dev);
928 if (in6_dev) {
929 atomic_inc(&in6_dev->cnf.mc_forwarding);
930 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
931 NETCONFA_MC_FORWARDING,
932 dev->ifindex, &in6_dev->cnf);
933 }
934
935 /* Fill in the VIF structures */
936 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
937 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
938 MIFF_REGISTER);
939
940 /* And finish update writing critical data */
941 spin_lock(&mrt_lock);
942 rcu_assign_pointer(v->dev, dev);
943 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
944 #ifdef CONFIG_IPV6_PIMSM_V2
945 if (v->flags & MIFF_REGISTER)
946 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
947 #endif
948 if (vifi + 1 > mrt->maxvif)
949 WRITE_ONCE(mrt->maxvif, vifi + 1);
950 spin_unlock(&mrt_lock);
951 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
952 v, dev, vifi, mrt->id);
953 return 0;
954 }
955
ip6mr_cache_find(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp)956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
957 const struct in6_addr *origin,
958 const struct in6_addr *mcastgrp)
959 {
960 struct mfc6_cache_cmp_arg arg = {
961 .mf6c_origin = *origin,
962 .mf6c_mcastgrp = *mcastgrp,
963 };
964
965 return mr_mfc_find(mrt, &arg);
966 }
967
968 /* Look for a (*,G) entry */
ip6mr_cache_find_any(struct mr_table * mrt,struct in6_addr * mcastgrp,mifi_t mifi)969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
970 struct in6_addr *mcastgrp,
971 mifi_t mifi)
972 {
973 struct mfc6_cache_cmp_arg arg = {
974 .mf6c_origin = in6addr_any,
975 .mf6c_mcastgrp = *mcastgrp,
976 };
977
978 if (ipv6_addr_any(mcastgrp))
979 return mr_mfc_find_any_parent(mrt, mifi);
980 return mr_mfc_find_any(mrt, mifi, &arg);
981 }
982
983 /* Look for a (S,G,iif) entry if parent != -1 */
984 static struct mfc6_cache *
ip6mr_cache_find_parent(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp,int parent)985 ip6mr_cache_find_parent(struct mr_table *mrt,
986 const struct in6_addr *origin,
987 const struct in6_addr *mcastgrp,
988 int parent)
989 {
990 struct mfc6_cache_cmp_arg arg = {
991 .mf6c_origin = *origin,
992 .mf6c_mcastgrp = *mcastgrp,
993 };
994
995 return mr_mfc_find_parent(mrt, &arg, parent);
996 }
997
998 /* Allocate a multicast cache entry */
ip6mr_cache_alloc(void)999 static struct mfc6_cache *ip6mr_cache_alloc(void)
1000 {
1001 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1002 if (!c)
1003 return NULL;
1004 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1005 c->_c.mfc_un.res.minvif = MAXMIFS;
1006 c->_c.free = ip6mr_cache_free_rcu;
1007 refcount_set(&c->_c.mfc_un.res.refcount, 1);
1008 return c;
1009 }
1010
ip6mr_cache_alloc_unres(void)1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1012 {
1013 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1014 if (!c)
1015 return NULL;
1016 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1017 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1018 return c;
1019 }
1020
1021 /*
1022 * A cache entry has gone into a resolved state from queued
1023 */
1024
ip6mr_cache_resolve(struct net * net,struct mr_table * mrt,struct mfc6_cache * uc,struct mfc6_cache * c)1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1026 struct mfc6_cache *uc, struct mfc6_cache *c)
1027 {
1028 struct sk_buff *skb;
1029
1030 /*
1031 * Play the pending entries through our router
1032 */
1033
1034 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1035 if (ipv6_hdr(skb)->version == 0) {
1036 struct nlmsghdr *nlh = skb_pull(skb,
1037 sizeof(struct ipv6hdr));
1038
1039 if (mr_fill_mroute(mrt, skb, &c->_c,
1040 nlmsg_data(nlh)) > 0) {
1041 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1042 } else {
1043 nlh->nlmsg_type = NLMSG_ERROR;
1044 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1045 skb_trim(skb, nlh->nlmsg_len);
1046 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1047 }
1048 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1049 } else {
1050 rcu_read_lock();
1051 ip6_mr_forward(net, mrt, skb->dev, skb, c);
1052 rcu_read_unlock();
1053 }
1054 }
1055 }
1056
1057 /*
1058 * Bounce a cache query up to pim6sd and netlink.
1059 *
1060 * Called under rcu_read_lock()
1061 */
1062
ip6mr_cache_report(const struct mr_table * mrt,struct sk_buff * pkt,mifi_t mifi,int assert)1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1064 mifi_t mifi, int assert)
1065 {
1066 struct sock *mroute6_sk;
1067 struct sk_buff *skb;
1068 struct mrt6msg *msg;
1069 int ret;
1070
1071 #ifdef CONFIG_IPV6_PIMSM_V2
1072 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1073 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1074 +sizeof(*msg));
1075 else
1076 #endif
1077 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1078
1079 if (!skb)
1080 return -ENOBUFS;
1081
1082 /* I suppose that internal messages
1083 * do not require checksums */
1084
1085 skb->ip_summed = CHECKSUM_UNNECESSARY;
1086
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1089 /* Ugly, but we have no choice with this interface.
1090 Duplicate old header, fix length etc.
1091 And all this only to mangle msg->im6_msgtype and
1092 to set msg->im6_mbz to "mbz" :-)
1093 */
1094 __skb_pull(skb, skb_network_offset(pkt));
1095
1096 skb_push(skb, sizeof(*msg));
1097 skb_reset_transport_header(skb);
1098 msg = (struct mrt6msg *)skb_transport_header(skb);
1099 msg->im6_mbz = 0;
1100 msg->im6_msgtype = assert;
1101 if (assert == MRT6MSG_WRMIFWHOLE)
1102 msg->im6_mif = mifi;
1103 else
1104 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1105 msg->im6_pad = 0;
1106 msg->im6_src = ipv6_hdr(pkt)->saddr;
1107 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1108
1109 skb->ip_summed = CHECKSUM_UNNECESSARY;
1110 } else
1111 #endif
1112 {
1113 /*
1114 * Copy the IP header
1115 */
1116
1117 skb_put(skb, sizeof(struct ipv6hdr));
1118 skb_reset_network_header(skb);
1119 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1120
1121 /*
1122 * Add our header
1123 */
1124 skb_put(skb, sizeof(*msg));
1125 skb_reset_transport_header(skb);
1126 msg = (struct mrt6msg *)skb_transport_header(skb);
1127
1128 msg->im6_mbz = 0;
1129 msg->im6_msgtype = assert;
1130 msg->im6_mif = mifi;
1131 msg->im6_pad = 0;
1132 msg->im6_src = ipv6_hdr(pkt)->saddr;
1133 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1134
1135 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1136 skb->ip_summed = CHECKSUM_UNNECESSARY;
1137 }
1138
1139 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1140 if (!mroute6_sk) {
1141 kfree_skb(skb);
1142 return -EINVAL;
1143 }
1144
1145 mrt6msg_netlink_event(mrt, skb);
1146
1147 /* Deliver to user space multicast routing algorithms */
1148 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1149
1150 if (ret < 0) {
1151 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1152 kfree_skb(skb);
1153 }
1154
1155 return ret;
1156 }
1157
1158 /* Queue a packet for resolution. It gets locked cache entry! */
ip6mr_cache_unresolved(struct mr_table * mrt,mifi_t mifi,struct sk_buff * skb,struct net_device * dev)1159 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1160 struct sk_buff *skb, struct net_device *dev)
1161 {
1162 struct mfc6_cache *c;
1163 bool found = false;
1164 int err;
1165
1166 spin_lock_bh(&mfc_unres_lock);
1167 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1168 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1169 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1170 found = true;
1171 break;
1172 }
1173 }
1174
1175 if (!found) {
1176 /*
1177 * Create a new entry if allowable
1178 */
1179
1180 c = ip6mr_cache_alloc_unres();
1181 if (!c) {
1182 spin_unlock_bh(&mfc_unres_lock);
1183
1184 kfree_skb(skb);
1185 return -ENOBUFS;
1186 }
1187
1188 /* Fill in the new cache entry */
1189 c->_c.mfc_parent = -1;
1190 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1191 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1192
1193 /*
1194 * Reflect first query at pim6sd
1195 */
1196 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1197 if (err < 0) {
1198 /* If the report failed throw the cache entry
1199 out - Brad Parker
1200 */
1201 spin_unlock_bh(&mfc_unres_lock);
1202
1203 ip6mr_cache_free(c);
1204 kfree_skb(skb);
1205 return err;
1206 }
1207
1208 atomic_inc(&mrt->cache_resolve_queue_len);
1209 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1210 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1211
1212 ipmr_do_expire_process(mrt);
1213 }
1214
1215 /* See if we can append the packet */
1216 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1217 kfree_skb(skb);
1218 err = -ENOBUFS;
1219 } else {
1220 if (dev) {
1221 skb->dev = dev;
1222 skb->skb_iif = dev->ifindex;
1223 }
1224 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1225 err = 0;
1226 }
1227
1228 spin_unlock_bh(&mfc_unres_lock);
1229 return err;
1230 }
1231
1232 /*
1233 * MFC6 cache manipulation by user space
1234 */
1235
ip6mr_mfc_delete(struct mr_table * mrt,struct mf6cctl * mfc,int parent)1236 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1237 int parent)
1238 {
1239 struct mfc6_cache *c;
1240
1241 /* The entries are added/deleted only under RTNL */
1242 rcu_read_lock();
1243 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1244 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1245 rcu_read_unlock();
1246 if (!c)
1247 return -ENOENT;
1248 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1249 list_del_rcu(&c->_c.list);
1250
1251 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1252 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1253 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1254 mr_cache_put(&c->_c);
1255 return 0;
1256 }
1257
ip6mr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1258 static int ip6mr_device_event(struct notifier_block *this,
1259 unsigned long event, void *ptr)
1260 {
1261 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1262 struct net *net = dev_net(dev);
1263 struct mr_table *mrt;
1264 struct vif_device *v;
1265 int ct;
1266
1267 if (event != NETDEV_UNREGISTER)
1268 return NOTIFY_DONE;
1269
1270 ip6mr_for_each_table(mrt, net) {
1271 v = &mrt->vif_table[0];
1272 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 if (rcu_access_pointer(v->dev) == dev)
1274 mif6_delete(mrt, ct, 1, NULL);
1275 }
1276 }
1277
1278 return NOTIFY_DONE;
1279 }
1280
ip6mr_seq_read(const struct net * net)1281 static unsigned int ip6mr_seq_read(const struct net *net)
1282 {
1283 return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1284 }
1285
ip6mr_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)1286 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1287 struct netlink_ext_ack *extack)
1288 {
1289 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1290 ip6mr_mr_table_iter, extack);
1291 }
1292
1293 static struct notifier_block ip6_mr_notifier = {
1294 .notifier_call = ip6mr_device_event
1295 };
1296
1297 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1298 .family = RTNL_FAMILY_IP6MR,
1299 .fib_seq_read = ip6mr_seq_read,
1300 .fib_dump = ip6mr_dump,
1301 .owner = THIS_MODULE,
1302 };
1303
ip6mr_notifier_init(struct net * net)1304 static int __net_init ip6mr_notifier_init(struct net *net)
1305 {
1306 struct fib_notifier_ops *ops;
1307
1308 net->ipv6.ipmr_seq = 0;
1309
1310 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1311 if (IS_ERR(ops))
1312 return PTR_ERR(ops);
1313
1314 net->ipv6.ip6mr_notifier_ops = ops;
1315
1316 return 0;
1317 }
1318
ip6mr_notifier_exit(struct net * net)1319 static void __net_exit ip6mr_notifier_exit(struct net *net)
1320 {
1321 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1322 net->ipv6.ip6mr_notifier_ops = NULL;
1323 }
1324
1325 /* Setup for IP multicast routing */
ip6mr_net_init(struct net * net)1326 static int __net_init ip6mr_net_init(struct net *net)
1327 {
1328 int err;
1329
1330 err = ip6mr_notifier_init(net);
1331 if (err)
1332 return err;
1333
1334 err = ip6mr_rules_init(net);
1335 if (err < 0)
1336 goto ip6mr_rules_fail;
1337
1338 #ifdef CONFIG_PROC_FS
1339 err = -ENOMEM;
1340 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1341 sizeof(struct mr_vif_iter)))
1342 goto proc_vif_fail;
1343 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1344 sizeof(struct mr_mfc_iter)))
1345 goto proc_cache_fail;
1346 #endif
1347
1348 return 0;
1349
1350 #ifdef CONFIG_PROC_FS
1351 proc_cache_fail:
1352 remove_proc_entry("ip6_mr_vif", net->proc_net);
1353 proc_vif_fail:
1354 rtnl_lock();
1355 ip6mr_rules_exit(net);
1356 rtnl_unlock();
1357 #endif
1358 ip6mr_rules_fail:
1359 ip6mr_notifier_exit(net);
1360 return err;
1361 }
1362
ip6mr_net_exit(struct net * net)1363 static void __net_exit ip6mr_net_exit(struct net *net)
1364 {
1365 #ifdef CONFIG_PROC_FS
1366 remove_proc_entry("ip6_mr_cache", net->proc_net);
1367 remove_proc_entry("ip6_mr_vif", net->proc_net);
1368 #endif
1369 ip6mr_notifier_exit(net);
1370 }
1371
ip6mr_net_exit_batch(struct list_head * net_list)1372 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1373 {
1374 struct net *net;
1375
1376 rtnl_lock();
1377 list_for_each_entry(net, net_list, exit_list)
1378 ip6mr_rules_exit(net);
1379 rtnl_unlock();
1380 }
1381
1382 static struct pernet_operations ip6mr_net_ops = {
1383 .init = ip6mr_net_init,
1384 .exit = ip6mr_net_exit,
1385 .exit_batch = ip6mr_net_exit_batch,
1386 };
1387
1388 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1389 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1390 .msgtype = RTM_GETROUTE,
1391 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
1392 };
1393
ip6_mr_init(void)1394 int __init ip6_mr_init(void)
1395 {
1396 int err;
1397
1398 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1399 if (!mrt_cachep)
1400 return -ENOMEM;
1401
1402 err = register_pernet_subsys(&ip6mr_net_ops);
1403 if (err)
1404 goto reg_pernet_fail;
1405
1406 err = register_netdevice_notifier(&ip6_mr_notifier);
1407 if (err)
1408 goto reg_notif_fail;
1409 #ifdef CONFIG_IPV6_PIMSM_V2
1410 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1411 pr_err("%s: can't add PIM protocol\n", __func__);
1412 err = -EAGAIN;
1413 goto add_proto_fail;
1414 }
1415 #endif
1416 err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1417 if (!err)
1418 return 0;
1419
1420 #ifdef CONFIG_IPV6_PIMSM_V2
1421 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1422 add_proto_fail:
1423 unregister_netdevice_notifier(&ip6_mr_notifier);
1424 #endif
1425 reg_notif_fail:
1426 unregister_pernet_subsys(&ip6mr_net_ops);
1427 reg_pernet_fail:
1428 kmem_cache_destroy(mrt_cachep);
1429 return err;
1430 }
1431
ip6_mr_cleanup(void)1432 void __init ip6_mr_cleanup(void)
1433 {
1434 rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1435 #ifdef CONFIG_IPV6_PIMSM_V2
1436 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1437 #endif
1438 unregister_netdevice_notifier(&ip6_mr_notifier);
1439 unregister_pernet_subsys(&ip6mr_net_ops);
1440 kmem_cache_destroy(mrt_cachep);
1441 }
1442
ip6mr_mfc_add(struct net * net,struct mr_table * mrt,struct mf6cctl * mfc,int mrtsock,int parent)1443 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1444 struct mf6cctl *mfc, int mrtsock, int parent)
1445 {
1446 unsigned char ttls[MAXMIFS];
1447 struct mfc6_cache *uc, *c;
1448 struct mr_mfc *_uc;
1449 bool found;
1450 int i, err;
1451
1452 if (mfc->mf6cc_parent >= MAXMIFS)
1453 return -ENFILE;
1454
1455 memset(ttls, 255, MAXMIFS);
1456 for (i = 0; i < MAXMIFS; i++) {
1457 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1458 ttls[i] = 1;
1459 }
1460
1461 /* The entries are added/deleted only under RTNL */
1462 rcu_read_lock();
1463 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1464 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1465 rcu_read_unlock();
1466 if (c) {
1467 spin_lock(&mrt_lock);
1468 c->_c.mfc_parent = mfc->mf6cc_parent;
1469 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1470 if (!mrtsock)
1471 c->_c.mfc_flags |= MFC_STATIC;
1472 spin_unlock(&mrt_lock);
1473 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1474 c, mrt->id);
1475 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1476 return 0;
1477 }
1478
1479 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1480 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1481 return -EINVAL;
1482
1483 c = ip6mr_cache_alloc();
1484 if (!c)
1485 return -ENOMEM;
1486
1487 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1488 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1489 c->_c.mfc_parent = mfc->mf6cc_parent;
1490 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1491 if (!mrtsock)
1492 c->_c.mfc_flags |= MFC_STATIC;
1493
1494 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1495 ip6mr_rht_params);
1496 if (err) {
1497 pr_err("ip6mr: rhtable insert error %d\n", err);
1498 ip6mr_cache_free(c);
1499 return err;
1500 }
1501 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1502
1503 /* Check to see if we resolved a queued list. If so we
1504 * need to send on the frames and tidy up.
1505 */
1506 found = false;
1507 spin_lock_bh(&mfc_unres_lock);
1508 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1509 uc = (struct mfc6_cache *)_uc;
1510 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1511 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1512 list_del(&_uc->list);
1513 atomic_dec(&mrt->cache_resolve_queue_len);
1514 found = true;
1515 break;
1516 }
1517 }
1518 if (list_empty(&mrt->mfc_unres_queue))
1519 timer_delete(&mrt->ipmr_expire_timer);
1520 spin_unlock_bh(&mfc_unres_lock);
1521
1522 if (found) {
1523 ip6mr_cache_resolve(net, mrt, uc, c);
1524 ip6mr_cache_free(uc);
1525 }
1526 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1527 c, mrt->id);
1528 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1529 return 0;
1530 }
1531
1532 /*
1533 * Close the multicast socket, and clear the vif tables etc
1534 */
1535
mroute_clean_tables(struct mr_table * mrt,int flags)1536 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1537 {
1538 struct mr_mfc *c, *tmp;
1539 LIST_HEAD(list);
1540 int i;
1541
1542 /* Shut down all active vif entries */
1543 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1544 for (i = 0; i < mrt->maxvif; i++) {
1545 if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1546 !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1547 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1548 continue;
1549 mif6_delete(mrt, i, 0, &list);
1550 }
1551 unregister_netdevice_many(&list);
1552 }
1553
1554 /* Wipe the cache */
1555 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1556 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1557 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1558 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1559 continue;
1560 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1561 list_del_rcu(&c->list);
1562 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1563 FIB_EVENT_ENTRY_DEL,
1564 (struct mfc6_cache *)c, mrt->id);
1565 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1566 mr_cache_put(c);
1567 }
1568 }
1569
1570 if (flags & MRT6_FLUSH_MFC) {
1571 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1572 spin_lock_bh(&mfc_unres_lock);
1573 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1574 list_del(&c->list);
1575 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1576 RTM_DELROUTE);
1577 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1578 }
1579 spin_unlock_bh(&mfc_unres_lock);
1580 }
1581 }
1582 }
1583
ip6mr_sk_init(struct mr_table * mrt,struct sock * sk)1584 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1585 {
1586 int err = 0;
1587 struct net *net = sock_net(sk);
1588
1589 rtnl_lock();
1590 spin_lock(&mrt_lock);
1591 if (rtnl_dereference(mrt->mroute_sk)) {
1592 err = -EADDRINUSE;
1593 } else {
1594 rcu_assign_pointer(mrt->mroute_sk, sk);
1595 sock_set_flag(sk, SOCK_RCU_FREE);
1596 atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1597 }
1598 spin_unlock(&mrt_lock);
1599
1600 if (!err)
1601 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1602 NETCONFA_MC_FORWARDING,
1603 NETCONFA_IFINDEX_ALL,
1604 net->ipv6.devconf_all);
1605 rtnl_unlock();
1606
1607 return err;
1608 }
1609
ip6mr_sk_done(struct sock * sk)1610 int ip6mr_sk_done(struct sock *sk)
1611 {
1612 struct net *net = sock_net(sk);
1613 struct ipv6_devconf *devconf;
1614 struct mr_table *mrt;
1615 int err = -EACCES;
1616
1617 if (sk->sk_type != SOCK_RAW ||
1618 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1619 return err;
1620
1621 devconf = net->ipv6.devconf_all;
1622 if (!devconf || !atomic_read(&devconf->mc_forwarding))
1623 return err;
1624
1625 rtnl_lock();
1626 ip6mr_for_each_table(mrt, net) {
1627 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1628 spin_lock(&mrt_lock);
1629 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1630 /* Note that mroute_sk had SOCK_RCU_FREE set,
1631 * so the RCU grace period before sk freeing
1632 * is guaranteed by sk_destruct()
1633 */
1634 atomic_dec(&devconf->mc_forwarding);
1635 spin_unlock(&mrt_lock);
1636 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1637 NETCONFA_MC_FORWARDING,
1638 NETCONFA_IFINDEX_ALL,
1639 net->ipv6.devconf_all);
1640
1641 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1642 err = 0;
1643 break;
1644 }
1645 }
1646 rtnl_unlock();
1647
1648 return err;
1649 }
1650
mroute6_is_socket(struct net * net,struct sk_buff * skb)1651 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1652 {
1653 struct mr_table *mrt;
1654 struct flowi6 fl6 = {
1655 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1656 .flowi6_oif = skb->dev->ifindex,
1657 .flowi6_mark = skb->mark,
1658 };
1659
1660 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1661 return NULL;
1662
1663 return rcu_access_pointer(mrt->mroute_sk);
1664 }
1665 EXPORT_SYMBOL(mroute6_is_socket);
1666
1667 /*
1668 * Socket options and virtual interface manipulation. The whole
1669 * virtual interface system is a complete heap, but unfortunately
1670 * that's how BSD mrouted happens to think. Maybe one day with a proper
1671 * MOSPF/PIM router set up we can clean this up.
1672 */
1673
ip6_mroute_setsockopt(struct sock * sk,int optname,sockptr_t optval,unsigned int optlen)1674 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1675 unsigned int optlen)
1676 {
1677 int ret, parent = 0;
1678 struct mif6ctl vif;
1679 struct mf6cctl mfc;
1680 mifi_t mifi;
1681 struct net *net = sock_net(sk);
1682 struct mr_table *mrt;
1683
1684 if (sk->sk_type != SOCK_RAW ||
1685 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1686 return -EOPNOTSUPP;
1687
1688 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1689 if (!mrt)
1690 return -ENOENT;
1691
1692 if (optname != MRT6_INIT) {
1693 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1694 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1695 return -EACCES;
1696 }
1697
1698 switch (optname) {
1699 case MRT6_INIT:
1700 if (optlen < sizeof(int))
1701 return -EINVAL;
1702
1703 return ip6mr_sk_init(mrt, sk);
1704
1705 case MRT6_DONE:
1706 return ip6mr_sk_done(sk);
1707
1708 case MRT6_ADD_MIF:
1709 if (optlen < sizeof(vif))
1710 return -EINVAL;
1711 if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1712 return -EFAULT;
1713 if (vif.mif6c_mifi >= MAXMIFS)
1714 return -ENFILE;
1715 rtnl_lock();
1716 ret = mif6_add(net, mrt, &vif,
1717 sk == rtnl_dereference(mrt->mroute_sk));
1718 rtnl_unlock();
1719 return ret;
1720
1721 case MRT6_DEL_MIF:
1722 if (optlen < sizeof(mifi_t))
1723 return -EINVAL;
1724 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1725 return -EFAULT;
1726 rtnl_lock();
1727 ret = mif6_delete(mrt, mifi, 0, NULL);
1728 rtnl_unlock();
1729 return ret;
1730
1731 /*
1732 * Manipulate the forwarding caches. These live
1733 * in a sort of kernel/user symbiosis.
1734 */
1735 case MRT6_ADD_MFC:
1736 case MRT6_DEL_MFC:
1737 parent = -1;
1738 fallthrough;
1739 case MRT6_ADD_MFC_PROXY:
1740 case MRT6_DEL_MFC_PROXY:
1741 if (optlen < sizeof(mfc))
1742 return -EINVAL;
1743 if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1744 return -EFAULT;
1745 if (parent == 0)
1746 parent = mfc.mf6cc_parent;
1747 rtnl_lock();
1748 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1749 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1750 else
1751 ret = ip6mr_mfc_add(net, mrt, &mfc,
1752 sk ==
1753 rtnl_dereference(mrt->mroute_sk),
1754 parent);
1755 rtnl_unlock();
1756 return ret;
1757
1758 case MRT6_FLUSH:
1759 {
1760 int flags;
1761
1762 if (optlen != sizeof(flags))
1763 return -EINVAL;
1764 if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1765 return -EFAULT;
1766 rtnl_lock();
1767 mroute_clean_tables(mrt, flags);
1768 rtnl_unlock();
1769 return 0;
1770 }
1771
1772 /*
1773 * Control PIM assert (to activate pim will activate assert)
1774 */
1775 case MRT6_ASSERT:
1776 {
1777 int v;
1778
1779 if (optlen != sizeof(v))
1780 return -EINVAL;
1781 if (copy_from_sockptr(&v, optval, sizeof(v)))
1782 return -EFAULT;
1783 mrt->mroute_do_assert = v;
1784 return 0;
1785 }
1786
1787 #ifdef CONFIG_IPV6_PIMSM_V2
1788 case MRT6_PIM:
1789 {
1790 bool do_wrmifwhole;
1791 int v;
1792
1793 if (optlen != sizeof(v))
1794 return -EINVAL;
1795 if (copy_from_sockptr(&v, optval, sizeof(v)))
1796 return -EFAULT;
1797
1798 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1799 v = !!v;
1800 rtnl_lock();
1801 ret = 0;
1802 if (v != mrt->mroute_do_pim) {
1803 mrt->mroute_do_pim = v;
1804 mrt->mroute_do_assert = v;
1805 mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1806 }
1807 rtnl_unlock();
1808 return ret;
1809 }
1810
1811 #endif
1812 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1813 case MRT6_TABLE:
1814 {
1815 u32 v;
1816
1817 if (optlen != sizeof(u32))
1818 return -EINVAL;
1819 if (copy_from_sockptr(&v, optval, sizeof(v)))
1820 return -EFAULT;
1821 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1822 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1823 return -EINVAL;
1824 if (sk == rcu_access_pointer(mrt->mroute_sk))
1825 return -EBUSY;
1826
1827 rtnl_lock();
1828 ret = 0;
1829 mrt = ip6mr_new_table(net, v);
1830 if (IS_ERR(mrt))
1831 ret = PTR_ERR(mrt);
1832 else
1833 raw6_sk(sk)->ip6mr_table = v;
1834 rtnl_unlock();
1835 return ret;
1836 }
1837 #endif
1838 /*
1839 * Spurious command, or MRT6_VERSION which you cannot
1840 * set.
1841 */
1842 default:
1843 return -ENOPROTOOPT;
1844 }
1845 }
1846
1847 /*
1848 * Getsock opt support for the multicast routing system.
1849 */
1850
ip6_mroute_getsockopt(struct sock * sk,int optname,sockptr_t optval,sockptr_t optlen)1851 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1852 sockptr_t optlen)
1853 {
1854 int olr;
1855 int val;
1856 struct net *net = sock_net(sk);
1857 struct mr_table *mrt;
1858
1859 if (sk->sk_type != SOCK_RAW ||
1860 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1861 return -EOPNOTSUPP;
1862
1863 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864 if (!mrt)
1865 return -ENOENT;
1866
1867 switch (optname) {
1868 case MRT6_VERSION:
1869 val = 0x0305;
1870 break;
1871 #ifdef CONFIG_IPV6_PIMSM_V2
1872 case MRT6_PIM:
1873 val = mrt->mroute_do_pim;
1874 break;
1875 #endif
1876 case MRT6_ASSERT:
1877 val = mrt->mroute_do_assert;
1878 break;
1879 default:
1880 return -ENOPROTOOPT;
1881 }
1882
1883 if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1884 return -EFAULT;
1885
1886 olr = min_t(int, olr, sizeof(int));
1887 if (olr < 0)
1888 return -EINVAL;
1889
1890 if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1891 return -EFAULT;
1892 if (copy_to_sockptr(optval, &val, olr))
1893 return -EFAULT;
1894 return 0;
1895 }
1896
1897 /*
1898 * The IP multicast ioctl support routines.
1899 */
ip6mr_ioctl(struct sock * sk,int cmd,void * arg)1900 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1901 {
1902 struct sioc_sg_req6 *sr;
1903 struct sioc_mif_req6 *vr;
1904 struct vif_device *vif;
1905 struct mfc6_cache *c;
1906 struct net *net = sock_net(sk);
1907 struct mr_table *mrt;
1908
1909 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1910 if (!mrt)
1911 return -ENOENT;
1912
1913 switch (cmd) {
1914 case SIOCGETMIFCNT_IN6:
1915 vr = (struct sioc_mif_req6 *)arg;
1916 if (vr->mifi >= mrt->maxvif)
1917 return -EINVAL;
1918 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1919 rcu_read_lock();
1920 vif = &mrt->vif_table[vr->mifi];
1921 if (VIF_EXISTS(mrt, vr->mifi)) {
1922 vr->icount = READ_ONCE(vif->pkt_in);
1923 vr->ocount = READ_ONCE(vif->pkt_out);
1924 vr->ibytes = READ_ONCE(vif->bytes_in);
1925 vr->obytes = READ_ONCE(vif->bytes_out);
1926 rcu_read_unlock();
1927 return 0;
1928 }
1929 rcu_read_unlock();
1930 return -EADDRNOTAVAIL;
1931 case SIOCGETSGCNT_IN6:
1932 sr = (struct sioc_sg_req6 *)arg;
1933
1934 rcu_read_lock();
1935 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1936 &sr->grp.sin6_addr);
1937 if (c) {
1938 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1939 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1940 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1941 rcu_read_unlock();
1942 return 0;
1943 }
1944 rcu_read_unlock();
1945 return -EADDRNOTAVAIL;
1946 default:
1947 return -ENOIOCTLCMD;
1948 }
1949 }
1950
1951 #ifdef CONFIG_COMPAT
1952 struct compat_sioc_sg_req6 {
1953 struct sockaddr_in6 src;
1954 struct sockaddr_in6 grp;
1955 compat_ulong_t pktcnt;
1956 compat_ulong_t bytecnt;
1957 compat_ulong_t wrong_if;
1958 };
1959
1960 struct compat_sioc_mif_req6 {
1961 mifi_t mifi;
1962 compat_ulong_t icount;
1963 compat_ulong_t ocount;
1964 compat_ulong_t ibytes;
1965 compat_ulong_t obytes;
1966 };
1967
ip6mr_compat_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1968 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1969 {
1970 struct compat_sioc_sg_req6 sr;
1971 struct compat_sioc_mif_req6 vr;
1972 struct vif_device *vif;
1973 struct mfc6_cache *c;
1974 struct net *net = sock_net(sk);
1975 struct mr_table *mrt;
1976
1977 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1978 if (!mrt)
1979 return -ENOENT;
1980
1981 switch (cmd) {
1982 case SIOCGETMIFCNT_IN6:
1983 if (copy_from_user(&vr, arg, sizeof(vr)))
1984 return -EFAULT;
1985 if (vr.mifi >= mrt->maxvif)
1986 return -EINVAL;
1987 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1988 rcu_read_lock();
1989 vif = &mrt->vif_table[vr.mifi];
1990 if (VIF_EXISTS(mrt, vr.mifi)) {
1991 vr.icount = READ_ONCE(vif->pkt_in);
1992 vr.ocount = READ_ONCE(vif->pkt_out);
1993 vr.ibytes = READ_ONCE(vif->bytes_in);
1994 vr.obytes = READ_ONCE(vif->bytes_out);
1995 rcu_read_unlock();
1996
1997 if (copy_to_user(arg, &vr, sizeof(vr)))
1998 return -EFAULT;
1999 return 0;
2000 }
2001 rcu_read_unlock();
2002 return -EADDRNOTAVAIL;
2003 case SIOCGETSGCNT_IN6:
2004 if (copy_from_user(&sr, arg, sizeof(sr)))
2005 return -EFAULT;
2006
2007 rcu_read_lock();
2008 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2009 if (c) {
2010 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2011 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2012 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2013 rcu_read_unlock();
2014
2015 if (copy_to_user(arg, &sr, sizeof(sr)))
2016 return -EFAULT;
2017 return 0;
2018 }
2019 rcu_read_unlock();
2020 return -EADDRNOTAVAIL;
2021 default:
2022 return -ENOIOCTLCMD;
2023 }
2024 }
2025 #endif
2026
ip6mr_forward2_finish(struct net * net,struct sock * sk,struct sk_buff * skb)2027 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2028 {
2029 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2030 IPSTATS_MIB_OUTFORWDATAGRAMS);
2031 return dst_output(net, sk, skb);
2032 }
2033
2034 /*
2035 * Processing handlers for ip6mr_forward
2036 */
2037
ip6mr_prepare_xmit(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2038 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt,
2039 struct sk_buff *skb, int vifi)
2040 {
2041 struct vif_device *vif = &mrt->vif_table[vifi];
2042 struct net_device *vif_dev;
2043 struct ipv6hdr *ipv6h;
2044 struct dst_entry *dst;
2045 struct flowi6 fl6;
2046
2047 vif_dev = vif_dev_read(vif);
2048 if (!vif_dev)
2049 return -1;
2050
2051 #ifdef CONFIG_IPV6_PIMSM_V2
2052 if (vif->flags & MIFF_REGISTER) {
2053 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2054 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2055 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2056 DEV_STATS_INC(vif_dev, tx_packets);
2057 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2058 return -1;
2059 }
2060 #endif
2061
2062 ipv6h = ipv6_hdr(skb);
2063
2064 fl6 = (struct flowi6) {
2065 .flowi6_oif = vif->link,
2066 .daddr = ipv6h->daddr,
2067 };
2068
2069 dst = ip6_route_output(net, NULL, &fl6);
2070 if (dst->error) {
2071 dst_release(dst);
2072 return -1;
2073 }
2074
2075 skb_dst_drop(skb);
2076 skb_dst_set(skb, dst);
2077
2078 /*
2079 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2080 * not only before forwarding, but after forwarding on all output
2081 * interfaces. It is clear, if mrouter runs a multicasting
2082 * program, it should receive packets not depending to what interface
2083 * program is joined.
2084 * If we will not make it, the program will have to join on all
2085 * interfaces. On the other hand, multihoming host (or router, but
2086 * not mrouter) cannot join to more than one interface - it will
2087 * result in receiving multiple packets.
2088 */
2089 skb->dev = vif_dev;
2090 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2091 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2092
2093 /* We are about to write */
2094 /* XXX: extension headers? */
2095 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2096 return -1;
2097
2098 ipv6h = ipv6_hdr(skb);
2099 ipv6h->hop_limit--;
2100 return 0;
2101 }
2102
ip6mr_forward2(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2103 static void ip6mr_forward2(struct net *net, struct mr_table *mrt,
2104 struct sk_buff *skb, int vifi)
2105 {
2106 struct net_device *indev = skb->dev;
2107
2108 if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2109 goto out_free;
2110
2111 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2112
2113 NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2114 net, NULL, skb, indev, skb->dev,
2115 ip6mr_forward2_finish);
2116 return;
2117
2118 out_free:
2119 kfree_skb(skb);
2120 }
2121
ip6mr_output2(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2122 static void ip6mr_output2(struct net *net, struct mr_table *mrt,
2123 struct sk_buff *skb, int vifi)
2124 {
2125 if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
2126 goto out_free;
2127
2128 ip6_output(net, NULL, skb);
2129 return;
2130
2131 out_free:
2132 kfree_skb(skb);
2133 }
2134
2135 /* Called with rcu_read_lock() */
ip6mr_find_vif(struct mr_table * mrt,struct net_device * dev)2136 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2137 {
2138 int ct;
2139
2140 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2141 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2142 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2143 break;
2144 }
2145 return ct;
2146 }
2147
2148 /* Called under rcu_read_lock() */
ip6_mr_forward(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc6_cache * c)2149 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2150 struct net_device *dev, struct sk_buff *skb,
2151 struct mfc6_cache *c)
2152 {
2153 int psend = -1;
2154 int vif, ct;
2155 int true_vifi = ip6mr_find_vif(mrt, dev);
2156
2157 vif = c->_c.mfc_parent;
2158 atomic_long_inc(&c->_c.mfc_un.res.pkt);
2159 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2160 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2161
2162 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2163 struct mfc6_cache *cache_proxy;
2164
2165 /* For an (*,G) entry, we only check that the incoming
2166 * interface is part of the static tree.
2167 */
2168 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2169 if (cache_proxy &&
2170 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2171 goto forward;
2172 }
2173
2174 /*
2175 * Wrong interface: drop packet and (maybe) send PIM assert.
2176 */
2177 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2178 atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2179
2180 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2181 /* pimsm uses asserts, when switching from RPT to SPT,
2182 so that we cannot check that packet arrived on an oif.
2183 It is bad, but otherwise we would need to move pretty
2184 large chunk of pimd to kernel. Ough... --ANK
2185 */
2186 (mrt->mroute_do_pim ||
2187 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2188 time_after(jiffies,
2189 c->_c.mfc_un.res.last_assert +
2190 MFC_ASSERT_THRESH)) {
2191 c->_c.mfc_un.res.last_assert = jiffies;
2192 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2193 if (mrt->mroute_do_wrvifwhole)
2194 ip6mr_cache_report(mrt, skb, true_vifi,
2195 MRT6MSG_WRMIFWHOLE);
2196 }
2197 goto dont_forward;
2198 }
2199
2200 forward:
2201 WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2202 mrt->vif_table[vif].pkt_in + 1);
2203 WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2204 mrt->vif_table[vif].bytes_in + skb->len);
2205
2206 /*
2207 * Forward the frame
2208 */
2209 if (ipv6_addr_any(&c->mf6c_origin) &&
2210 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2211 if (true_vifi >= 0 &&
2212 true_vifi != c->_c.mfc_parent &&
2213 ipv6_hdr(skb)->hop_limit >
2214 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2215 /* It's an (*,*) entry and the packet is not coming from
2216 * the upstream: forward the packet to the upstream
2217 * only.
2218 */
2219 psend = c->_c.mfc_parent;
2220 goto last_forward;
2221 }
2222 goto dont_forward;
2223 }
2224 for (ct = c->_c.mfc_un.res.maxvif - 1;
2225 ct >= c->_c.mfc_un.res.minvif; ct--) {
2226 /* For (*,G) entry, don't forward to the incoming interface */
2227 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2228 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2229 if (psend != -1) {
2230 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2231 if (skb2)
2232 ip6mr_forward2(net, mrt, skb2, psend);
2233 }
2234 psend = ct;
2235 }
2236 }
2237 last_forward:
2238 if (psend != -1) {
2239 ip6mr_forward2(net, mrt, skb, psend);
2240 return;
2241 }
2242
2243 dont_forward:
2244 kfree_skb(skb);
2245 }
2246
2247 /* Called under rcu_read_lock() */
ip6_mr_output_finish(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc6_cache * c)2248 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt,
2249 struct net_device *dev, struct sk_buff *skb,
2250 struct mfc6_cache *c)
2251 {
2252 int psend = -1;
2253 int ct;
2254
2255 WARN_ON_ONCE(!rcu_read_lock_held());
2256
2257 atomic_long_inc(&c->_c.mfc_un.res.pkt);
2258 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2259 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2260
2261 /* Forward the frame */
2262 if (ipv6_addr_any(&c->mf6c_origin) &&
2263 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2264 if (ipv6_hdr(skb)->hop_limit >
2265 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2266 /* It's an (*,*) entry and the packet is not coming from
2267 * the upstream: forward the packet to the upstream
2268 * only.
2269 */
2270 psend = c->_c.mfc_parent;
2271 goto last_forward;
2272 }
2273 goto dont_forward;
2274 }
2275 for (ct = c->_c.mfc_un.res.maxvif - 1;
2276 ct >= c->_c.mfc_un.res.minvif; ct--) {
2277 if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2278 if (psend != -1) {
2279 struct sk_buff *skb2;
2280
2281 skb2 = skb_clone(skb, GFP_ATOMIC);
2282 if (skb2)
2283 ip6mr_output2(net, mrt, skb2, psend);
2284 }
2285 psend = ct;
2286 }
2287 }
2288 last_forward:
2289 if (psend != -1) {
2290 ip6mr_output2(net, mrt, skb, psend);
2291 return;
2292 }
2293
2294 dont_forward:
2295 kfree_skb(skb);
2296 }
2297
2298 /*
2299 * Multicast packets for forwarding arrive here
2300 */
2301
ip6_mr_input(struct sk_buff * skb)2302 int ip6_mr_input(struct sk_buff *skb)
2303 {
2304 struct net_device *dev = skb->dev;
2305 struct net *net = dev_net_rcu(dev);
2306 struct mfc6_cache *cache;
2307 struct mr_table *mrt;
2308 struct flowi6 fl6 = {
2309 .flowi6_iif = dev->ifindex,
2310 .flowi6_mark = skb->mark,
2311 };
2312 int err;
2313
2314 /* skb->dev passed in is the master dev for vrfs.
2315 * Get the proper interface that does have a vif associated with it.
2316 */
2317 if (netif_is_l3_master(dev)) {
2318 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2319 if (!dev) {
2320 kfree_skb(skb);
2321 return -ENODEV;
2322 }
2323 }
2324
2325 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2326 if (err < 0) {
2327 kfree_skb(skb);
2328 return err;
2329 }
2330
2331 cache = ip6mr_cache_find(mrt,
2332 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2333 if (!cache) {
2334 int vif = ip6mr_find_vif(mrt, dev);
2335
2336 if (vif >= 0)
2337 cache = ip6mr_cache_find_any(mrt,
2338 &ipv6_hdr(skb)->daddr,
2339 vif);
2340 }
2341
2342 /*
2343 * No usable cache entry
2344 */
2345 if (!cache) {
2346 int vif;
2347
2348 vif = ip6mr_find_vif(mrt, dev);
2349 if (vif >= 0) {
2350 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2351
2352 return err;
2353 }
2354 kfree_skb(skb);
2355 return -ENODEV;
2356 }
2357
2358 ip6_mr_forward(net, mrt, dev, skb, cache);
2359
2360 return 0;
2361 }
2362
ip6_mr_output(struct net * net,struct sock * sk,struct sk_buff * skb)2363 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2364 {
2365 struct net_device *dev = skb_dst(skb)->dev;
2366 struct flowi6 fl6 = (struct flowi6) {
2367 .flowi6_iif = LOOPBACK_IFINDEX,
2368 .flowi6_mark = skb->mark,
2369 };
2370 struct mfc6_cache *cache;
2371 struct mr_table *mrt;
2372 int err;
2373 int vif;
2374
2375 guard(rcu)();
2376
2377 if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
2378 goto ip6_output;
2379 if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE))
2380 goto ip6_output;
2381
2382 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2383 if (err < 0) {
2384 kfree_skb(skb);
2385 return err;
2386 }
2387
2388 cache = ip6mr_cache_find(mrt,
2389 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2390 if (!cache) {
2391 vif = ip6mr_find_vif(mrt, dev);
2392 if (vif >= 0)
2393 cache = ip6mr_cache_find_any(mrt,
2394 &ipv6_hdr(skb)->daddr,
2395 vif);
2396 }
2397
2398 /* No usable cache entry */
2399 if (!cache) {
2400 vif = ip6mr_find_vif(mrt, dev);
2401 if (vif >= 0)
2402 return ip6mr_cache_unresolved(mrt, vif, skb, dev);
2403 goto ip6_output;
2404 }
2405
2406 /* Wrong interface */
2407 vif = cache->_c.mfc_parent;
2408 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
2409 goto ip6_output;
2410
2411 ip6_mr_output_finish(net, mrt, dev, skb, cache);
2412 return 0;
2413
2414 ip6_output:
2415 return ip6_output(net, sk, skb);
2416 }
2417
ip6mr_get_route(struct net * net,struct sk_buff * skb,struct rtmsg * rtm,u32 portid)2418 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2419 u32 portid)
2420 {
2421 int err;
2422 struct mr_table *mrt;
2423 struct mfc6_cache *cache;
2424 struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2425
2426 rcu_read_lock();
2427 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2428 if (!mrt) {
2429 rcu_read_unlock();
2430 return -ENOENT;
2431 }
2432
2433 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2434 if (!cache && skb->dev) {
2435 int vif = ip6mr_find_vif(mrt, skb->dev);
2436
2437 if (vif >= 0)
2438 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2439 vif);
2440 }
2441
2442 if (!cache) {
2443 struct sk_buff *skb2;
2444 struct ipv6hdr *iph;
2445 struct net_device *dev;
2446 int vif;
2447
2448 dev = skb->dev;
2449 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2450 rcu_read_unlock();
2451 return -ENODEV;
2452 }
2453
2454 /* really correct? */
2455 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2456 if (!skb2) {
2457 rcu_read_unlock();
2458 return -ENOMEM;
2459 }
2460
2461 NETLINK_CB(skb2).portid = portid;
2462 skb_reset_transport_header(skb2);
2463
2464 skb_put(skb2, sizeof(struct ipv6hdr));
2465 skb_reset_network_header(skb2);
2466
2467 iph = ipv6_hdr(skb2);
2468 iph->version = 0;
2469 iph->priority = 0;
2470 iph->flow_lbl[0] = 0;
2471 iph->flow_lbl[1] = 0;
2472 iph->flow_lbl[2] = 0;
2473 iph->payload_len = 0;
2474 iph->nexthdr = IPPROTO_NONE;
2475 iph->hop_limit = 0;
2476 iph->saddr = rt->rt6i_src.addr;
2477 iph->daddr = rt->rt6i_dst.addr;
2478
2479 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2480 rcu_read_unlock();
2481
2482 return err;
2483 }
2484
2485 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2486 rcu_read_unlock();
2487 return err;
2488 }
2489
ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mfc6_cache * c,int cmd,int flags)2490 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2491 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2492 int flags)
2493 {
2494 struct nlmsghdr *nlh;
2495 struct rtmsg *rtm;
2496 int err;
2497
2498 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2499 if (!nlh)
2500 return -EMSGSIZE;
2501
2502 rtm = nlmsg_data(nlh);
2503 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2504 rtm->rtm_dst_len = 128;
2505 rtm->rtm_src_len = 128;
2506 rtm->rtm_tos = 0;
2507 rtm->rtm_table = mrt->id;
2508 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2509 goto nla_put_failure;
2510 rtm->rtm_type = RTN_MULTICAST;
2511 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2512 if (c->_c.mfc_flags & MFC_STATIC)
2513 rtm->rtm_protocol = RTPROT_STATIC;
2514 else
2515 rtm->rtm_protocol = RTPROT_MROUTED;
2516 rtm->rtm_flags = 0;
2517
2518 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2519 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2520 goto nla_put_failure;
2521 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2522 /* do not break the dump if cache is unresolved */
2523 if (err < 0 && err != -ENOENT)
2524 goto nla_put_failure;
2525
2526 nlmsg_end(skb, nlh);
2527 return 0;
2528
2529 nla_put_failure:
2530 nlmsg_cancel(skb, nlh);
2531 return -EMSGSIZE;
2532 }
2533
_ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mr_mfc * c,int cmd,int flags)2534 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2535 u32 portid, u32 seq, struct mr_mfc *c,
2536 int cmd, int flags)
2537 {
2538 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2539 cmd, flags);
2540 }
2541
mr6_msgsize(bool unresolved,int maxvif)2542 static int mr6_msgsize(bool unresolved, int maxvif)
2543 {
2544 size_t len =
2545 NLMSG_ALIGN(sizeof(struct rtmsg))
2546 + nla_total_size(4) /* RTA_TABLE */
2547 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2548 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2549 ;
2550
2551 if (!unresolved)
2552 len = len
2553 + nla_total_size(4) /* RTA_IIF */
2554 + nla_total_size(0) /* RTA_MULTIPATH */
2555 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2556 /* RTA_MFC_STATS */
2557 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2558 ;
2559
2560 return len;
2561 }
2562
mr6_netlink_event(struct mr_table * mrt,struct mfc6_cache * mfc,int cmd)2563 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2564 int cmd)
2565 {
2566 struct net *net = read_pnet(&mrt->net);
2567 struct sk_buff *skb;
2568 int err = -ENOBUFS;
2569
2570 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2571 GFP_ATOMIC);
2572 if (!skb)
2573 goto errout;
2574
2575 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2576 if (err < 0)
2577 goto errout;
2578
2579 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2580 return;
2581
2582 errout:
2583 kfree_skb(skb);
2584 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2585 }
2586
mrt6msg_netlink_msgsize(size_t payloadlen)2587 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2588 {
2589 size_t len =
2590 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2591 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2592 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2593 /* IP6MRA_CREPORT_SRC_ADDR */
2594 + nla_total_size(sizeof(struct in6_addr))
2595 /* IP6MRA_CREPORT_DST_ADDR */
2596 + nla_total_size(sizeof(struct in6_addr))
2597 /* IP6MRA_CREPORT_PKT */
2598 + nla_total_size(payloadlen)
2599 ;
2600
2601 return len;
2602 }
2603
mrt6msg_netlink_event(const struct mr_table * mrt,struct sk_buff * pkt)2604 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2605 {
2606 struct net *net = read_pnet(&mrt->net);
2607 struct nlmsghdr *nlh;
2608 struct rtgenmsg *rtgenm;
2609 struct mrt6msg *msg;
2610 struct sk_buff *skb;
2611 struct nlattr *nla;
2612 int payloadlen;
2613
2614 payloadlen = pkt->len - sizeof(struct mrt6msg);
2615 msg = (struct mrt6msg *)skb_transport_header(pkt);
2616
2617 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2618 if (!skb)
2619 goto errout;
2620
2621 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2622 sizeof(struct rtgenmsg), 0);
2623 if (!nlh)
2624 goto errout;
2625 rtgenm = nlmsg_data(nlh);
2626 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2627 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2628 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2629 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2630 &msg->im6_src) ||
2631 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2632 &msg->im6_dst))
2633 goto nla_put_failure;
2634
2635 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2636 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2637 nla_data(nla), payloadlen))
2638 goto nla_put_failure;
2639
2640 nlmsg_end(skb, nlh);
2641
2642 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2643 return;
2644
2645 nla_put_failure:
2646 nlmsg_cancel(skb, nlh);
2647 errout:
2648 kfree_skb(skb);
2649 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2650 }
2651
2652 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2653 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2654 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2655 [RTA_TABLE] = { .type = NLA_U32 },
2656 };
2657
ip6mr_rtm_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2658 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2659 const struct nlmsghdr *nlh,
2660 struct nlattr **tb,
2661 struct netlink_ext_ack *extack)
2662 {
2663 struct rtmsg *rtm;
2664 int err;
2665
2666 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2667 extack);
2668 if (err)
2669 return err;
2670
2671 rtm = nlmsg_data(nlh);
2672 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2673 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2674 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2675 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2676 NL_SET_ERR_MSG_MOD(extack,
2677 "Invalid values in header for multicast route get request");
2678 return -EINVAL;
2679 }
2680
2681 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2682 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2683 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2684 return -EINVAL;
2685 }
2686
2687 return 0;
2688 }
2689
ip6mr_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2690 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2691 struct netlink_ext_ack *extack)
2692 {
2693 struct net *net = sock_net(in_skb->sk);
2694 struct in6_addr src = {}, grp = {};
2695 struct nlattr *tb[RTA_MAX + 1];
2696 struct mfc6_cache *cache;
2697 struct mr_table *mrt;
2698 struct sk_buff *skb;
2699 u32 tableid;
2700 int err;
2701
2702 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2703 if (err < 0)
2704 return err;
2705
2706 if (tb[RTA_SRC])
2707 src = nla_get_in6_addr(tb[RTA_SRC]);
2708 if (tb[RTA_DST])
2709 grp = nla_get_in6_addr(tb[RTA_DST]);
2710 tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2711
2712 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2713 if (!mrt) {
2714 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2715 return -ENOENT;
2716 }
2717
2718 /* entries are added/deleted only under RTNL */
2719 rcu_read_lock();
2720 cache = ip6mr_cache_find(mrt, &src, &grp);
2721 rcu_read_unlock();
2722 if (!cache) {
2723 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2724 return -ENOENT;
2725 }
2726
2727 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2728 if (!skb)
2729 return -ENOBUFS;
2730
2731 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2732 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2733 if (err < 0) {
2734 kfree_skb(skb);
2735 return err;
2736 }
2737
2738 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2739 }
2740
ip6mr_rtm_dumproute(struct sk_buff * skb,struct netlink_callback * cb)2741 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2742 {
2743 const struct nlmsghdr *nlh = cb->nlh;
2744 struct fib_dump_filter filter = {
2745 .rtnl_held = true,
2746 };
2747 int err;
2748
2749 if (cb->strict_check) {
2750 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2751 &filter, cb);
2752 if (err < 0)
2753 return err;
2754 }
2755
2756 if (filter.table_id) {
2757 struct mr_table *mrt;
2758
2759 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2760 if (!mrt) {
2761 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2762 return skb->len;
2763
2764 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2765 return -ENOENT;
2766 }
2767 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2768 &mfc_unres_lock, &filter);
2769 return skb->len ? : err;
2770 }
2771
2772 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2773 _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2774 }
2775