1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Linux IPv6 multicast routing support for BSD pim6sd
4 * Based on net/ipv4/ipmr.c.
5 *
6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7 * LSIIT Laboratory, Strasbourg, France
8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * 6WIND, Paris, France
10 * Copyright (C)2007,2008 USAGI/WIDE Project
11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 */
13
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50
51 #include <linux/nospec.h>
52
53 struct ip6mr_rule {
54 struct fib_rule common;
55 };
56
57 struct ip6mr_result {
58 struct mr_table *mrt;
59 };
60
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62 Note that the changes are semaphored via rtnl_lock.
63 */
64
65 static DEFINE_SPINLOCK(mrt_lock);
66
vif_dev_read(const struct vif_device * vif)67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 return rcu_dereference(vif->dev);
70 }
71
72 /* Multicast router control variables */
73
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76
77 /* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
81
82 In this case data path is free of exclusive locks at all.
83 */
84
85 static struct kmem_cache *mrt_cachep __read_mostly;
86
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 struct net_device *dev, struct sk_buff *skb,
92 struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 lockdep_rtnl_is_held() || \
109 list_empty(&net->ipv6.mr6_tables))
110
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 struct mr_table *mrt)
113 {
114 struct mr_table *ret;
115
116 if (!mrt)
117 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 struct mr_table, list);
119 else
120 ret = list_entry_rcu(mrt->list.next,
121 struct mr_table, list);
122
123 if (&ret->list == &net->ipv6.mr6_tables)
124 return NULL;
125 return ret;
126 }
127
__ip6mr_get_table(struct net * net,u32 id)128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
129 {
130 struct mr_table *mrt;
131
132 ip6mr_for_each_table(mrt, net) {
133 if (mrt->id == id)
134 return mrt;
135 }
136 return NULL;
137 }
138
ip6mr_get_table(struct net * net,u32 id)139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
140 {
141 struct mr_table *mrt;
142
143 rcu_read_lock();
144 mrt = __ip6mr_get_table(net, id);
145 rcu_read_unlock();
146 return mrt;
147 }
148
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 struct mr_table **mrt)
151 {
152 int err;
153 struct ip6mr_result res;
154 struct fib_lookup_arg arg = {
155 .result = &res,
156 .flags = FIB_LOOKUP_NOREF,
157 };
158
159 /* update flow if oif or iif point to device enslaved to l3mdev */
160 l3mdev_update_flow(net, flowi6_to_flowi(flp6));
161
162 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 flowi6_to_flowi(flp6), 0, &arg);
164 if (err < 0)
165 return err;
166 *mrt = res.mrt;
167 return 0;
168 }
169
ip6mr_rule_action(struct fib_rule * rule,struct flowi * flp,int flags,struct fib_lookup_arg * arg)170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 int flags, struct fib_lookup_arg *arg)
172 {
173 struct ip6mr_result *res = arg->result;
174 struct mr_table *mrt;
175
176 switch (rule->action) {
177 case FR_ACT_TO_TBL:
178 break;
179 case FR_ACT_UNREACHABLE:
180 return -ENETUNREACH;
181 case FR_ACT_PROHIBIT:
182 return -EACCES;
183 case FR_ACT_BLACKHOLE:
184 default:
185 return -EINVAL;
186 }
187
188 arg->table = fib_rule_get_table(rule, arg);
189
190 mrt = __ip6mr_get_table(rule->fr_net, arg->table);
191 if (!mrt)
192 return -EAGAIN;
193 res->mrt = mrt;
194 return 0;
195 }
196
ip6mr_rule_match(struct fib_rule * rule,struct flowi * flp,int flags)197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
198 {
199 return 1;
200 }
201
ip6mr_rule_configure(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh,struct nlattr ** tb,struct netlink_ext_ack * extack)202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 struct fib_rule_hdr *frh, struct nlattr **tb,
204 struct netlink_ext_ack *extack)
205 {
206 return 0;
207 }
208
ip6mr_rule_compare(struct fib_rule * rule,struct fib_rule_hdr * frh,struct nlattr ** tb)209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 struct nlattr **tb)
211 {
212 return 1;
213 }
214
ip6mr_rule_fill(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh)215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 struct fib_rule_hdr *frh)
217 {
218 frh->dst_len = 0;
219 frh->src_len = 0;
220 frh->tos = 0;
221 return 0;
222 }
223
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 .family = RTNL_FAMILY_IP6MR,
226 .rule_size = sizeof(struct ip6mr_rule),
227 .addr_size = sizeof(struct in6_addr),
228 .action = ip6mr_rule_action,
229 .match = ip6mr_rule_match,
230 .configure = ip6mr_rule_configure,
231 .compare = ip6mr_rule_compare,
232 .fill = ip6mr_rule_fill,
233 .nlgroup = RTNLGRP_IPV6_RULE,
234 .owner = THIS_MODULE,
235 };
236
ip6mr_rules_init(struct net * net)237 static int __net_init ip6mr_rules_init(struct net *net)
238 {
239 struct fib_rules_ops *ops;
240 struct mr_table *mrt;
241 int err;
242
243 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
244 if (IS_ERR(ops))
245 return PTR_ERR(ops);
246
247 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
248
249 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
250 if (IS_ERR(mrt)) {
251 err = PTR_ERR(mrt);
252 goto err1;
253 }
254
255 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
256 if (err < 0)
257 goto err2;
258
259 net->ipv6.mr6_rules_ops = ops;
260 return 0;
261
262 err2:
263 rtnl_lock();
264 ip6mr_free_table(mrt);
265 rtnl_unlock();
266 err1:
267 fib_rules_unregister(ops);
268 return err;
269 }
270
ip6mr_rules_exit(struct net * net)271 static void __net_exit ip6mr_rules_exit(struct net *net)
272 {
273 struct mr_table *mrt, *next;
274
275 ASSERT_RTNL();
276 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 list_del(&mrt->list);
278 ip6mr_free_table(mrt);
279 }
280 fib_rules_unregister(net->ipv6.mr6_rules_ops);
281 }
282
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 struct netlink_ext_ack *extack)
285 {
286 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
287 }
288
ip6mr_rules_seq_read(const struct net * net)289 static unsigned int ip6mr_rules_seq_read(const struct net *net)
290 {
291 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
292 }
293
ip6mr_rule_default(const struct fib_rule * rule)294 bool ip6mr_rule_default(const struct fib_rule *rule)
295 {
296 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
298 }
299 EXPORT_SYMBOL(ip6mr_rule_default);
300 #else
301 #define ip6mr_for_each_table(mrt, net) \
302 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
303
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 struct mr_table *mrt)
306 {
307 if (!mrt)
308 return net->ipv6.mrt6;
309 return NULL;
310 }
311
ip6mr_get_table(struct net * net,u32 id)312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
313 {
314 return net->ipv6.mrt6;
315 }
316
317 #define __ip6mr_get_table ip6mr_get_table
318
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 struct mr_table **mrt)
321 {
322 *mrt = net->ipv6.mrt6;
323 return 0;
324 }
325
ip6mr_rules_init(struct net * net)326 static int __net_init ip6mr_rules_init(struct net *net)
327 {
328 struct mr_table *mrt;
329
330 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
331 if (IS_ERR(mrt))
332 return PTR_ERR(mrt);
333 net->ipv6.mrt6 = mrt;
334 return 0;
335 }
336
ip6mr_rules_exit(struct net * net)337 static void __net_exit ip6mr_rules_exit(struct net *net)
338 {
339 ASSERT_RTNL();
340 ip6mr_free_table(net->ipv6.mrt6);
341 net->ipv6.mrt6 = NULL;
342 }
343
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 struct netlink_ext_ack *extack)
346 {
347 return 0;
348 }
349
ip6mr_rules_seq_read(const struct net * net)350 static unsigned int ip6mr_rules_seq_read(const struct net *net)
351 {
352 return 0;
353 }
354 #endif
355
ip6mr_hash_cmp(struct rhashtable_compare_arg * arg,const void * ptr)356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
357 const void *ptr)
358 {
359 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
361
362 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
364 }
365
366 static const struct rhashtable_params ip6mr_rht_params = {
367 .head_offset = offsetof(struct mr_mfc, mnode),
368 .key_offset = offsetof(struct mfc6_cache, cmparg),
369 .key_len = sizeof(struct mfc6_cache_cmp_arg),
370 .nelem_hint = 3,
371 .obj_cmpfn = ip6mr_hash_cmp,
372 .automatic_shrinking = true,
373 };
374
ip6mr_new_table_set(struct mr_table * mrt,struct net * net)375 static void ip6mr_new_table_set(struct mr_table *mrt,
376 struct net *net)
377 {
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
380 #endif
381 }
382
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 .mf6c_origin = IN6ADDR_ANY_INIT,
385 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
386 };
387
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 .rht_params = &ip6mr_rht_params,
390 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
391 };
392
ip6mr_new_table(struct net * net,u32 id)393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
394 {
395 struct mr_table *mrt;
396
397 mrt = __ip6mr_get_table(net, id);
398 if (mrt)
399 return mrt;
400
401 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 ipmr_expire_process, ip6mr_new_table_set);
403 }
404
ip6mr_free_table(struct mr_table * mrt)405 static void ip6mr_free_table(struct mr_table *mrt)
406 {
407 struct net *net = read_pnet(&mrt->net);
408
409 WARN_ON_ONCE(!mr_can_free_table(net));
410
411 timer_shutdown_sync(&mrt->ipmr_expire_timer);
412 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
413 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
414 rhltable_destroy(&mrt->mfc_hash);
415 kfree(mrt);
416 }
417
418 #ifdef CONFIG_PROC_FS
419 /* The /proc interfaces to multicast routing
420 * /proc/ip6_mr_cache /proc/ip6_mr_vif
421 */
422
ip6mr_vif_seq_start(struct seq_file * seq,loff_t * pos)423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
424 __acquires(RCU)
425 {
426 struct mr_vif_iter *iter = seq->private;
427 struct net *net = seq_file_net(seq);
428 struct mr_table *mrt;
429
430 rcu_read_lock();
431 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
432 if (!mrt) {
433 rcu_read_unlock();
434 return ERR_PTR(-ENOENT);
435 }
436
437 iter->mrt = mrt;
438
439 return mr_vif_seq_start(seq, pos);
440 }
441
ip6mr_vif_seq_stop(struct seq_file * seq,void * v)442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
443 __releases(RCU)
444 {
445 rcu_read_unlock();
446 }
447
ip6mr_vif_seq_show(struct seq_file * seq,void * v)448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
449 {
450 struct mr_vif_iter *iter = seq->private;
451 struct mr_table *mrt = iter->mrt;
452
453 if (v == SEQ_START_TOKEN) {
454 seq_puts(seq,
455 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
456 } else {
457 const struct vif_device *vif = v;
458 const struct net_device *vif_dev;
459 const char *name;
460
461 vif_dev = vif_dev_read(vif);
462 name = vif_dev ? vif_dev->name : "none";
463
464 seq_printf(seq,
465 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
466 vif - mrt->vif_table,
467 name, vif->bytes_in, vif->pkt_in,
468 vif->bytes_out, vif->pkt_out,
469 vif->flags);
470 }
471 return 0;
472 }
473
474 static const struct seq_operations ip6mr_vif_seq_ops = {
475 .start = ip6mr_vif_seq_start,
476 .next = mr_vif_seq_next,
477 .stop = ip6mr_vif_seq_stop,
478 .show = ip6mr_vif_seq_show,
479 };
480
ipmr_mfc_seq_start(struct seq_file * seq,loff_t * pos)481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
482 {
483 struct net *net = seq_file_net(seq);
484 struct mr_table *mrt;
485
486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
487 if (!mrt)
488 return ERR_PTR(-ENOENT);
489
490 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
491 }
492
ipmr_mfc_seq_show(struct seq_file * seq,void * v)493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
494 {
495 int n;
496
497 if (v == SEQ_START_TOKEN) {
498 seq_puts(seq,
499 "Group "
500 "Origin "
501 "Iif Pkts Bytes Wrong Oifs\n");
502 } else {
503 const struct mfc6_cache *mfc = v;
504 const struct mr_mfc_iter *it = seq->private;
505 struct mr_table *mrt = it->mrt;
506
507 seq_printf(seq, "%pI6 %pI6 %-3hd",
508 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
509 mfc->_c.mfc_parent);
510
511 if (it->cache != &mrt->mfc_unres_queue) {
512 seq_printf(seq, " %8lu %8lu %8lu",
513 atomic_long_read(&mfc->_c.mfc_un.res.pkt),
514 atomic_long_read(&mfc->_c.mfc_un.res.bytes),
515 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
516 for (n = mfc->_c.mfc_un.res.minvif;
517 n < mfc->_c.mfc_un.res.maxvif; n++) {
518 if (VIF_EXISTS(mrt, n) &&
519 mfc->_c.mfc_un.res.ttls[n] < 255)
520 seq_printf(seq,
521 " %2d:%-3d", n,
522 mfc->_c.mfc_un.res.ttls[n]);
523 }
524 } else {
525 /* unresolved mfc_caches don't contain
526 * pkt, bytes and wrong_if values
527 */
528 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
529 }
530 seq_putc(seq, '\n');
531 }
532 return 0;
533 }
534
535 static const struct seq_operations ipmr_mfc_seq_ops = {
536 .start = ipmr_mfc_seq_start,
537 .next = mr_mfc_seq_next,
538 .stop = mr_mfc_seq_stop,
539 .show = ipmr_mfc_seq_show,
540 };
541 #endif
542
543 #ifdef CONFIG_IPV6_PIMSM_V2
544
pim6_rcv(struct sk_buff * skb)545 static int pim6_rcv(struct sk_buff *skb)
546 {
547 struct pimreghdr *pim;
548 struct ipv6hdr *encap;
549 struct net_device *reg_dev = NULL;
550 struct net *net = dev_net(skb->dev);
551 struct mr_table *mrt;
552 struct flowi6 fl6 = {
553 .flowi6_iif = skb->dev->ifindex,
554 .flowi6_mark = skb->mark,
555 };
556 int reg_vif_num;
557
558 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
559 goto drop;
560
561 pim = (struct pimreghdr *)skb_transport_header(skb);
562 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
563 (pim->flags & PIM_NULL_REGISTER) ||
564 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
565 sizeof(*pim), IPPROTO_PIM,
566 csum_partial((void *)pim, sizeof(*pim), 0)) &&
567 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
568 goto drop;
569
570 /* check if the inner packet is destined to mcast group */
571 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
572 sizeof(*pim));
573
574 if (!ipv6_addr_is_multicast(&encap->daddr) ||
575 encap->payload_len == 0 ||
576 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
577 goto drop;
578
579 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
580 goto drop;
581
582 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
583 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
584 if (reg_vif_num >= 0)
585 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
586
587 if (!reg_dev)
588 goto drop;
589
590 skb->mac_header = skb->network_header;
591 skb_pull(skb, (u8 *)encap - skb->data);
592 skb_reset_network_header(skb);
593 skb->protocol = htons(ETH_P_IPV6);
594 skb->ip_summed = CHECKSUM_NONE;
595
596 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
597
598 netif_rx(skb);
599
600 return 0;
601 drop:
602 kfree_skb(skb);
603 return 0;
604 }
605
606 static const struct inet6_protocol pim6_protocol = {
607 .handler = pim6_rcv,
608 };
609
610 /* Service routines creating virtual interfaces: PIMREG */
611
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
613 struct net_device *dev)
614 {
615 struct net *net = dev_net(dev);
616 struct mr_table *mrt;
617 struct flowi6 fl6 = {
618 .flowi6_oif = dev->ifindex,
619 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
620 .flowi6_mark = skb->mark,
621 };
622
623 if (!pskb_inet_may_pull(skb))
624 goto tx_err;
625
626 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
627 goto tx_err;
628
629 DEV_STATS_ADD(dev, tx_bytes, skb->len);
630 DEV_STATS_INC(dev, tx_packets);
631 rcu_read_lock();
632 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
633 MRT6MSG_WHOLEPKT);
634 rcu_read_unlock();
635 kfree_skb(skb);
636 return NETDEV_TX_OK;
637
638 tx_err:
639 DEV_STATS_INC(dev, tx_errors);
640 kfree_skb(skb);
641 return NETDEV_TX_OK;
642 }
643
reg_vif_get_iflink(const struct net_device * dev)644 static int reg_vif_get_iflink(const struct net_device *dev)
645 {
646 return 0;
647 }
648
649 static const struct net_device_ops reg_vif_netdev_ops = {
650 .ndo_start_xmit = reg_vif_xmit,
651 .ndo_get_iflink = reg_vif_get_iflink,
652 };
653
reg_vif_setup(struct net_device * dev)654 static void reg_vif_setup(struct net_device *dev)
655 {
656 dev->type = ARPHRD_PIMREG;
657 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
658 dev->flags = IFF_NOARP;
659 dev->netdev_ops = ®_vif_netdev_ops;
660 dev->needs_free_netdev = true;
661 dev->netns_immutable = true;
662 }
663
ip6mr_reg_vif(struct net * net,struct mr_table * mrt)664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
665 {
666 struct net_device *dev;
667 char name[IFNAMSIZ];
668
669 if (mrt->id == RT6_TABLE_DFLT)
670 sprintf(name, "pim6reg");
671 else
672 sprintf(name, "pim6reg%u", mrt->id);
673
674 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
675 if (!dev)
676 return NULL;
677
678 dev_net_set(dev, net);
679
680 if (register_netdevice(dev)) {
681 free_netdev(dev);
682 return NULL;
683 }
684
685 if (dev_open(dev, NULL))
686 goto failure;
687
688 dev_hold(dev);
689 return dev;
690
691 failure:
692 unregister_netdevice(dev);
693 return NULL;
694 }
695 #endif
696
call_ip6mr_vif_entry_notifiers(struct net * net,enum fib_event_type event_type,struct vif_device * vif,struct net_device * vif_dev,mifi_t vif_index,u32 tb_id)697 static int call_ip6mr_vif_entry_notifiers(struct net *net,
698 enum fib_event_type event_type,
699 struct vif_device *vif,
700 struct net_device *vif_dev,
701 mifi_t vif_index, u32 tb_id)
702 {
703 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
704 vif, vif_dev, vif_index, tb_id,
705 &net->ipv6.ipmr_seq);
706 }
707
call_ip6mr_mfc_entry_notifiers(struct net * net,enum fib_event_type event_type,struct mfc6_cache * mfc,u32 tb_id)708 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
709 enum fib_event_type event_type,
710 struct mfc6_cache *mfc, u32 tb_id)
711 {
712 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
713 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
714 }
715
716 /* Delete a VIF entry */
mif6_delete(struct mr_table * mrt,int vifi,int notify,struct list_head * head)717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
718 struct list_head *head)
719 {
720 struct vif_device *v;
721 struct net_device *dev;
722 struct inet6_dev *in6_dev;
723
724 if (vifi < 0 || vifi >= mrt->maxvif)
725 return -EADDRNOTAVAIL;
726
727 v = &mrt->vif_table[vifi];
728
729 dev = rtnl_dereference(v->dev);
730 if (!dev)
731 return -EADDRNOTAVAIL;
732
733 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
734 FIB_EVENT_VIF_DEL, v, dev,
735 vifi, mrt->id);
736 spin_lock(&mrt_lock);
737 RCU_INIT_POINTER(v->dev, NULL);
738
739 #ifdef CONFIG_IPV6_PIMSM_V2
740 if (vifi == mrt->mroute_reg_vif_num) {
741 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
742 WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
743 }
744 #endif
745
746 if (vifi + 1 == mrt->maxvif) {
747 int tmp;
748 for (tmp = vifi - 1; tmp >= 0; tmp--) {
749 if (VIF_EXISTS(mrt, tmp))
750 break;
751 }
752 WRITE_ONCE(mrt->maxvif, tmp + 1);
753 }
754
755 spin_unlock(&mrt_lock);
756
757 dev_set_allmulti(dev, -1);
758
759 in6_dev = __in6_dev_get(dev);
760 if (in6_dev) {
761 atomic_dec(&in6_dev->cnf.mc_forwarding);
762 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
763 NETCONFA_MC_FORWARDING,
764 dev->ifindex, &in6_dev->cnf);
765 }
766
767 if ((v->flags & MIFF_REGISTER) && !notify)
768 unregister_netdevice_queue(dev, head);
769
770 netdev_put(dev, &v->dev_tracker);
771 return 0;
772 }
773
ip6mr_cache_free_rcu(struct rcu_head * head)774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
775 {
776 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
777
778 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
779 }
780
ip6mr_cache_free(struct mfc6_cache * c)781 static inline void ip6mr_cache_free(struct mfc6_cache *c)
782 {
783 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
784 }
785
786 /* Destroy an unresolved cache entry, killing queued skbs
787 and reporting error to netlink readers.
788 */
789
ip6mr_destroy_unres(struct mr_table * mrt,struct mfc6_cache * c)790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
791 {
792 struct net *net = read_pnet(&mrt->net);
793 struct sk_buff *skb;
794
795 atomic_dec(&mrt->cache_resolve_queue_len);
796
797 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
798 if (ipv6_hdr(skb)->version == 0) {
799 struct nlmsghdr *nlh = skb_pull(skb,
800 sizeof(struct ipv6hdr));
801 nlh->nlmsg_type = NLMSG_ERROR;
802 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
803 skb_trim(skb, nlh->nlmsg_len);
804 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
805 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
806 } else
807 kfree_skb(skb);
808 }
809
810 ip6mr_cache_free(c);
811 }
812
813
814 /* Timer process for all the unresolved queue. */
815
ipmr_do_expire_process(struct mr_table * mrt)816 static void ipmr_do_expire_process(struct mr_table *mrt)
817 {
818 unsigned long now = jiffies;
819 unsigned long expires = 10 * HZ;
820 struct mr_mfc *c, *next;
821
822 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
823 if (time_after(c->mfc_un.unres.expires, now)) {
824 /* not yet... */
825 unsigned long interval = c->mfc_un.unres.expires - now;
826 if (interval < expires)
827 expires = interval;
828 continue;
829 }
830
831 list_del(&c->list);
832 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
833 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
834 }
835
836 if (!list_empty(&mrt->mfc_unres_queue))
837 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
838 }
839
ipmr_expire_process(struct timer_list * t)840 static void ipmr_expire_process(struct timer_list *t)
841 {
842 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
843
844 if (!spin_trylock(&mfc_unres_lock)) {
845 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
846 return;
847 }
848
849 if (!list_empty(&mrt->mfc_unres_queue))
850 ipmr_do_expire_process(mrt);
851
852 spin_unlock(&mfc_unres_lock);
853 }
854
855 /* Fill oifs list. It is called under locked mrt_lock. */
856
ip6mr_update_thresholds(struct mr_table * mrt,struct mr_mfc * cache,unsigned char * ttls)857 static void ip6mr_update_thresholds(struct mr_table *mrt,
858 struct mr_mfc *cache,
859 unsigned char *ttls)
860 {
861 int vifi;
862
863 cache->mfc_un.res.minvif = MAXMIFS;
864 cache->mfc_un.res.maxvif = 0;
865 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
866
867 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
868 if (VIF_EXISTS(mrt, vifi) &&
869 ttls[vifi] && ttls[vifi] < 255) {
870 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
871 if (cache->mfc_un.res.minvif > vifi)
872 cache->mfc_un.res.minvif = vifi;
873 if (cache->mfc_un.res.maxvif <= vifi)
874 cache->mfc_un.res.maxvif = vifi + 1;
875 }
876 }
877 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
878 }
879
mif6_add(struct net * net,struct mr_table * mrt,struct mif6ctl * vifc,int mrtsock)880 static int mif6_add(struct net *net, struct mr_table *mrt,
881 struct mif6ctl *vifc, int mrtsock)
882 {
883 int vifi = vifc->mif6c_mifi;
884 struct vif_device *v = &mrt->vif_table[vifi];
885 struct net_device *dev;
886 struct inet6_dev *in6_dev;
887 int err;
888
889 /* Is vif busy ? */
890 if (VIF_EXISTS(mrt, vifi))
891 return -EADDRINUSE;
892
893 switch (vifc->mif6c_flags) {
894 #ifdef CONFIG_IPV6_PIMSM_V2
895 case MIFF_REGISTER:
896 /*
897 * Special Purpose VIF in PIM
898 * All the packets will be sent to the daemon
899 */
900 if (mrt->mroute_reg_vif_num >= 0)
901 return -EADDRINUSE;
902 dev = ip6mr_reg_vif(net, mrt);
903 if (!dev)
904 return -ENOBUFS;
905 err = dev_set_allmulti(dev, 1);
906 if (err) {
907 unregister_netdevice(dev);
908 dev_put(dev);
909 return err;
910 }
911 break;
912 #endif
913 case 0:
914 dev = dev_get_by_index(net, vifc->mif6c_pifi);
915 if (!dev)
916 return -EADDRNOTAVAIL;
917 err = dev_set_allmulti(dev, 1);
918 if (err) {
919 dev_put(dev);
920 return err;
921 }
922 break;
923 default:
924 return -EINVAL;
925 }
926
927 in6_dev = __in6_dev_get(dev);
928 if (in6_dev) {
929 atomic_inc(&in6_dev->cnf.mc_forwarding);
930 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
931 NETCONFA_MC_FORWARDING,
932 dev->ifindex, &in6_dev->cnf);
933 }
934
935 /* Fill in the VIF structures */
936 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
937 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
938 MIFF_REGISTER);
939
940 /* And finish update writing critical data */
941 spin_lock(&mrt_lock);
942 rcu_assign_pointer(v->dev, dev);
943 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
944 #ifdef CONFIG_IPV6_PIMSM_V2
945 if (v->flags & MIFF_REGISTER)
946 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
947 #endif
948 if (vifi + 1 > mrt->maxvif)
949 WRITE_ONCE(mrt->maxvif, vifi + 1);
950 spin_unlock(&mrt_lock);
951 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
952 v, dev, vifi, mrt->id);
953 return 0;
954 }
955
ip6mr_cache_find(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp)956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
957 const struct in6_addr *origin,
958 const struct in6_addr *mcastgrp)
959 {
960 struct mfc6_cache_cmp_arg arg = {
961 .mf6c_origin = *origin,
962 .mf6c_mcastgrp = *mcastgrp,
963 };
964
965 return mr_mfc_find(mrt, &arg);
966 }
967
968 /* Look for a (*,G) entry */
ip6mr_cache_find_any(struct mr_table * mrt,struct in6_addr * mcastgrp,mifi_t mifi)969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
970 struct in6_addr *mcastgrp,
971 mifi_t mifi)
972 {
973 struct mfc6_cache_cmp_arg arg = {
974 .mf6c_origin = in6addr_any,
975 .mf6c_mcastgrp = *mcastgrp,
976 };
977
978 if (ipv6_addr_any(mcastgrp))
979 return mr_mfc_find_any_parent(mrt, mifi);
980 return mr_mfc_find_any(mrt, mifi, &arg);
981 }
982
983 /* Look for a (S,G,iif) entry if parent != -1 */
984 static struct mfc6_cache *
ip6mr_cache_find_parent(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp,int parent)985 ip6mr_cache_find_parent(struct mr_table *mrt,
986 const struct in6_addr *origin,
987 const struct in6_addr *mcastgrp,
988 int parent)
989 {
990 struct mfc6_cache_cmp_arg arg = {
991 .mf6c_origin = *origin,
992 .mf6c_mcastgrp = *mcastgrp,
993 };
994
995 return mr_mfc_find_parent(mrt, &arg, parent);
996 }
997
998 /* Allocate a multicast cache entry */
ip6mr_cache_alloc(void)999 static struct mfc6_cache *ip6mr_cache_alloc(void)
1000 {
1001 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1002 if (!c)
1003 return NULL;
1004 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1005 c->_c.mfc_un.res.minvif = MAXMIFS;
1006 c->_c.free = ip6mr_cache_free_rcu;
1007 refcount_set(&c->_c.mfc_un.res.refcount, 1);
1008 return c;
1009 }
1010
ip6mr_cache_alloc_unres(void)1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1012 {
1013 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1014 if (!c)
1015 return NULL;
1016 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1017 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1018 return c;
1019 }
1020
1021 /*
1022 * A cache entry has gone into a resolved state from queued
1023 */
1024
ip6mr_cache_resolve(struct net * net,struct mr_table * mrt,struct mfc6_cache * uc,struct mfc6_cache * c)1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1026 struct mfc6_cache *uc, struct mfc6_cache *c)
1027 {
1028 struct sk_buff *skb;
1029
1030 /*
1031 * Play the pending entries through our router
1032 */
1033
1034 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1035 if (ipv6_hdr(skb)->version == 0) {
1036 struct nlmsghdr *nlh = skb_pull(skb,
1037 sizeof(struct ipv6hdr));
1038
1039 if (mr_fill_mroute(mrt, skb, &c->_c,
1040 nlmsg_data(nlh)) > 0) {
1041 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1042 } else {
1043 nlh->nlmsg_type = NLMSG_ERROR;
1044 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1045 skb_trim(skb, nlh->nlmsg_len);
1046 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1047 }
1048 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1049 } else {
1050 rcu_read_lock();
1051 ip6_mr_forward(net, mrt, skb->dev, skb, c);
1052 rcu_read_unlock();
1053 }
1054 }
1055 }
1056
1057 /*
1058 * Bounce a cache query up to pim6sd and netlink.
1059 *
1060 * Called under rcu_read_lock()
1061 */
1062
ip6mr_cache_report(const struct mr_table * mrt,struct sk_buff * pkt,mifi_t mifi,int assert)1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1064 mifi_t mifi, int assert)
1065 {
1066 struct sock *mroute6_sk;
1067 struct sk_buff *skb;
1068 struct mrt6msg *msg;
1069 int ret;
1070
1071 #ifdef CONFIG_IPV6_PIMSM_V2
1072 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1073 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1074 +sizeof(*msg));
1075 else
1076 #endif
1077 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1078
1079 if (!skb)
1080 return -ENOBUFS;
1081
1082 /* I suppose that internal messages
1083 * do not require checksums */
1084
1085 skb->ip_summed = CHECKSUM_UNNECESSARY;
1086
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1089 /* Ugly, but we have no choice with this interface.
1090 Duplicate old header, fix length etc.
1091 And all this only to mangle msg->im6_msgtype and
1092 to set msg->im6_mbz to "mbz" :-)
1093 */
1094 __skb_pull(skb, skb_network_offset(pkt));
1095
1096 skb_push(skb, sizeof(*msg));
1097 skb_reset_transport_header(skb);
1098 msg = (struct mrt6msg *)skb_transport_header(skb);
1099 msg->im6_mbz = 0;
1100 msg->im6_msgtype = assert;
1101 if (assert == MRT6MSG_WRMIFWHOLE)
1102 msg->im6_mif = mifi;
1103 else
1104 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1105 msg->im6_pad = 0;
1106 msg->im6_src = ipv6_hdr(pkt)->saddr;
1107 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1108
1109 skb->ip_summed = CHECKSUM_UNNECESSARY;
1110 } else
1111 #endif
1112 {
1113 /*
1114 * Copy the IP header
1115 */
1116
1117 skb_put(skb, sizeof(struct ipv6hdr));
1118 skb_reset_network_header(skb);
1119 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1120
1121 /*
1122 * Add our header
1123 */
1124 skb_put(skb, sizeof(*msg));
1125 skb_reset_transport_header(skb);
1126 msg = (struct mrt6msg *)skb_transport_header(skb);
1127
1128 msg->im6_mbz = 0;
1129 msg->im6_msgtype = assert;
1130 msg->im6_mif = mifi;
1131 msg->im6_pad = 0;
1132 msg->im6_src = ipv6_hdr(pkt)->saddr;
1133 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1134
1135 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1136 skb->ip_summed = CHECKSUM_UNNECESSARY;
1137 }
1138
1139 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1140 if (!mroute6_sk) {
1141 kfree_skb(skb);
1142 return -EINVAL;
1143 }
1144
1145 mrt6msg_netlink_event(mrt, skb);
1146
1147 /* Deliver to user space multicast routing algorithms */
1148 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1149
1150 if (ret < 0) {
1151 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1152 kfree_skb(skb);
1153 }
1154
1155 return ret;
1156 }
1157
1158 /* Queue a packet for resolution. It gets locked cache entry! */
ip6mr_cache_unresolved(struct mr_table * mrt,mifi_t mifi,struct sk_buff * skb,struct net_device * dev)1159 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1160 struct sk_buff *skb, struct net_device *dev)
1161 {
1162 struct mfc6_cache *c;
1163 bool found = false;
1164 int err;
1165
1166 spin_lock_bh(&mfc_unres_lock);
1167 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1168 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1169 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1170 found = true;
1171 break;
1172 }
1173 }
1174
1175 if (!found) {
1176 /*
1177 * Create a new entry if allowable
1178 */
1179
1180 c = ip6mr_cache_alloc_unres();
1181 if (!c) {
1182 spin_unlock_bh(&mfc_unres_lock);
1183
1184 kfree_skb(skb);
1185 return -ENOBUFS;
1186 }
1187
1188 /* Fill in the new cache entry */
1189 c->_c.mfc_parent = -1;
1190 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1191 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1192
1193 /*
1194 * Reflect first query at pim6sd
1195 */
1196 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1197 if (err < 0) {
1198 /* If the report failed throw the cache entry
1199 out - Brad Parker
1200 */
1201 spin_unlock_bh(&mfc_unres_lock);
1202
1203 ip6mr_cache_free(c);
1204 kfree_skb(skb);
1205 return err;
1206 }
1207
1208 atomic_inc(&mrt->cache_resolve_queue_len);
1209 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1210 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1211
1212 ipmr_do_expire_process(mrt);
1213 }
1214
1215 /* See if we can append the packet */
1216 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1217 kfree_skb(skb);
1218 err = -ENOBUFS;
1219 } else {
1220 if (dev) {
1221 skb->dev = dev;
1222 skb->skb_iif = dev->ifindex;
1223 }
1224 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1225 err = 0;
1226 }
1227
1228 spin_unlock_bh(&mfc_unres_lock);
1229 return err;
1230 }
1231
1232 /*
1233 * MFC6 cache manipulation by user space
1234 */
1235
ip6mr_mfc_delete(struct mr_table * mrt,struct mf6cctl * mfc,int parent)1236 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1237 int parent)
1238 {
1239 struct mfc6_cache *c;
1240
1241 /* The entries are added/deleted only under RTNL */
1242 rcu_read_lock();
1243 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1244 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1245 rcu_read_unlock();
1246 if (!c)
1247 return -ENOENT;
1248 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1249 list_del_rcu(&c->_c.list);
1250
1251 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1252 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1253 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1254 mr_cache_put(&c->_c);
1255 return 0;
1256 }
1257
ip6mr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1258 static int ip6mr_device_event(struct notifier_block *this,
1259 unsigned long event, void *ptr)
1260 {
1261 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1262 struct net *net = dev_net(dev);
1263 struct mr_table *mrt;
1264 struct vif_device *v;
1265 int ct;
1266
1267 if (event != NETDEV_UNREGISTER)
1268 return NOTIFY_DONE;
1269
1270 ip6mr_for_each_table(mrt, net) {
1271 v = &mrt->vif_table[0];
1272 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 if (rcu_access_pointer(v->dev) == dev)
1274 mif6_delete(mrt, ct, 1, NULL);
1275 }
1276 }
1277
1278 return NOTIFY_DONE;
1279 }
1280
ip6mr_seq_read(const struct net * net)1281 static unsigned int ip6mr_seq_read(const struct net *net)
1282 {
1283 return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1284 }
1285
ip6mr_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)1286 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1287 struct netlink_ext_ack *extack)
1288 {
1289 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1290 ip6mr_mr_table_iter, extack);
1291 }
1292
1293 static struct notifier_block ip6_mr_notifier = {
1294 .notifier_call = ip6mr_device_event
1295 };
1296
1297 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1298 .family = RTNL_FAMILY_IP6MR,
1299 .fib_seq_read = ip6mr_seq_read,
1300 .fib_dump = ip6mr_dump,
1301 .owner = THIS_MODULE,
1302 };
1303
ip6mr_notifier_init(struct net * net)1304 static int __net_init ip6mr_notifier_init(struct net *net)
1305 {
1306 struct fib_notifier_ops *ops;
1307
1308 net->ipv6.ipmr_seq = 0;
1309
1310 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1311 if (IS_ERR(ops))
1312 return PTR_ERR(ops);
1313
1314 net->ipv6.ip6mr_notifier_ops = ops;
1315
1316 return 0;
1317 }
1318
ip6mr_notifier_exit(struct net * net)1319 static void __net_exit ip6mr_notifier_exit(struct net *net)
1320 {
1321 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1322 net->ipv6.ip6mr_notifier_ops = NULL;
1323 }
1324
1325 /* Setup for IP multicast routing */
ip6mr_net_init(struct net * net)1326 static int __net_init ip6mr_net_init(struct net *net)
1327 {
1328 int err;
1329
1330 err = ip6mr_notifier_init(net);
1331 if (err)
1332 return err;
1333
1334 err = ip6mr_rules_init(net);
1335 if (err < 0)
1336 goto ip6mr_rules_fail;
1337
1338 #ifdef CONFIG_PROC_FS
1339 err = -ENOMEM;
1340 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1341 sizeof(struct mr_vif_iter)))
1342 goto proc_vif_fail;
1343 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1344 sizeof(struct mr_mfc_iter)))
1345 goto proc_cache_fail;
1346 #endif
1347
1348 return 0;
1349
1350 #ifdef CONFIG_PROC_FS
1351 proc_cache_fail:
1352 remove_proc_entry("ip6_mr_vif", net->proc_net);
1353 proc_vif_fail:
1354 rtnl_lock();
1355 ip6mr_rules_exit(net);
1356 rtnl_unlock();
1357 #endif
1358 ip6mr_rules_fail:
1359 ip6mr_notifier_exit(net);
1360 return err;
1361 }
1362
ip6mr_net_exit(struct net * net)1363 static void __net_exit ip6mr_net_exit(struct net *net)
1364 {
1365 #ifdef CONFIG_PROC_FS
1366 remove_proc_entry("ip6_mr_cache", net->proc_net);
1367 remove_proc_entry("ip6_mr_vif", net->proc_net);
1368 #endif
1369 ip6mr_notifier_exit(net);
1370 }
1371
ip6mr_net_exit_batch(struct list_head * net_list)1372 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1373 {
1374 struct net *net;
1375
1376 rtnl_lock();
1377 list_for_each_entry(net, net_list, exit_list)
1378 ip6mr_rules_exit(net);
1379 rtnl_unlock();
1380 }
1381
1382 static struct pernet_operations ip6mr_net_ops = {
1383 .init = ip6mr_net_init,
1384 .exit = ip6mr_net_exit,
1385 .exit_batch = ip6mr_net_exit_batch,
1386 };
1387
1388 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1389 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1390 .msgtype = RTM_GETROUTE,
1391 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
1392 };
1393
ip6_mr_init(void)1394 int __init ip6_mr_init(void)
1395 {
1396 int err;
1397
1398 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1399 if (!mrt_cachep)
1400 return -ENOMEM;
1401
1402 err = register_pernet_subsys(&ip6mr_net_ops);
1403 if (err)
1404 goto reg_pernet_fail;
1405
1406 err = register_netdevice_notifier(&ip6_mr_notifier);
1407 if (err)
1408 goto reg_notif_fail;
1409 #ifdef CONFIG_IPV6_PIMSM_V2
1410 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1411 pr_err("%s: can't add PIM protocol\n", __func__);
1412 err = -EAGAIN;
1413 goto add_proto_fail;
1414 }
1415 #endif
1416 err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1417 if (!err)
1418 return 0;
1419
1420 #ifdef CONFIG_IPV6_PIMSM_V2
1421 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1422 add_proto_fail:
1423 unregister_netdevice_notifier(&ip6_mr_notifier);
1424 #endif
1425 reg_notif_fail:
1426 unregister_pernet_subsys(&ip6mr_net_ops);
1427 reg_pernet_fail:
1428 kmem_cache_destroy(mrt_cachep);
1429 return err;
1430 }
1431
ip6_mr_cleanup(void)1432 void __init ip6_mr_cleanup(void)
1433 {
1434 rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1435 #ifdef CONFIG_IPV6_PIMSM_V2
1436 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1437 #endif
1438 unregister_netdevice_notifier(&ip6_mr_notifier);
1439 unregister_pernet_subsys(&ip6mr_net_ops);
1440 kmem_cache_destroy(mrt_cachep);
1441 }
1442
ip6mr_mfc_add(struct net * net,struct mr_table * mrt,struct mf6cctl * mfc,int mrtsock,int parent)1443 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1444 struct mf6cctl *mfc, int mrtsock, int parent)
1445 {
1446 unsigned char ttls[MAXMIFS];
1447 struct mfc6_cache *uc, *c;
1448 struct mr_mfc *_uc;
1449 bool found;
1450 int i, err;
1451
1452 if (mfc->mf6cc_parent >= MAXMIFS)
1453 return -ENFILE;
1454
1455 memset(ttls, 255, MAXMIFS);
1456 for (i = 0; i < MAXMIFS; i++) {
1457 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1458 ttls[i] = 1;
1459 }
1460
1461 /* The entries are added/deleted only under RTNL */
1462 rcu_read_lock();
1463 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1464 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1465 rcu_read_unlock();
1466 if (c) {
1467 spin_lock(&mrt_lock);
1468 c->_c.mfc_parent = mfc->mf6cc_parent;
1469 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1470 if (!mrtsock)
1471 c->_c.mfc_flags |= MFC_STATIC;
1472 spin_unlock(&mrt_lock);
1473 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1474 c, mrt->id);
1475 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1476 return 0;
1477 }
1478
1479 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1480 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1481 return -EINVAL;
1482
1483 c = ip6mr_cache_alloc();
1484 if (!c)
1485 return -ENOMEM;
1486
1487 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1488 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1489 c->_c.mfc_parent = mfc->mf6cc_parent;
1490 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1491 if (!mrtsock)
1492 c->_c.mfc_flags |= MFC_STATIC;
1493
1494 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1495 ip6mr_rht_params);
1496 if (err) {
1497 pr_err("ip6mr: rhtable insert error %d\n", err);
1498 ip6mr_cache_free(c);
1499 return err;
1500 }
1501 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1502
1503 /* Check to see if we resolved a queued list. If so we
1504 * need to send on the frames and tidy up.
1505 */
1506 found = false;
1507 spin_lock_bh(&mfc_unres_lock);
1508 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1509 uc = (struct mfc6_cache *)_uc;
1510 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1511 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1512 list_del(&_uc->list);
1513 atomic_dec(&mrt->cache_resolve_queue_len);
1514 found = true;
1515 break;
1516 }
1517 }
1518 if (list_empty(&mrt->mfc_unres_queue))
1519 timer_delete(&mrt->ipmr_expire_timer);
1520 spin_unlock_bh(&mfc_unres_lock);
1521
1522 if (found) {
1523 ip6mr_cache_resolve(net, mrt, uc, c);
1524 ip6mr_cache_free(uc);
1525 }
1526 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1527 c, mrt->id);
1528 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1529 return 0;
1530 }
1531
1532 /*
1533 * Close the multicast socket, and clear the vif tables etc
1534 */
1535
mroute_clean_tables(struct mr_table * mrt,int flags)1536 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1537 {
1538 struct mr_mfc *c, *tmp;
1539 LIST_HEAD(list);
1540 int i;
1541
1542 /* Shut down all active vif entries */
1543 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1544 for (i = 0; i < mrt->maxvif; i++) {
1545 if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1546 !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1547 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1548 continue;
1549 mif6_delete(mrt, i, 0, &list);
1550 }
1551 unregister_netdevice_many(&list);
1552 }
1553
1554 /* Wipe the cache */
1555 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1556 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1557 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1558 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1559 continue;
1560 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1561 list_del_rcu(&c->list);
1562 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1563 FIB_EVENT_ENTRY_DEL,
1564 (struct mfc6_cache *)c, mrt->id);
1565 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1566 mr_cache_put(c);
1567 }
1568 }
1569
1570 if (flags & MRT6_FLUSH_MFC) {
1571 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1572 spin_lock_bh(&mfc_unres_lock);
1573 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1574 list_del(&c->list);
1575 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1576 RTM_DELROUTE);
1577 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1578 }
1579 spin_unlock_bh(&mfc_unres_lock);
1580 }
1581 }
1582 }
1583
ip6mr_sk_init(struct mr_table * mrt,struct sock * sk)1584 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1585 {
1586 int err = 0;
1587 struct net *net = sock_net(sk);
1588
1589 rtnl_lock();
1590 spin_lock(&mrt_lock);
1591 if (rtnl_dereference(mrt->mroute_sk)) {
1592 err = -EADDRINUSE;
1593 } else {
1594 rcu_assign_pointer(mrt->mroute_sk, sk);
1595 sock_set_flag(sk, SOCK_RCU_FREE);
1596 atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1597 }
1598 spin_unlock(&mrt_lock);
1599
1600 if (!err)
1601 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1602 NETCONFA_MC_FORWARDING,
1603 NETCONFA_IFINDEX_ALL,
1604 net->ipv6.devconf_all);
1605 rtnl_unlock();
1606
1607 return err;
1608 }
1609
ip6mr_sk_done(struct sock * sk)1610 int ip6mr_sk_done(struct sock *sk)
1611 {
1612 struct net *net = sock_net(sk);
1613 struct ipv6_devconf *devconf;
1614 struct mr_table *mrt;
1615 int err = -EACCES;
1616
1617 if (sk->sk_type != SOCK_RAW ||
1618 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1619 return err;
1620
1621 devconf = net->ipv6.devconf_all;
1622 if (!devconf || !atomic_read(&devconf->mc_forwarding))
1623 return err;
1624
1625 rtnl_lock();
1626 ip6mr_for_each_table(mrt, net) {
1627 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1628 spin_lock(&mrt_lock);
1629 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1630 /* Note that mroute_sk had SOCK_RCU_FREE set,
1631 * so the RCU grace period before sk freeing
1632 * is guaranteed by sk_destruct()
1633 */
1634 atomic_dec(&devconf->mc_forwarding);
1635 spin_unlock(&mrt_lock);
1636 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1637 NETCONFA_MC_FORWARDING,
1638 NETCONFA_IFINDEX_ALL,
1639 net->ipv6.devconf_all);
1640
1641 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1642 err = 0;
1643 break;
1644 }
1645 }
1646 rtnl_unlock();
1647
1648 return err;
1649 }
1650
mroute6_is_socket(struct net * net,struct sk_buff * skb)1651 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1652 {
1653 struct mr_table *mrt;
1654 struct flowi6 fl6 = {
1655 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1656 .flowi6_oif = skb->dev->ifindex,
1657 .flowi6_mark = skb->mark,
1658 };
1659
1660 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1661 return NULL;
1662
1663 return rcu_access_pointer(mrt->mroute_sk);
1664 }
1665 EXPORT_SYMBOL(mroute6_is_socket);
1666
1667 /*
1668 * Socket options and virtual interface manipulation. The whole
1669 * virtual interface system is a complete heap, but unfortunately
1670 * that's how BSD mrouted happens to think. Maybe one day with a proper
1671 * MOSPF/PIM router set up we can clean this up.
1672 */
1673
ip6_mroute_setsockopt(struct sock * sk,int optname,sockptr_t optval,unsigned int optlen)1674 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1675 unsigned int optlen)
1676 {
1677 int ret, parent = 0;
1678 struct mif6ctl vif;
1679 struct mf6cctl mfc;
1680 mifi_t mifi;
1681 struct net *net = sock_net(sk);
1682 struct mr_table *mrt;
1683
1684 if (sk->sk_type != SOCK_RAW ||
1685 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1686 return -EOPNOTSUPP;
1687
1688 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1689 if (!mrt)
1690 return -ENOENT;
1691
1692 if (optname != MRT6_INIT) {
1693 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1694 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1695 return -EACCES;
1696 }
1697
1698 switch (optname) {
1699 case MRT6_INIT:
1700 if (optlen < sizeof(int))
1701 return -EINVAL;
1702
1703 return ip6mr_sk_init(mrt, sk);
1704
1705 case MRT6_DONE:
1706 return ip6mr_sk_done(sk);
1707
1708 case MRT6_ADD_MIF:
1709 if (optlen < sizeof(vif))
1710 return -EINVAL;
1711 if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1712 return -EFAULT;
1713 if (vif.mif6c_mifi >= MAXMIFS)
1714 return -ENFILE;
1715 rtnl_lock();
1716 ret = mif6_add(net, mrt, &vif,
1717 sk == rtnl_dereference(mrt->mroute_sk));
1718 rtnl_unlock();
1719 return ret;
1720
1721 case MRT6_DEL_MIF:
1722 if (optlen < sizeof(mifi_t))
1723 return -EINVAL;
1724 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1725 return -EFAULT;
1726 rtnl_lock();
1727 ret = mif6_delete(mrt, mifi, 0, NULL);
1728 rtnl_unlock();
1729 return ret;
1730
1731 /*
1732 * Manipulate the forwarding caches. These live
1733 * in a sort of kernel/user symbiosis.
1734 */
1735 case MRT6_ADD_MFC:
1736 case MRT6_DEL_MFC:
1737 parent = -1;
1738 fallthrough;
1739 case MRT6_ADD_MFC_PROXY:
1740 case MRT6_DEL_MFC_PROXY:
1741 if (optlen < sizeof(mfc))
1742 return -EINVAL;
1743 if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1744 return -EFAULT;
1745 if (parent == 0)
1746 parent = mfc.mf6cc_parent;
1747 rtnl_lock();
1748 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1749 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1750 else
1751 ret = ip6mr_mfc_add(net, mrt, &mfc,
1752 sk ==
1753 rtnl_dereference(mrt->mroute_sk),
1754 parent);
1755 rtnl_unlock();
1756 return ret;
1757
1758 case MRT6_FLUSH:
1759 {
1760 int flags;
1761
1762 if (optlen != sizeof(flags))
1763 return -EINVAL;
1764 if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1765 return -EFAULT;
1766 rtnl_lock();
1767 mroute_clean_tables(mrt, flags);
1768 rtnl_unlock();
1769 return 0;
1770 }
1771
1772 /*
1773 * Control PIM assert (to activate pim will activate assert)
1774 */
1775 case MRT6_ASSERT:
1776 {
1777 int v;
1778
1779 if (optlen != sizeof(v))
1780 return -EINVAL;
1781 if (copy_from_sockptr(&v, optval, sizeof(v)))
1782 return -EFAULT;
1783 mrt->mroute_do_assert = v;
1784 return 0;
1785 }
1786
1787 #ifdef CONFIG_IPV6_PIMSM_V2
1788 case MRT6_PIM:
1789 {
1790 bool do_wrmifwhole;
1791 int v;
1792
1793 if (optlen != sizeof(v))
1794 return -EINVAL;
1795 if (copy_from_sockptr(&v, optval, sizeof(v)))
1796 return -EFAULT;
1797
1798 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1799 v = !!v;
1800 rtnl_lock();
1801 ret = 0;
1802 if (v != mrt->mroute_do_pim) {
1803 mrt->mroute_do_pim = v;
1804 mrt->mroute_do_assert = v;
1805 mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1806 }
1807 rtnl_unlock();
1808 return ret;
1809 }
1810
1811 #endif
1812 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1813 case MRT6_TABLE:
1814 {
1815 u32 v;
1816
1817 if (optlen != sizeof(u32))
1818 return -EINVAL;
1819 if (copy_from_sockptr(&v, optval, sizeof(v)))
1820 return -EFAULT;
1821 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1822 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1823 return -EINVAL;
1824 if (sk == rcu_access_pointer(mrt->mroute_sk))
1825 return -EBUSY;
1826
1827 rtnl_lock();
1828 ret = 0;
1829 mrt = ip6mr_new_table(net, v);
1830 if (IS_ERR(mrt))
1831 ret = PTR_ERR(mrt);
1832 else
1833 raw6_sk(sk)->ip6mr_table = v;
1834 rtnl_unlock();
1835 return ret;
1836 }
1837 #endif
1838 /*
1839 * Spurious command, or MRT6_VERSION which you cannot
1840 * set.
1841 */
1842 default:
1843 return -ENOPROTOOPT;
1844 }
1845 }
1846
1847 /*
1848 * Getsock opt support for the multicast routing system.
1849 */
1850
ip6_mroute_getsockopt(struct sock * sk,int optname,sockptr_t optval,sockptr_t optlen)1851 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1852 sockptr_t optlen)
1853 {
1854 int olr;
1855 int val;
1856 struct net *net = sock_net(sk);
1857 struct mr_table *mrt;
1858
1859 if (sk->sk_type != SOCK_RAW ||
1860 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1861 return -EOPNOTSUPP;
1862
1863 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864 if (!mrt)
1865 return -ENOENT;
1866
1867 switch (optname) {
1868 case MRT6_VERSION:
1869 val = 0x0305;
1870 break;
1871 #ifdef CONFIG_IPV6_PIMSM_V2
1872 case MRT6_PIM:
1873 val = mrt->mroute_do_pim;
1874 break;
1875 #endif
1876 case MRT6_ASSERT:
1877 val = mrt->mroute_do_assert;
1878 break;
1879 default:
1880 return -ENOPROTOOPT;
1881 }
1882
1883 if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1884 return -EFAULT;
1885
1886 olr = min_t(int, olr, sizeof(int));
1887 if (olr < 0)
1888 return -EINVAL;
1889
1890 if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1891 return -EFAULT;
1892 if (copy_to_sockptr(optval, &val, olr))
1893 return -EFAULT;
1894 return 0;
1895 }
1896
1897 /*
1898 * The IP multicast ioctl support routines.
1899 */
ip6mr_ioctl(struct sock * sk,int cmd,void * arg)1900 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1901 {
1902 struct sioc_sg_req6 *sr;
1903 struct sioc_mif_req6 *vr;
1904 struct vif_device *vif;
1905 struct mfc6_cache *c;
1906 struct net *net = sock_net(sk);
1907 struct mr_table *mrt;
1908
1909 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1910 if (!mrt)
1911 return -ENOENT;
1912
1913 switch (cmd) {
1914 case SIOCGETMIFCNT_IN6:
1915 vr = (struct sioc_mif_req6 *)arg;
1916 if (vr->mifi >= mrt->maxvif)
1917 return -EINVAL;
1918 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1919 rcu_read_lock();
1920 vif = &mrt->vif_table[vr->mifi];
1921 if (VIF_EXISTS(mrt, vr->mifi)) {
1922 vr->icount = READ_ONCE(vif->pkt_in);
1923 vr->ocount = READ_ONCE(vif->pkt_out);
1924 vr->ibytes = READ_ONCE(vif->bytes_in);
1925 vr->obytes = READ_ONCE(vif->bytes_out);
1926 rcu_read_unlock();
1927 return 0;
1928 }
1929 rcu_read_unlock();
1930 return -EADDRNOTAVAIL;
1931 case SIOCGETSGCNT_IN6:
1932 sr = (struct sioc_sg_req6 *)arg;
1933
1934 rcu_read_lock();
1935 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1936 &sr->grp.sin6_addr);
1937 if (c) {
1938 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1939 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1940 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1941 rcu_read_unlock();
1942 return 0;
1943 }
1944 rcu_read_unlock();
1945 return -EADDRNOTAVAIL;
1946 default:
1947 return -ENOIOCTLCMD;
1948 }
1949 }
1950
1951 #ifdef CONFIG_COMPAT
1952 struct compat_sioc_sg_req6 {
1953 struct sockaddr_in6 src;
1954 struct sockaddr_in6 grp;
1955 compat_ulong_t pktcnt;
1956 compat_ulong_t bytecnt;
1957 compat_ulong_t wrong_if;
1958 };
1959
1960 struct compat_sioc_mif_req6 {
1961 mifi_t mifi;
1962 compat_ulong_t icount;
1963 compat_ulong_t ocount;
1964 compat_ulong_t ibytes;
1965 compat_ulong_t obytes;
1966 };
1967
ip6mr_compat_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1968 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1969 {
1970 struct compat_sioc_sg_req6 sr;
1971 struct compat_sioc_mif_req6 vr;
1972 struct vif_device *vif;
1973 struct mfc6_cache *c;
1974 struct net *net = sock_net(sk);
1975 struct mr_table *mrt;
1976
1977 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1978 if (!mrt)
1979 return -ENOENT;
1980
1981 switch (cmd) {
1982 case SIOCGETMIFCNT_IN6:
1983 if (copy_from_user(&vr, arg, sizeof(vr)))
1984 return -EFAULT;
1985 if (vr.mifi >= mrt->maxvif)
1986 return -EINVAL;
1987 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1988 rcu_read_lock();
1989 vif = &mrt->vif_table[vr.mifi];
1990 if (VIF_EXISTS(mrt, vr.mifi)) {
1991 vr.icount = READ_ONCE(vif->pkt_in);
1992 vr.ocount = READ_ONCE(vif->pkt_out);
1993 vr.ibytes = READ_ONCE(vif->bytes_in);
1994 vr.obytes = READ_ONCE(vif->bytes_out);
1995 rcu_read_unlock();
1996
1997 if (copy_to_user(arg, &vr, sizeof(vr)))
1998 return -EFAULT;
1999 return 0;
2000 }
2001 rcu_read_unlock();
2002 return -EADDRNOTAVAIL;
2003 case SIOCGETSGCNT_IN6:
2004 if (copy_from_user(&sr, arg, sizeof(sr)))
2005 return -EFAULT;
2006
2007 rcu_read_lock();
2008 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2009 if (c) {
2010 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2011 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2012 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2013 rcu_read_unlock();
2014
2015 if (copy_to_user(arg, &sr, sizeof(sr)))
2016 return -EFAULT;
2017 return 0;
2018 }
2019 rcu_read_unlock();
2020 return -EADDRNOTAVAIL;
2021 default:
2022 return -ENOIOCTLCMD;
2023 }
2024 }
2025 #endif
2026
ip6mr_forward2_finish(struct net * net,struct sock * sk,struct sk_buff * skb)2027 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2028 {
2029 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2030 IPSTATS_MIB_OUTFORWDATAGRAMS);
2031 return dst_output(net, sk, skb);
2032 }
2033
2034 /*
2035 * Processing handlers for ip6mr_forward
2036 */
2037
ip6mr_forward2(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2038 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2039 struct sk_buff *skb, int vifi)
2040 {
2041 struct vif_device *vif = &mrt->vif_table[vifi];
2042 struct net_device *vif_dev;
2043 struct ipv6hdr *ipv6h;
2044 struct dst_entry *dst;
2045 struct flowi6 fl6;
2046
2047 vif_dev = vif_dev_read(vif);
2048 if (!vif_dev)
2049 goto out_free;
2050
2051 #ifdef CONFIG_IPV6_PIMSM_V2
2052 if (vif->flags & MIFF_REGISTER) {
2053 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2054 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2055 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2056 DEV_STATS_INC(vif_dev, tx_packets);
2057 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2058 goto out_free;
2059 }
2060 #endif
2061
2062 ipv6h = ipv6_hdr(skb);
2063
2064 fl6 = (struct flowi6) {
2065 .flowi6_oif = vif->link,
2066 .daddr = ipv6h->daddr,
2067 };
2068
2069 dst = ip6_route_output(net, NULL, &fl6);
2070 if (dst->error) {
2071 dst_release(dst);
2072 goto out_free;
2073 }
2074
2075 skb_dst_drop(skb);
2076 skb_dst_set(skb, dst);
2077
2078 /*
2079 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2080 * not only before forwarding, but after forwarding on all output
2081 * interfaces. It is clear, if mrouter runs a multicasting
2082 * program, it should receive packets not depending to what interface
2083 * program is joined.
2084 * If we will not make it, the program will have to join on all
2085 * interfaces. On the other hand, multihoming host (or router, but
2086 * not mrouter) cannot join to more than one interface - it will
2087 * result in receiving multiple packets.
2088 */
2089 skb->dev = vif_dev;
2090 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2091 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2092
2093 /* We are about to write */
2094 /* XXX: extension headers? */
2095 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2096 goto out_free;
2097
2098 ipv6h = ipv6_hdr(skb);
2099 ipv6h->hop_limit--;
2100
2101 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2102
2103 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2104 net, NULL, skb, skb->dev, vif_dev,
2105 ip6mr_forward2_finish);
2106
2107 out_free:
2108 kfree_skb(skb);
2109 return 0;
2110 }
2111
2112 /* Called with rcu_read_lock() */
ip6mr_find_vif(struct mr_table * mrt,struct net_device * dev)2113 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2114 {
2115 int ct;
2116
2117 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2118 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2119 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2120 break;
2121 }
2122 return ct;
2123 }
2124
2125 /* Called under rcu_read_lock() */
ip6_mr_forward(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc6_cache * c)2126 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2127 struct net_device *dev, struct sk_buff *skb,
2128 struct mfc6_cache *c)
2129 {
2130 int psend = -1;
2131 int vif, ct;
2132 int true_vifi = ip6mr_find_vif(mrt, dev);
2133
2134 vif = c->_c.mfc_parent;
2135 atomic_long_inc(&c->_c.mfc_un.res.pkt);
2136 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2137 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2138
2139 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2140 struct mfc6_cache *cache_proxy;
2141
2142 /* For an (*,G) entry, we only check that the incoming
2143 * interface is part of the static tree.
2144 */
2145 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2146 if (cache_proxy &&
2147 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2148 goto forward;
2149 }
2150
2151 /*
2152 * Wrong interface: drop packet and (maybe) send PIM assert.
2153 */
2154 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2155 atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2156
2157 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2158 /* pimsm uses asserts, when switching from RPT to SPT,
2159 so that we cannot check that packet arrived on an oif.
2160 It is bad, but otherwise we would need to move pretty
2161 large chunk of pimd to kernel. Ough... --ANK
2162 */
2163 (mrt->mroute_do_pim ||
2164 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2165 time_after(jiffies,
2166 c->_c.mfc_un.res.last_assert +
2167 MFC_ASSERT_THRESH)) {
2168 c->_c.mfc_un.res.last_assert = jiffies;
2169 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2170 if (mrt->mroute_do_wrvifwhole)
2171 ip6mr_cache_report(mrt, skb, true_vifi,
2172 MRT6MSG_WRMIFWHOLE);
2173 }
2174 goto dont_forward;
2175 }
2176
2177 forward:
2178 WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2179 mrt->vif_table[vif].pkt_in + 1);
2180 WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2181 mrt->vif_table[vif].bytes_in + skb->len);
2182
2183 /*
2184 * Forward the frame
2185 */
2186 if (ipv6_addr_any(&c->mf6c_origin) &&
2187 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2188 if (true_vifi >= 0 &&
2189 true_vifi != c->_c.mfc_parent &&
2190 ipv6_hdr(skb)->hop_limit >
2191 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2192 /* It's an (*,*) entry and the packet is not coming from
2193 * the upstream: forward the packet to the upstream
2194 * only.
2195 */
2196 psend = c->_c.mfc_parent;
2197 goto last_forward;
2198 }
2199 goto dont_forward;
2200 }
2201 for (ct = c->_c.mfc_un.res.maxvif - 1;
2202 ct >= c->_c.mfc_un.res.minvif; ct--) {
2203 /* For (*,G) entry, don't forward to the incoming interface */
2204 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2205 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2206 if (psend != -1) {
2207 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2208 if (skb2)
2209 ip6mr_forward2(net, mrt, skb2, psend);
2210 }
2211 psend = ct;
2212 }
2213 }
2214 last_forward:
2215 if (psend != -1) {
2216 ip6mr_forward2(net, mrt, skb, psend);
2217 return;
2218 }
2219
2220 dont_forward:
2221 kfree_skb(skb);
2222 }
2223
2224
2225 /*
2226 * Multicast packets for forwarding arrive here
2227 */
2228
ip6_mr_input(struct sk_buff * skb)2229 int ip6_mr_input(struct sk_buff *skb)
2230 {
2231 struct mfc6_cache *cache;
2232 struct net *net = dev_net(skb->dev);
2233 struct mr_table *mrt;
2234 struct flowi6 fl6 = {
2235 .flowi6_iif = skb->dev->ifindex,
2236 .flowi6_mark = skb->mark,
2237 };
2238 int err;
2239 struct net_device *dev;
2240
2241 /* skb->dev passed in is the master dev for vrfs.
2242 * Get the proper interface that does have a vif associated with it.
2243 */
2244 dev = skb->dev;
2245 if (netif_is_l3_master(skb->dev)) {
2246 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2247 if (!dev) {
2248 kfree_skb(skb);
2249 return -ENODEV;
2250 }
2251 }
2252
2253 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2254 if (err < 0) {
2255 kfree_skb(skb);
2256 return err;
2257 }
2258
2259 cache = ip6mr_cache_find(mrt,
2260 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2261 if (!cache) {
2262 int vif = ip6mr_find_vif(mrt, dev);
2263
2264 if (vif >= 0)
2265 cache = ip6mr_cache_find_any(mrt,
2266 &ipv6_hdr(skb)->daddr,
2267 vif);
2268 }
2269
2270 /*
2271 * No usable cache entry
2272 */
2273 if (!cache) {
2274 int vif;
2275
2276 vif = ip6mr_find_vif(mrt, dev);
2277 if (vif >= 0) {
2278 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2279
2280 return err;
2281 }
2282 kfree_skb(skb);
2283 return -ENODEV;
2284 }
2285
2286 ip6_mr_forward(net, mrt, dev, skb, cache);
2287
2288 return 0;
2289 }
2290
ip6mr_get_route(struct net * net,struct sk_buff * skb,struct rtmsg * rtm,u32 portid)2291 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2292 u32 portid)
2293 {
2294 int err;
2295 struct mr_table *mrt;
2296 struct mfc6_cache *cache;
2297 struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2298
2299 rcu_read_lock();
2300 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2301 if (!mrt) {
2302 rcu_read_unlock();
2303 return -ENOENT;
2304 }
2305
2306 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2307 if (!cache && skb->dev) {
2308 int vif = ip6mr_find_vif(mrt, skb->dev);
2309
2310 if (vif >= 0)
2311 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2312 vif);
2313 }
2314
2315 if (!cache) {
2316 struct sk_buff *skb2;
2317 struct ipv6hdr *iph;
2318 struct net_device *dev;
2319 int vif;
2320
2321 dev = skb->dev;
2322 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2323 rcu_read_unlock();
2324 return -ENODEV;
2325 }
2326
2327 /* really correct? */
2328 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2329 if (!skb2) {
2330 rcu_read_unlock();
2331 return -ENOMEM;
2332 }
2333
2334 NETLINK_CB(skb2).portid = portid;
2335 skb_reset_transport_header(skb2);
2336
2337 skb_put(skb2, sizeof(struct ipv6hdr));
2338 skb_reset_network_header(skb2);
2339
2340 iph = ipv6_hdr(skb2);
2341 iph->version = 0;
2342 iph->priority = 0;
2343 iph->flow_lbl[0] = 0;
2344 iph->flow_lbl[1] = 0;
2345 iph->flow_lbl[2] = 0;
2346 iph->payload_len = 0;
2347 iph->nexthdr = IPPROTO_NONE;
2348 iph->hop_limit = 0;
2349 iph->saddr = rt->rt6i_src.addr;
2350 iph->daddr = rt->rt6i_dst.addr;
2351
2352 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2353 rcu_read_unlock();
2354
2355 return err;
2356 }
2357
2358 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2359 rcu_read_unlock();
2360 return err;
2361 }
2362
ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mfc6_cache * c,int cmd,int flags)2363 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2364 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2365 int flags)
2366 {
2367 struct nlmsghdr *nlh;
2368 struct rtmsg *rtm;
2369 int err;
2370
2371 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2372 if (!nlh)
2373 return -EMSGSIZE;
2374
2375 rtm = nlmsg_data(nlh);
2376 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2377 rtm->rtm_dst_len = 128;
2378 rtm->rtm_src_len = 128;
2379 rtm->rtm_tos = 0;
2380 rtm->rtm_table = mrt->id;
2381 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2382 goto nla_put_failure;
2383 rtm->rtm_type = RTN_MULTICAST;
2384 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385 if (c->_c.mfc_flags & MFC_STATIC)
2386 rtm->rtm_protocol = RTPROT_STATIC;
2387 else
2388 rtm->rtm_protocol = RTPROT_MROUTED;
2389 rtm->rtm_flags = 0;
2390
2391 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2392 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2393 goto nla_put_failure;
2394 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2395 /* do not break the dump if cache is unresolved */
2396 if (err < 0 && err != -ENOENT)
2397 goto nla_put_failure;
2398
2399 nlmsg_end(skb, nlh);
2400 return 0;
2401
2402 nla_put_failure:
2403 nlmsg_cancel(skb, nlh);
2404 return -EMSGSIZE;
2405 }
2406
_ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mr_mfc * c,int cmd,int flags)2407 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2408 u32 portid, u32 seq, struct mr_mfc *c,
2409 int cmd, int flags)
2410 {
2411 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2412 cmd, flags);
2413 }
2414
mr6_msgsize(bool unresolved,int maxvif)2415 static int mr6_msgsize(bool unresolved, int maxvif)
2416 {
2417 size_t len =
2418 NLMSG_ALIGN(sizeof(struct rtmsg))
2419 + nla_total_size(4) /* RTA_TABLE */
2420 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2421 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2422 ;
2423
2424 if (!unresolved)
2425 len = len
2426 + nla_total_size(4) /* RTA_IIF */
2427 + nla_total_size(0) /* RTA_MULTIPATH */
2428 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2429 /* RTA_MFC_STATS */
2430 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2431 ;
2432
2433 return len;
2434 }
2435
mr6_netlink_event(struct mr_table * mrt,struct mfc6_cache * mfc,int cmd)2436 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2437 int cmd)
2438 {
2439 struct net *net = read_pnet(&mrt->net);
2440 struct sk_buff *skb;
2441 int err = -ENOBUFS;
2442
2443 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2444 GFP_ATOMIC);
2445 if (!skb)
2446 goto errout;
2447
2448 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2449 if (err < 0)
2450 goto errout;
2451
2452 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2453 return;
2454
2455 errout:
2456 kfree_skb(skb);
2457 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2458 }
2459
mrt6msg_netlink_msgsize(size_t payloadlen)2460 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2461 {
2462 size_t len =
2463 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2464 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2465 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2466 /* IP6MRA_CREPORT_SRC_ADDR */
2467 + nla_total_size(sizeof(struct in6_addr))
2468 /* IP6MRA_CREPORT_DST_ADDR */
2469 + nla_total_size(sizeof(struct in6_addr))
2470 /* IP6MRA_CREPORT_PKT */
2471 + nla_total_size(payloadlen)
2472 ;
2473
2474 return len;
2475 }
2476
mrt6msg_netlink_event(const struct mr_table * mrt,struct sk_buff * pkt)2477 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2478 {
2479 struct net *net = read_pnet(&mrt->net);
2480 struct nlmsghdr *nlh;
2481 struct rtgenmsg *rtgenm;
2482 struct mrt6msg *msg;
2483 struct sk_buff *skb;
2484 struct nlattr *nla;
2485 int payloadlen;
2486
2487 payloadlen = pkt->len - sizeof(struct mrt6msg);
2488 msg = (struct mrt6msg *)skb_transport_header(pkt);
2489
2490 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2491 if (!skb)
2492 goto errout;
2493
2494 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2495 sizeof(struct rtgenmsg), 0);
2496 if (!nlh)
2497 goto errout;
2498 rtgenm = nlmsg_data(nlh);
2499 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2500 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2501 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2502 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2503 &msg->im6_src) ||
2504 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2505 &msg->im6_dst))
2506 goto nla_put_failure;
2507
2508 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2509 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2510 nla_data(nla), payloadlen))
2511 goto nla_put_failure;
2512
2513 nlmsg_end(skb, nlh);
2514
2515 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2516 return;
2517
2518 nla_put_failure:
2519 nlmsg_cancel(skb, nlh);
2520 errout:
2521 kfree_skb(skb);
2522 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2523 }
2524
2525 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2526 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2527 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2528 [RTA_TABLE] = { .type = NLA_U32 },
2529 };
2530
ip6mr_rtm_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2531 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2532 const struct nlmsghdr *nlh,
2533 struct nlattr **tb,
2534 struct netlink_ext_ack *extack)
2535 {
2536 struct rtmsg *rtm;
2537 int err;
2538
2539 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2540 extack);
2541 if (err)
2542 return err;
2543
2544 rtm = nlmsg_data(nlh);
2545 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2546 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2547 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2548 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2549 NL_SET_ERR_MSG_MOD(extack,
2550 "Invalid values in header for multicast route get request");
2551 return -EINVAL;
2552 }
2553
2554 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2555 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2556 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2557 return -EINVAL;
2558 }
2559
2560 return 0;
2561 }
2562
ip6mr_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2563 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2564 struct netlink_ext_ack *extack)
2565 {
2566 struct net *net = sock_net(in_skb->sk);
2567 struct in6_addr src = {}, grp = {};
2568 struct nlattr *tb[RTA_MAX + 1];
2569 struct mfc6_cache *cache;
2570 struct mr_table *mrt;
2571 struct sk_buff *skb;
2572 u32 tableid;
2573 int err;
2574
2575 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2576 if (err < 0)
2577 return err;
2578
2579 if (tb[RTA_SRC])
2580 src = nla_get_in6_addr(tb[RTA_SRC]);
2581 if (tb[RTA_DST])
2582 grp = nla_get_in6_addr(tb[RTA_DST]);
2583 tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2584
2585 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2586 if (!mrt) {
2587 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2588 return -ENOENT;
2589 }
2590
2591 /* entries are added/deleted only under RTNL */
2592 rcu_read_lock();
2593 cache = ip6mr_cache_find(mrt, &src, &grp);
2594 rcu_read_unlock();
2595 if (!cache) {
2596 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2597 return -ENOENT;
2598 }
2599
2600 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2601 if (!skb)
2602 return -ENOBUFS;
2603
2604 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2605 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2606 if (err < 0) {
2607 kfree_skb(skb);
2608 return err;
2609 }
2610
2611 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2612 }
2613
ip6mr_rtm_dumproute(struct sk_buff * skb,struct netlink_callback * cb)2614 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2615 {
2616 const struct nlmsghdr *nlh = cb->nlh;
2617 struct fib_dump_filter filter = {
2618 .rtnl_held = true,
2619 };
2620 int err;
2621
2622 if (cb->strict_check) {
2623 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2624 &filter, cb);
2625 if (err < 0)
2626 return err;
2627 }
2628
2629 if (filter.table_id) {
2630 struct mr_table *mrt;
2631
2632 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2633 if (!mrt) {
2634 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2635 return skb->len;
2636
2637 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2638 return -ENOENT;
2639 }
2640 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2641 &mfc_unres_lock, &filter);
2642 return skb->len ? : err;
2643 }
2644
2645 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2646 _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2647 }
2648