1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux IPv6 multicast routing support for BSD pim6sd 4 * Based on net/ipv4/ipmr.c. 5 * 6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 7 * LSIIT Laboratory, Strasbourg, France 8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 9 * 6WIND, Paris, France 10 * Copyright (C)2007,2008 USAGI/WIDE Project 11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 12 */ 13 14 #include <linux/uaccess.h> 15 #include <linux/types.h> 16 #include <linux/sched.h> 17 #include <linux/errno.h> 18 #include <linux/mm.h> 19 #include <linux/kernel.h> 20 #include <linux/fcntl.h> 21 #include <linux/stat.h> 22 #include <linux/socket.h> 23 #include <linux/inet.h> 24 #include <linux/netdevice.h> 25 #include <linux/inetdevice.h> 26 #include <linux/proc_fs.h> 27 #include <linux/seq_file.h> 28 #include <linux/init.h> 29 #include <linux/compat.h> 30 #include <linux/rhashtable.h> 31 #include <net/protocol.h> 32 #include <linux/skbuff.h> 33 #include <net/raw.h> 34 #include <linux/notifier.h> 35 #include <linux/if_arp.h> 36 #include <net/checksum.h> 37 #include <net/netlink.h> 38 #include <net/fib_rules.h> 39 40 #include <net/ipv6.h> 41 #include <net/ip6_route.h> 42 #include <linux/mroute6.h> 43 #include <linux/pim.h> 44 #include <net/addrconf.h> 45 #include <linux/netfilter_ipv6.h> 46 #include <linux/export.h> 47 #include <net/ip6_checksum.h> 48 #include <linux/netconf.h> 49 #include <net/ip_tunnels.h> 50 51 #include <linux/nospec.h> 52 53 struct ip6mr_rule { 54 struct fib_rule common; 55 }; 56 57 struct ip6mr_result { 58 struct mr_table *mrt; 59 }; 60 61 /* Big lock, protecting vif table, mrt cache and mroute socket state. 62 Note that the changes are semaphored via rtnl_lock. 63 */ 64 65 static DEFINE_SPINLOCK(mrt_lock); 66 67 static struct net_device *vif_dev_read(const struct vif_device *vif) 68 { 69 return rcu_dereference(vif->dev); 70 } 71 72 /* Multicast router control variables */ 73 74 /* Special spinlock for queue of unresolved entries */ 75 static DEFINE_SPINLOCK(mfc_unres_lock); 76 77 /* We return to original Alan's scheme. Hash table of resolved 78 entries is changed only in process context and protected 79 with weak lock mrt_lock. Queue of unresolved entries is protected 80 with strong spinlock mfc_unres_lock. 81 82 In this case data path is free of exclusive locks at all. 83 */ 84 85 static struct kmem_cache *mrt_cachep __read_mostly; 86 87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 88 static void ip6mr_free_table(struct mr_table *mrt, 89 struct list_head *dev_kill_list); 90 91 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 92 struct net_device *dev, struct sk_buff *skb, 93 struct mfc6_cache *cache); 94 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 95 mifi_t mifi, int assert); 96 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 97 int cmd); 98 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 99 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 100 struct netlink_ext_ack *extack); 101 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 102 struct netlink_callback *cb); 103 static void mroute_clean_tables(struct mr_table *mrt, int flags, 104 struct list_head *dev_kill_list); 105 static void ipmr_expire_process(struct timer_list *t); 106 107 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 108 #define ip6mr_for_each_table(mrt, net) \ 109 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ 110 lockdep_rtnl_is_held() || \ 111 list_empty(&net->ipv6.mr6_tables)) 112 113 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 114 struct mr_table *mrt) 115 { 116 struct mr_table *ret; 117 118 if (!mrt) 119 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 120 struct mr_table, list); 121 else 122 ret = list_entry_rcu(mrt->list.next, 123 struct mr_table, list); 124 125 if (&ret->list == &net->ipv6.mr6_tables) 126 return NULL; 127 return ret; 128 } 129 130 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) 131 { 132 struct mr_table *mrt; 133 134 ip6mr_for_each_table(mrt, net) { 135 if (mrt->id == id) 136 return mrt; 137 } 138 return NULL; 139 } 140 141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 142 struct mr_table **mrt) 143 { 144 int err; 145 struct ip6mr_result res; 146 struct fib_lookup_arg arg = { 147 .result = &res, 148 .flags = FIB_LOOKUP_NOREF, 149 }; 150 151 /* update flow if oif or iif point to device enslaved to l3mdev */ 152 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 153 154 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 155 flowi6_to_flowi(flp6), 0, &arg); 156 if (err < 0) 157 return err; 158 *mrt = res.mrt; 159 return 0; 160 } 161 162 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 163 int flags, struct fib_lookup_arg *arg) 164 { 165 struct ip6mr_result *res = arg->result; 166 struct mr_table *mrt; 167 168 switch (rule->action) { 169 case FR_ACT_TO_TBL: 170 break; 171 case FR_ACT_UNREACHABLE: 172 return -ENETUNREACH; 173 case FR_ACT_PROHIBIT: 174 return -EACCES; 175 case FR_ACT_BLACKHOLE: 176 default: 177 return -EINVAL; 178 } 179 180 arg->table = fib_rule_get_table(rule, arg); 181 182 mrt = __ip6mr_get_table(rule->fr_net, arg->table); 183 if (!mrt) 184 return -EAGAIN; 185 res->mrt = mrt; 186 return 0; 187 } 188 189 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 190 { 191 return 1; 192 } 193 194 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 195 struct fib_rule_hdr *frh, struct nlattr **tb, 196 struct netlink_ext_ack *extack) 197 { 198 return 0; 199 } 200 201 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 202 struct nlattr **tb) 203 { 204 return 1; 205 } 206 207 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 208 struct fib_rule_hdr *frh) 209 { 210 frh->dst_len = 0; 211 frh->src_len = 0; 212 frh->tos = 0; 213 return 0; 214 } 215 216 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 217 .family = RTNL_FAMILY_IP6MR, 218 .rule_size = sizeof(struct ip6mr_rule), 219 .addr_size = sizeof(struct in6_addr), 220 .action = ip6mr_rule_action, 221 .match = ip6mr_rule_match, 222 .configure = ip6mr_rule_configure, 223 .compare = ip6mr_rule_compare, 224 .fill = ip6mr_rule_fill, 225 .nlgroup = RTNLGRP_IPV6_RULE, 226 .owner = THIS_MODULE, 227 }; 228 229 static int __net_init ip6mr_rules_init(struct net *net) 230 { 231 struct fib_rules_ops *ops; 232 LIST_HEAD(dev_kill_list); 233 struct mr_table *mrt; 234 int err; 235 236 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 237 if (IS_ERR(ops)) 238 return PTR_ERR(ops); 239 240 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 241 242 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 243 if (IS_ERR(mrt)) { 244 err = PTR_ERR(mrt); 245 goto err1; 246 } 247 248 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); 249 if (err < 0) 250 goto err2; 251 252 net->ipv6.mr6_rules_ops = ops; 253 return 0; 254 255 err2: 256 ip6mr_free_table(mrt, &dev_kill_list); 257 err1: 258 fib_rules_unregister(ops); 259 return err; 260 } 261 262 static void __net_exit ip6mr_rules_exit(struct net *net) 263 { 264 fib_rules_unregister(net->ipv6.mr6_rules_ops); 265 } 266 267 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net, 268 struct list_head *dev_kill_list) 269 { 270 struct mr_table *mrt, *next; 271 272 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 273 list_del_rcu(&mrt->list); 274 ip6mr_free_table(mrt, dev_kill_list); 275 } 276 } 277 278 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 279 struct netlink_ext_ack *extack) 280 { 281 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); 282 } 283 284 static unsigned int ip6mr_rules_seq_read(const struct net *net) 285 { 286 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 287 } 288 289 bool ip6mr_rule_default(const struct fib_rule *rule) 290 { 291 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 292 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 293 } 294 EXPORT_SYMBOL(ip6mr_rule_default); 295 #else 296 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 297 struct mr_table *mrt) 298 { 299 if (!mrt) 300 return rcu_dereference(net->ipv6.mrt6); 301 return NULL; 302 } 303 304 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) 305 { 306 return rcu_dereference_check(net->ipv6.mrt6, 307 lockdep_rtnl_is_held() || 308 !rcu_access_pointer(net->ipv6.mrt6)); 309 } 310 311 #define ip6mr_for_each_table(mrt, net) \ 312 for (mrt = __ip6mr_get_table(net, 0); mrt; mrt = NULL) 313 314 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 315 struct mr_table **mrt) 316 { 317 *mrt = rcu_dereference(net->ipv6.mrt6); 318 if (!*mrt) 319 return -EAGAIN; 320 return 0; 321 } 322 323 static int __net_init ip6mr_rules_init(struct net *net) 324 { 325 struct mr_table *mrt; 326 327 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 328 if (IS_ERR(mrt)) 329 return PTR_ERR(mrt); 330 331 rcu_assign_pointer(net->ipv6.mrt6, mrt); 332 return 0; 333 } 334 335 static void __net_exit ip6mr_rules_exit(struct net *net) 336 { 337 } 338 339 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net, 340 struct list_head *dev_kill_list) 341 { 342 struct mr_table *mrt = rcu_dereference_protected(net->ipv6.mrt6, 1); 343 344 RCU_INIT_POINTER(net->ipv6.mrt6, NULL); 345 ip6mr_free_table(mrt, dev_kill_list); 346 } 347 348 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 349 struct netlink_ext_ack *extack) 350 { 351 return 0; 352 } 353 354 static unsigned int ip6mr_rules_seq_read(const struct net *net) 355 { 356 return 0; 357 } 358 #endif 359 360 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 361 { 362 struct mr_table *mrt; 363 364 rcu_read_lock(); 365 mrt = __ip6mr_get_table(net, id); 366 rcu_read_unlock(); 367 368 return mrt; 369 } 370 371 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 372 const void *ptr) 373 { 374 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 375 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 376 377 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 378 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 379 } 380 381 static const struct rhashtable_params ip6mr_rht_params = { 382 .head_offset = offsetof(struct mr_mfc, mnode), 383 .key_offset = offsetof(struct mfc6_cache, cmparg), 384 .key_len = sizeof(struct mfc6_cache_cmp_arg), 385 .nelem_hint = 3, 386 .obj_cmpfn = ip6mr_hash_cmp, 387 .automatic_shrinking = true, 388 }; 389 390 static void ip6mr_new_table_set(struct mr_table *mrt, 391 struct net *net) 392 { 393 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 394 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 395 #endif 396 } 397 398 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 399 .mf6c_origin = IN6ADDR_ANY_INIT, 400 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 401 }; 402 403 static struct mr_table_ops ip6mr_mr_table_ops = { 404 .rht_params = &ip6mr_rht_params, 405 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 406 }; 407 408 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 409 { 410 struct mr_table *mrt; 411 412 mrt = __ip6mr_get_table(net, id); 413 if (mrt) 414 return mrt; 415 416 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 417 ipmr_expire_process, ip6mr_new_table_set); 418 } 419 420 static void ip6mr_free_table(struct mr_table *mrt, 421 struct list_head *dev_kill_list) 422 { 423 struct net *net = read_pnet(&mrt->net); 424 LIST_HEAD(ip6mr_dev_kill_list); 425 426 WARN_ON_ONCE(!mr_can_free_table(net)); 427 428 timer_shutdown_sync(&mrt->ipmr_expire_timer); 429 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | 430 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC, 431 &ip6mr_dev_kill_list); 432 433 mr_table_free(mrt); 434 435 WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ip6mr_dev_kill_list)); 436 list_splice(&ip6mr_dev_kill_list, dev_kill_list); 437 } 438 439 #ifdef CONFIG_PROC_FS 440 /* The /proc interfaces to multicast routing 441 * /proc/ip6_mr_cache /proc/ip6_mr_vif 442 */ 443 444 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 445 __acquires(RCU) 446 { 447 struct mr_vif_iter *iter = seq->private; 448 struct net *net = seq_file_net(seq); 449 struct mr_table *mrt; 450 451 rcu_read_lock(); 452 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 453 if (!mrt) { 454 rcu_read_unlock(); 455 return ERR_PTR(-ENOENT); 456 } 457 458 iter->mrt = mrt; 459 460 return mr_vif_seq_start(seq, pos); 461 } 462 463 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 464 __releases(RCU) 465 { 466 rcu_read_unlock(); 467 } 468 469 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 470 { 471 struct mr_vif_iter *iter = seq->private; 472 struct mr_table *mrt = iter->mrt; 473 474 if (v == SEQ_START_TOKEN) { 475 seq_puts(seq, 476 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 477 } else { 478 const struct vif_device *vif = v; 479 const struct net_device *vif_dev; 480 const char *name; 481 482 vif_dev = vif_dev_read(vif); 483 name = vif_dev ? vif_dev->name : "none"; 484 485 seq_printf(seq, 486 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 487 vif - mrt->vif_table, 488 name, vif->bytes_in, vif->pkt_in, 489 vif->bytes_out, vif->pkt_out, 490 vif->flags); 491 } 492 return 0; 493 } 494 495 static const struct seq_operations ip6mr_vif_seq_ops = { 496 .start = ip6mr_vif_seq_start, 497 .next = mr_vif_seq_next, 498 .stop = ip6mr_vif_seq_stop, 499 .show = ip6mr_vif_seq_show, 500 }; 501 502 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 503 { 504 struct net *net = seq_file_net(seq); 505 struct mr_table *mrt; 506 507 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 508 if (!mrt) 509 return ERR_PTR(-ENOENT); 510 511 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 512 } 513 514 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 515 { 516 int n; 517 518 if (v == SEQ_START_TOKEN) { 519 seq_puts(seq, 520 "Group " 521 "Origin " 522 "Iif Pkts Bytes Wrong Oifs\n"); 523 } else { 524 const struct mfc6_cache *mfc = v; 525 const struct mr_mfc_iter *it = seq->private; 526 struct mr_table *mrt = it->mrt; 527 528 seq_printf(seq, "%pI6 %pI6 %-3hd", 529 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 530 mfc->_c.mfc_parent); 531 532 if (it->cache != &mrt->mfc_unres_queue) { 533 seq_printf(seq, " %8lu %8lu %8lu", 534 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 535 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 536 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 537 for (n = mfc->_c.mfc_un.res.minvif; 538 n < mfc->_c.mfc_un.res.maxvif; n++) { 539 if (VIF_EXISTS(mrt, n) && 540 mfc->_c.mfc_un.res.ttls[n] < 255) 541 seq_printf(seq, 542 " %2d:%-3d", n, 543 mfc->_c.mfc_un.res.ttls[n]); 544 } 545 } else { 546 /* unresolved mfc_caches don't contain 547 * pkt, bytes and wrong_if values 548 */ 549 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 550 } 551 seq_putc(seq, '\n'); 552 } 553 return 0; 554 } 555 556 static const struct seq_operations ipmr_mfc_seq_ops = { 557 .start = ipmr_mfc_seq_start, 558 .next = mr_mfc_seq_next, 559 .stop = mr_mfc_seq_stop, 560 .show = ipmr_mfc_seq_show, 561 }; 562 #endif 563 564 #ifdef CONFIG_IPV6_PIMSM_V2 565 566 static int pim6_rcv(struct sk_buff *skb) 567 { 568 struct pimreghdr *pim; 569 struct ipv6hdr *encap; 570 struct net_device *reg_dev = NULL; 571 struct net *net = dev_net(skb->dev); 572 struct mr_table *mrt; 573 struct flowi6 fl6 = { 574 .flowi6_iif = skb->dev->ifindex, 575 .flowi6_mark = skb->mark, 576 }; 577 int reg_vif_num; 578 579 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 580 goto drop; 581 582 pim = (struct pimreghdr *)skb_transport_header(skb); 583 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 584 (pim->flags & PIM_NULL_REGISTER) || 585 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 586 sizeof(*pim), IPPROTO_PIM, 587 csum_partial((void *)pim, sizeof(*pim), 0)) && 588 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 589 goto drop; 590 591 /* check if the inner packet is destined to mcast group */ 592 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 593 sizeof(*pim)); 594 595 if (!ipv6_addr_is_multicast(&encap->daddr) || 596 encap->payload_len == 0 || 597 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 598 goto drop; 599 600 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 601 goto drop; 602 603 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ 604 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 605 if (reg_vif_num >= 0) 606 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); 607 608 if (!reg_dev) 609 goto drop; 610 611 skb->mac_header = skb->network_header; 612 skb_pull(skb, (u8 *)encap - skb->data); 613 skb_reset_network_header(skb); 614 skb->protocol = htons(ETH_P_IPV6); 615 skb->ip_summed = CHECKSUM_NONE; 616 617 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 618 619 netif_rx(skb); 620 621 return 0; 622 drop: 623 kfree_skb(skb); 624 return 0; 625 } 626 627 static const struct inet6_protocol pim6_protocol = { 628 .handler = pim6_rcv, 629 }; 630 631 /* Service routines creating virtual interfaces: PIMREG */ 632 633 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 634 struct net_device *dev) 635 { 636 struct net *net = dev_net(dev); 637 struct mr_table *mrt; 638 struct flowi6 fl6 = { 639 .flowi6_oif = dev->ifindex, 640 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 641 .flowi6_mark = skb->mark, 642 }; 643 644 if (!pskb_inet_may_pull(skb)) 645 goto tx_err; 646 647 rcu_read_lock(); 648 649 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 650 goto tx_lookup_err; 651 652 DEV_STATS_ADD(dev, tx_bytes, skb->len); 653 DEV_STATS_INC(dev, tx_packets); 654 655 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 656 MRT6MSG_WHOLEPKT); 657 rcu_read_unlock(); 658 kfree_skb(skb); 659 return NETDEV_TX_OK; 660 661 tx_lookup_err: 662 rcu_read_unlock(); 663 tx_err: 664 DEV_STATS_INC(dev, tx_errors); 665 kfree_skb(skb); 666 return NETDEV_TX_OK; 667 } 668 669 static int reg_vif_get_iflink(const struct net_device *dev) 670 { 671 return 0; 672 } 673 674 static const struct net_device_ops reg_vif_netdev_ops = { 675 .ndo_start_xmit = reg_vif_xmit, 676 .ndo_get_iflink = reg_vif_get_iflink, 677 }; 678 679 static void reg_vif_setup(struct net_device *dev) 680 { 681 dev->type = ARPHRD_PIMREG; 682 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 683 dev->flags = IFF_NOARP; 684 dev->netdev_ops = ®_vif_netdev_ops; 685 dev->needs_free_netdev = true; 686 dev->netns_immutable = true; 687 } 688 689 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 690 { 691 struct net_device *dev; 692 char name[IFNAMSIZ]; 693 694 if (mrt->id == RT6_TABLE_DFLT) 695 sprintf(name, "pim6reg"); 696 else 697 sprintf(name, "pim6reg%u", mrt->id); 698 699 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 700 if (!dev) 701 return NULL; 702 703 dev_net_set(dev, net); 704 705 if (register_netdevice(dev)) { 706 free_netdev(dev); 707 return NULL; 708 } 709 710 if (dev_open(dev, NULL)) 711 goto failure; 712 713 dev_hold(dev); 714 return dev; 715 716 failure: 717 unregister_netdevice(dev); 718 return NULL; 719 } 720 #endif 721 722 static int call_ip6mr_vif_entry_notifiers(struct net *net, 723 enum fib_event_type event_type, 724 struct vif_device *vif, 725 struct net_device *vif_dev, 726 mifi_t vif_index, u32 tb_id) 727 { 728 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 729 vif, vif_dev, vif_index, tb_id, 730 &net->ipv6.ipmr_seq); 731 } 732 733 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 734 enum fib_event_type event_type, 735 struct mfc6_cache *mfc, u32 tb_id) 736 { 737 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 738 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 739 } 740 741 /* Delete a VIF entry */ 742 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 743 struct list_head *head) 744 { 745 struct vif_device *v; 746 struct net_device *dev; 747 struct inet6_dev *in6_dev; 748 749 if (vifi < 0 || vifi >= mrt->maxvif) 750 return -EADDRNOTAVAIL; 751 752 v = &mrt->vif_table[vifi]; 753 754 dev = rtnl_dereference(v->dev); 755 if (!dev) 756 return -EADDRNOTAVAIL; 757 758 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 759 FIB_EVENT_VIF_DEL, v, dev, 760 vifi, mrt->id); 761 spin_lock(&mrt_lock); 762 RCU_INIT_POINTER(v->dev, NULL); 763 764 #ifdef CONFIG_IPV6_PIMSM_V2 765 if (vifi == mrt->mroute_reg_vif_num) { 766 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ 767 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 768 } 769 #endif 770 771 if (vifi + 1 == mrt->maxvif) { 772 int tmp; 773 for (tmp = vifi - 1; tmp >= 0; tmp--) { 774 if (VIF_EXISTS(mrt, tmp)) 775 break; 776 } 777 WRITE_ONCE(mrt->maxvif, tmp + 1); 778 } 779 780 spin_unlock(&mrt_lock); 781 782 dev_set_allmulti(dev, -1); 783 784 in6_dev = __in6_dev_get(dev); 785 if (in6_dev) { 786 atomic_dec(&in6_dev->cnf.mc_forwarding); 787 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 788 NETCONFA_MC_FORWARDING, 789 dev->ifindex, &in6_dev->cnf); 790 } 791 792 if ((v->flags & MIFF_REGISTER) && !notify) 793 unregister_netdevice_queue(dev, head); 794 795 netdev_put(dev, &v->dev_tracker); 796 return 0; 797 } 798 799 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 800 { 801 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 802 803 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 804 } 805 806 static inline void ip6mr_cache_free(struct mfc6_cache *c) 807 { 808 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 809 } 810 811 /* Destroy an unresolved cache entry, killing queued skbs 812 and reporting error to netlink readers. 813 */ 814 815 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 816 { 817 struct net *net = read_pnet(&mrt->net); 818 struct sk_buff *skb; 819 820 atomic_dec(&mrt->cache_resolve_queue_len); 821 822 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 823 if (ipv6_hdr(skb)->version == 0) { 824 struct nlmsghdr *nlh = skb_pull(skb, 825 sizeof(struct ipv6hdr)); 826 nlh->nlmsg_type = NLMSG_ERROR; 827 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 828 skb_trim(skb, nlh->nlmsg_len); 829 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 830 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 831 } else 832 kfree_skb(skb); 833 } 834 835 ip6mr_cache_free(c); 836 } 837 838 839 /* Timer process for all the unresolved queue. */ 840 841 static void ipmr_do_expire_process(struct mr_table *mrt) 842 { 843 unsigned long now = jiffies; 844 unsigned long expires = 10 * HZ; 845 struct mr_mfc *c, *next; 846 847 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 848 if (time_after(c->mfc_un.unres.expires, now)) { 849 /* not yet... */ 850 unsigned long interval = c->mfc_un.unres.expires - now; 851 if (interval < expires) 852 expires = interval; 853 continue; 854 } 855 856 list_del(&c->list); 857 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 858 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 859 } 860 861 if (!list_empty(&mrt->mfc_unres_queue)) 862 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 863 } 864 865 static void ipmr_expire_process(struct timer_list *t) 866 { 867 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer); 868 869 if (!spin_trylock(&mfc_unres_lock)) { 870 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 871 return; 872 } 873 874 if (!list_empty(&mrt->mfc_unres_queue)) 875 ipmr_do_expire_process(mrt); 876 877 spin_unlock(&mfc_unres_lock); 878 } 879 880 /* Fill oifs list. It is called under locked mrt_lock. */ 881 882 static void ip6mr_update_thresholds(struct mr_table *mrt, 883 struct mr_mfc *cache, 884 unsigned char *ttls) 885 { 886 int vifi; 887 888 cache->mfc_un.res.minvif = MAXMIFS; 889 cache->mfc_un.res.maxvif = 0; 890 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 891 892 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 893 if (VIF_EXISTS(mrt, vifi) && 894 ttls[vifi] && ttls[vifi] < 255) { 895 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 896 if (cache->mfc_un.res.minvif > vifi) 897 cache->mfc_un.res.minvif = vifi; 898 if (cache->mfc_un.res.maxvif <= vifi) 899 cache->mfc_un.res.maxvif = vifi + 1; 900 } 901 } 902 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 903 } 904 905 static int mif6_add(struct net *net, struct mr_table *mrt, 906 struct mif6ctl *vifc, int mrtsock) 907 { 908 int vifi = vifc->mif6c_mifi; 909 struct vif_device *v = &mrt->vif_table[vifi]; 910 struct net_device *dev; 911 struct inet6_dev *in6_dev; 912 int err; 913 914 /* Is vif busy ? */ 915 if (VIF_EXISTS(mrt, vifi)) 916 return -EADDRINUSE; 917 918 switch (vifc->mif6c_flags) { 919 #ifdef CONFIG_IPV6_PIMSM_V2 920 case MIFF_REGISTER: 921 /* 922 * Special Purpose VIF in PIM 923 * All the packets will be sent to the daemon 924 */ 925 if (mrt->mroute_reg_vif_num >= 0) 926 return -EADDRINUSE; 927 dev = ip6mr_reg_vif(net, mrt); 928 if (!dev) 929 return -ENOBUFS; 930 err = dev_set_allmulti(dev, 1); 931 if (err) { 932 unregister_netdevice(dev); 933 dev_put(dev); 934 return err; 935 } 936 break; 937 #endif 938 case 0: 939 dev = dev_get_by_index(net, vifc->mif6c_pifi); 940 if (!dev) 941 return -EADDRNOTAVAIL; 942 err = dev_set_allmulti(dev, 1); 943 if (err) { 944 dev_put(dev); 945 return err; 946 } 947 break; 948 default: 949 return -EINVAL; 950 } 951 952 in6_dev = __in6_dev_get(dev); 953 if (in6_dev) { 954 atomic_inc(&in6_dev->cnf.mc_forwarding); 955 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 956 NETCONFA_MC_FORWARDING, 957 dev->ifindex, &in6_dev->cnf); 958 } 959 960 /* Fill in the VIF structures */ 961 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 962 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 963 MIFF_REGISTER); 964 965 /* And finish update writing critical data */ 966 spin_lock(&mrt_lock); 967 rcu_assign_pointer(v->dev, dev); 968 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 969 #ifdef CONFIG_IPV6_PIMSM_V2 970 if (v->flags & MIFF_REGISTER) 971 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 972 #endif 973 if (vifi + 1 > mrt->maxvif) 974 WRITE_ONCE(mrt->maxvif, vifi + 1); 975 spin_unlock(&mrt_lock); 976 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 977 v, dev, vifi, mrt->id); 978 return 0; 979 } 980 981 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 982 const struct in6_addr *origin, 983 const struct in6_addr *mcastgrp) 984 { 985 struct mfc6_cache_cmp_arg arg = { 986 .mf6c_origin = *origin, 987 .mf6c_mcastgrp = *mcastgrp, 988 }; 989 990 return mr_mfc_find(mrt, &arg); 991 } 992 993 /* Look for a (*,G) entry */ 994 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 995 struct in6_addr *mcastgrp, 996 mifi_t mifi) 997 { 998 struct mfc6_cache_cmp_arg arg = { 999 .mf6c_origin = in6addr_any, 1000 .mf6c_mcastgrp = *mcastgrp, 1001 }; 1002 1003 if (ipv6_addr_any(mcastgrp)) 1004 return mr_mfc_find_any_parent(mrt, mifi); 1005 return mr_mfc_find_any(mrt, mifi, &arg); 1006 } 1007 1008 /* Look for a (S,G,iif) entry if parent != -1 */ 1009 static struct mfc6_cache * 1010 ip6mr_cache_find_parent(struct mr_table *mrt, 1011 const struct in6_addr *origin, 1012 const struct in6_addr *mcastgrp, 1013 int parent) 1014 { 1015 struct mfc6_cache_cmp_arg arg = { 1016 .mf6c_origin = *origin, 1017 .mf6c_mcastgrp = *mcastgrp, 1018 }; 1019 1020 return mr_mfc_find_parent(mrt, &arg, parent); 1021 } 1022 1023 /* Allocate a multicast cache entry */ 1024 static struct mfc6_cache *ip6mr_cache_alloc(void) 1025 { 1026 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1027 if (!c) 1028 return NULL; 1029 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1030 c->_c.mfc_un.res.minvif = MAXMIFS; 1031 c->_c.free = ip6mr_cache_free_rcu; 1032 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1033 return c; 1034 } 1035 1036 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1037 { 1038 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1039 if (!c) 1040 return NULL; 1041 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1042 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1043 return c; 1044 } 1045 1046 /* 1047 * A cache entry has gone into a resolved state from queued 1048 */ 1049 1050 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1051 struct mfc6_cache *uc, struct mfc6_cache *c) 1052 { 1053 struct sk_buff *skb; 1054 1055 /* 1056 * Play the pending entries through our router 1057 */ 1058 1059 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1060 if (ipv6_hdr(skb)->version == 0) { 1061 struct nlmsghdr *nlh = skb_pull(skb, 1062 sizeof(struct ipv6hdr)); 1063 1064 if (mr_fill_mroute(mrt, skb, &c->_c, 1065 nlmsg_data(nlh)) > 0) { 1066 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1067 } else { 1068 nlh->nlmsg_type = NLMSG_ERROR; 1069 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1070 skb_trim(skb, nlh->nlmsg_len); 1071 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1072 } 1073 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1074 } else { 1075 rcu_read_lock(); 1076 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1077 rcu_read_unlock(); 1078 } 1079 } 1080 } 1081 1082 /* 1083 * Bounce a cache query up to pim6sd and netlink. 1084 * 1085 * Called under rcu_read_lock() 1086 */ 1087 1088 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 1089 mifi_t mifi, int assert) 1090 { 1091 enum skb_drop_reason reason; 1092 struct sock *mroute6_sk; 1093 struct sk_buff *skb; 1094 struct mrt6msg *msg; 1095 1096 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1097 if (!mroute6_sk) 1098 return -EINVAL; 1099 1100 #ifdef CONFIG_IPV6_PIMSM_V2 1101 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) 1102 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1103 +sizeof(*msg)); 1104 else 1105 #endif 1106 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1107 1108 if (!skb) 1109 return -ENOBUFS; 1110 1111 /* I suppose that internal messages 1112 * do not require checksums */ 1113 1114 skb->ip_summed = CHECKSUM_UNNECESSARY; 1115 1116 #ifdef CONFIG_IPV6_PIMSM_V2 1117 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { 1118 /* Ugly, but we have no choice with this interface. 1119 Duplicate old header, fix length etc. 1120 And all this only to mangle msg->im6_msgtype and 1121 to set msg->im6_mbz to "mbz" :-) 1122 */ 1123 __skb_pull(skb, skb_network_offset(pkt)); 1124 1125 skb_push(skb, sizeof(*msg)); 1126 skb_reset_transport_header(skb); 1127 msg = (struct mrt6msg *)skb_transport_header(skb); 1128 msg->im6_mbz = 0; 1129 msg->im6_msgtype = assert; 1130 if (assert == MRT6MSG_WRMIFWHOLE) 1131 msg->im6_mif = mifi; 1132 else 1133 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); 1134 msg->im6_pad = 0; 1135 msg->im6_src = ipv6_hdr(pkt)->saddr; 1136 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1137 1138 skb->ip_summed = CHECKSUM_UNNECESSARY; 1139 } else 1140 #endif 1141 { 1142 /* 1143 * Copy the IP header 1144 */ 1145 1146 skb_put(skb, sizeof(struct ipv6hdr)); 1147 skb_reset_network_header(skb); 1148 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1149 1150 /* 1151 * Add our header 1152 */ 1153 skb_put(skb, sizeof(*msg)); 1154 skb_reset_transport_header(skb); 1155 msg = (struct mrt6msg *)skb_transport_header(skb); 1156 1157 msg->im6_mbz = 0; 1158 msg->im6_msgtype = assert; 1159 msg->im6_mif = mifi; 1160 msg->im6_pad = 0; 1161 msg->im6_src = ipv6_hdr(pkt)->saddr; 1162 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1163 1164 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1165 skb->ip_summed = CHECKSUM_UNNECESSARY; 1166 } 1167 1168 mrt6msg_netlink_event(mrt, skb); 1169 1170 /* Deliver to user space multicast routing algorithms */ 1171 reason = sock_queue_rcv_skb_reason(mroute6_sk, skb); 1172 1173 if (reason) { 1174 sk_skb_reason_drop(mroute6_sk, skb, reason); 1175 return -ENOMEM; 1176 } 1177 1178 return 0; 1179 } 1180 1181 /* Queue a packet for resolution. It gets locked cache entry! */ 1182 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1183 struct sk_buff *skb, struct net_device *dev) 1184 { 1185 struct net *net = read_pnet(&mrt->net); 1186 struct mfc6_cache *c = NULL; 1187 bool found = false; 1188 int err; 1189 1190 spin_lock_bh(&mfc_unres_lock); 1191 1192 if (!check_net(net)) { 1193 err = -EINVAL; 1194 goto err; 1195 } 1196 1197 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1198 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1199 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1200 found = true; 1201 break; 1202 } 1203 } 1204 1205 if (!found) { 1206 /* 1207 * Create a new entry if allowable 1208 */ 1209 1210 c = ip6mr_cache_alloc_unres(); 1211 if (!c) { 1212 err = -ENOBUFS; 1213 goto err; 1214 } 1215 1216 /* Fill in the new cache entry */ 1217 c->_c.mfc_parent = -1; 1218 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1219 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1220 1221 /* 1222 * Reflect first query at pim6sd 1223 */ 1224 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1225 if (err < 0) 1226 goto err; 1227 1228 atomic_inc(&mrt->cache_resolve_queue_len); 1229 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1230 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1231 1232 ipmr_do_expire_process(mrt); 1233 } 1234 1235 /* See if we can append the packet */ 1236 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1237 c = NULL; 1238 err = -ENOBUFS; 1239 goto err; 1240 } 1241 1242 if (dev) { 1243 skb->dev = dev; 1244 skb->skb_iif = dev->ifindex; 1245 } 1246 1247 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1248 1249 spin_unlock_bh(&mfc_unres_lock); 1250 return 0; 1251 1252 err: 1253 spin_unlock_bh(&mfc_unres_lock); 1254 if (c) 1255 ip6mr_cache_free(c); 1256 kfree_skb(skb); 1257 return err; 1258 } 1259 1260 /* 1261 * MFC6 cache manipulation by user space 1262 */ 1263 1264 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1265 int parent) 1266 { 1267 struct mfc6_cache *c; 1268 1269 rcu_read_lock(); 1270 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1271 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1272 rcu_read_unlock(); 1273 if (!c) 1274 return -ENOENT; 1275 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1276 list_del_rcu(&c->_c.list); 1277 1278 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1279 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1280 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1281 mr_cache_put(&c->_c); 1282 return 0; 1283 } 1284 1285 static int ip6mr_device_event(struct notifier_block *this, 1286 unsigned long event, void *ptr) 1287 { 1288 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1289 struct net *net = dev_net(dev); 1290 struct mr_table *mrt; 1291 struct vif_device *v; 1292 int ct; 1293 1294 if (event != NETDEV_UNREGISTER) 1295 return NOTIFY_DONE; 1296 1297 ip6mr_for_each_table(mrt, net) { 1298 v = &mrt->vif_table[0]; 1299 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1300 if (rcu_access_pointer(v->dev) == dev) 1301 mif6_delete(mrt, ct, 1, NULL); 1302 } 1303 } 1304 1305 return NOTIFY_DONE; 1306 } 1307 1308 static unsigned int ip6mr_seq_read(const struct net *net) 1309 { 1310 return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); 1311 } 1312 1313 static int ip6mr_dump(struct net *net, struct notifier_block *nb, 1314 struct netlink_ext_ack *extack) 1315 { 1316 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1317 ip6mr_mr_table_iter, extack); 1318 } 1319 1320 static struct notifier_block ip6_mr_notifier = { 1321 .notifier_call = ip6mr_device_event 1322 }; 1323 1324 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1325 .family = RTNL_FAMILY_IP6MR, 1326 .fib_seq_read = ip6mr_seq_read, 1327 .fib_dump = ip6mr_dump, 1328 .owner = THIS_MODULE, 1329 }; 1330 1331 static int __net_init ip6mr_notifier_init(struct net *net) 1332 { 1333 struct fib_notifier_ops *ops; 1334 1335 atomic_set(&net->ipv6.ipmr_seq, 0); 1336 1337 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1338 if (IS_ERR(ops)) 1339 return PTR_ERR(ops); 1340 1341 net->ipv6.ip6mr_notifier_ops = ops; 1342 1343 return 0; 1344 } 1345 1346 static void __net_exit ip6mr_notifier_exit(struct net *net) 1347 { 1348 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1349 net->ipv6.ip6mr_notifier_ops = NULL; 1350 } 1351 1352 /* Setup for IP multicast routing */ 1353 static int __net_init ip6mr_net_init(struct net *net) 1354 { 1355 #ifdef CONFIG_PROC_FS 1356 LIST_HEAD(dev_kill_list); 1357 #endif 1358 int err; 1359 1360 mutex_init(&net->ipv6.mfc_mutex); 1361 1362 err = ip6mr_notifier_init(net); 1363 if (err) 1364 return err; 1365 1366 err = ip6mr_rules_init(net); 1367 if (err < 0) 1368 goto ip6mr_rules_fail; 1369 1370 #ifdef CONFIG_PROC_FS 1371 err = -ENOMEM; 1372 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1373 sizeof(struct mr_vif_iter))) 1374 goto proc_vif_fail; 1375 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1376 sizeof(struct mr_mfc_iter))) 1377 goto proc_cache_fail; 1378 #endif 1379 1380 return 0; 1381 1382 #ifdef CONFIG_PROC_FS 1383 proc_cache_fail: 1384 remove_proc_entry("ip6_mr_vif", net->proc_net); 1385 proc_vif_fail: 1386 ip6mr_rules_exit_rtnl(net, &dev_kill_list); 1387 ip6mr_rules_exit(net); 1388 #endif 1389 ip6mr_rules_fail: 1390 ip6mr_notifier_exit(net); 1391 return err; 1392 } 1393 1394 static void __net_exit ip6mr_net_exit(struct net *net) 1395 { 1396 #ifdef CONFIG_PROC_FS 1397 remove_proc_entry("ip6_mr_cache", net->proc_net); 1398 remove_proc_entry("ip6_mr_vif", net->proc_net); 1399 #endif 1400 ip6mr_rules_exit(net); 1401 ip6mr_notifier_exit(net); 1402 } 1403 1404 static void __net_exit ip6mr_net_exit_rtnl(struct net *net, 1405 struct list_head *dev_kill_list) 1406 { 1407 ip6mr_rules_exit_rtnl(net, dev_kill_list); 1408 } 1409 1410 static struct pernet_operations ip6mr_net_ops = { 1411 .init = ip6mr_net_init, 1412 .exit = ip6mr_net_exit, 1413 .exit_rtnl = ip6mr_net_exit_rtnl, 1414 }; 1415 1416 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { 1417 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, 1418 .msgtype = RTM_GETROUTE, 1419 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute, 1420 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 1421 }; 1422 1423 int __init ip6_mr_init(void) 1424 { 1425 int err; 1426 1427 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN); 1428 if (!mrt_cachep) 1429 return -ENOMEM; 1430 1431 err = register_pernet_subsys(&ip6mr_net_ops); 1432 if (err) 1433 goto reg_pernet_fail; 1434 1435 err = register_netdevice_notifier(&ip6_mr_notifier); 1436 if (err) 1437 goto reg_notif_fail; 1438 #ifdef CONFIG_IPV6_PIMSM_V2 1439 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1440 pr_err("%s: can't add PIM protocol\n", __func__); 1441 err = -EAGAIN; 1442 goto add_proto_fail; 1443 } 1444 #endif 1445 err = rtnl_register_many(ip6mr_rtnl_msg_handlers); 1446 if (!err) 1447 return 0; 1448 1449 #ifdef CONFIG_IPV6_PIMSM_V2 1450 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1451 add_proto_fail: 1452 unregister_netdevice_notifier(&ip6_mr_notifier); 1453 #endif 1454 reg_notif_fail: 1455 unregister_pernet_subsys(&ip6mr_net_ops); 1456 reg_pernet_fail: 1457 kmem_cache_destroy(mrt_cachep); 1458 return err; 1459 } 1460 1461 void __init ip6_mr_cleanup(void) 1462 { 1463 rtnl_unregister_many(ip6mr_rtnl_msg_handlers); 1464 #ifdef CONFIG_IPV6_PIMSM_V2 1465 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1466 #endif 1467 unregister_netdevice_notifier(&ip6_mr_notifier); 1468 unregister_pernet_subsys(&ip6mr_net_ops); 1469 kmem_cache_destroy(mrt_cachep); 1470 } 1471 1472 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1473 struct mf6cctl *mfc, int mrtsock, int parent) 1474 { 1475 unsigned char ttls[MAXMIFS]; 1476 struct mfc6_cache *uc, *c; 1477 struct mr_mfc *_uc; 1478 bool found; 1479 int i, err; 1480 1481 if (mfc->mf6cc_parent >= MAXMIFS) 1482 return -ENFILE; 1483 1484 memset(ttls, 255, MAXMIFS); 1485 for (i = 0; i < MAXMIFS; i++) { 1486 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1487 ttls[i] = 1; 1488 } 1489 1490 rcu_read_lock(); 1491 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1492 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1493 rcu_read_unlock(); 1494 if (c) { 1495 spin_lock(&mrt_lock); 1496 c->_c.mfc_parent = mfc->mf6cc_parent; 1497 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1498 if (!mrtsock) 1499 c->_c.mfc_flags |= MFC_STATIC; 1500 spin_unlock(&mrt_lock); 1501 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1502 c, mrt->id); 1503 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1504 return 0; 1505 } 1506 1507 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1508 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1509 return -EINVAL; 1510 1511 c = ip6mr_cache_alloc(); 1512 if (!c) 1513 return -ENOMEM; 1514 1515 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1516 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1517 c->_c.mfc_parent = mfc->mf6cc_parent; 1518 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1519 if (!mrtsock) 1520 c->_c.mfc_flags |= MFC_STATIC; 1521 1522 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1523 ip6mr_rht_params); 1524 if (err) { 1525 pr_err("ip6mr: rhtable insert error %d\n", err); 1526 ip6mr_cache_free(c); 1527 return err; 1528 } 1529 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1530 1531 /* Check to see if we resolved a queued list. If so we 1532 * need to send on the frames and tidy up. 1533 */ 1534 found = false; 1535 spin_lock_bh(&mfc_unres_lock); 1536 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1537 uc = (struct mfc6_cache *)_uc; 1538 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1539 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1540 list_del(&_uc->list); 1541 atomic_dec(&mrt->cache_resolve_queue_len); 1542 found = true; 1543 break; 1544 } 1545 } 1546 if (list_empty(&mrt->mfc_unres_queue)) 1547 timer_delete(&mrt->ipmr_expire_timer); 1548 spin_unlock_bh(&mfc_unres_lock); 1549 1550 if (found) { 1551 ip6mr_cache_resolve(net, mrt, uc, c); 1552 ip6mr_cache_free(uc); 1553 } 1554 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1555 c, mrt->id); 1556 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1557 return 0; 1558 } 1559 1560 /* 1561 * Close the multicast socket, and clear the vif tables etc 1562 */ 1563 1564 static void mroute_clean_tables(struct mr_table *mrt, int flags, 1565 struct list_head *dev_kill_list) 1566 { 1567 struct net *net = read_pnet(&mrt->net); 1568 struct mr_mfc *c, *tmp; 1569 int i; 1570 1571 /* Shut down all active vif entries */ 1572 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { 1573 for (i = 0; i < mrt->maxvif; i++) { 1574 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1575 !(flags & MRT6_FLUSH_MIFS_STATIC)) || 1576 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) 1577 continue; 1578 mif6_delete(mrt, i, 0, dev_kill_list); 1579 } 1580 } 1581 1582 /* Wipe the cache */ 1583 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { 1584 mutex_lock(&net->ipv6.mfc_mutex); 1585 1586 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1587 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || 1588 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) 1589 continue; 1590 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1591 list_del_rcu(&c->list); 1592 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, 1593 (struct mfc6_cache *)c, mrt->id); 1594 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1595 mr_cache_put(c); 1596 } 1597 1598 mutex_unlock(&net->ipv6.mfc_mutex); 1599 } 1600 1601 if (flags & MRT6_FLUSH_MFC) { 1602 if (atomic_read(&mrt->cache_resolve_queue_len) != 0 || 1603 !check_net(net)) { 1604 spin_lock_bh(&mfc_unres_lock); 1605 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1606 list_del(&c->list); 1607 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1608 RTM_DELROUTE); 1609 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1610 } 1611 spin_unlock_bh(&mfc_unres_lock); 1612 } 1613 } 1614 } 1615 1616 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1617 { 1618 int err = 0; 1619 struct net *net = sock_net(sk); 1620 1621 rtnl_lock(); 1622 spin_lock(&mrt_lock); 1623 if (rtnl_dereference(mrt->mroute_sk)) { 1624 err = -EADDRINUSE; 1625 } else { 1626 rcu_assign_pointer(mrt->mroute_sk, sk); 1627 sock_set_flag(sk, SOCK_RCU_FREE); 1628 atomic_inc(&net->ipv6.devconf_all->mc_forwarding); 1629 } 1630 spin_unlock(&mrt_lock); 1631 1632 if (!err) 1633 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1634 NETCONFA_MC_FORWARDING, 1635 NETCONFA_IFINDEX_ALL, 1636 net->ipv6.devconf_all); 1637 rtnl_unlock(); 1638 1639 return err; 1640 } 1641 1642 int ip6mr_sk_done(struct sock *sk) 1643 { 1644 struct net *net = sock_net(sk); 1645 struct ipv6_devconf *devconf; 1646 LIST_HEAD(dev_kill_list); 1647 struct mr_table *mrt; 1648 int err = -EACCES; 1649 1650 if (sk->sk_type != SOCK_RAW || 1651 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1652 return err; 1653 1654 devconf = net->ipv6.devconf_all; 1655 if (!devconf || !atomic_read(&devconf->mc_forwarding)) 1656 return err; 1657 1658 rtnl_lock(); 1659 ip6mr_for_each_table(mrt, net) { 1660 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1661 spin_lock(&mrt_lock); 1662 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1663 /* Note that mroute_sk had SOCK_RCU_FREE set, 1664 * so the RCU grace period before sk freeing 1665 * is guaranteed by sk_destruct() 1666 */ 1667 atomic_dec(&devconf->mc_forwarding); 1668 spin_unlock(&mrt_lock); 1669 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1670 NETCONFA_MC_FORWARDING, 1671 NETCONFA_IFINDEX_ALL, 1672 net->ipv6.devconf_all); 1673 1674 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC, 1675 &dev_kill_list); 1676 err = 0; 1677 break; 1678 } 1679 } 1680 unregister_netdevice_many(&dev_kill_list); 1681 rtnl_unlock(); 1682 1683 return err; 1684 } 1685 1686 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1687 { 1688 struct mr_table *mrt; 1689 struct flowi6 fl6 = { 1690 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1691 .flowi6_oif = skb->dev->ifindex, 1692 .flowi6_mark = skb->mark, 1693 }; 1694 1695 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1696 return NULL; 1697 1698 return rcu_access_pointer(mrt->mroute_sk); 1699 } 1700 EXPORT_SYMBOL(mroute6_is_socket); 1701 1702 /* 1703 * Socket options and virtual interface manipulation. The whole 1704 * virtual interface system is a complete heap, but unfortunately 1705 * that's how BSD mrouted happens to think. Maybe one day with a proper 1706 * MOSPF/PIM router set up we can clean this up. 1707 */ 1708 1709 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1710 unsigned int optlen) 1711 { 1712 int ret, parent = 0; 1713 struct mif6ctl vif; 1714 struct mf6cctl mfc; 1715 mifi_t mifi; 1716 struct net *net = sock_net(sk); 1717 struct mr_table *mrt; 1718 1719 if (sk->sk_type != SOCK_RAW || 1720 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1721 return -EOPNOTSUPP; 1722 1723 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1724 if (!mrt) 1725 return -ENOENT; 1726 1727 if (optname != MRT6_INIT) { 1728 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1729 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1730 return -EACCES; 1731 } 1732 1733 switch (optname) { 1734 case MRT6_INIT: 1735 if (optlen < sizeof(int)) 1736 return -EINVAL; 1737 1738 return ip6mr_sk_init(mrt, sk); 1739 1740 case MRT6_DONE: 1741 return ip6mr_sk_done(sk); 1742 1743 case MRT6_ADD_MIF: 1744 if (optlen < sizeof(vif)) 1745 return -EINVAL; 1746 if (copy_from_sockptr(&vif, optval, sizeof(vif))) 1747 return -EFAULT; 1748 if (vif.mif6c_mifi >= MAXMIFS) 1749 return -ENFILE; 1750 rtnl_lock(); 1751 ret = mif6_add(net, mrt, &vif, 1752 sk == rtnl_dereference(mrt->mroute_sk)); 1753 rtnl_unlock(); 1754 return ret; 1755 1756 case MRT6_DEL_MIF: 1757 if (optlen < sizeof(mifi_t)) 1758 return -EINVAL; 1759 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) 1760 return -EFAULT; 1761 rtnl_lock(); 1762 ret = mif6_delete(mrt, mifi, 0, NULL); 1763 rtnl_unlock(); 1764 return ret; 1765 1766 /* 1767 * Manipulate the forwarding caches. These live 1768 * in a sort of kernel/user symbiosis. 1769 */ 1770 case MRT6_ADD_MFC: 1771 case MRT6_DEL_MFC: 1772 parent = -1; 1773 fallthrough; 1774 case MRT6_ADD_MFC_PROXY: 1775 case MRT6_DEL_MFC_PROXY: 1776 if (optlen < sizeof(mfc)) 1777 return -EINVAL; 1778 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) 1779 return -EFAULT; 1780 if (parent == 0) 1781 parent = mfc.mf6cc_parent; 1782 1783 mutex_lock(&net->ipv6.mfc_mutex); 1784 1785 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1786 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1787 else 1788 ret = ip6mr_mfc_add(net, mrt, &mfc, 1789 sk == 1790 rcu_access_pointer(mrt->mroute_sk), 1791 parent); 1792 1793 mutex_unlock(&net->ipv6.mfc_mutex); 1794 return ret; 1795 1796 case MRT6_FLUSH: 1797 { 1798 LIST_HEAD(dev_kill_list); 1799 int flags; 1800 1801 if (optlen != sizeof(flags)) 1802 return -EINVAL; 1803 if (copy_from_sockptr(&flags, optval, sizeof(flags))) 1804 return -EFAULT; 1805 1806 rtnl_lock(); 1807 mroute_clean_tables(mrt, flags, &dev_kill_list); 1808 unregister_netdevice_many(&dev_kill_list); 1809 rtnl_unlock(); 1810 return 0; 1811 } 1812 1813 /* 1814 * Control PIM assert (to activate pim will activate assert) 1815 */ 1816 case MRT6_ASSERT: 1817 { 1818 int v; 1819 1820 if (optlen != sizeof(v)) 1821 return -EINVAL; 1822 if (copy_from_sockptr(&v, optval, sizeof(v))) 1823 return -EFAULT; 1824 WRITE_ONCE(mrt->mroute_do_assert, v); 1825 return 0; 1826 } 1827 1828 #ifdef CONFIG_IPV6_PIMSM_V2 1829 case MRT6_PIM: 1830 { 1831 bool do_wrmifwhole; 1832 int v; 1833 1834 if (optlen != sizeof(v)) 1835 return -EINVAL; 1836 if (copy_from_sockptr(&v, optval, sizeof(v))) 1837 return -EFAULT; 1838 1839 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); 1840 v = !!v; 1841 rtnl_lock(); 1842 ret = 0; 1843 if (v != mrt->mroute_do_pim) { 1844 WRITE_ONCE(mrt->mroute_do_pim, v); 1845 WRITE_ONCE(mrt->mroute_do_assert, v); 1846 WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrmifwhole); 1847 } 1848 rtnl_unlock(); 1849 return ret; 1850 } 1851 1852 #endif 1853 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1854 case MRT6_TABLE: 1855 { 1856 u32 v; 1857 1858 if (optlen != sizeof(u32)) 1859 return -EINVAL; 1860 if (copy_from_sockptr(&v, optval, sizeof(v))) 1861 return -EFAULT; 1862 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1863 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1864 return -EINVAL; 1865 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1866 return -EBUSY; 1867 1868 rtnl_lock(); 1869 ret = 0; 1870 mrt = ip6mr_new_table(net, v); 1871 if (IS_ERR(mrt)) 1872 ret = PTR_ERR(mrt); 1873 else 1874 raw6_sk(sk)->ip6mr_table = v; 1875 rtnl_unlock(); 1876 return ret; 1877 } 1878 #endif 1879 /* 1880 * Spurious command, or MRT6_VERSION which you cannot 1881 * set. 1882 */ 1883 default: 1884 return -ENOPROTOOPT; 1885 } 1886 } 1887 1888 /* 1889 * Getsock opt support for the multicast routing system. 1890 */ 1891 1892 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1893 sockptr_t optlen) 1894 { 1895 int olr; 1896 int val; 1897 struct net *net = sock_net(sk); 1898 struct mr_table *mrt; 1899 1900 if (sk->sk_type != SOCK_RAW || 1901 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1902 return -EOPNOTSUPP; 1903 1904 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1905 if (!mrt) 1906 return -ENOENT; 1907 1908 switch (optname) { 1909 case MRT6_VERSION: 1910 val = 0x0305; 1911 break; 1912 #ifdef CONFIG_IPV6_PIMSM_V2 1913 case MRT6_PIM: 1914 val = READ_ONCE(mrt->mroute_do_pim); 1915 break; 1916 #endif 1917 case MRT6_ASSERT: 1918 val = READ_ONCE(mrt->mroute_do_assert); 1919 break; 1920 default: 1921 return -ENOPROTOOPT; 1922 } 1923 1924 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1925 return -EFAULT; 1926 1927 olr = min_t(int, olr, sizeof(int)); 1928 if (olr < 0) 1929 return -EINVAL; 1930 1931 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1932 return -EFAULT; 1933 if (copy_to_sockptr(optval, &val, olr)) 1934 return -EFAULT; 1935 return 0; 1936 } 1937 1938 /* 1939 * The IP multicast ioctl support routines. 1940 */ 1941 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg) 1942 { 1943 struct sioc_sg_req6 *sr; 1944 struct sioc_mif_req6 *vr; 1945 struct vif_device *vif; 1946 struct mfc6_cache *c; 1947 struct net *net = sock_net(sk); 1948 struct mr_table *mrt; 1949 1950 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1951 if (!mrt) 1952 return -ENOENT; 1953 1954 switch (cmd) { 1955 case SIOCGETMIFCNT_IN6: 1956 vr = (struct sioc_mif_req6 *)arg; 1957 if (vr->mifi >= mrt->maxvif) 1958 return -EINVAL; 1959 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif); 1960 rcu_read_lock(); 1961 vif = &mrt->vif_table[vr->mifi]; 1962 if (VIF_EXISTS(mrt, vr->mifi)) { 1963 vr->icount = READ_ONCE(vif->pkt_in); 1964 vr->ocount = READ_ONCE(vif->pkt_out); 1965 vr->ibytes = READ_ONCE(vif->bytes_in); 1966 vr->obytes = READ_ONCE(vif->bytes_out); 1967 rcu_read_unlock(); 1968 return 0; 1969 } 1970 rcu_read_unlock(); 1971 return -EADDRNOTAVAIL; 1972 case SIOCGETSGCNT_IN6: 1973 sr = (struct sioc_sg_req6 *)arg; 1974 1975 rcu_read_lock(); 1976 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr, 1977 &sr->grp.sin6_addr); 1978 if (c) { 1979 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1980 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1981 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1982 rcu_read_unlock(); 1983 return 0; 1984 } 1985 rcu_read_unlock(); 1986 return -EADDRNOTAVAIL; 1987 default: 1988 return -ENOIOCTLCMD; 1989 } 1990 } 1991 1992 #ifdef CONFIG_COMPAT 1993 struct compat_sioc_sg_req6 { 1994 struct sockaddr_in6 src; 1995 struct sockaddr_in6 grp; 1996 compat_ulong_t pktcnt; 1997 compat_ulong_t bytecnt; 1998 compat_ulong_t wrong_if; 1999 }; 2000 2001 struct compat_sioc_mif_req6 { 2002 mifi_t mifi; 2003 compat_ulong_t icount; 2004 compat_ulong_t ocount; 2005 compat_ulong_t ibytes; 2006 compat_ulong_t obytes; 2007 }; 2008 2009 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 2010 { 2011 struct compat_sioc_sg_req6 sr; 2012 struct compat_sioc_mif_req6 vr; 2013 struct vif_device *vif; 2014 struct mfc6_cache *c; 2015 struct net *net = sock_net(sk); 2016 struct mr_table *mrt; 2017 2018 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 2019 if (!mrt) 2020 return -ENOENT; 2021 2022 switch (cmd) { 2023 case SIOCGETMIFCNT_IN6: 2024 if (copy_from_user(&vr, arg, sizeof(vr))) 2025 return -EFAULT; 2026 if (vr.mifi >= mrt->maxvif) 2027 return -EINVAL; 2028 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 2029 rcu_read_lock(); 2030 vif = &mrt->vif_table[vr.mifi]; 2031 if (VIF_EXISTS(mrt, vr.mifi)) { 2032 vr.icount = READ_ONCE(vif->pkt_in); 2033 vr.ocount = READ_ONCE(vif->pkt_out); 2034 vr.ibytes = READ_ONCE(vif->bytes_in); 2035 vr.obytes = READ_ONCE(vif->bytes_out); 2036 rcu_read_unlock(); 2037 2038 if (copy_to_user(arg, &vr, sizeof(vr))) 2039 return -EFAULT; 2040 return 0; 2041 } 2042 rcu_read_unlock(); 2043 return -EADDRNOTAVAIL; 2044 case SIOCGETSGCNT_IN6: 2045 if (copy_from_user(&sr, arg, sizeof(sr))) 2046 return -EFAULT; 2047 2048 rcu_read_lock(); 2049 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 2050 if (c) { 2051 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 2052 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 2053 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 2054 rcu_read_unlock(); 2055 2056 if (copy_to_user(arg, &sr, sizeof(sr))) 2057 return -EFAULT; 2058 return 0; 2059 } 2060 rcu_read_unlock(); 2061 return -EADDRNOTAVAIL; 2062 default: 2063 return -ENOIOCTLCMD; 2064 } 2065 } 2066 #endif 2067 2068 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 2069 { 2070 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 2071 IPSTATS_MIB_OUTFORWDATAGRAMS); 2072 return dst_output(net, sk, skb); 2073 } 2074 2075 /* 2076 * Processing handlers for ip6mr_forward 2077 */ 2078 2079 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt, 2080 struct sk_buff *skb, int vifi) 2081 { 2082 struct vif_device *vif = &mrt->vif_table[vifi]; 2083 struct net_device *vif_dev; 2084 struct ipv6hdr *ipv6h; 2085 struct dst_entry *dst; 2086 struct flowi6 fl6; 2087 2088 vif_dev = vif_dev_read(vif); 2089 if (!vif_dev) 2090 return -1; 2091 2092 #ifdef CONFIG_IPV6_PIMSM_V2 2093 if (vif->flags & MIFF_REGISTER) { 2094 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2095 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2096 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 2097 DEV_STATS_INC(vif_dev, tx_packets); 2098 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2099 return -1; 2100 } 2101 #endif 2102 2103 ipv6h = ipv6_hdr(skb); 2104 2105 fl6 = (struct flowi6) { 2106 .flowi6_oif = vif->link, 2107 .daddr = ipv6h->daddr, 2108 }; 2109 2110 dst = ip6_route_output(net, NULL, &fl6); 2111 if (dst->error) { 2112 dst_release(dst); 2113 return -1; 2114 } 2115 2116 skb_dst_drop(skb); 2117 skb_dst_set(skb, dst); 2118 2119 /* 2120 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2121 * not only before forwarding, but after forwarding on all output 2122 * interfaces. It is clear, if mrouter runs a multicasting 2123 * program, it should receive packets not depending to what interface 2124 * program is joined. 2125 * If we will not make it, the program will have to join on all 2126 * interfaces. On the other hand, multihoming host (or router, but 2127 * not mrouter) cannot join to more than one interface - it will 2128 * result in receiving multiple packets. 2129 */ 2130 skb->dev = vif_dev; 2131 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2132 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2133 2134 /* We are about to write */ 2135 /* XXX: extension headers? */ 2136 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) 2137 return -1; 2138 2139 ipv6h = ipv6_hdr(skb); 2140 ipv6h->hop_limit--; 2141 return 0; 2142 } 2143 2144 static void ip6mr_forward2(struct net *net, struct mr_table *mrt, 2145 struct sk_buff *skb, int vifi) 2146 { 2147 struct net_device *indev = skb->dev; 2148 2149 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2150 goto out_free; 2151 2152 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2153 2154 NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2155 net, NULL, skb, indev, skb->dev, 2156 ip6mr_forward2_finish); 2157 return; 2158 2159 out_free: 2160 kfree_skb(skb); 2161 } 2162 2163 static void ip6mr_output2(struct net *net, struct mr_table *mrt, 2164 struct sk_buff *skb, int vifi) 2165 { 2166 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2167 goto out_free; 2168 2169 ip6_output(net, NULL, skb); 2170 return; 2171 2172 out_free: 2173 kfree_skb(skb); 2174 } 2175 2176 /* Called with rcu_read_lock() */ 2177 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2178 { 2179 int ct; 2180 2181 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ 2182 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2183 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2184 break; 2185 } 2186 return ct; 2187 } 2188 2189 /* Called under rcu_read_lock() */ 2190 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2191 struct net_device *dev, struct sk_buff *skb, 2192 struct mfc6_cache *c) 2193 { 2194 int psend = -1; 2195 int vif, ct; 2196 int true_vifi = ip6mr_find_vif(mrt, dev); 2197 2198 vif = c->_c.mfc_parent; 2199 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2200 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2201 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2202 2203 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2204 struct mfc6_cache *cache_proxy; 2205 2206 /* For an (*,G) entry, we only check that the incoming 2207 * interface is part of the static tree. 2208 */ 2209 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2210 if (cache_proxy && 2211 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2212 goto forward; 2213 } 2214 2215 /* 2216 * Wrong interface: drop packet and (maybe) send PIM assert. 2217 */ 2218 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2219 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2220 2221 if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) && 2222 /* pimsm uses asserts, when switching from RPT to SPT, 2223 so that we cannot check that packet arrived on an oif. 2224 It is bad, but otherwise we would need to move pretty 2225 large chunk of pimd to kernel. Ough... --ANK 2226 */ 2227 (READ_ONCE(mrt->mroute_do_pim) || 2228 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2229 time_after(jiffies, 2230 c->_c.mfc_un.res.last_assert + 2231 MFC_ASSERT_THRESH)) { 2232 c->_c.mfc_un.res.last_assert = jiffies; 2233 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2234 if (READ_ONCE(mrt->mroute_do_wrvifwhole)) 2235 ip6mr_cache_report(mrt, skb, true_vifi, 2236 MRT6MSG_WRMIFWHOLE); 2237 } 2238 goto dont_forward; 2239 } 2240 2241 forward: 2242 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2243 mrt->vif_table[vif].pkt_in + 1); 2244 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2245 mrt->vif_table[vif].bytes_in + skb->len); 2246 2247 /* 2248 * Forward the frame 2249 */ 2250 if (ipv6_addr_any(&c->mf6c_origin) && 2251 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2252 if (true_vifi >= 0 && 2253 true_vifi != c->_c.mfc_parent && 2254 ipv6_hdr(skb)->hop_limit > 2255 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2256 /* It's an (*,*) entry and the packet is not coming from 2257 * the upstream: forward the packet to the upstream 2258 * only. 2259 */ 2260 psend = c->_c.mfc_parent; 2261 goto last_forward; 2262 } 2263 goto dont_forward; 2264 } 2265 for (ct = c->_c.mfc_un.res.maxvif - 1; 2266 ct >= c->_c.mfc_un.res.minvif; ct--) { 2267 /* For (*,G) entry, don't forward to the incoming interface */ 2268 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2269 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2270 if (psend != -1) { 2271 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2272 if (skb2) 2273 ip6mr_forward2(net, mrt, skb2, psend); 2274 } 2275 psend = ct; 2276 } 2277 } 2278 last_forward: 2279 if (psend != -1) { 2280 ip6mr_forward2(net, mrt, skb, psend); 2281 return; 2282 } 2283 2284 dont_forward: 2285 kfree_skb(skb); 2286 } 2287 2288 /* Called under rcu_read_lock() */ 2289 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt, 2290 struct net_device *dev, struct sk_buff *skb, 2291 struct mfc6_cache *c) 2292 { 2293 int psend = -1; 2294 int ct; 2295 2296 WARN_ON_ONCE(!rcu_read_lock_held()); 2297 2298 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2299 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2300 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2301 2302 /* Forward the frame */ 2303 if (ipv6_addr_any(&c->mf6c_origin) && 2304 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2305 if (ipv6_hdr(skb)->hop_limit > 2306 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2307 /* It's an (*,*) entry and the packet is not coming from 2308 * the upstream: forward the packet to the upstream 2309 * only. 2310 */ 2311 psend = c->_c.mfc_parent; 2312 goto last_forward; 2313 } 2314 goto dont_forward; 2315 } 2316 for (ct = c->_c.mfc_un.res.maxvif - 1; 2317 ct >= c->_c.mfc_un.res.minvif; ct--) { 2318 if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2319 if (psend != -1) { 2320 struct sk_buff *skb2; 2321 2322 skb2 = skb_clone(skb, GFP_ATOMIC); 2323 if (skb2) 2324 ip6mr_output2(net, mrt, skb2, psend); 2325 } 2326 psend = ct; 2327 } 2328 } 2329 last_forward: 2330 if (psend != -1) { 2331 ip6mr_output2(net, mrt, skb, psend); 2332 return; 2333 } 2334 2335 dont_forward: 2336 kfree_skb(skb); 2337 } 2338 2339 /* 2340 * Multicast packets for forwarding arrive here 2341 */ 2342 2343 int ip6_mr_input(struct sk_buff *skb) 2344 { 2345 struct net_device *dev = skb->dev; 2346 struct net *net = dev_net_rcu(dev); 2347 struct mfc6_cache *cache; 2348 struct mr_table *mrt; 2349 struct flowi6 fl6 = { 2350 .flowi6_iif = dev->ifindex, 2351 .flowi6_mark = skb->mark, 2352 }; 2353 int err; 2354 2355 /* skb->dev passed in is the master dev for vrfs. 2356 * Get the proper interface that does have a vif associated with it. 2357 */ 2358 if (netif_is_l3_master(dev)) { 2359 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2360 if (!dev) { 2361 kfree_skb(skb); 2362 return -ENODEV; 2363 } 2364 } 2365 2366 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2367 if (err < 0) { 2368 kfree_skb(skb); 2369 return err; 2370 } 2371 2372 cache = ip6mr_cache_find(mrt, 2373 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2374 if (!cache) { 2375 int vif = ip6mr_find_vif(mrt, dev); 2376 2377 if (vif >= 0) 2378 cache = ip6mr_cache_find_any(mrt, 2379 &ipv6_hdr(skb)->daddr, 2380 vif); 2381 } 2382 2383 /* 2384 * No usable cache entry 2385 */ 2386 if (!cache) { 2387 int vif; 2388 2389 vif = ip6mr_find_vif(mrt, dev); 2390 if (vif >= 0) { 2391 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2392 2393 return err; 2394 } 2395 kfree_skb(skb); 2396 return -ENODEV; 2397 } 2398 2399 ip6_mr_forward(net, mrt, dev, skb, cache); 2400 2401 return 0; 2402 } 2403 2404 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2405 { 2406 struct net_device *dev = skb_dst(skb)->dev; 2407 struct flowi6 fl6 = (struct flowi6) { 2408 .flowi6_iif = LOOPBACK_IFINDEX, 2409 .flowi6_mark = skb->mark, 2410 }; 2411 struct mfc6_cache *cache; 2412 struct mr_table *mrt; 2413 int err; 2414 int vif; 2415 2416 guard(rcu)(); 2417 2418 if (IP6CB(skb)->flags & IP6SKB_FORWARDED) 2419 goto ip6_output; 2420 if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE)) 2421 goto ip6_output; 2422 2423 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2424 if (err < 0) { 2425 kfree_skb(skb); 2426 return err; 2427 } 2428 2429 cache = ip6mr_cache_find(mrt, 2430 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2431 if (!cache) { 2432 vif = ip6mr_find_vif(mrt, dev); 2433 if (vif >= 0) 2434 cache = ip6mr_cache_find_any(mrt, 2435 &ipv6_hdr(skb)->daddr, 2436 vif); 2437 } 2438 2439 /* No usable cache entry */ 2440 if (!cache) { 2441 vif = ip6mr_find_vif(mrt, dev); 2442 if (vif >= 0) 2443 return ip6mr_cache_unresolved(mrt, vif, skb, dev); 2444 goto ip6_output; 2445 } 2446 2447 /* Wrong interface */ 2448 vif = cache->_c.mfc_parent; 2449 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2450 goto ip6_output; 2451 2452 ip6_mr_output_finish(net, mrt, dev, skb, cache); 2453 return 0; 2454 2455 ip6_output: 2456 return ip6_output(net, sk, skb); 2457 } 2458 2459 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2460 u32 portid) 2461 { 2462 int err; 2463 struct mr_table *mrt; 2464 struct mfc6_cache *cache; 2465 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 2466 2467 rcu_read_lock(); 2468 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 2469 if (!mrt) { 2470 rcu_read_unlock(); 2471 return -ENOENT; 2472 } 2473 2474 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2475 if (!cache && skb->dev) { 2476 int vif = ip6mr_find_vif(mrt, skb->dev); 2477 2478 if (vif >= 0) 2479 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2480 vif); 2481 } 2482 2483 if (!cache) { 2484 struct sk_buff *skb2; 2485 struct ipv6hdr *iph; 2486 struct net_device *dev; 2487 int vif; 2488 2489 dev = skb->dev; 2490 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2491 rcu_read_unlock(); 2492 return -ENODEV; 2493 } 2494 2495 /* really correct? */ 2496 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2497 if (!skb2) { 2498 rcu_read_unlock(); 2499 return -ENOMEM; 2500 } 2501 2502 NETLINK_CB(skb2).portid = portid; 2503 skb_reset_transport_header(skb2); 2504 2505 skb_put(skb2, sizeof(struct ipv6hdr)); 2506 skb_reset_network_header(skb2); 2507 2508 iph = ipv6_hdr(skb2); 2509 iph->version = 0; 2510 iph->priority = 0; 2511 iph->flow_lbl[0] = 0; 2512 iph->flow_lbl[1] = 0; 2513 iph->flow_lbl[2] = 0; 2514 iph->payload_len = 0; 2515 iph->nexthdr = IPPROTO_NONE; 2516 iph->hop_limit = 0; 2517 iph->saddr = rt->rt6i_src.addr; 2518 iph->daddr = rt->rt6i_dst.addr; 2519 2520 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2521 rcu_read_unlock(); 2522 2523 return err; 2524 } 2525 2526 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2527 rcu_read_unlock(); 2528 return err; 2529 } 2530 2531 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2532 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2533 int flags) 2534 { 2535 struct nlmsghdr *nlh; 2536 struct rtmsg *rtm; 2537 int err; 2538 2539 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2540 if (!nlh) 2541 return -EMSGSIZE; 2542 2543 rtm = nlmsg_data(nlh); 2544 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2545 rtm->rtm_dst_len = 128; 2546 rtm->rtm_src_len = 128; 2547 rtm->rtm_tos = 0; 2548 rtm->rtm_table = mrt->id; 2549 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2550 goto nla_put_failure; 2551 rtm->rtm_type = RTN_MULTICAST; 2552 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2553 if (c->_c.mfc_flags & MFC_STATIC) 2554 rtm->rtm_protocol = RTPROT_STATIC; 2555 else 2556 rtm->rtm_protocol = RTPROT_MROUTED; 2557 rtm->rtm_flags = 0; 2558 2559 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2560 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2561 goto nla_put_failure; 2562 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2563 /* do not break the dump if cache is unresolved */ 2564 if (err < 0 && err != -ENOENT) 2565 goto nla_put_failure; 2566 2567 nlmsg_end(skb, nlh); 2568 return 0; 2569 2570 nla_put_failure: 2571 nlmsg_cancel(skb, nlh); 2572 return -EMSGSIZE; 2573 } 2574 2575 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2576 u32 portid, u32 seq, struct mr_mfc *c, 2577 int cmd, int flags) 2578 { 2579 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2580 cmd, flags); 2581 } 2582 2583 static int mr6_msgsize(bool unresolved) 2584 { 2585 size_t len = 2586 NLMSG_ALIGN(sizeof(struct rtmsg)) 2587 + nla_total_size(4) /* RTA_TABLE */ 2588 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2589 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2590 ; 2591 2592 if (!unresolved) 2593 len = len 2594 + nla_total_size(4) /* RTA_IIF */ 2595 + nla_total_size(0) /* RTA_MULTIPATH */ 2596 + MAXMIFS * NLA_ALIGN(sizeof(struct rtnexthop)) 2597 /* RTA_MFC_STATS */ 2598 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2599 ; 2600 2601 return len; 2602 } 2603 2604 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2605 int cmd) 2606 { 2607 struct net *net = read_pnet(&mrt->net); 2608 struct sk_buff *skb; 2609 int err = -ENOBUFS; 2610 2611 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS), GFP_ATOMIC); 2612 if (!skb) 2613 goto errout; 2614 2615 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2616 if (err < 0) 2617 goto errout; 2618 2619 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2620 return; 2621 2622 errout: 2623 kfree_skb(skb); 2624 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2625 } 2626 2627 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2628 { 2629 size_t len = 2630 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2631 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2632 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2633 /* IP6MRA_CREPORT_SRC_ADDR */ 2634 + nla_total_size(sizeof(struct in6_addr)) 2635 /* IP6MRA_CREPORT_DST_ADDR */ 2636 + nla_total_size(sizeof(struct in6_addr)) 2637 /* IP6MRA_CREPORT_PKT */ 2638 + nla_total_size(payloadlen) 2639 ; 2640 2641 return len; 2642 } 2643 2644 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2645 { 2646 struct net *net = read_pnet(&mrt->net); 2647 struct nlmsghdr *nlh; 2648 struct rtgenmsg *rtgenm; 2649 struct mrt6msg *msg; 2650 struct sk_buff *skb; 2651 struct nlattr *nla; 2652 int payloadlen; 2653 2654 payloadlen = pkt->len - sizeof(struct mrt6msg); 2655 msg = (struct mrt6msg *)skb_transport_header(pkt); 2656 2657 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2658 if (!skb) 2659 goto errout; 2660 2661 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2662 sizeof(struct rtgenmsg), 0); 2663 if (!nlh) 2664 goto errout; 2665 rtgenm = nlmsg_data(nlh); 2666 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2667 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2668 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2669 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2670 &msg->im6_src) || 2671 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2672 &msg->im6_dst)) 2673 goto nla_put_failure; 2674 2675 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2676 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2677 nla_data(nla), payloadlen)) 2678 goto nla_put_failure; 2679 2680 nlmsg_end(skb, nlh); 2681 2682 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2683 return; 2684 2685 nla_put_failure: 2686 nlmsg_cancel(skb, nlh); 2687 errout: 2688 kfree_skb(skb); 2689 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2690 } 2691 2692 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = { 2693 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2694 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2695 [RTA_TABLE] = { .type = NLA_U32 }, 2696 }; 2697 2698 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb, 2699 const struct nlmsghdr *nlh, 2700 struct nlattr **tb, 2701 struct netlink_ext_ack *extack) 2702 { 2703 struct rtmsg *rtm; 2704 int err; 2705 2706 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy, 2707 extack); 2708 if (err) 2709 return err; 2710 2711 rtm = nlmsg_data(nlh); 2712 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || 2713 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || 2714 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2715 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2716 NL_SET_ERR_MSG_MOD(extack, 2717 "Invalid values in header for multicast route get request"); 2718 return -EINVAL; 2719 } 2720 2721 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2722 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2723 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); 2724 return -EINVAL; 2725 } 2726 2727 return 0; 2728 } 2729 2730 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2731 struct netlink_ext_ack *extack) 2732 { 2733 struct net *net = sock_net(in_skb->sk); 2734 struct in6_addr src = {}, grp = {}; 2735 struct nlattr *tb[RTA_MAX + 1]; 2736 struct mfc6_cache *cache; 2737 struct mr_table *mrt; 2738 struct sk_buff *skb; 2739 u32 tableid; 2740 int err; 2741 2742 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2743 if (err < 0) 2744 return err; 2745 2746 skb = nlmsg_new(mr6_msgsize(false), GFP_KERNEL); 2747 if (!skb) 2748 return -ENOBUFS; 2749 2750 if (tb[RTA_SRC]) 2751 src = nla_get_in6_addr(tb[RTA_SRC]); 2752 if (tb[RTA_DST]) 2753 grp = nla_get_in6_addr(tb[RTA_DST]); 2754 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2755 2756 rcu_read_lock(); 2757 2758 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); 2759 if (!mrt) { 2760 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist"); 2761 err = -ENOENT; 2762 goto err; 2763 } 2764 2765 cache = ip6mr_cache_find(mrt, &src, &grp); 2766 if (!cache) { 2767 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found"); 2768 err = -ENOENT; 2769 goto err; 2770 } 2771 2772 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2773 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); 2774 if (err < 0) 2775 goto err; 2776 2777 rcu_read_unlock(); 2778 2779 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2780 2781 err: 2782 rcu_read_unlock(); 2783 kfree_skb(skb); 2784 return err; 2785 } 2786 2787 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2788 { 2789 const struct nlmsghdr *nlh = cb->nlh; 2790 struct fib_dump_filter filter = {}; 2791 int err; 2792 2793 rcu_read_lock(); 2794 2795 if (cb->strict_check) { 2796 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2797 &filter, cb); 2798 if (err < 0) 2799 goto unlock; 2800 } 2801 2802 if (filter.table_id) { 2803 struct mr_table *mrt; 2804 2805 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2806 if (!mrt) { 2807 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) { 2808 err = skb->len; 2809 goto unlock; 2810 } 2811 2812 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2813 err = -ENOENT; 2814 goto unlock; 2815 } 2816 2817 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2818 &mfc_unres_lock, &filter); 2819 err = skb->len ? : err; 2820 goto unlock; 2821 } 2822 2823 err = mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2824 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2825 unlock: 2826 rcu_read_unlock(); 2827 2828 return err; 2829 } 2830