1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux IPv6 multicast routing support for BSD pim6sd 4 * Based on net/ipv4/ipmr.c. 5 * 6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 7 * LSIIT Laboratory, Strasbourg, France 8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 9 * 6WIND, Paris, France 10 * Copyright (C)2007,2008 USAGI/WIDE Project 11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 12 */ 13 14 #include <linux/uaccess.h> 15 #include <linux/types.h> 16 #include <linux/sched.h> 17 #include <linux/errno.h> 18 #include <linux/mm.h> 19 #include <linux/kernel.h> 20 #include <linux/fcntl.h> 21 #include <linux/stat.h> 22 #include <linux/socket.h> 23 #include <linux/inet.h> 24 #include <linux/netdevice.h> 25 #include <linux/inetdevice.h> 26 #include <linux/proc_fs.h> 27 #include <linux/seq_file.h> 28 #include <linux/init.h> 29 #include <linux/compat.h> 30 #include <linux/rhashtable.h> 31 #include <net/protocol.h> 32 #include <linux/skbuff.h> 33 #include <net/raw.h> 34 #include <linux/notifier.h> 35 #include <linux/if_arp.h> 36 #include <net/checksum.h> 37 #include <net/netlink.h> 38 #include <net/fib_rules.h> 39 40 #include <net/ipv6.h> 41 #include <net/ip6_route.h> 42 #include <linux/mroute6.h> 43 #include <linux/pim.h> 44 #include <net/addrconf.h> 45 #include <linux/netfilter_ipv6.h> 46 #include <linux/export.h> 47 #include <net/ip6_checksum.h> 48 #include <linux/netconf.h> 49 #include <net/ip_tunnels.h> 50 51 #include <linux/nospec.h> 52 53 struct ip6mr_rule { 54 struct fib_rule common; 55 }; 56 57 struct ip6mr_result { 58 struct mr_table *mrt; 59 }; 60 61 /* Big lock, protecting vif table, mrt cache and mroute socket state. 62 Note that the changes are semaphored via rtnl_lock. 63 */ 64 65 static DEFINE_SPINLOCK(mrt_lock); 66 67 static struct net_device *vif_dev_read(const struct vif_device *vif) 68 { 69 return rcu_dereference(vif->dev); 70 } 71 72 /* Multicast router control variables */ 73 74 /* Special spinlock for queue of unresolved entries */ 75 static DEFINE_SPINLOCK(mfc_unres_lock); 76 77 /* We return to original Alan's scheme. Hash table of resolved 78 entries is changed only in process context and protected 79 with weak lock mrt_lock. Queue of unresolved entries is protected 80 with strong spinlock mfc_unres_lock. 81 82 In this case data path is free of exclusive locks at all. 83 */ 84 85 static struct kmem_cache *mrt_cachep __read_mostly; 86 87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 88 static void ip6mr_free_table(struct mr_table *mrt); 89 90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 91 struct net_device *dev, struct sk_buff *skb, 92 struct mfc6_cache *cache); 93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 94 mifi_t mifi, int assert); 95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 96 int cmd); 97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 99 struct netlink_ext_ack *extack); 100 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 101 struct netlink_callback *cb); 102 static void mroute_clean_tables(struct mr_table *mrt, int flags); 103 static void ipmr_expire_process(struct timer_list *t); 104 105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 106 #define ip6mr_for_each_table(mrt, net) \ 107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ 108 lockdep_rtnl_is_held() || \ 109 list_empty(&net->ipv6.mr6_tables)) 110 111 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 112 struct mr_table *mrt) 113 { 114 struct mr_table *ret; 115 116 if (!mrt) 117 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 118 struct mr_table, list); 119 else 120 ret = list_entry_rcu(mrt->list.next, 121 struct mr_table, list); 122 123 if (&ret->list == &net->ipv6.mr6_tables) 124 return NULL; 125 return ret; 126 } 127 128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) 129 { 130 struct mr_table *mrt; 131 132 ip6mr_for_each_table(mrt, net) { 133 if (mrt->id == id) 134 return mrt; 135 } 136 return NULL; 137 } 138 139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 140 { 141 struct mr_table *mrt; 142 143 rcu_read_lock(); 144 mrt = __ip6mr_get_table(net, id); 145 rcu_read_unlock(); 146 return mrt; 147 } 148 149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 150 struct mr_table **mrt) 151 { 152 int err; 153 struct ip6mr_result res; 154 struct fib_lookup_arg arg = { 155 .result = &res, 156 .flags = FIB_LOOKUP_NOREF, 157 }; 158 159 /* update flow if oif or iif point to device enslaved to l3mdev */ 160 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 161 162 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 163 flowi6_to_flowi(flp6), 0, &arg); 164 if (err < 0) 165 return err; 166 *mrt = res.mrt; 167 return 0; 168 } 169 170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 171 int flags, struct fib_lookup_arg *arg) 172 { 173 struct ip6mr_result *res = arg->result; 174 struct mr_table *mrt; 175 176 switch (rule->action) { 177 case FR_ACT_TO_TBL: 178 break; 179 case FR_ACT_UNREACHABLE: 180 return -ENETUNREACH; 181 case FR_ACT_PROHIBIT: 182 return -EACCES; 183 case FR_ACT_BLACKHOLE: 184 default: 185 return -EINVAL; 186 } 187 188 arg->table = fib_rule_get_table(rule, arg); 189 190 mrt = __ip6mr_get_table(rule->fr_net, arg->table); 191 if (!mrt) 192 return -EAGAIN; 193 res->mrt = mrt; 194 return 0; 195 } 196 197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 198 { 199 return 1; 200 } 201 202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 203 struct fib_rule_hdr *frh, struct nlattr **tb, 204 struct netlink_ext_ack *extack) 205 { 206 return 0; 207 } 208 209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 210 struct nlattr **tb) 211 { 212 return 1; 213 } 214 215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 216 struct fib_rule_hdr *frh) 217 { 218 frh->dst_len = 0; 219 frh->src_len = 0; 220 frh->tos = 0; 221 return 0; 222 } 223 224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 225 .family = RTNL_FAMILY_IP6MR, 226 .rule_size = sizeof(struct ip6mr_rule), 227 .addr_size = sizeof(struct in6_addr), 228 .action = ip6mr_rule_action, 229 .match = ip6mr_rule_match, 230 .configure = ip6mr_rule_configure, 231 .compare = ip6mr_rule_compare, 232 .fill = ip6mr_rule_fill, 233 .nlgroup = RTNLGRP_IPV6_RULE, 234 .owner = THIS_MODULE, 235 }; 236 237 static int __net_init ip6mr_rules_init(struct net *net) 238 { 239 struct fib_rules_ops *ops; 240 struct mr_table *mrt; 241 int err; 242 243 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 244 if (IS_ERR(ops)) 245 return PTR_ERR(ops); 246 247 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 248 249 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 250 if (IS_ERR(mrt)) { 251 err = PTR_ERR(mrt); 252 goto err1; 253 } 254 255 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); 256 if (err < 0) 257 goto err2; 258 259 net->ipv6.mr6_rules_ops = ops; 260 return 0; 261 262 err2: 263 rtnl_lock(); 264 ip6mr_free_table(mrt); 265 rtnl_unlock(); 266 err1: 267 fib_rules_unregister(ops); 268 return err; 269 } 270 271 static void __net_exit ip6mr_rules_exit(struct net *net) 272 { 273 struct mr_table *mrt, *next; 274 275 ASSERT_RTNL(); 276 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 277 list_del(&mrt->list); 278 ip6mr_free_table(mrt); 279 } 280 fib_rules_unregister(net->ipv6.mr6_rules_ops); 281 } 282 283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 284 struct netlink_ext_ack *extack) 285 { 286 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); 287 } 288 289 static unsigned int ip6mr_rules_seq_read(const struct net *net) 290 { 291 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 292 } 293 294 bool ip6mr_rule_default(const struct fib_rule *rule) 295 { 296 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 297 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 298 } 299 EXPORT_SYMBOL(ip6mr_rule_default); 300 #else 301 #define ip6mr_for_each_table(mrt, net) \ 302 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 303 304 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 305 struct mr_table *mrt) 306 { 307 if (!mrt) 308 return net->ipv6.mrt6; 309 return NULL; 310 } 311 312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 313 { 314 return net->ipv6.mrt6; 315 } 316 317 #define __ip6mr_get_table ip6mr_get_table 318 319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 320 struct mr_table **mrt) 321 { 322 *mrt = net->ipv6.mrt6; 323 return 0; 324 } 325 326 static int __net_init ip6mr_rules_init(struct net *net) 327 { 328 struct mr_table *mrt; 329 330 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 331 if (IS_ERR(mrt)) 332 return PTR_ERR(mrt); 333 net->ipv6.mrt6 = mrt; 334 return 0; 335 } 336 337 static void __net_exit ip6mr_rules_exit(struct net *net) 338 { 339 ASSERT_RTNL(); 340 ip6mr_free_table(net->ipv6.mrt6); 341 net->ipv6.mrt6 = NULL; 342 } 343 344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 345 struct netlink_ext_ack *extack) 346 { 347 return 0; 348 } 349 350 static unsigned int ip6mr_rules_seq_read(const struct net *net) 351 { 352 return 0; 353 } 354 #endif 355 356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 357 const void *ptr) 358 { 359 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 360 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 361 362 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 363 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 364 } 365 366 static const struct rhashtable_params ip6mr_rht_params = { 367 .head_offset = offsetof(struct mr_mfc, mnode), 368 .key_offset = offsetof(struct mfc6_cache, cmparg), 369 .key_len = sizeof(struct mfc6_cache_cmp_arg), 370 .nelem_hint = 3, 371 .obj_cmpfn = ip6mr_hash_cmp, 372 .automatic_shrinking = true, 373 }; 374 375 static void ip6mr_new_table_set(struct mr_table *mrt, 376 struct net *net) 377 { 378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 379 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 380 #endif 381 } 382 383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 384 .mf6c_origin = IN6ADDR_ANY_INIT, 385 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 386 }; 387 388 static struct mr_table_ops ip6mr_mr_table_ops = { 389 .rht_params = &ip6mr_rht_params, 390 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 391 }; 392 393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 394 { 395 struct mr_table *mrt; 396 397 mrt = __ip6mr_get_table(net, id); 398 if (mrt) 399 return mrt; 400 401 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 402 ipmr_expire_process, ip6mr_new_table_set); 403 } 404 405 static void ip6mr_free_table(struct mr_table *mrt) 406 { 407 struct net *net = read_pnet(&mrt->net); 408 409 WARN_ON_ONCE(!mr_can_free_table(net)); 410 411 timer_shutdown_sync(&mrt->ipmr_expire_timer); 412 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | 413 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); 414 rhltable_destroy(&mrt->mfc_hash); 415 kfree(mrt); 416 } 417 418 #ifdef CONFIG_PROC_FS 419 /* The /proc interfaces to multicast routing 420 * /proc/ip6_mr_cache /proc/ip6_mr_vif 421 */ 422 423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 424 __acquires(RCU) 425 { 426 struct mr_vif_iter *iter = seq->private; 427 struct net *net = seq_file_net(seq); 428 struct mr_table *mrt; 429 430 rcu_read_lock(); 431 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 432 if (!mrt) { 433 rcu_read_unlock(); 434 return ERR_PTR(-ENOENT); 435 } 436 437 iter->mrt = mrt; 438 439 return mr_vif_seq_start(seq, pos); 440 } 441 442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 443 __releases(RCU) 444 { 445 rcu_read_unlock(); 446 } 447 448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 449 { 450 struct mr_vif_iter *iter = seq->private; 451 struct mr_table *mrt = iter->mrt; 452 453 if (v == SEQ_START_TOKEN) { 454 seq_puts(seq, 455 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 456 } else { 457 const struct vif_device *vif = v; 458 const struct net_device *vif_dev; 459 const char *name; 460 461 vif_dev = vif_dev_read(vif); 462 name = vif_dev ? vif_dev->name : "none"; 463 464 seq_printf(seq, 465 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 466 vif - mrt->vif_table, 467 name, vif->bytes_in, vif->pkt_in, 468 vif->bytes_out, vif->pkt_out, 469 vif->flags); 470 } 471 return 0; 472 } 473 474 static const struct seq_operations ip6mr_vif_seq_ops = { 475 .start = ip6mr_vif_seq_start, 476 .next = mr_vif_seq_next, 477 .stop = ip6mr_vif_seq_stop, 478 .show = ip6mr_vif_seq_show, 479 }; 480 481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 482 { 483 struct net *net = seq_file_net(seq); 484 struct mr_table *mrt; 485 486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 487 if (!mrt) 488 return ERR_PTR(-ENOENT); 489 490 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 491 } 492 493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 494 { 495 int n; 496 497 if (v == SEQ_START_TOKEN) { 498 seq_puts(seq, 499 "Group " 500 "Origin " 501 "Iif Pkts Bytes Wrong Oifs\n"); 502 } else { 503 const struct mfc6_cache *mfc = v; 504 const struct mr_mfc_iter *it = seq->private; 505 struct mr_table *mrt = it->mrt; 506 507 seq_printf(seq, "%pI6 %pI6 %-3hd", 508 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 509 mfc->_c.mfc_parent); 510 511 if (it->cache != &mrt->mfc_unres_queue) { 512 seq_printf(seq, " %8lu %8lu %8lu", 513 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 514 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 515 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 516 for (n = mfc->_c.mfc_un.res.minvif; 517 n < mfc->_c.mfc_un.res.maxvif; n++) { 518 if (VIF_EXISTS(mrt, n) && 519 mfc->_c.mfc_un.res.ttls[n] < 255) 520 seq_printf(seq, 521 " %2d:%-3d", n, 522 mfc->_c.mfc_un.res.ttls[n]); 523 } 524 } else { 525 /* unresolved mfc_caches don't contain 526 * pkt, bytes and wrong_if values 527 */ 528 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 529 } 530 seq_putc(seq, '\n'); 531 } 532 return 0; 533 } 534 535 static const struct seq_operations ipmr_mfc_seq_ops = { 536 .start = ipmr_mfc_seq_start, 537 .next = mr_mfc_seq_next, 538 .stop = mr_mfc_seq_stop, 539 .show = ipmr_mfc_seq_show, 540 }; 541 #endif 542 543 #ifdef CONFIG_IPV6_PIMSM_V2 544 545 static int pim6_rcv(struct sk_buff *skb) 546 { 547 struct pimreghdr *pim; 548 struct ipv6hdr *encap; 549 struct net_device *reg_dev = NULL; 550 struct net *net = dev_net(skb->dev); 551 struct mr_table *mrt; 552 struct flowi6 fl6 = { 553 .flowi6_iif = skb->dev->ifindex, 554 .flowi6_mark = skb->mark, 555 }; 556 int reg_vif_num; 557 558 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 559 goto drop; 560 561 pim = (struct pimreghdr *)skb_transport_header(skb); 562 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 563 (pim->flags & PIM_NULL_REGISTER) || 564 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 565 sizeof(*pim), IPPROTO_PIM, 566 csum_partial((void *)pim, sizeof(*pim), 0)) && 567 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 568 goto drop; 569 570 /* check if the inner packet is destined to mcast group */ 571 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 572 sizeof(*pim)); 573 574 if (!ipv6_addr_is_multicast(&encap->daddr) || 575 encap->payload_len == 0 || 576 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 577 goto drop; 578 579 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 580 goto drop; 581 582 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ 583 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 584 if (reg_vif_num >= 0) 585 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); 586 587 if (!reg_dev) 588 goto drop; 589 590 skb->mac_header = skb->network_header; 591 skb_pull(skb, (u8 *)encap - skb->data); 592 skb_reset_network_header(skb); 593 skb->protocol = htons(ETH_P_IPV6); 594 skb->ip_summed = CHECKSUM_NONE; 595 596 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 597 598 netif_rx(skb); 599 600 return 0; 601 drop: 602 kfree_skb(skb); 603 return 0; 604 } 605 606 static const struct inet6_protocol pim6_protocol = { 607 .handler = pim6_rcv, 608 }; 609 610 /* Service routines creating virtual interfaces: PIMREG */ 611 612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 613 struct net_device *dev) 614 { 615 struct net *net = dev_net(dev); 616 struct mr_table *mrt; 617 struct flowi6 fl6 = { 618 .flowi6_oif = dev->ifindex, 619 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 620 .flowi6_mark = skb->mark, 621 }; 622 623 if (!pskb_inet_may_pull(skb)) 624 goto tx_err; 625 626 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 627 goto tx_err; 628 629 DEV_STATS_ADD(dev, tx_bytes, skb->len); 630 DEV_STATS_INC(dev, tx_packets); 631 rcu_read_lock(); 632 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 633 MRT6MSG_WHOLEPKT); 634 rcu_read_unlock(); 635 kfree_skb(skb); 636 return NETDEV_TX_OK; 637 638 tx_err: 639 DEV_STATS_INC(dev, tx_errors); 640 kfree_skb(skb); 641 return NETDEV_TX_OK; 642 } 643 644 static int reg_vif_get_iflink(const struct net_device *dev) 645 { 646 return 0; 647 } 648 649 static const struct net_device_ops reg_vif_netdev_ops = { 650 .ndo_start_xmit = reg_vif_xmit, 651 .ndo_get_iflink = reg_vif_get_iflink, 652 }; 653 654 static void reg_vif_setup(struct net_device *dev) 655 { 656 dev->type = ARPHRD_PIMREG; 657 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 658 dev->flags = IFF_NOARP; 659 dev->netdev_ops = ®_vif_netdev_ops; 660 dev->needs_free_netdev = true; 661 dev->netns_immutable = true; 662 } 663 664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 665 { 666 struct net_device *dev; 667 char name[IFNAMSIZ]; 668 669 if (mrt->id == RT6_TABLE_DFLT) 670 sprintf(name, "pim6reg"); 671 else 672 sprintf(name, "pim6reg%u", mrt->id); 673 674 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 675 if (!dev) 676 return NULL; 677 678 dev_net_set(dev, net); 679 680 if (register_netdevice(dev)) { 681 free_netdev(dev); 682 return NULL; 683 } 684 685 if (dev_open(dev, NULL)) 686 goto failure; 687 688 dev_hold(dev); 689 return dev; 690 691 failure: 692 unregister_netdevice(dev); 693 return NULL; 694 } 695 #endif 696 697 static int call_ip6mr_vif_entry_notifiers(struct net *net, 698 enum fib_event_type event_type, 699 struct vif_device *vif, 700 struct net_device *vif_dev, 701 mifi_t vif_index, u32 tb_id) 702 { 703 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 704 vif, vif_dev, vif_index, tb_id, 705 &net->ipv6.ipmr_seq); 706 } 707 708 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 709 enum fib_event_type event_type, 710 struct mfc6_cache *mfc, u32 tb_id) 711 { 712 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 713 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 714 } 715 716 /* Delete a VIF entry */ 717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 718 struct list_head *head) 719 { 720 struct vif_device *v; 721 struct net_device *dev; 722 struct inet6_dev *in6_dev; 723 724 if (vifi < 0 || vifi >= mrt->maxvif) 725 return -EADDRNOTAVAIL; 726 727 v = &mrt->vif_table[vifi]; 728 729 dev = rtnl_dereference(v->dev); 730 if (!dev) 731 return -EADDRNOTAVAIL; 732 733 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 734 FIB_EVENT_VIF_DEL, v, dev, 735 vifi, mrt->id); 736 spin_lock(&mrt_lock); 737 RCU_INIT_POINTER(v->dev, NULL); 738 739 #ifdef CONFIG_IPV6_PIMSM_V2 740 if (vifi == mrt->mroute_reg_vif_num) { 741 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ 742 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 743 } 744 #endif 745 746 if (vifi + 1 == mrt->maxvif) { 747 int tmp; 748 for (tmp = vifi - 1; tmp >= 0; tmp--) { 749 if (VIF_EXISTS(mrt, tmp)) 750 break; 751 } 752 WRITE_ONCE(mrt->maxvif, tmp + 1); 753 } 754 755 spin_unlock(&mrt_lock); 756 757 dev_set_allmulti(dev, -1); 758 759 in6_dev = __in6_dev_get(dev); 760 if (in6_dev) { 761 atomic_dec(&in6_dev->cnf.mc_forwarding); 762 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 763 NETCONFA_MC_FORWARDING, 764 dev->ifindex, &in6_dev->cnf); 765 } 766 767 if ((v->flags & MIFF_REGISTER) && !notify) 768 unregister_netdevice_queue(dev, head); 769 770 netdev_put(dev, &v->dev_tracker); 771 return 0; 772 } 773 774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 775 { 776 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 777 778 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 779 } 780 781 static inline void ip6mr_cache_free(struct mfc6_cache *c) 782 { 783 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 784 } 785 786 /* Destroy an unresolved cache entry, killing queued skbs 787 and reporting error to netlink readers. 788 */ 789 790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 791 { 792 struct net *net = read_pnet(&mrt->net); 793 struct sk_buff *skb; 794 795 atomic_dec(&mrt->cache_resolve_queue_len); 796 797 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 798 if (ipv6_hdr(skb)->version == 0) { 799 struct nlmsghdr *nlh = skb_pull(skb, 800 sizeof(struct ipv6hdr)); 801 nlh->nlmsg_type = NLMSG_ERROR; 802 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 803 skb_trim(skb, nlh->nlmsg_len); 804 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 805 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 806 } else 807 kfree_skb(skb); 808 } 809 810 ip6mr_cache_free(c); 811 } 812 813 814 /* Timer process for all the unresolved queue. */ 815 816 static void ipmr_do_expire_process(struct mr_table *mrt) 817 { 818 unsigned long now = jiffies; 819 unsigned long expires = 10 * HZ; 820 struct mr_mfc *c, *next; 821 822 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 823 if (time_after(c->mfc_un.unres.expires, now)) { 824 /* not yet... */ 825 unsigned long interval = c->mfc_un.unres.expires - now; 826 if (interval < expires) 827 expires = interval; 828 continue; 829 } 830 831 list_del(&c->list); 832 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 833 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 834 } 835 836 if (!list_empty(&mrt->mfc_unres_queue)) 837 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 838 } 839 840 static void ipmr_expire_process(struct timer_list *t) 841 { 842 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer); 843 844 if (!spin_trylock(&mfc_unres_lock)) { 845 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 846 return; 847 } 848 849 if (!list_empty(&mrt->mfc_unres_queue)) 850 ipmr_do_expire_process(mrt); 851 852 spin_unlock(&mfc_unres_lock); 853 } 854 855 /* Fill oifs list. It is called under locked mrt_lock. */ 856 857 static void ip6mr_update_thresholds(struct mr_table *mrt, 858 struct mr_mfc *cache, 859 unsigned char *ttls) 860 { 861 int vifi; 862 863 cache->mfc_un.res.minvif = MAXMIFS; 864 cache->mfc_un.res.maxvif = 0; 865 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 866 867 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 868 if (VIF_EXISTS(mrt, vifi) && 869 ttls[vifi] && ttls[vifi] < 255) { 870 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 871 if (cache->mfc_un.res.minvif > vifi) 872 cache->mfc_un.res.minvif = vifi; 873 if (cache->mfc_un.res.maxvif <= vifi) 874 cache->mfc_un.res.maxvif = vifi + 1; 875 } 876 } 877 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 878 } 879 880 static int mif6_add(struct net *net, struct mr_table *mrt, 881 struct mif6ctl *vifc, int mrtsock) 882 { 883 int vifi = vifc->mif6c_mifi; 884 struct vif_device *v = &mrt->vif_table[vifi]; 885 struct net_device *dev; 886 struct inet6_dev *in6_dev; 887 int err; 888 889 /* Is vif busy ? */ 890 if (VIF_EXISTS(mrt, vifi)) 891 return -EADDRINUSE; 892 893 switch (vifc->mif6c_flags) { 894 #ifdef CONFIG_IPV6_PIMSM_V2 895 case MIFF_REGISTER: 896 /* 897 * Special Purpose VIF in PIM 898 * All the packets will be sent to the daemon 899 */ 900 if (mrt->mroute_reg_vif_num >= 0) 901 return -EADDRINUSE; 902 dev = ip6mr_reg_vif(net, mrt); 903 if (!dev) 904 return -ENOBUFS; 905 err = dev_set_allmulti(dev, 1); 906 if (err) { 907 unregister_netdevice(dev); 908 dev_put(dev); 909 return err; 910 } 911 break; 912 #endif 913 case 0: 914 dev = dev_get_by_index(net, vifc->mif6c_pifi); 915 if (!dev) 916 return -EADDRNOTAVAIL; 917 err = dev_set_allmulti(dev, 1); 918 if (err) { 919 dev_put(dev); 920 return err; 921 } 922 break; 923 default: 924 return -EINVAL; 925 } 926 927 in6_dev = __in6_dev_get(dev); 928 if (in6_dev) { 929 atomic_inc(&in6_dev->cnf.mc_forwarding); 930 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 931 NETCONFA_MC_FORWARDING, 932 dev->ifindex, &in6_dev->cnf); 933 } 934 935 /* Fill in the VIF structures */ 936 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 937 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 938 MIFF_REGISTER); 939 940 /* And finish update writing critical data */ 941 spin_lock(&mrt_lock); 942 rcu_assign_pointer(v->dev, dev); 943 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 944 #ifdef CONFIG_IPV6_PIMSM_V2 945 if (v->flags & MIFF_REGISTER) 946 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 947 #endif 948 if (vifi + 1 > mrt->maxvif) 949 WRITE_ONCE(mrt->maxvif, vifi + 1); 950 spin_unlock(&mrt_lock); 951 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 952 v, dev, vifi, mrt->id); 953 return 0; 954 } 955 956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 957 const struct in6_addr *origin, 958 const struct in6_addr *mcastgrp) 959 { 960 struct mfc6_cache_cmp_arg arg = { 961 .mf6c_origin = *origin, 962 .mf6c_mcastgrp = *mcastgrp, 963 }; 964 965 return mr_mfc_find(mrt, &arg); 966 } 967 968 /* Look for a (*,G) entry */ 969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 970 struct in6_addr *mcastgrp, 971 mifi_t mifi) 972 { 973 struct mfc6_cache_cmp_arg arg = { 974 .mf6c_origin = in6addr_any, 975 .mf6c_mcastgrp = *mcastgrp, 976 }; 977 978 if (ipv6_addr_any(mcastgrp)) 979 return mr_mfc_find_any_parent(mrt, mifi); 980 return mr_mfc_find_any(mrt, mifi, &arg); 981 } 982 983 /* Look for a (S,G,iif) entry if parent != -1 */ 984 static struct mfc6_cache * 985 ip6mr_cache_find_parent(struct mr_table *mrt, 986 const struct in6_addr *origin, 987 const struct in6_addr *mcastgrp, 988 int parent) 989 { 990 struct mfc6_cache_cmp_arg arg = { 991 .mf6c_origin = *origin, 992 .mf6c_mcastgrp = *mcastgrp, 993 }; 994 995 return mr_mfc_find_parent(mrt, &arg, parent); 996 } 997 998 /* Allocate a multicast cache entry */ 999 static struct mfc6_cache *ip6mr_cache_alloc(void) 1000 { 1001 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1002 if (!c) 1003 return NULL; 1004 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1005 c->_c.mfc_un.res.minvif = MAXMIFS; 1006 c->_c.free = ip6mr_cache_free_rcu; 1007 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1008 return c; 1009 } 1010 1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1012 { 1013 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1014 if (!c) 1015 return NULL; 1016 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1017 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1018 return c; 1019 } 1020 1021 /* 1022 * A cache entry has gone into a resolved state from queued 1023 */ 1024 1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1026 struct mfc6_cache *uc, struct mfc6_cache *c) 1027 { 1028 struct sk_buff *skb; 1029 1030 /* 1031 * Play the pending entries through our router 1032 */ 1033 1034 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1035 if (ipv6_hdr(skb)->version == 0) { 1036 struct nlmsghdr *nlh = skb_pull(skb, 1037 sizeof(struct ipv6hdr)); 1038 1039 if (mr_fill_mroute(mrt, skb, &c->_c, 1040 nlmsg_data(nlh)) > 0) { 1041 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1042 } else { 1043 nlh->nlmsg_type = NLMSG_ERROR; 1044 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1045 skb_trim(skb, nlh->nlmsg_len); 1046 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1047 } 1048 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1049 } else { 1050 rcu_read_lock(); 1051 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1052 rcu_read_unlock(); 1053 } 1054 } 1055 } 1056 1057 /* 1058 * Bounce a cache query up to pim6sd and netlink. 1059 * 1060 * Called under rcu_read_lock() 1061 */ 1062 1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 1064 mifi_t mifi, int assert) 1065 { 1066 enum skb_drop_reason reason; 1067 struct sock *mroute6_sk; 1068 struct sk_buff *skb; 1069 struct mrt6msg *msg; 1070 1071 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1072 if (!mroute6_sk) 1073 return -EINVAL; 1074 1075 #ifdef CONFIG_IPV6_PIMSM_V2 1076 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) 1077 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1078 +sizeof(*msg)); 1079 else 1080 #endif 1081 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1082 1083 if (!skb) 1084 return -ENOBUFS; 1085 1086 /* I suppose that internal messages 1087 * do not require checksums */ 1088 1089 skb->ip_summed = CHECKSUM_UNNECESSARY; 1090 1091 #ifdef CONFIG_IPV6_PIMSM_V2 1092 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { 1093 /* Ugly, but we have no choice with this interface. 1094 Duplicate old header, fix length etc. 1095 And all this only to mangle msg->im6_msgtype and 1096 to set msg->im6_mbz to "mbz" :-) 1097 */ 1098 __skb_pull(skb, skb_network_offset(pkt)); 1099 1100 skb_push(skb, sizeof(*msg)); 1101 skb_reset_transport_header(skb); 1102 msg = (struct mrt6msg *)skb_transport_header(skb); 1103 msg->im6_mbz = 0; 1104 msg->im6_msgtype = assert; 1105 if (assert == MRT6MSG_WRMIFWHOLE) 1106 msg->im6_mif = mifi; 1107 else 1108 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); 1109 msg->im6_pad = 0; 1110 msg->im6_src = ipv6_hdr(pkt)->saddr; 1111 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1112 1113 skb->ip_summed = CHECKSUM_UNNECESSARY; 1114 } else 1115 #endif 1116 { 1117 /* 1118 * Copy the IP header 1119 */ 1120 1121 skb_put(skb, sizeof(struct ipv6hdr)); 1122 skb_reset_network_header(skb); 1123 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1124 1125 /* 1126 * Add our header 1127 */ 1128 skb_put(skb, sizeof(*msg)); 1129 skb_reset_transport_header(skb); 1130 msg = (struct mrt6msg *)skb_transport_header(skb); 1131 1132 msg->im6_mbz = 0; 1133 msg->im6_msgtype = assert; 1134 msg->im6_mif = mifi; 1135 msg->im6_pad = 0; 1136 msg->im6_src = ipv6_hdr(pkt)->saddr; 1137 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1138 1139 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1140 skb->ip_summed = CHECKSUM_UNNECESSARY; 1141 } 1142 1143 mrt6msg_netlink_event(mrt, skb); 1144 1145 /* Deliver to user space multicast routing algorithms */ 1146 reason = sock_queue_rcv_skb_reason(mroute6_sk, skb); 1147 1148 if (reason) { 1149 sk_skb_reason_drop(mroute6_sk, skb, reason); 1150 return -ENOMEM; 1151 } 1152 1153 return 0; 1154 } 1155 1156 /* Queue a packet for resolution. It gets locked cache entry! */ 1157 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1158 struct sk_buff *skb, struct net_device *dev) 1159 { 1160 struct mfc6_cache *c; 1161 bool found = false; 1162 int err; 1163 1164 spin_lock_bh(&mfc_unres_lock); 1165 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1166 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1167 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1168 found = true; 1169 break; 1170 } 1171 } 1172 1173 if (!found) { 1174 /* 1175 * Create a new entry if allowable 1176 */ 1177 1178 c = ip6mr_cache_alloc_unres(); 1179 if (!c) { 1180 spin_unlock_bh(&mfc_unres_lock); 1181 1182 kfree_skb(skb); 1183 return -ENOBUFS; 1184 } 1185 1186 /* Fill in the new cache entry */ 1187 c->_c.mfc_parent = -1; 1188 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1189 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1190 1191 /* 1192 * Reflect first query at pim6sd 1193 */ 1194 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1195 if (err < 0) { 1196 /* If the report failed throw the cache entry 1197 out - Brad Parker 1198 */ 1199 spin_unlock_bh(&mfc_unres_lock); 1200 1201 ip6mr_cache_free(c); 1202 kfree_skb(skb); 1203 return err; 1204 } 1205 1206 atomic_inc(&mrt->cache_resolve_queue_len); 1207 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1208 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1209 1210 ipmr_do_expire_process(mrt); 1211 } 1212 1213 /* See if we can append the packet */ 1214 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1215 kfree_skb(skb); 1216 err = -ENOBUFS; 1217 } else { 1218 if (dev) { 1219 skb->dev = dev; 1220 skb->skb_iif = dev->ifindex; 1221 } 1222 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1223 err = 0; 1224 } 1225 1226 spin_unlock_bh(&mfc_unres_lock); 1227 return err; 1228 } 1229 1230 /* 1231 * MFC6 cache manipulation by user space 1232 */ 1233 1234 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1235 int parent) 1236 { 1237 struct mfc6_cache *c; 1238 1239 /* The entries are added/deleted only under RTNL */ 1240 rcu_read_lock(); 1241 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1242 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1243 rcu_read_unlock(); 1244 if (!c) 1245 return -ENOENT; 1246 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1247 list_del_rcu(&c->_c.list); 1248 1249 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1250 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1251 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1252 mr_cache_put(&c->_c); 1253 return 0; 1254 } 1255 1256 static int ip6mr_device_event(struct notifier_block *this, 1257 unsigned long event, void *ptr) 1258 { 1259 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1260 struct net *net = dev_net(dev); 1261 struct mr_table *mrt; 1262 struct vif_device *v; 1263 int ct; 1264 1265 if (event != NETDEV_UNREGISTER) 1266 return NOTIFY_DONE; 1267 1268 ip6mr_for_each_table(mrt, net) { 1269 v = &mrt->vif_table[0]; 1270 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1271 if (rcu_access_pointer(v->dev) == dev) 1272 mif6_delete(mrt, ct, 1, NULL); 1273 } 1274 } 1275 1276 return NOTIFY_DONE; 1277 } 1278 1279 static unsigned int ip6mr_seq_read(const struct net *net) 1280 { 1281 return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); 1282 } 1283 1284 static int ip6mr_dump(struct net *net, struct notifier_block *nb, 1285 struct netlink_ext_ack *extack) 1286 { 1287 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1288 ip6mr_mr_table_iter, extack); 1289 } 1290 1291 static struct notifier_block ip6_mr_notifier = { 1292 .notifier_call = ip6mr_device_event 1293 }; 1294 1295 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1296 .family = RTNL_FAMILY_IP6MR, 1297 .fib_seq_read = ip6mr_seq_read, 1298 .fib_dump = ip6mr_dump, 1299 .owner = THIS_MODULE, 1300 }; 1301 1302 static int __net_init ip6mr_notifier_init(struct net *net) 1303 { 1304 struct fib_notifier_ops *ops; 1305 1306 atomic_set(&net->ipv6.ipmr_seq, 0); 1307 1308 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1309 if (IS_ERR(ops)) 1310 return PTR_ERR(ops); 1311 1312 net->ipv6.ip6mr_notifier_ops = ops; 1313 1314 return 0; 1315 } 1316 1317 static void __net_exit ip6mr_notifier_exit(struct net *net) 1318 { 1319 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1320 net->ipv6.ip6mr_notifier_ops = NULL; 1321 } 1322 1323 /* Setup for IP multicast routing */ 1324 static int __net_init ip6mr_net_init(struct net *net) 1325 { 1326 int err; 1327 1328 err = ip6mr_notifier_init(net); 1329 if (err) 1330 return err; 1331 1332 err = ip6mr_rules_init(net); 1333 if (err < 0) 1334 goto ip6mr_rules_fail; 1335 1336 #ifdef CONFIG_PROC_FS 1337 err = -ENOMEM; 1338 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1339 sizeof(struct mr_vif_iter))) 1340 goto proc_vif_fail; 1341 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1342 sizeof(struct mr_mfc_iter))) 1343 goto proc_cache_fail; 1344 #endif 1345 1346 return 0; 1347 1348 #ifdef CONFIG_PROC_FS 1349 proc_cache_fail: 1350 remove_proc_entry("ip6_mr_vif", net->proc_net); 1351 proc_vif_fail: 1352 rtnl_lock(); 1353 ip6mr_rules_exit(net); 1354 rtnl_unlock(); 1355 #endif 1356 ip6mr_rules_fail: 1357 ip6mr_notifier_exit(net); 1358 return err; 1359 } 1360 1361 static void __net_exit ip6mr_net_exit(struct net *net) 1362 { 1363 #ifdef CONFIG_PROC_FS 1364 remove_proc_entry("ip6_mr_cache", net->proc_net); 1365 remove_proc_entry("ip6_mr_vif", net->proc_net); 1366 #endif 1367 ip6mr_notifier_exit(net); 1368 } 1369 1370 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list) 1371 { 1372 struct net *net; 1373 1374 rtnl_lock(); 1375 list_for_each_entry(net, net_list, exit_list) 1376 ip6mr_rules_exit(net); 1377 rtnl_unlock(); 1378 } 1379 1380 static struct pernet_operations ip6mr_net_ops = { 1381 .init = ip6mr_net_init, 1382 .exit = ip6mr_net_exit, 1383 .exit_batch = ip6mr_net_exit_batch, 1384 }; 1385 1386 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { 1387 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, 1388 .msgtype = RTM_GETROUTE, 1389 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute}, 1390 }; 1391 1392 int __init ip6_mr_init(void) 1393 { 1394 int err; 1395 1396 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN); 1397 if (!mrt_cachep) 1398 return -ENOMEM; 1399 1400 err = register_pernet_subsys(&ip6mr_net_ops); 1401 if (err) 1402 goto reg_pernet_fail; 1403 1404 err = register_netdevice_notifier(&ip6_mr_notifier); 1405 if (err) 1406 goto reg_notif_fail; 1407 #ifdef CONFIG_IPV6_PIMSM_V2 1408 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1409 pr_err("%s: can't add PIM protocol\n", __func__); 1410 err = -EAGAIN; 1411 goto add_proto_fail; 1412 } 1413 #endif 1414 err = rtnl_register_many(ip6mr_rtnl_msg_handlers); 1415 if (!err) 1416 return 0; 1417 1418 #ifdef CONFIG_IPV6_PIMSM_V2 1419 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1420 add_proto_fail: 1421 unregister_netdevice_notifier(&ip6_mr_notifier); 1422 #endif 1423 reg_notif_fail: 1424 unregister_pernet_subsys(&ip6mr_net_ops); 1425 reg_pernet_fail: 1426 kmem_cache_destroy(mrt_cachep); 1427 return err; 1428 } 1429 1430 void __init ip6_mr_cleanup(void) 1431 { 1432 rtnl_unregister_many(ip6mr_rtnl_msg_handlers); 1433 #ifdef CONFIG_IPV6_PIMSM_V2 1434 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1435 #endif 1436 unregister_netdevice_notifier(&ip6_mr_notifier); 1437 unregister_pernet_subsys(&ip6mr_net_ops); 1438 kmem_cache_destroy(mrt_cachep); 1439 } 1440 1441 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1442 struct mf6cctl *mfc, int mrtsock, int parent) 1443 { 1444 unsigned char ttls[MAXMIFS]; 1445 struct mfc6_cache *uc, *c; 1446 struct mr_mfc *_uc; 1447 bool found; 1448 int i, err; 1449 1450 if (mfc->mf6cc_parent >= MAXMIFS) 1451 return -ENFILE; 1452 1453 memset(ttls, 255, MAXMIFS); 1454 for (i = 0; i < MAXMIFS; i++) { 1455 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1456 ttls[i] = 1; 1457 } 1458 1459 /* The entries are added/deleted only under RTNL */ 1460 rcu_read_lock(); 1461 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1462 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1463 rcu_read_unlock(); 1464 if (c) { 1465 spin_lock(&mrt_lock); 1466 c->_c.mfc_parent = mfc->mf6cc_parent; 1467 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1468 if (!mrtsock) 1469 c->_c.mfc_flags |= MFC_STATIC; 1470 spin_unlock(&mrt_lock); 1471 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1472 c, mrt->id); 1473 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1474 return 0; 1475 } 1476 1477 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1478 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1479 return -EINVAL; 1480 1481 c = ip6mr_cache_alloc(); 1482 if (!c) 1483 return -ENOMEM; 1484 1485 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1486 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1487 c->_c.mfc_parent = mfc->mf6cc_parent; 1488 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1489 if (!mrtsock) 1490 c->_c.mfc_flags |= MFC_STATIC; 1491 1492 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1493 ip6mr_rht_params); 1494 if (err) { 1495 pr_err("ip6mr: rhtable insert error %d\n", err); 1496 ip6mr_cache_free(c); 1497 return err; 1498 } 1499 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1500 1501 /* Check to see if we resolved a queued list. If so we 1502 * need to send on the frames and tidy up. 1503 */ 1504 found = false; 1505 spin_lock_bh(&mfc_unres_lock); 1506 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1507 uc = (struct mfc6_cache *)_uc; 1508 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1509 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1510 list_del(&_uc->list); 1511 atomic_dec(&mrt->cache_resolve_queue_len); 1512 found = true; 1513 break; 1514 } 1515 } 1516 if (list_empty(&mrt->mfc_unres_queue)) 1517 timer_delete(&mrt->ipmr_expire_timer); 1518 spin_unlock_bh(&mfc_unres_lock); 1519 1520 if (found) { 1521 ip6mr_cache_resolve(net, mrt, uc, c); 1522 ip6mr_cache_free(uc); 1523 } 1524 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1525 c, mrt->id); 1526 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1527 return 0; 1528 } 1529 1530 /* 1531 * Close the multicast socket, and clear the vif tables etc 1532 */ 1533 1534 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1535 { 1536 struct mr_mfc *c, *tmp; 1537 LIST_HEAD(list); 1538 int i; 1539 1540 /* Shut down all active vif entries */ 1541 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { 1542 for (i = 0; i < mrt->maxvif; i++) { 1543 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1544 !(flags & MRT6_FLUSH_MIFS_STATIC)) || 1545 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) 1546 continue; 1547 mif6_delete(mrt, i, 0, &list); 1548 } 1549 unregister_netdevice_many(&list); 1550 } 1551 1552 /* Wipe the cache */ 1553 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { 1554 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1555 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || 1556 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) 1557 continue; 1558 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1559 list_del_rcu(&c->list); 1560 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1561 FIB_EVENT_ENTRY_DEL, 1562 (struct mfc6_cache *)c, mrt->id); 1563 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1564 mr_cache_put(c); 1565 } 1566 } 1567 1568 if (flags & MRT6_FLUSH_MFC) { 1569 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1570 spin_lock_bh(&mfc_unres_lock); 1571 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1572 list_del(&c->list); 1573 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1574 RTM_DELROUTE); 1575 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1576 } 1577 spin_unlock_bh(&mfc_unres_lock); 1578 } 1579 } 1580 } 1581 1582 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1583 { 1584 int err = 0; 1585 struct net *net = sock_net(sk); 1586 1587 rtnl_lock(); 1588 spin_lock(&mrt_lock); 1589 if (rtnl_dereference(mrt->mroute_sk)) { 1590 err = -EADDRINUSE; 1591 } else { 1592 rcu_assign_pointer(mrt->mroute_sk, sk); 1593 sock_set_flag(sk, SOCK_RCU_FREE); 1594 atomic_inc(&net->ipv6.devconf_all->mc_forwarding); 1595 } 1596 spin_unlock(&mrt_lock); 1597 1598 if (!err) 1599 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1600 NETCONFA_MC_FORWARDING, 1601 NETCONFA_IFINDEX_ALL, 1602 net->ipv6.devconf_all); 1603 rtnl_unlock(); 1604 1605 return err; 1606 } 1607 1608 int ip6mr_sk_done(struct sock *sk) 1609 { 1610 struct net *net = sock_net(sk); 1611 struct ipv6_devconf *devconf; 1612 struct mr_table *mrt; 1613 int err = -EACCES; 1614 1615 if (sk->sk_type != SOCK_RAW || 1616 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1617 return err; 1618 1619 devconf = net->ipv6.devconf_all; 1620 if (!devconf || !atomic_read(&devconf->mc_forwarding)) 1621 return err; 1622 1623 rtnl_lock(); 1624 ip6mr_for_each_table(mrt, net) { 1625 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1626 spin_lock(&mrt_lock); 1627 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1628 /* Note that mroute_sk had SOCK_RCU_FREE set, 1629 * so the RCU grace period before sk freeing 1630 * is guaranteed by sk_destruct() 1631 */ 1632 atomic_dec(&devconf->mc_forwarding); 1633 spin_unlock(&mrt_lock); 1634 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1635 NETCONFA_MC_FORWARDING, 1636 NETCONFA_IFINDEX_ALL, 1637 net->ipv6.devconf_all); 1638 1639 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); 1640 err = 0; 1641 break; 1642 } 1643 } 1644 rtnl_unlock(); 1645 1646 return err; 1647 } 1648 1649 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1650 { 1651 struct mr_table *mrt; 1652 struct flowi6 fl6 = { 1653 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1654 .flowi6_oif = skb->dev->ifindex, 1655 .flowi6_mark = skb->mark, 1656 }; 1657 1658 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1659 return NULL; 1660 1661 return rcu_access_pointer(mrt->mroute_sk); 1662 } 1663 EXPORT_SYMBOL(mroute6_is_socket); 1664 1665 /* 1666 * Socket options and virtual interface manipulation. The whole 1667 * virtual interface system is a complete heap, but unfortunately 1668 * that's how BSD mrouted happens to think. Maybe one day with a proper 1669 * MOSPF/PIM router set up we can clean this up. 1670 */ 1671 1672 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1673 unsigned int optlen) 1674 { 1675 int ret, parent = 0; 1676 struct mif6ctl vif; 1677 struct mf6cctl mfc; 1678 mifi_t mifi; 1679 struct net *net = sock_net(sk); 1680 struct mr_table *mrt; 1681 1682 if (sk->sk_type != SOCK_RAW || 1683 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1684 return -EOPNOTSUPP; 1685 1686 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1687 if (!mrt) 1688 return -ENOENT; 1689 1690 if (optname != MRT6_INIT) { 1691 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1692 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1693 return -EACCES; 1694 } 1695 1696 switch (optname) { 1697 case MRT6_INIT: 1698 if (optlen < sizeof(int)) 1699 return -EINVAL; 1700 1701 return ip6mr_sk_init(mrt, sk); 1702 1703 case MRT6_DONE: 1704 return ip6mr_sk_done(sk); 1705 1706 case MRT6_ADD_MIF: 1707 if (optlen < sizeof(vif)) 1708 return -EINVAL; 1709 if (copy_from_sockptr(&vif, optval, sizeof(vif))) 1710 return -EFAULT; 1711 if (vif.mif6c_mifi >= MAXMIFS) 1712 return -ENFILE; 1713 rtnl_lock(); 1714 ret = mif6_add(net, mrt, &vif, 1715 sk == rtnl_dereference(mrt->mroute_sk)); 1716 rtnl_unlock(); 1717 return ret; 1718 1719 case MRT6_DEL_MIF: 1720 if (optlen < sizeof(mifi_t)) 1721 return -EINVAL; 1722 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) 1723 return -EFAULT; 1724 rtnl_lock(); 1725 ret = mif6_delete(mrt, mifi, 0, NULL); 1726 rtnl_unlock(); 1727 return ret; 1728 1729 /* 1730 * Manipulate the forwarding caches. These live 1731 * in a sort of kernel/user symbiosis. 1732 */ 1733 case MRT6_ADD_MFC: 1734 case MRT6_DEL_MFC: 1735 parent = -1; 1736 fallthrough; 1737 case MRT6_ADD_MFC_PROXY: 1738 case MRT6_DEL_MFC_PROXY: 1739 if (optlen < sizeof(mfc)) 1740 return -EINVAL; 1741 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) 1742 return -EFAULT; 1743 if (parent == 0) 1744 parent = mfc.mf6cc_parent; 1745 rtnl_lock(); 1746 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1747 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1748 else 1749 ret = ip6mr_mfc_add(net, mrt, &mfc, 1750 sk == 1751 rtnl_dereference(mrt->mroute_sk), 1752 parent); 1753 rtnl_unlock(); 1754 return ret; 1755 1756 case MRT6_FLUSH: 1757 { 1758 int flags; 1759 1760 if (optlen != sizeof(flags)) 1761 return -EINVAL; 1762 if (copy_from_sockptr(&flags, optval, sizeof(flags))) 1763 return -EFAULT; 1764 rtnl_lock(); 1765 mroute_clean_tables(mrt, flags); 1766 rtnl_unlock(); 1767 return 0; 1768 } 1769 1770 /* 1771 * Control PIM assert (to activate pim will activate assert) 1772 */ 1773 case MRT6_ASSERT: 1774 { 1775 int v; 1776 1777 if (optlen != sizeof(v)) 1778 return -EINVAL; 1779 if (copy_from_sockptr(&v, optval, sizeof(v))) 1780 return -EFAULT; 1781 mrt->mroute_do_assert = v; 1782 return 0; 1783 } 1784 1785 #ifdef CONFIG_IPV6_PIMSM_V2 1786 case MRT6_PIM: 1787 { 1788 bool do_wrmifwhole; 1789 int v; 1790 1791 if (optlen != sizeof(v)) 1792 return -EINVAL; 1793 if (copy_from_sockptr(&v, optval, sizeof(v))) 1794 return -EFAULT; 1795 1796 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); 1797 v = !!v; 1798 rtnl_lock(); 1799 ret = 0; 1800 if (v != mrt->mroute_do_pim) { 1801 mrt->mroute_do_pim = v; 1802 mrt->mroute_do_assert = v; 1803 mrt->mroute_do_wrvifwhole = do_wrmifwhole; 1804 } 1805 rtnl_unlock(); 1806 return ret; 1807 } 1808 1809 #endif 1810 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1811 case MRT6_TABLE: 1812 { 1813 u32 v; 1814 1815 if (optlen != sizeof(u32)) 1816 return -EINVAL; 1817 if (copy_from_sockptr(&v, optval, sizeof(v))) 1818 return -EFAULT; 1819 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1820 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1821 return -EINVAL; 1822 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1823 return -EBUSY; 1824 1825 rtnl_lock(); 1826 ret = 0; 1827 mrt = ip6mr_new_table(net, v); 1828 if (IS_ERR(mrt)) 1829 ret = PTR_ERR(mrt); 1830 else 1831 raw6_sk(sk)->ip6mr_table = v; 1832 rtnl_unlock(); 1833 return ret; 1834 } 1835 #endif 1836 /* 1837 * Spurious command, or MRT6_VERSION which you cannot 1838 * set. 1839 */ 1840 default: 1841 return -ENOPROTOOPT; 1842 } 1843 } 1844 1845 /* 1846 * Getsock opt support for the multicast routing system. 1847 */ 1848 1849 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1850 sockptr_t optlen) 1851 { 1852 int olr; 1853 int val; 1854 struct net *net = sock_net(sk); 1855 struct mr_table *mrt; 1856 1857 if (sk->sk_type != SOCK_RAW || 1858 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1859 return -EOPNOTSUPP; 1860 1861 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1862 if (!mrt) 1863 return -ENOENT; 1864 1865 switch (optname) { 1866 case MRT6_VERSION: 1867 val = 0x0305; 1868 break; 1869 #ifdef CONFIG_IPV6_PIMSM_V2 1870 case MRT6_PIM: 1871 val = mrt->mroute_do_pim; 1872 break; 1873 #endif 1874 case MRT6_ASSERT: 1875 val = mrt->mroute_do_assert; 1876 break; 1877 default: 1878 return -ENOPROTOOPT; 1879 } 1880 1881 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1882 return -EFAULT; 1883 1884 olr = min_t(int, olr, sizeof(int)); 1885 if (olr < 0) 1886 return -EINVAL; 1887 1888 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1889 return -EFAULT; 1890 if (copy_to_sockptr(optval, &val, olr)) 1891 return -EFAULT; 1892 return 0; 1893 } 1894 1895 /* 1896 * The IP multicast ioctl support routines. 1897 */ 1898 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg) 1899 { 1900 struct sioc_sg_req6 *sr; 1901 struct sioc_mif_req6 *vr; 1902 struct vif_device *vif; 1903 struct mfc6_cache *c; 1904 struct net *net = sock_net(sk); 1905 struct mr_table *mrt; 1906 1907 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1908 if (!mrt) 1909 return -ENOENT; 1910 1911 switch (cmd) { 1912 case SIOCGETMIFCNT_IN6: 1913 vr = (struct sioc_mif_req6 *)arg; 1914 if (vr->mifi >= mrt->maxvif) 1915 return -EINVAL; 1916 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif); 1917 rcu_read_lock(); 1918 vif = &mrt->vif_table[vr->mifi]; 1919 if (VIF_EXISTS(mrt, vr->mifi)) { 1920 vr->icount = READ_ONCE(vif->pkt_in); 1921 vr->ocount = READ_ONCE(vif->pkt_out); 1922 vr->ibytes = READ_ONCE(vif->bytes_in); 1923 vr->obytes = READ_ONCE(vif->bytes_out); 1924 rcu_read_unlock(); 1925 return 0; 1926 } 1927 rcu_read_unlock(); 1928 return -EADDRNOTAVAIL; 1929 case SIOCGETSGCNT_IN6: 1930 sr = (struct sioc_sg_req6 *)arg; 1931 1932 rcu_read_lock(); 1933 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr, 1934 &sr->grp.sin6_addr); 1935 if (c) { 1936 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1937 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1938 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1939 rcu_read_unlock(); 1940 return 0; 1941 } 1942 rcu_read_unlock(); 1943 return -EADDRNOTAVAIL; 1944 default: 1945 return -ENOIOCTLCMD; 1946 } 1947 } 1948 1949 #ifdef CONFIG_COMPAT 1950 struct compat_sioc_sg_req6 { 1951 struct sockaddr_in6 src; 1952 struct sockaddr_in6 grp; 1953 compat_ulong_t pktcnt; 1954 compat_ulong_t bytecnt; 1955 compat_ulong_t wrong_if; 1956 }; 1957 1958 struct compat_sioc_mif_req6 { 1959 mifi_t mifi; 1960 compat_ulong_t icount; 1961 compat_ulong_t ocount; 1962 compat_ulong_t ibytes; 1963 compat_ulong_t obytes; 1964 }; 1965 1966 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1967 { 1968 struct compat_sioc_sg_req6 sr; 1969 struct compat_sioc_mif_req6 vr; 1970 struct vif_device *vif; 1971 struct mfc6_cache *c; 1972 struct net *net = sock_net(sk); 1973 struct mr_table *mrt; 1974 1975 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1976 if (!mrt) 1977 return -ENOENT; 1978 1979 switch (cmd) { 1980 case SIOCGETMIFCNT_IN6: 1981 if (copy_from_user(&vr, arg, sizeof(vr))) 1982 return -EFAULT; 1983 if (vr.mifi >= mrt->maxvif) 1984 return -EINVAL; 1985 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 1986 rcu_read_lock(); 1987 vif = &mrt->vif_table[vr.mifi]; 1988 if (VIF_EXISTS(mrt, vr.mifi)) { 1989 vr.icount = READ_ONCE(vif->pkt_in); 1990 vr.ocount = READ_ONCE(vif->pkt_out); 1991 vr.ibytes = READ_ONCE(vif->bytes_in); 1992 vr.obytes = READ_ONCE(vif->bytes_out); 1993 rcu_read_unlock(); 1994 1995 if (copy_to_user(arg, &vr, sizeof(vr))) 1996 return -EFAULT; 1997 return 0; 1998 } 1999 rcu_read_unlock(); 2000 return -EADDRNOTAVAIL; 2001 case SIOCGETSGCNT_IN6: 2002 if (copy_from_user(&sr, arg, sizeof(sr))) 2003 return -EFAULT; 2004 2005 rcu_read_lock(); 2006 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 2007 if (c) { 2008 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 2009 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 2010 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 2011 rcu_read_unlock(); 2012 2013 if (copy_to_user(arg, &sr, sizeof(sr))) 2014 return -EFAULT; 2015 return 0; 2016 } 2017 rcu_read_unlock(); 2018 return -EADDRNOTAVAIL; 2019 default: 2020 return -ENOIOCTLCMD; 2021 } 2022 } 2023 #endif 2024 2025 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 2026 { 2027 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 2028 IPSTATS_MIB_OUTFORWDATAGRAMS); 2029 return dst_output(net, sk, skb); 2030 } 2031 2032 /* 2033 * Processing handlers for ip6mr_forward 2034 */ 2035 2036 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt, 2037 struct sk_buff *skb, int vifi) 2038 { 2039 struct vif_device *vif = &mrt->vif_table[vifi]; 2040 struct net_device *vif_dev; 2041 struct ipv6hdr *ipv6h; 2042 struct dst_entry *dst; 2043 struct flowi6 fl6; 2044 2045 vif_dev = vif_dev_read(vif); 2046 if (!vif_dev) 2047 return -1; 2048 2049 #ifdef CONFIG_IPV6_PIMSM_V2 2050 if (vif->flags & MIFF_REGISTER) { 2051 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2052 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2053 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 2054 DEV_STATS_INC(vif_dev, tx_packets); 2055 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2056 return -1; 2057 } 2058 #endif 2059 2060 ipv6h = ipv6_hdr(skb); 2061 2062 fl6 = (struct flowi6) { 2063 .flowi6_oif = vif->link, 2064 .daddr = ipv6h->daddr, 2065 }; 2066 2067 dst = ip6_route_output(net, NULL, &fl6); 2068 if (dst->error) { 2069 dst_release(dst); 2070 return -1; 2071 } 2072 2073 skb_dst_drop(skb); 2074 skb_dst_set(skb, dst); 2075 2076 /* 2077 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2078 * not only before forwarding, but after forwarding on all output 2079 * interfaces. It is clear, if mrouter runs a multicasting 2080 * program, it should receive packets not depending to what interface 2081 * program is joined. 2082 * If we will not make it, the program will have to join on all 2083 * interfaces. On the other hand, multihoming host (or router, but 2084 * not mrouter) cannot join to more than one interface - it will 2085 * result in receiving multiple packets. 2086 */ 2087 skb->dev = vif_dev; 2088 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2089 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2090 2091 /* We are about to write */ 2092 /* XXX: extension headers? */ 2093 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) 2094 return -1; 2095 2096 ipv6h = ipv6_hdr(skb); 2097 ipv6h->hop_limit--; 2098 return 0; 2099 } 2100 2101 static void ip6mr_forward2(struct net *net, struct mr_table *mrt, 2102 struct sk_buff *skb, int vifi) 2103 { 2104 struct net_device *indev = skb->dev; 2105 2106 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2107 goto out_free; 2108 2109 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2110 2111 NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2112 net, NULL, skb, indev, skb->dev, 2113 ip6mr_forward2_finish); 2114 return; 2115 2116 out_free: 2117 kfree_skb(skb); 2118 } 2119 2120 static void ip6mr_output2(struct net *net, struct mr_table *mrt, 2121 struct sk_buff *skb, int vifi) 2122 { 2123 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2124 goto out_free; 2125 2126 ip6_output(net, NULL, skb); 2127 return; 2128 2129 out_free: 2130 kfree_skb(skb); 2131 } 2132 2133 /* Called with rcu_read_lock() */ 2134 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2135 { 2136 int ct; 2137 2138 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ 2139 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2140 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2141 break; 2142 } 2143 return ct; 2144 } 2145 2146 /* Called under rcu_read_lock() */ 2147 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2148 struct net_device *dev, struct sk_buff *skb, 2149 struct mfc6_cache *c) 2150 { 2151 int psend = -1; 2152 int vif, ct; 2153 int true_vifi = ip6mr_find_vif(mrt, dev); 2154 2155 vif = c->_c.mfc_parent; 2156 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2157 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2158 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2159 2160 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2161 struct mfc6_cache *cache_proxy; 2162 2163 /* For an (*,G) entry, we only check that the incoming 2164 * interface is part of the static tree. 2165 */ 2166 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2167 if (cache_proxy && 2168 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2169 goto forward; 2170 } 2171 2172 /* 2173 * Wrong interface: drop packet and (maybe) send PIM assert. 2174 */ 2175 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2176 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2177 2178 if (true_vifi >= 0 && mrt->mroute_do_assert && 2179 /* pimsm uses asserts, when switching from RPT to SPT, 2180 so that we cannot check that packet arrived on an oif. 2181 It is bad, but otherwise we would need to move pretty 2182 large chunk of pimd to kernel. Ough... --ANK 2183 */ 2184 (mrt->mroute_do_pim || 2185 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2186 time_after(jiffies, 2187 c->_c.mfc_un.res.last_assert + 2188 MFC_ASSERT_THRESH)) { 2189 c->_c.mfc_un.res.last_assert = jiffies; 2190 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2191 if (mrt->mroute_do_wrvifwhole) 2192 ip6mr_cache_report(mrt, skb, true_vifi, 2193 MRT6MSG_WRMIFWHOLE); 2194 } 2195 goto dont_forward; 2196 } 2197 2198 forward: 2199 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2200 mrt->vif_table[vif].pkt_in + 1); 2201 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2202 mrt->vif_table[vif].bytes_in + skb->len); 2203 2204 /* 2205 * Forward the frame 2206 */ 2207 if (ipv6_addr_any(&c->mf6c_origin) && 2208 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2209 if (true_vifi >= 0 && 2210 true_vifi != c->_c.mfc_parent && 2211 ipv6_hdr(skb)->hop_limit > 2212 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2213 /* It's an (*,*) entry and the packet is not coming from 2214 * the upstream: forward the packet to the upstream 2215 * only. 2216 */ 2217 psend = c->_c.mfc_parent; 2218 goto last_forward; 2219 } 2220 goto dont_forward; 2221 } 2222 for (ct = c->_c.mfc_un.res.maxvif - 1; 2223 ct >= c->_c.mfc_un.res.minvif; ct--) { 2224 /* For (*,G) entry, don't forward to the incoming interface */ 2225 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2226 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2227 if (psend != -1) { 2228 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2229 if (skb2) 2230 ip6mr_forward2(net, mrt, skb2, psend); 2231 } 2232 psend = ct; 2233 } 2234 } 2235 last_forward: 2236 if (psend != -1) { 2237 ip6mr_forward2(net, mrt, skb, psend); 2238 return; 2239 } 2240 2241 dont_forward: 2242 kfree_skb(skb); 2243 } 2244 2245 /* Called under rcu_read_lock() */ 2246 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt, 2247 struct net_device *dev, struct sk_buff *skb, 2248 struct mfc6_cache *c) 2249 { 2250 int psend = -1; 2251 int ct; 2252 2253 WARN_ON_ONCE(!rcu_read_lock_held()); 2254 2255 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2256 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2257 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2258 2259 /* Forward the frame */ 2260 if (ipv6_addr_any(&c->mf6c_origin) && 2261 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2262 if (ipv6_hdr(skb)->hop_limit > 2263 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2264 /* It's an (*,*) entry and the packet is not coming from 2265 * the upstream: forward the packet to the upstream 2266 * only. 2267 */ 2268 psend = c->_c.mfc_parent; 2269 goto last_forward; 2270 } 2271 goto dont_forward; 2272 } 2273 for (ct = c->_c.mfc_un.res.maxvif - 1; 2274 ct >= c->_c.mfc_un.res.minvif; ct--) { 2275 if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2276 if (psend != -1) { 2277 struct sk_buff *skb2; 2278 2279 skb2 = skb_clone(skb, GFP_ATOMIC); 2280 if (skb2) 2281 ip6mr_output2(net, mrt, skb2, psend); 2282 } 2283 psend = ct; 2284 } 2285 } 2286 last_forward: 2287 if (psend != -1) { 2288 ip6mr_output2(net, mrt, skb, psend); 2289 return; 2290 } 2291 2292 dont_forward: 2293 kfree_skb(skb); 2294 } 2295 2296 /* 2297 * Multicast packets for forwarding arrive here 2298 */ 2299 2300 int ip6_mr_input(struct sk_buff *skb) 2301 { 2302 struct net_device *dev = skb->dev; 2303 struct net *net = dev_net_rcu(dev); 2304 struct mfc6_cache *cache; 2305 struct mr_table *mrt; 2306 struct flowi6 fl6 = { 2307 .flowi6_iif = dev->ifindex, 2308 .flowi6_mark = skb->mark, 2309 }; 2310 int err; 2311 2312 /* skb->dev passed in is the master dev for vrfs. 2313 * Get the proper interface that does have a vif associated with it. 2314 */ 2315 if (netif_is_l3_master(dev)) { 2316 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2317 if (!dev) { 2318 kfree_skb(skb); 2319 return -ENODEV; 2320 } 2321 } 2322 2323 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2324 if (err < 0) { 2325 kfree_skb(skb); 2326 return err; 2327 } 2328 2329 cache = ip6mr_cache_find(mrt, 2330 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2331 if (!cache) { 2332 int vif = ip6mr_find_vif(mrt, dev); 2333 2334 if (vif >= 0) 2335 cache = ip6mr_cache_find_any(mrt, 2336 &ipv6_hdr(skb)->daddr, 2337 vif); 2338 } 2339 2340 /* 2341 * No usable cache entry 2342 */ 2343 if (!cache) { 2344 int vif; 2345 2346 vif = ip6mr_find_vif(mrt, dev); 2347 if (vif >= 0) { 2348 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2349 2350 return err; 2351 } 2352 kfree_skb(skb); 2353 return -ENODEV; 2354 } 2355 2356 ip6_mr_forward(net, mrt, dev, skb, cache); 2357 2358 return 0; 2359 } 2360 2361 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2362 { 2363 struct net_device *dev = skb_dst(skb)->dev; 2364 struct flowi6 fl6 = (struct flowi6) { 2365 .flowi6_iif = LOOPBACK_IFINDEX, 2366 .flowi6_mark = skb->mark, 2367 }; 2368 struct mfc6_cache *cache; 2369 struct mr_table *mrt; 2370 int err; 2371 int vif; 2372 2373 guard(rcu)(); 2374 2375 if (IP6CB(skb)->flags & IP6SKB_FORWARDED) 2376 goto ip6_output; 2377 if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE)) 2378 goto ip6_output; 2379 2380 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2381 if (err < 0) { 2382 kfree_skb(skb); 2383 return err; 2384 } 2385 2386 cache = ip6mr_cache_find(mrt, 2387 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2388 if (!cache) { 2389 vif = ip6mr_find_vif(mrt, dev); 2390 if (vif >= 0) 2391 cache = ip6mr_cache_find_any(mrt, 2392 &ipv6_hdr(skb)->daddr, 2393 vif); 2394 } 2395 2396 /* No usable cache entry */ 2397 if (!cache) { 2398 vif = ip6mr_find_vif(mrt, dev); 2399 if (vif >= 0) 2400 return ip6mr_cache_unresolved(mrt, vif, skb, dev); 2401 goto ip6_output; 2402 } 2403 2404 /* Wrong interface */ 2405 vif = cache->_c.mfc_parent; 2406 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2407 goto ip6_output; 2408 2409 ip6_mr_output_finish(net, mrt, dev, skb, cache); 2410 return 0; 2411 2412 ip6_output: 2413 return ip6_output(net, sk, skb); 2414 } 2415 2416 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2417 u32 portid) 2418 { 2419 int err; 2420 struct mr_table *mrt; 2421 struct mfc6_cache *cache; 2422 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 2423 2424 rcu_read_lock(); 2425 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 2426 if (!mrt) { 2427 rcu_read_unlock(); 2428 return -ENOENT; 2429 } 2430 2431 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2432 if (!cache && skb->dev) { 2433 int vif = ip6mr_find_vif(mrt, skb->dev); 2434 2435 if (vif >= 0) 2436 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2437 vif); 2438 } 2439 2440 if (!cache) { 2441 struct sk_buff *skb2; 2442 struct ipv6hdr *iph; 2443 struct net_device *dev; 2444 int vif; 2445 2446 dev = skb->dev; 2447 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2448 rcu_read_unlock(); 2449 return -ENODEV; 2450 } 2451 2452 /* really correct? */ 2453 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2454 if (!skb2) { 2455 rcu_read_unlock(); 2456 return -ENOMEM; 2457 } 2458 2459 NETLINK_CB(skb2).portid = portid; 2460 skb_reset_transport_header(skb2); 2461 2462 skb_put(skb2, sizeof(struct ipv6hdr)); 2463 skb_reset_network_header(skb2); 2464 2465 iph = ipv6_hdr(skb2); 2466 iph->version = 0; 2467 iph->priority = 0; 2468 iph->flow_lbl[0] = 0; 2469 iph->flow_lbl[1] = 0; 2470 iph->flow_lbl[2] = 0; 2471 iph->payload_len = 0; 2472 iph->nexthdr = IPPROTO_NONE; 2473 iph->hop_limit = 0; 2474 iph->saddr = rt->rt6i_src.addr; 2475 iph->daddr = rt->rt6i_dst.addr; 2476 2477 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2478 rcu_read_unlock(); 2479 2480 return err; 2481 } 2482 2483 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2484 rcu_read_unlock(); 2485 return err; 2486 } 2487 2488 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2489 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2490 int flags) 2491 { 2492 struct nlmsghdr *nlh; 2493 struct rtmsg *rtm; 2494 int err; 2495 2496 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2497 if (!nlh) 2498 return -EMSGSIZE; 2499 2500 rtm = nlmsg_data(nlh); 2501 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2502 rtm->rtm_dst_len = 128; 2503 rtm->rtm_src_len = 128; 2504 rtm->rtm_tos = 0; 2505 rtm->rtm_table = mrt->id; 2506 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2507 goto nla_put_failure; 2508 rtm->rtm_type = RTN_MULTICAST; 2509 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2510 if (c->_c.mfc_flags & MFC_STATIC) 2511 rtm->rtm_protocol = RTPROT_STATIC; 2512 else 2513 rtm->rtm_protocol = RTPROT_MROUTED; 2514 rtm->rtm_flags = 0; 2515 2516 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2517 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2518 goto nla_put_failure; 2519 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2520 /* do not break the dump if cache is unresolved */ 2521 if (err < 0 && err != -ENOENT) 2522 goto nla_put_failure; 2523 2524 nlmsg_end(skb, nlh); 2525 return 0; 2526 2527 nla_put_failure: 2528 nlmsg_cancel(skb, nlh); 2529 return -EMSGSIZE; 2530 } 2531 2532 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2533 u32 portid, u32 seq, struct mr_mfc *c, 2534 int cmd, int flags) 2535 { 2536 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2537 cmd, flags); 2538 } 2539 2540 static int mr6_msgsize(bool unresolved, int maxvif) 2541 { 2542 size_t len = 2543 NLMSG_ALIGN(sizeof(struct rtmsg)) 2544 + nla_total_size(4) /* RTA_TABLE */ 2545 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2546 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2547 ; 2548 2549 if (!unresolved) 2550 len = len 2551 + nla_total_size(4) /* RTA_IIF */ 2552 + nla_total_size(0) /* RTA_MULTIPATH */ 2553 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2554 /* RTA_MFC_STATS */ 2555 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2556 ; 2557 2558 return len; 2559 } 2560 2561 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2562 int cmd) 2563 { 2564 struct net *net = read_pnet(&mrt->net); 2565 struct sk_buff *skb; 2566 int err = -ENOBUFS; 2567 2568 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2569 GFP_ATOMIC); 2570 if (!skb) 2571 goto errout; 2572 2573 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2574 if (err < 0) 2575 goto errout; 2576 2577 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2578 return; 2579 2580 errout: 2581 kfree_skb(skb); 2582 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2583 } 2584 2585 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2586 { 2587 size_t len = 2588 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2589 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2590 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2591 /* IP6MRA_CREPORT_SRC_ADDR */ 2592 + nla_total_size(sizeof(struct in6_addr)) 2593 /* IP6MRA_CREPORT_DST_ADDR */ 2594 + nla_total_size(sizeof(struct in6_addr)) 2595 /* IP6MRA_CREPORT_PKT */ 2596 + nla_total_size(payloadlen) 2597 ; 2598 2599 return len; 2600 } 2601 2602 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2603 { 2604 struct net *net = read_pnet(&mrt->net); 2605 struct nlmsghdr *nlh; 2606 struct rtgenmsg *rtgenm; 2607 struct mrt6msg *msg; 2608 struct sk_buff *skb; 2609 struct nlattr *nla; 2610 int payloadlen; 2611 2612 payloadlen = pkt->len - sizeof(struct mrt6msg); 2613 msg = (struct mrt6msg *)skb_transport_header(pkt); 2614 2615 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2616 if (!skb) 2617 goto errout; 2618 2619 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2620 sizeof(struct rtgenmsg), 0); 2621 if (!nlh) 2622 goto errout; 2623 rtgenm = nlmsg_data(nlh); 2624 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2625 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2626 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2627 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2628 &msg->im6_src) || 2629 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2630 &msg->im6_dst)) 2631 goto nla_put_failure; 2632 2633 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2634 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2635 nla_data(nla), payloadlen)) 2636 goto nla_put_failure; 2637 2638 nlmsg_end(skb, nlh); 2639 2640 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2641 return; 2642 2643 nla_put_failure: 2644 nlmsg_cancel(skb, nlh); 2645 errout: 2646 kfree_skb(skb); 2647 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2648 } 2649 2650 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = { 2651 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2652 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2653 [RTA_TABLE] = { .type = NLA_U32 }, 2654 }; 2655 2656 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb, 2657 const struct nlmsghdr *nlh, 2658 struct nlattr **tb, 2659 struct netlink_ext_ack *extack) 2660 { 2661 struct rtmsg *rtm; 2662 int err; 2663 2664 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy, 2665 extack); 2666 if (err) 2667 return err; 2668 2669 rtm = nlmsg_data(nlh); 2670 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || 2671 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || 2672 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2673 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2674 NL_SET_ERR_MSG_MOD(extack, 2675 "Invalid values in header for multicast route get request"); 2676 return -EINVAL; 2677 } 2678 2679 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2680 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2681 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); 2682 return -EINVAL; 2683 } 2684 2685 return 0; 2686 } 2687 2688 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2689 struct netlink_ext_ack *extack) 2690 { 2691 struct net *net = sock_net(in_skb->sk); 2692 struct in6_addr src = {}, grp = {}; 2693 struct nlattr *tb[RTA_MAX + 1]; 2694 struct mfc6_cache *cache; 2695 struct mr_table *mrt; 2696 struct sk_buff *skb; 2697 u32 tableid; 2698 int err; 2699 2700 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2701 if (err < 0) 2702 return err; 2703 2704 if (tb[RTA_SRC]) 2705 src = nla_get_in6_addr(tb[RTA_SRC]); 2706 if (tb[RTA_DST]) 2707 grp = nla_get_in6_addr(tb[RTA_DST]); 2708 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2709 2710 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); 2711 if (!mrt) { 2712 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist"); 2713 return -ENOENT; 2714 } 2715 2716 /* entries are added/deleted only under RTNL */ 2717 rcu_read_lock(); 2718 cache = ip6mr_cache_find(mrt, &src, &grp); 2719 rcu_read_unlock(); 2720 if (!cache) { 2721 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found"); 2722 return -ENOENT; 2723 } 2724 2725 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL); 2726 if (!skb) 2727 return -ENOBUFS; 2728 2729 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2730 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); 2731 if (err < 0) { 2732 kfree_skb(skb); 2733 return err; 2734 } 2735 2736 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2737 } 2738 2739 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2740 { 2741 const struct nlmsghdr *nlh = cb->nlh; 2742 struct fib_dump_filter filter = { 2743 .rtnl_held = true, 2744 }; 2745 int err; 2746 2747 if (cb->strict_check) { 2748 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2749 &filter, cb); 2750 if (err < 0) 2751 return err; 2752 } 2753 2754 if (filter.table_id) { 2755 struct mr_table *mrt; 2756 2757 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2758 if (!mrt) { 2759 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) 2760 return skb->len; 2761 2762 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2763 return -ENOENT; 2764 } 2765 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2766 &mfc_unres_lock, &filter); 2767 return skb->len ? : err; 2768 } 2769 2770 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2771 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2772 } 2773