1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux IPv6 multicast routing support for BSD pim6sd 4 * Based on net/ipv4/ipmr.c. 5 * 6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 7 * LSIIT Laboratory, Strasbourg, France 8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 9 * 6WIND, Paris, France 10 * Copyright (C)2007,2008 USAGI/WIDE Project 11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 12 */ 13 14 #include <linux/uaccess.h> 15 #include <linux/types.h> 16 #include <linux/sched.h> 17 #include <linux/errno.h> 18 #include <linux/mm.h> 19 #include <linux/kernel.h> 20 #include <linux/fcntl.h> 21 #include <linux/stat.h> 22 #include <linux/socket.h> 23 #include <linux/inet.h> 24 #include <linux/netdevice.h> 25 #include <linux/inetdevice.h> 26 #include <linux/proc_fs.h> 27 #include <linux/seq_file.h> 28 #include <linux/init.h> 29 #include <linux/compat.h> 30 #include <linux/rhashtable.h> 31 #include <net/protocol.h> 32 #include <linux/skbuff.h> 33 #include <net/raw.h> 34 #include <linux/notifier.h> 35 #include <linux/if_arp.h> 36 #include <net/checksum.h> 37 #include <net/netlink.h> 38 #include <net/fib_rules.h> 39 40 #include <net/ipv6.h> 41 #include <net/ip6_route.h> 42 #include <linux/mroute6.h> 43 #include <linux/pim.h> 44 #include <net/addrconf.h> 45 #include <linux/netfilter_ipv6.h> 46 #include <linux/export.h> 47 #include <net/ip6_checksum.h> 48 #include <linux/netconf.h> 49 #include <net/ip_tunnels.h> 50 51 #include <linux/nospec.h> 52 53 struct ip6mr_rule { 54 struct fib_rule common; 55 }; 56 57 struct ip6mr_result { 58 struct mr_table *mrt; 59 }; 60 61 /* Big lock, protecting vif table, mrt cache and mroute socket state. 62 Note that the changes are semaphored via rtnl_lock. 63 */ 64 65 static DEFINE_SPINLOCK(mrt_lock); 66 67 static struct net_device *vif_dev_read(const struct vif_device *vif) 68 { 69 return rcu_dereference(vif->dev); 70 } 71 72 /* Multicast router control variables */ 73 74 /* Special spinlock for queue of unresolved entries */ 75 static DEFINE_SPINLOCK(mfc_unres_lock); 76 77 /* We return to original Alan's scheme. Hash table of resolved 78 entries is changed only in process context and protected 79 with weak lock mrt_lock. Queue of unresolved entries is protected 80 with strong spinlock mfc_unres_lock. 81 82 In this case data path is free of exclusive locks at all. 83 */ 84 85 static struct kmem_cache *mrt_cachep __read_mostly; 86 87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 88 static void ip6mr_free_table(struct mr_table *mrt); 89 90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 91 struct net_device *dev, struct sk_buff *skb, 92 struct mfc6_cache *cache); 93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 94 mifi_t mifi, int assert); 95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 96 int cmd); 97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 99 struct netlink_ext_ack *extack); 100 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 101 struct netlink_callback *cb); 102 static void mroute_clean_tables(struct mr_table *mrt, int flags); 103 static void ipmr_expire_process(struct timer_list *t); 104 105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 106 #define ip6mr_for_each_table(mrt, net) \ 107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ 108 lockdep_rtnl_is_held() || \ 109 list_empty(&net->ipv6.mr6_tables)) 110 111 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 112 struct mr_table *mrt) 113 { 114 struct mr_table *ret; 115 116 if (!mrt) 117 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 118 struct mr_table, list); 119 else 120 ret = list_entry_rcu(mrt->list.next, 121 struct mr_table, list); 122 123 if (&ret->list == &net->ipv6.mr6_tables) 124 return NULL; 125 return ret; 126 } 127 128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) 129 { 130 struct mr_table *mrt; 131 132 ip6mr_for_each_table(mrt, net) { 133 if (mrt->id == id) 134 return mrt; 135 } 136 return NULL; 137 } 138 139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 140 { 141 struct mr_table *mrt; 142 143 rcu_read_lock(); 144 mrt = __ip6mr_get_table(net, id); 145 rcu_read_unlock(); 146 return mrt; 147 } 148 149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 150 struct mr_table **mrt) 151 { 152 int err; 153 struct ip6mr_result res; 154 struct fib_lookup_arg arg = { 155 .result = &res, 156 .flags = FIB_LOOKUP_NOREF, 157 }; 158 159 /* update flow if oif or iif point to device enslaved to l3mdev */ 160 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 161 162 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 163 flowi6_to_flowi(flp6), 0, &arg); 164 if (err < 0) 165 return err; 166 *mrt = res.mrt; 167 return 0; 168 } 169 170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 171 int flags, struct fib_lookup_arg *arg) 172 { 173 struct ip6mr_result *res = arg->result; 174 struct mr_table *mrt; 175 176 switch (rule->action) { 177 case FR_ACT_TO_TBL: 178 break; 179 case FR_ACT_UNREACHABLE: 180 return -ENETUNREACH; 181 case FR_ACT_PROHIBIT: 182 return -EACCES; 183 case FR_ACT_BLACKHOLE: 184 default: 185 return -EINVAL; 186 } 187 188 arg->table = fib_rule_get_table(rule, arg); 189 190 mrt = __ip6mr_get_table(rule->fr_net, arg->table); 191 if (!mrt) 192 return -EAGAIN; 193 res->mrt = mrt; 194 return 0; 195 } 196 197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 198 { 199 return 1; 200 } 201 202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 203 struct fib_rule_hdr *frh, struct nlattr **tb, 204 struct netlink_ext_ack *extack) 205 { 206 return 0; 207 } 208 209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 210 struct nlattr **tb) 211 { 212 return 1; 213 } 214 215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 216 struct fib_rule_hdr *frh) 217 { 218 frh->dst_len = 0; 219 frh->src_len = 0; 220 frh->tos = 0; 221 return 0; 222 } 223 224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 225 .family = RTNL_FAMILY_IP6MR, 226 .rule_size = sizeof(struct ip6mr_rule), 227 .addr_size = sizeof(struct in6_addr), 228 .action = ip6mr_rule_action, 229 .match = ip6mr_rule_match, 230 .configure = ip6mr_rule_configure, 231 .compare = ip6mr_rule_compare, 232 .fill = ip6mr_rule_fill, 233 .nlgroup = RTNLGRP_IPV6_RULE, 234 .owner = THIS_MODULE, 235 }; 236 237 static int __net_init ip6mr_rules_init(struct net *net) 238 { 239 struct fib_rules_ops *ops; 240 struct mr_table *mrt; 241 int err; 242 243 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 244 if (IS_ERR(ops)) 245 return PTR_ERR(ops); 246 247 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 248 249 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 250 if (IS_ERR(mrt)) { 251 err = PTR_ERR(mrt); 252 goto err1; 253 } 254 255 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); 256 if (err < 0) 257 goto err2; 258 259 net->ipv6.mr6_rules_ops = ops; 260 return 0; 261 262 err2: 263 rtnl_lock(); 264 ip6mr_free_table(mrt); 265 rtnl_unlock(); 266 err1: 267 fib_rules_unregister(ops); 268 return err; 269 } 270 271 static void __net_exit ip6mr_rules_exit(struct net *net) 272 { 273 struct mr_table *mrt, *next; 274 275 ASSERT_RTNL(); 276 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 277 list_del(&mrt->list); 278 ip6mr_free_table(mrt); 279 } 280 fib_rules_unregister(net->ipv6.mr6_rules_ops); 281 } 282 283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 284 struct netlink_ext_ack *extack) 285 { 286 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); 287 } 288 289 static unsigned int ip6mr_rules_seq_read(const struct net *net) 290 { 291 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 292 } 293 294 bool ip6mr_rule_default(const struct fib_rule *rule) 295 { 296 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 297 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 298 } 299 EXPORT_SYMBOL(ip6mr_rule_default); 300 #else 301 #define ip6mr_for_each_table(mrt, net) \ 302 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 303 304 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 305 struct mr_table *mrt) 306 { 307 if (!mrt) 308 return net->ipv6.mrt6; 309 return NULL; 310 } 311 312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 313 { 314 return net->ipv6.mrt6; 315 } 316 317 #define __ip6mr_get_table ip6mr_get_table 318 319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 320 struct mr_table **mrt) 321 { 322 *mrt = net->ipv6.mrt6; 323 return 0; 324 } 325 326 static int __net_init ip6mr_rules_init(struct net *net) 327 { 328 struct mr_table *mrt; 329 330 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 331 if (IS_ERR(mrt)) 332 return PTR_ERR(mrt); 333 net->ipv6.mrt6 = mrt; 334 return 0; 335 } 336 337 static void __net_exit ip6mr_rules_exit(struct net *net) 338 { 339 ASSERT_RTNL(); 340 ip6mr_free_table(net->ipv6.mrt6); 341 net->ipv6.mrt6 = NULL; 342 } 343 344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 345 struct netlink_ext_ack *extack) 346 { 347 return 0; 348 } 349 350 static unsigned int ip6mr_rules_seq_read(const struct net *net) 351 { 352 return 0; 353 } 354 #endif 355 356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 357 const void *ptr) 358 { 359 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 360 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 361 362 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 363 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 364 } 365 366 static const struct rhashtable_params ip6mr_rht_params = { 367 .head_offset = offsetof(struct mr_mfc, mnode), 368 .key_offset = offsetof(struct mfc6_cache, cmparg), 369 .key_len = sizeof(struct mfc6_cache_cmp_arg), 370 .nelem_hint = 3, 371 .obj_cmpfn = ip6mr_hash_cmp, 372 .automatic_shrinking = true, 373 }; 374 375 static void ip6mr_new_table_set(struct mr_table *mrt, 376 struct net *net) 377 { 378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 379 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 380 #endif 381 } 382 383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 384 .mf6c_origin = IN6ADDR_ANY_INIT, 385 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 386 }; 387 388 static struct mr_table_ops ip6mr_mr_table_ops = { 389 .rht_params = &ip6mr_rht_params, 390 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 391 }; 392 393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 394 { 395 struct mr_table *mrt; 396 397 mrt = __ip6mr_get_table(net, id); 398 if (mrt) 399 return mrt; 400 401 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 402 ipmr_expire_process, ip6mr_new_table_set); 403 } 404 405 static void ip6mr_free_table(struct mr_table *mrt) 406 { 407 struct net *net = read_pnet(&mrt->net); 408 409 WARN_ON_ONCE(!mr_can_free_table(net)); 410 411 timer_shutdown_sync(&mrt->ipmr_expire_timer); 412 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | 413 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); 414 rhltable_destroy(&mrt->mfc_hash); 415 kfree(mrt); 416 } 417 418 #ifdef CONFIG_PROC_FS 419 /* The /proc interfaces to multicast routing 420 * /proc/ip6_mr_cache /proc/ip6_mr_vif 421 */ 422 423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 424 __acquires(RCU) 425 { 426 struct mr_vif_iter *iter = seq->private; 427 struct net *net = seq_file_net(seq); 428 struct mr_table *mrt; 429 430 rcu_read_lock(); 431 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 432 if (!mrt) { 433 rcu_read_unlock(); 434 return ERR_PTR(-ENOENT); 435 } 436 437 iter->mrt = mrt; 438 439 return mr_vif_seq_start(seq, pos); 440 } 441 442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 443 __releases(RCU) 444 { 445 rcu_read_unlock(); 446 } 447 448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 449 { 450 struct mr_vif_iter *iter = seq->private; 451 struct mr_table *mrt = iter->mrt; 452 453 if (v == SEQ_START_TOKEN) { 454 seq_puts(seq, 455 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 456 } else { 457 const struct vif_device *vif = v; 458 const struct net_device *vif_dev; 459 const char *name; 460 461 vif_dev = vif_dev_read(vif); 462 name = vif_dev ? vif_dev->name : "none"; 463 464 seq_printf(seq, 465 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 466 vif - mrt->vif_table, 467 name, vif->bytes_in, vif->pkt_in, 468 vif->bytes_out, vif->pkt_out, 469 vif->flags); 470 } 471 return 0; 472 } 473 474 static const struct seq_operations ip6mr_vif_seq_ops = { 475 .start = ip6mr_vif_seq_start, 476 .next = mr_vif_seq_next, 477 .stop = ip6mr_vif_seq_stop, 478 .show = ip6mr_vif_seq_show, 479 }; 480 481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 482 { 483 struct net *net = seq_file_net(seq); 484 struct mr_table *mrt; 485 486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 487 if (!mrt) 488 return ERR_PTR(-ENOENT); 489 490 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 491 } 492 493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 494 { 495 int n; 496 497 if (v == SEQ_START_TOKEN) { 498 seq_puts(seq, 499 "Group " 500 "Origin " 501 "Iif Pkts Bytes Wrong Oifs\n"); 502 } else { 503 const struct mfc6_cache *mfc = v; 504 const struct mr_mfc_iter *it = seq->private; 505 struct mr_table *mrt = it->mrt; 506 507 seq_printf(seq, "%pI6 %pI6 %-3hd", 508 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 509 mfc->_c.mfc_parent); 510 511 if (it->cache != &mrt->mfc_unres_queue) { 512 seq_printf(seq, " %8lu %8lu %8lu", 513 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 514 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 515 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 516 for (n = mfc->_c.mfc_un.res.minvif; 517 n < mfc->_c.mfc_un.res.maxvif; n++) { 518 if (VIF_EXISTS(mrt, n) && 519 mfc->_c.mfc_un.res.ttls[n] < 255) 520 seq_printf(seq, 521 " %2d:%-3d", n, 522 mfc->_c.mfc_un.res.ttls[n]); 523 } 524 } else { 525 /* unresolved mfc_caches don't contain 526 * pkt, bytes and wrong_if values 527 */ 528 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 529 } 530 seq_putc(seq, '\n'); 531 } 532 return 0; 533 } 534 535 static const struct seq_operations ipmr_mfc_seq_ops = { 536 .start = ipmr_mfc_seq_start, 537 .next = mr_mfc_seq_next, 538 .stop = mr_mfc_seq_stop, 539 .show = ipmr_mfc_seq_show, 540 }; 541 #endif 542 543 #ifdef CONFIG_IPV6_PIMSM_V2 544 545 static int pim6_rcv(struct sk_buff *skb) 546 { 547 struct pimreghdr *pim; 548 struct ipv6hdr *encap; 549 struct net_device *reg_dev = NULL; 550 struct net *net = dev_net(skb->dev); 551 struct mr_table *mrt; 552 struct flowi6 fl6 = { 553 .flowi6_iif = skb->dev->ifindex, 554 .flowi6_mark = skb->mark, 555 }; 556 int reg_vif_num; 557 558 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 559 goto drop; 560 561 pim = (struct pimreghdr *)skb_transport_header(skb); 562 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 563 (pim->flags & PIM_NULL_REGISTER) || 564 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 565 sizeof(*pim), IPPROTO_PIM, 566 csum_partial((void *)pim, sizeof(*pim), 0)) && 567 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 568 goto drop; 569 570 /* check if the inner packet is destined to mcast group */ 571 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 572 sizeof(*pim)); 573 574 if (!ipv6_addr_is_multicast(&encap->daddr) || 575 encap->payload_len == 0 || 576 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 577 goto drop; 578 579 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 580 goto drop; 581 582 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ 583 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 584 if (reg_vif_num >= 0) 585 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); 586 587 if (!reg_dev) 588 goto drop; 589 590 skb->mac_header = skb->network_header; 591 skb_pull(skb, (u8 *)encap - skb->data); 592 skb_reset_network_header(skb); 593 skb->protocol = htons(ETH_P_IPV6); 594 skb->ip_summed = CHECKSUM_NONE; 595 596 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 597 598 netif_rx(skb); 599 600 return 0; 601 drop: 602 kfree_skb(skb); 603 return 0; 604 } 605 606 static const struct inet6_protocol pim6_protocol = { 607 .handler = pim6_rcv, 608 }; 609 610 /* Service routines creating virtual interfaces: PIMREG */ 611 612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 613 struct net_device *dev) 614 { 615 struct net *net = dev_net(dev); 616 struct mr_table *mrt; 617 struct flowi6 fl6 = { 618 .flowi6_oif = dev->ifindex, 619 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 620 .flowi6_mark = skb->mark, 621 }; 622 623 if (!pskb_inet_may_pull(skb)) 624 goto tx_err; 625 626 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 627 goto tx_err; 628 629 DEV_STATS_ADD(dev, tx_bytes, skb->len); 630 DEV_STATS_INC(dev, tx_packets); 631 rcu_read_lock(); 632 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 633 MRT6MSG_WHOLEPKT); 634 rcu_read_unlock(); 635 kfree_skb(skb); 636 return NETDEV_TX_OK; 637 638 tx_err: 639 DEV_STATS_INC(dev, tx_errors); 640 kfree_skb(skb); 641 return NETDEV_TX_OK; 642 } 643 644 static int reg_vif_get_iflink(const struct net_device *dev) 645 { 646 return 0; 647 } 648 649 static const struct net_device_ops reg_vif_netdev_ops = { 650 .ndo_start_xmit = reg_vif_xmit, 651 .ndo_get_iflink = reg_vif_get_iflink, 652 }; 653 654 static void reg_vif_setup(struct net_device *dev) 655 { 656 dev->type = ARPHRD_PIMREG; 657 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 658 dev->flags = IFF_NOARP; 659 dev->netdev_ops = ®_vif_netdev_ops; 660 dev->needs_free_netdev = true; 661 dev->netns_immutable = true; 662 } 663 664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 665 { 666 struct net_device *dev; 667 char name[IFNAMSIZ]; 668 669 if (mrt->id == RT6_TABLE_DFLT) 670 sprintf(name, "pim6reg"); 671 else 672 sprintf(name, "pim6reg%u", mrt->id); 673 674 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 675 if (!dev) 676 return NULL; 677 678 dev_net_set(dev, net); 679 680 if (register_netdevice(dev)) { 681 free_netdev(dev); 682 return NULL; 683 } 684 685 if (dev_open(dev, NULL)) 686 goto failure; 687 688 dev_hold(dev); 689 return dev; 690 691 failure: 692 unregister_netdevice(dev); 693 return NULL; 694 } 695 #endif 696 697 static int call_ip6mr_vif_entry_notifiers(struct net *net, 698 enum fib_event_type event_type, 699 struct vif_device *vif, 700 struct net_device *vif_dev, 701 mifi_t vif_index, u32 tb_id) 702 { 703 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 704 vif, vif_dev, vif_index, tb_id, 705 &net->ipv6.ipmr_seq); 706 } 707 708 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 709 enum fib_event_type event_type, 710 struct mfc6_cache *mfc, u32 tb_id) 711 { 712 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 713 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 714 } 715 716 /* Delete a VIF entry */ 717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 718 struct list_head *head) 719 { 720 struct vif_device *v; 721 struct net_device *dev; 722 struct inet6_dev *in6_dev; 723 724 if (vifi < 0 || vifi >= mrt->maxvif) 725 return -EADDRNOTAVAIL; 726 727 v = &mrt->vif_table[vifi]; 728 729 dev = rtnl_dereference(v->dev); 730 if (!dev) 731 return -EADDRNOTAVAIL; 732 733 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 734 FIB_EVENT_VIF_DEL, v, dev, 735 vifi, mrt->id); 736 spin_lock(&mrt_lock); 737 RCU_INIT_POINTER(v->dev, NULL); 738 739 #ifdef CONFIG_IPV6_PIMSM_V2 740 if (vifi == mrt->mroute_reg_vif_num) { 741 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ 742 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 743 } 744 #endif 745 746 if (vifi + 1 == mrt->maxvif) { 747 int tmp; 748 for (tmp = vifi - 1; tmp >= 0; tmp--) { 749 if (VIF_EXISTS(mrt, tmp)) 750 break; 751 } 752 WRITE_ONCE(mrt->maxvif, tmp + 1); 753 } 754 755 spin_unlock(&mrt_lock); 756 757 dev_set_allmulti(dev, -1); 758 759 in6_dev = __in6_dev_get(dev); 760 if (in6_dev) { 761 atomic_dec(&in6_dev->cnf.mc_forwarding); 762 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 763 NETCONFA_MC_FORWARDING, 764 dev->ifindex, &in6_dev->cnf); 765 } 766 767 if ((v->flags & MIFF_REGISTER) && !notify) 768 unregister_netdevice_queue(dev, head); 769 770 netdev_put(dev, &v->dev_tracker); 771 return 0; 772 } 773 774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 775 { 776 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 777 778 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 779 } 780 781 static inline void ip6mr_cache_free(struct mfc6_cache *c) 782 { 783 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 784 } 785 786 /* Destroy an unresolved cache entry, killing queued skbs 787 and reporting error to netlink readers. 788 */ 789 790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 791 { 792 struct net *net = read_pnet(&mrt->net); 793 struct sk_buff *skb; 794 795 atomic_dec(&mrt->cache_resolve_queue_len); 796 797 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 798 if (ipv6_hdr(skb)->version == 0) { 799 struct nlmsghdr *nlh = skb_pull(skb, 800 sizeof(struct ipv6hdr)); 801 nlh->nlmsg_type = NLMSG_ERROR; 802 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 803 skb_trim(skb, nlh->nlmsg_len); 804 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 805 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 806 } else 807 kfree_skb(skb); 808 } 809 810 ip6mr_cache_free(c); 811 } 812 813 814 /* Timer process for all the unresolved queue. */ 815 816 static void ipmr_do_expire_process(struct mr_table *mrt) 817 { 818 unsigned long now = jiffies; 819 unsigned long expires = 10 * HZ; 820 struct mr_mfc *c, *next; 821 822 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 823 if (time_after(c->mfc_un.unres.expires, now)) { 824 /* not yet... */ 825 unsigned long interval = c->mfc_un.unres.expires - now; 826 if (interval < expires) 827 expires = interval; 828 continue; 829 } 830 831 list_del(&c->list); 832 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 833 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 834 } 835 836 if (!list_empty(&mrt->mfc_unres_queue)) 837 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 838 } 839 840 static void ipmr_expire_process(struct timer_list *t) 841 { 842 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer); 843 844 if (!spin_trylock(&mfc_unres_lock)) { 845 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 846 return; 847 } 848 849 if (!list_empty(&mrt->mfc_unres_queue)) 850 ipmr_do_expire_process(mrt); 851 852 spin_unlock(&mfc_unres_lock); 853 } 854 855 /* Fill oifs list. It is called under locked mrt_lock. */ 856 857 static void ip6mr_update_thresholds(struct mr_table *mrt, 858 struct mr_mfc *cache, 859 unsigned char *ttls) 860 { 861 int vifi; 862 863 cache->mfc_un.res.minvif = MAXMIFS; 864 cache->mfc_un.res.maxvif = 0; 865 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 866 867 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 868 if (VIF_EXISTS(mrt, vifi) && 869 ttls[vifi] && ttls[vifi] < 255) { 870 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 871 if (cache->mfc_un.res.minvif > vifi) 872 cache->mfc_un.res.minvif = vifi; 873 if (cache->mfc_un.res.maxvif <= vifi) 874 cache->mfc_un.res.maxvif = vifi + 1; 875 } 876 } 877 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 878 } 879 880 static int mif6_add(struct net *net, struct mr_table *mrt, 881 struct mif6ctl *vifc, int mrtsock) 882 { 883 int vifi = vifc->mif6c_mifi; 884 struct vif_device *v = &mrt->vif_table[vifi]; 885 struct net_device *dev; 886 struct inet6_dev *in6_dev; 887 int err; 888 889 /* Is vif busy ? */ 890 if (VIF_EXISTS(mrt, vifi)) 891 return -EADDRINUSE; 892 893 switch (vifc->mif6c_flags) { 894 #ifdef CONFIG_IPV6_PIMSM_V2 895 case MIFF_REGISTER: 896 /* 897 * Special Purpose VIF in PIM 898 * All the packets will be sent to the daemon 899 */ 900 if (mrt->mroute_reg_vif_num >= 0) 901 return -EADDRINUSE; 902 dev = ip6mr_reg_vif(net, mrt); 903 if (!dev) 904 return -ENOBUFS; 905 err = dev_set_allmulti(dev, 1); 906 if (err) { 907 unregister_netdevice(dev); 908 dev_put(dev); 909 return err; 910 } 911 break; 912 #endif 913 case 0: 914 dev = dev_get_by_index(net, vifc->mif6c_pifi); 915 if (!dev) 916 return -EADDRNOTAVAIL; 917 err = dev_set_allmulti(dev, 1); 918 if (err) { 919 dev_put(dev); 920 return err; 921 } 922 break; 923 default: 924 return -EINVAL; 925 } 926 927 in6_dev = __in6_dev_get(dev); 928 if (in6_dev) { 929 atomic_inc(&in6_dev->cnf.mc_forwarding); 930 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 931 NETCONFA_MC_FORWARDING, 932 dev->ifindex, &in6_dev->cnf); 933 } 934 935 /* Fill in the VIF structures */ 936 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 937 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 938 MIFF_REGISTER); 939 940 /* And finish update writing critical data */ 941 spin_lock(&mrt_lock); 942 rcu_assign_pointer(v->dev, dev); 943 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 944 #ifdef CONFIG_IPV6_PIMSM_V2 945 if (v->flags & MIFF_REGISTER) 946 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 947 #endif 948 if (vifi + 1 > mrt->maxvif) 949 WRITE_ONCE(mrt->maxvif, vifi + 1); 950 spin_unlock(&mrt_lock); 951 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 952 v, dev, vifi, mrt->id); 953 return 0; 954 } 955 956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 957 const struct in6_addr *origin, 958 const struct in6_addr *mcastgrp) 959 { 960 struct mfc6_cache_cmp_arg arg = { 961 .mf6c_origin = *origin, 962 .mf6c_mcastgrp = *mcastgrp, 963 }; 964 965 return mr_mfc_find(mrt, &arg); 966 } 967 968 /* Look for a (*,G) entry */ 969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 970 struct in6_addr *mcastgrp, 971 mifi_t mifi) 972 { 973 struct mfc6_cache_cmp_arg arg = { 974 .mf6c_origin = in6addr_any, 975 .mf6c_mcastgrp = *mcastgrp, 976 }; 977 978 if (ipv6_addr_any(mcastgrp)) 979 return mr_mfc_find_any_parent(mrt, mifi); 980 return mr_mfc_find_any(mrt, mifi, &arg); 981 } 982 983 /* Look for a (S,G,iif) entry if parent != -1 */ 984 static struct mfc6_cache * 985 ip6mr_cache_find_parent(struct mr_table *mrt, 986 const struct in6_addr *origin, 987 const struct in6_addr *mcastgrp, 988 int parent) 989 { 990 struct mfc6_cache_cmp_arg arg = { 991 .mf6c_origin = *origin, 992 .mf6c_mcastgrp = *mcastgrp, 993 }; 994 995 return mr_mfc_find_parent(mrt, &arg, parent); 996 } 997 998 /* Allocate a multicast cache entry */ 999 static struct mfc6_cache *ip6mr_cache_alloc(void) 1000 { 1001 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1002 if (!c) 1003 return NULL; 1004 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1005 c->_c.mfc_un.res.minvif = MAXMIFS; 1006 c->_c.free = ip6mr_cache_free_rcu; 1007 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1008 return c; 1009 } 1010 1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1012 { 1013 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1014 if (!c) 1015 return NULL; 1016 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1017 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1018 return c; 1019 } 1020 1021 /* 1022 * A cache entry has gone into a resolved state from queued 1023 */ 1024 1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1026 struct mfc6_cache *uc, struct mfc6_cache *c) 1027 { 1028 struct sk_buff *skb; 1029 1030 /* 1031 * Play the pending entries through our router 1032 */ 1033 1034 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1035 if (ipv6_hdr(skb)->version == 0) { 1036 struct nlmsghdr *nlh = skb_pull(skb, 1037 sizeof(struct ipv6hdr)); 1038 1039 if (mr_fill_mroute(mrt, skb, &c->_c, 1040 nlmsg_data(nlh)) > 0) { 1041 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1042 } else { 1043 nlh->nlmsg_type = NLMSG_ERROR; 1044 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1045 skb_trim(skb, nlh->nlmsg_len); 1046 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1047 } 1048 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1049 } else { 1050 rcu_read_lock(); 1051 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1052 rcu_read_unlock(); 1053 } 1054 } 1055 } 1056 1057 /* 1058 * Bounce a cache query up to pim6sd and netlink. 1059 * 1060 * Called under rcu_read_lock() 1061 */ 1062 1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 1064 mifi_t mifi, int assert) 1065 { 1066 struct sock *mroute6_sk; 1067 struct sk_buff *skb; 1068 struct mrt6msg *msg; 1069 int ret; 1070 1071 #ifdef CONFIG_IPV6_PIMSM_V2 1072 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) 1073 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1074 +sizeof(*msg)); 1075 else 1076 #endif 1077 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1078 1079 if (!skb) 1080 return -ENOBUFS; 1081 1082 /* I suppose that internal messages 1083 * do not require checksums */ 1084 1085 skb->ip_summed = CHECKSUM_UNNECESSARY; 1086 1087 #ifdef CONFIG_IPV6_PIMSM_V2 1088 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { 1089 /* Ugly, but we have no choice with this interface. 1090 Duplicate old header, fix length etc. 1091 And all this only to mangle msg->im6_msgtype and 1092 to set msg->im6_mbz to "mbz" :-) 1093 */ 1094 __skb_pull(skb, skb_network_offset(pkt)); 1095 1096 skb_push(skb, sizeof(*msg)); 1097 skb_reset_transport_header(skb); 1098 msg = (struct mrt6msg *)skb_transport_header(skb); 1099 msg->im6_mbz = 0; 1100 msg->im6_msgtype = assert; 1101 if (assert == MRT6MSG_WRMIFWHOLE) 1102 msg->im6_mif = mifi; 1103 else 1104 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); 1105 msg->im6_pad = 0; 1106 msg->im6_src = ipv6_hdr(pkt)->saddr; 1107 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1108 1109 skb->ip_summed = CHECKSUM_UNNECESSARY; 1110 } else 1111 #endif 1112 { 1113 /* 1114 * Copy the IP header 1115 */ 1116 1117 skb_put(skb, sizeof(struct ipv6hdr)); 1118 skb_reset_network_header(skb); 1119 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1120 1121 /* 1122 * Add our header 1123 */ 1124 skb_put(skb, sizeof(*msg)); 1125 skb_reset_transport_header(skb); 1126 msg = (struct mrt6msg *)skb_transport_header(skb); 1127 1128 msg->im6_mbz = 0; 1129 msg->im6_msgtype = assert; 1130 msg->im6_mif = mifi; 1131 msg->im6_pad = 0; 1132 msg->im6_src = ipv6_hdr(pkt)->saddr; 1133 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1134 1135 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1136 skb->ip_summed = CHECKSUM_UNNECESSARY; 1137 } 1138 1139 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1140 if (!mroute6_sk) { 1141 kfree_skb(skb); 1142 return -EINVAL; 1143 } 1144 1145 mrt6msg_netlink_event(mrt, skb); 1146 1147 /* Deliver to user space multicast routing algorithms */ 1148 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1149 1150 if (ret < 0) { 1151 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1152 kfree_skb(skb); 1153 } 1154 1155 return ret; 1156 } 1157 1158 /* Queue a packet for resolution. It gets locked cache entry! */ 1159 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1160 struct sk_buff *skb, struct net_device *dev) 1161 { 1162 struct mfc6_cache *c; 1163 bool found = false; 1164 int err; 1165 1166 spin_lock_bh(&mfc_unres_lock); 1167 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1168 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1169 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1170 found = true; 1171 break; 1172 } 1173 } 1174 1175 if (!found) { 1176 /* 1177 * Create a new entry if allowable 1178 */ 1179 1180 c = ip6mr_cache_alloc_unres(); 1181 if (!c) { 1182 spin_unlock_bh(&mfc_unres_lock); 1183 1184 kfree_skb(skb); 1185 return -ENOBUFS; 1186 } 1187 1188 /* Fill in the new cache entry */ 1189 c->_c.mfc_parent = -1; 1190 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1191 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1192 1193 /* 1194 * Reflect first query at pim6sd 1195 */ 1196 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1197 if (err < 0) { 1198 /* If the report failed throw the cache entry 1199 out - Brad Parker 1200 */ 1201 spin_unlock_bh(&mfc_unres_lock); 1202 1203 ip6mr_cache_free(c); 1204 kfree_skb(skb); 1205 return err; 1206 } 1207 1208 atomic_inc(&mrt->cache_resolve_queue_len); 1209 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1210 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1211 1212 ipmr_do_expire_process(mrt); 1213 } 1214 1215 /* See if we can append the packet */ 1216 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1217 kfree_skb(skb); 1218 err = -ENOBUFS; 1219 } else { 1220 if (dev) { 1221 skb->dev = dev; 1222 skb->skb_iif = dev->ifindex; 1223 } 1224 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1225 err = 0; 1226 } 1227 1228 spin_unlock_bh(&mfc_unres_lock); 1229 return err; 1230 } 1231 1232 /* 1233 * MFC6 cache manipulation by user space 1234 */ 1235 1236 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1237 int parent) 1238 { 1239 struct mfc6_cache *c; 1240 1241 /* The entries are added/deleted only under RTNL */ 1242 rcu_read_lock(); 1243 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1244 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1245 rcu_read_unlock(); 1246 if (!c) 1247 return -ENOENT; 1248 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1249 list_del_rcu(&c->_c.list); 1250 1251 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1252 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1253 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1254 mr_cache_put(&c->_c); 1255 return 0; 1256 } 1257 1258 static int ip6mr_device_event(struct notifier_block *this, 1259 unsigned long event, void *ptr) 1260 { 1261 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1262 struct net *net = dev_net(dev); 1263 struct mr_table *mrt; 1264 struct vif_device *v; 1265 int ct; 1266 1267 if (event != NETDEV_UNREGISTER) 1268 return NOTIFY_DONE; 1269 1270 ip6mr_for_each_table(mrt, net) { 1271 v = &mrt->vif_table[0]; 1272 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1273 if (rcu_access_pointer(v->dev) == dev) 1274 mif6_delete(mrt, ct, 1, NULL); 1275 } 1276 } 1277 1278 return NOTIFY_DONE; 1279 } 1280 1281 static unsigned int ip6mr_seq_read(const struct net *net) 1282 { 1283 return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); 1284 } 1285 1286 static int ip6mr_dump(struct net *net, struct notifier_block *nb, 1287 struct netlink_ext_ack *extack) 1288 { 1289 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1290 ip6mr_mr_table_iter, extack); 1291 } 1292 1293 static struct notifier_block ip6_mr_notifier = { 1294 .notifier_call = ip6mr_device_event 1295 }; 1296 1297 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1298 .family = RTNL_FAMILY_IP6MR, 1299 .fib_seq_read = ip6mr_seq_read, 1300 .fib_dump = ip6mr_dump, 1301 .owner = THIS_MODULE, 1302 }; 1303 1304 static int __net_init ip6mr_notifier_init(struct net *net) 1305 { 1306 struct fib_notifier_ops *ops; 1307 1308 net->ipv6.ipmr_seq = 0; 1309 1310 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1311 if (IS_ERR(ops)) 1312 return PTR_ERR(ops); 1313 1314 net->ipv6.ip6mr_notifier_ops = ops; 1315 1316 return 0; 1317 } 1318 1319 static void __net_exit ip6mr_notifier_exit(struct net *net) 1320 { 1321 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1322 net->ipv6.ip6mr_notifier_ops = NULL; 1323 } 1324 1325 /* Setup for IP multicast routing */ 1326 static int __net_init ip6mr_net_init(struct net *net) 1327 { 1328 int err; 1329 1330 err = ip6mr_notifier_init(net); 1331 if (err) 1332 return err; 1333 1334 err = ip6mr_rules_init(net); 1335 if (err < 0) 1336 goto ip6mr_rules_fail; 1337 1338 #ifdef CONFIG_PROC_FS 1339 err = -ENOMEM; 1340 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1341 sizeof(struct mr_vif_iter))) 1342 goto proc_vif_fail; 1343 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1344 sizeof(struct mr_mfc_iter))) 1345 goto proc_cache_fail; 1346 #endif 1347 1348 return 0; 1349 1350 #ifdef CONFIG_PROC_FS 1351 proc_cache_fail: 1352 remove_proc_entry("ip6_mr_vif", net->proc_net); 1353 proc_vif_fail: 1354 rtnl_lock(); 1355 ip6mr_rules_exit(net); 1356 rtnl_unlock(); 1357 #endif 1358 ip6mr_rules_fail: 1359 ip6mr_notifier_exit(net); 1360 return err; 1361 } 1362 1363 static void __net_exit ip6mr_net_exit(struct net *net) 1364 { 1365 #ifdef CONFIG_PROC_FS 1366 remove_proc_entry("ip6_mr_cache", net->proc_net); 1367 remove_proc_entry("ip6_mr_vif", net->proc_net); 1368 #endif 1369 ip6mr_notifier_exit(net); 1370 } 1371 1372 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list) 1373 { 1374 struct net *net; 1375 1376 rtnl_lock(); 1377 list_for_each_entry(net, net_list, exit_list) 1378 ip6mr_rules_exit(net); 1379 rtnl_unlock(); 1380 } 1381 1382 static struct pernet_operations ip6mr_net_ops = { 1383 .init = ip6mr_net_init, 1384 .exit = ip6mr_net_exit, 1385 .exit_batch = ip6mr_net_exit_batch, 1386 }; 1387 1388 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { 1389 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, 1390 .msgtype = RTM_GETROUTE, 1391 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute}, 1392 }; 1393 1394 int __init ip6_mr_init(void) 1395 { 1396 int err; 1397 1398 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN); 1399 if (!mrt_cachep) 1400 return -ENOMEM; 1401 1402 err = register_pernet_subsys(&ip6mr_net_ops); 1403 if (err) 1404 goto reg_pernet_fail; 1405 1406 err = register_netdevice_notifier(&ip6_mr_notifier); 1407 if (err) 1408 goto reg_notif_fail; 1409 #ifdef CONFIG_IPV6_PIMSM_V2 1410 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1411 pr_err("%s: can't add PIM protocol\n", __func__); 1412 err = -EAGAIN; 1413 goto add_proto_fail; 1414 } 1415 #endif 1416 err = rtnl_register_many(ip6mr_rtnl_msg_handlers); 1417 if (!err) 1418 return 0; 1419 1420 #ifdef CONFIG_IPV6_PIMSM_V2 1421 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1422 add_proto_fail: 1423 unregister_netdevice_notifier(&ip6_mr_notifier); 1424 #endif 1425 reg_notif_fail: 1426 unregister_pernet_subsys(&ip6mr_net_ops); 1427 reg_pernet_fail: 1428 kmem_cache_destroy(mrt_cachep); 1429 return err; 1430 } 1431 1432 void __init ip6_mr_cleanup(void) 1433 { 1434 rtnl_unregister_many(ip6mr_rtnl_msg_handlers); 1435 #ifdef CONFIG_IPV6_PIMSM_V2 1436 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1437 #endif 1438 unregister_netdevice_notifier(&ip6_mr_notifier); 1439 unregister_pernet_subsys(&ip6mr_net_ops); 1440 kmem_cache_destroy(mrt_cachep); 1441 } 1442 1443 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1444 struct mf6cctl *mfc, int mrtsock, int parent) 1445 { 1446 unsigned char ttls[MAXMIFS]; 1447 struct mfc6_cache *uc, *c; 1448 struct mr_mfc *_uc; 1449 bool found; 1450 int i, err; 1451 1452 if (mfc->mf6cc_parent >= MAXMIFS) 1453 return -ENFILE; 1454 1455 memset(ttls, 255, MAXMIFS); 1456 for (i = 0; i < MAXMIFS; i++) { 1457 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1458 ttls[i] = 1; 1459 } 1460 1461 /* The entries are added/deleted only under RTNL */ 1462 rcu_read_lock(); 1463 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1464 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1465 rcu_read_unlock(); 1466 if (c) { 1467 spin_lock(&mrt_lock); 1468 c->_c.mfc_parent = mfc->mf6cc_parent; 1469 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1470 if (!mrtsock) 1471 c->_c.mfc_flags |= MFC_STATIC; 1472 spin_unlock(&mrt_lock); 1473 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1474 c, mrt->id); 1475 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1476 return 0; 1477 } 1478 1479 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1480 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1481 return -EINVAL; 1482 1483 c = ip6mr_cache_alloc(); 1484 if (!c) 1485 return -ENOMEM; 1486 1487 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1488 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1489 c->_c.mfc_parent = mfc->mf6cc_parent; 1490 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1491 if (!mrtsock) 1492 c->_c.mfc_flags |= MFC_STATIC; 1493 1494 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1495 ip6mr_rht_params); 1496 if (err) { 1497 pr_err("ip6mr: rhtable insert error %d\n", err); 1498 ip6mr_cache_free(c); 1499 return err; 1500 } 1501 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1502 1503 /* Check to see if we resolved a queued list. If so we 1504 * need to send on the frames and tidy up. 1505 */ 1506 found = false; 1507 spin_lock_bh(&mfc_unres_lock); 1508 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1509 uc = (struct mfc6_cache *)_uc; 1510 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1511 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1512 list_del(&_uc->list); 1513 atomic_dec(&mrt->cache_resolve_queue_len); 1514 found = true; 1515 break; 1516 } 1517 } 1518 if (list_empty(&mrt->mfc_unres_queue)) 1519 timer_delete(&mrt->ipmr_expire_timer); 1520 spin_unlock_bh(&mfc_unres_lock); 1521 1522 if (found) { 1523 ip6mr_cache_resolve(net, mrt, uc, c); 1524 ip6mr_cache_free(uc); 1525 } 1526 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1527 c, mrt->id); 1528 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1529 return 0; 1530 } 1531 1532 /* 1533 * Close the multicast socket, and clear the vif tables etc 1534 */ 1535 1536 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1537 { 1538 struct mr_mfc *c, *tmp; 1539 LIST_HEAD(list); 1540 int i; 1541 1542 /* Shut down all active vif entries */ 1543 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { 1544 for (i = 0; i < mrt->maxvif; i++) { 1545 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1546 !(flags & MRT6_FLUSH_MIFS_STATIC)) || 1547 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) 1548 continue; 1549 mif6_delete(mrt, i, 0, &list); 1550 } 1551 unregister_netdevice_many(&list); 1552 } 1553 1554 /* Wipe the cache */ 1555 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { 1556 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1557 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || 1558 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) 1559 continue; 1560 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1561 list_del_rcu(&c->list); 1562 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1563 FIB_EVENT_ENTRY_DEL, 1564 (struct mfc6_cache *)c, mrt->id); 1565 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1566 mr_cache_put(c); 1567 } 1568 } 1569 1570 if (flags & MRT6_FLUSH_MFC) { 1571 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1572 spin_lock_bh(&mfc_unres_lock); 1573 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1574 list_del(&c->list); 1575 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1576 RTM_DELROUTE); 1577 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1578 } 1579 spin_unlock_bh(&mfc_unres_lock); 1580 } 1581 } 1582 } 1583 1584 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1585 { 1586 int err = 0; 1587 struct net *net = sock_net(sk); 1588 1589 rtnl_lock(); 1590 spin_lock(&mrt_lock); 1591 if (rtnl_dereference(mrt->mroute_sk)) { 1592 err = -EADDRINUSE; 1593 } else { 1594 rcu_assign_pointer(mrt->mroute_sk, sk); 1595 sock_set_flag(sk, SOCK_RCU_FREE); 1596 atomic_inc(&net->ipv6.devconf_all->mc_forwarding); 1597 } 1598 spin_unlock(&mrt_lock); 1599 1600 if (!err) 1601 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1602 NETCONFA_MC_FORWARDING, 1603 NETCONFA_IFINDEX_ALL, 1604 net->ipv6.devconf_all); 1605 rtnl_unlock(); 1606 1607 return err; 1608 } 1609 1610 int ip6mr_sk_done(struct sock *sk) 1611 { 1612 struct net *net = sock_net(sk); 1613 struct ipv6_devconf *devconf; 1614 struct mr_table *mrt; 1615 int err = -EACCES; 1616 1617 if (sk->sk_type != SOCK_RAW || 1618 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1619 return err; 1620 1621 devconf = net->ipv6.devconf_all; 1622 if (!devconf || !atomic_read(&devconf->mc_forwarding)) 1623 return err; 1624 1625 rtnl_lock(); 1626 ip6mr_for_each_table(mrt, net) { 1627 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1628 spin_lock(&mrt_lock); 1629 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1630 /* Note that mroute_sk had SOCK_RCU_FREE set, 1631 * so the RCU grace period before sk freeing 1632 * is guaranteed by sk_destruct() 1633 */ 1634 atomic_dec(&devconf->mc_forwarding); 1635 spin_unlock(&mrt_lock); 1636 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1637 NETCONFA_MC_FORWARDING, 1638 NETCONFA_IFINDEX_ALL, 1639 net->ipv6.devconf_all); 1640 1641 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); 1642 err = 0; 1643 break; 1644 } 1645 } 1646 rtnl_unlock(); 1647 1648 return err; 1649 } 1650 1651 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1652 { 1653 struct mr_table *mrt; 1654 struct flowi6 fl6 = { 1655 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1656 .flowi6_oif = skb->dev->ifindex, 1657 .flowi6_mark = skb->mark, 1658 }; 1659 1660 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1661 return NULL; 1662 1663 return rcu_access_pointer(mrt->mroute_sk); 1664 } 1665 EXPORT_SYMBOL(mroute6_is_socket); 1666 1667 /* 1668 * Socket options and virtual interface manipulation. The whole 1669 * virtual interface system is a complete heap, but unfortunately 1670 * that's how BSD mrouted happens to think. Maybe one day with a proper 1671 * MOSPF/PIM router set up we can clean this up. 1672 */ 1673 1674 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1675 unsigned int optlen) 1676 { 1677 int ret, parent = 0; 1678 struct mif6ctl vif; 1679 struct mf6cctl mfc; 1680 mifi_t mifi; 1681 struct net *net = sock_net(sk); 1682 struct mr_table *mrt; 1683 1684 if (sk->sk_type != SOCK_RAW || 1685 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1686 return -EOPNOTSUPP; 1687 1688 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1689 if (!mrt) 1690 return -ENOENT; 1691 1692 if (optname != MRT6_INIT) { 1693 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1694 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1695 return -EACCES; 1696 } 1697 1698 switch (optname) { 1699 case MRT6_INIT: 1700 if (optlen < sizeof(int)) 1701 return -EINVAL; 1702 1703 return ip6mr_sk_init(mrt, sk); 1704 1705 case MRT6_DONE: 1706 return ip6mr_sk_done(sk); 1707 1708 case MRT6_ADD_MIF: 1709 if (optlen < sizeof(vif)) 1710 return -EINVAL; 1711 if (copy_from_sockptr(&vif, optval, sizeof(vif))) 1712 return -EFAULT; 1713 if (vif.mif6c_mifi >= MAXMIFS) 1714 return -ENFILE; 1715 rtnl_lock(); 1716 ret = mif6_add(net, mrt, &vif, 1717 sk == rtnl_dereference(mrt->mroute_sk)); 1718 rtnl_unlock(); 1719 return ret; 1720 1721 case MRT6_DEL_MIF: 1722 if (optlen < sizeof(mifi_t)) 1723 return -EINVAL; 1724 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) 1725 return -EFAULT; 1726 rtnl_lock(); 1727 ret = mif6_delete(mrt, mifi, 0, NULL); 1728 rtnl_unlock(); 1729 return ret; 1730 1731 /* 1732 * Manipulate the forwarding caches. These live 1733 * in a sort of kernel/user symbiosis. 1734 */ 1735 case MRT6_ADD_MFC: 1736 case MRT6_DEL_MFC: 1737 parent = -1; 1738 fallthrough; 1739 case MRT6_ADD_MFC_PROXY: 1740 case MRT6_DEL_MFC_PROXY: 1741 if (optlen < sizeof(mfc)) 1742 return -EINVAL; 1743 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) 1744 return -EFAULT; 1745 if (parent == 0) 1746 parent = mfc.mf6cc_parent; 1747 rtnl_lock(); 1748 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1749 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1750 else 1751 ret = ip6mr_mfc_add(net, mrt, &mfc, 1752 sk == 1753 rtnl_dereference(mrt->mroute_sk), 1754 parent); 1755 rtnl_unlock(); 1756 return ret; 1757 1758 case MRT6_FLUSH: 1759 { 1760 int flags; 1761 1762 if (optlen != sizeof(flags)) 1763 return -EINVAL; 1764 if (copy_from_sockptr(&flags, optval, sizeof(flags))) 1765 return -EFAULT; 1766 rtnl_lock(); 1767 mroute_clean_tables(mrt, flags); 1768 rtnl_unlock(); 1769 return 0; 1770 } 1771 1772 /* 1773 * Control PIM assert (to activate pim will activate assert) 1774 */ 1775 case MRT6_ASSERT: 1776 { 1777 int v; 1778 1779 if (optlen != sizeof(v)) 1780 return -EINVAL; 1781 if (copy_from_sockptr(&v, optval, sizeof(v))) 1782 return -EFAULT; 1783 mrt->mroute_do_assert = v; 1784 return 0; 1785 } 1786 1787 #ifdef CONFIG_IPV6_PIMSM_V2 1788 case MRT6_PIM: 1789 { 1790 bool do_wrmifwhole; 1791 int v; 1792 1793 if (optlen != sizeof(v)) 1794 return -EINVAL; 1795 if (copy_from_sockptr(&v, optval, sizeof(v))) 1796 return -EFAULT; 1797 1798 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); 1799 v = !!v; 1800 rtnl_lock(); 1801 ret = 0; 1802 if (v != mrt->mroute_do_pim) { 1803 mrt->mroute_do_pim = v; 1804 mrt->mroute_do_assert = v; 1805 mrt->mroute_do_wrvifwhole = do_wrmifwhole; 1806 } 1807 rtnl_unlock(); 1808 return ret; 1809 } 1810 1811 #endif 1812 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1813 case MRT6_TABLE: 1814 { 1815 u32 v; 1816 1817 if (optlen != sizeof(u32)) 1818 return -EINVAL; 1819 if (copy_from_sockptr(&v, optval, sizeof(v))) 1820 return -EFAULT; 1821 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1822 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1823 return -EINVAL; 1824 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1825 return -EBUSY; 1826 1827 rtnl_lock(); 1828 ret = 0; 1829 mrt = ip6mr_new_table(net, v); 1830 if (IS_ERR(mrt)) 1831 ret = PTR_ERR(mrt); 1832 else 1833 raw6_sk(sk)->ip6mr_table = v; 1834 rtnl_unlock(); 1835 return ret; 1836 } 1837 #endif 1838 /* 1839 * Spurious command, or MRT6_VERSION which you cannot 1840 * set. 1841 */ 1842 default: 1843 return -ENOPROTOOPT; 1844 } 1845 } 1846 1847 /* 1848 * Getsock opt support for the multicast routing system. 1849 */ 1850 1851 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1852 sockptr_t optlen) 1853 { 1854 int olr; 1855 int val; 1856 struct net *net = sock_net(sk); 1857 struct mr_table *mrt; 1858 1859 if (sk->sk_type != SOCK_RAW || 1860 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1861 return -EOPNOTSUPP; 1862 1863 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1864 if (!mrt) 1865 return -ENOENT; 1866 1867 switch (optname) { 1868 case MRT6_VERSION: 1869 val = 0x0305; 1870 break; 1871 #ifdef CONFIG_IPV6_PIMSM_V2 1872 case MRT6_PIM: 1873 val = mrt->mroute_do_pim; 1874 break; 1875 #endif 1876 case MRT6_ASSERT: 1877 val = mrt->mroute_do_assert; 1878 break; 1879 default: 1880 return -ENOPROTOOPT; 1881 } 1882 1883 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1884 return -EFAULT; 1885 1886 olr = min_t(int, olr, sizeof(int)); 1887 if (olr < 0) 1888 return -EINVAL; 1889 1890 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1891 return -EFAULT; 1892 if (copy_to_sockptr(optval, &val, olr)) 1893 return -EFAULT; 1894 return 0; 1895 } 1896 1897 /* 1898 * The IP multicast ioctl support routines. 1899 */ 1900 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg) 1901 { 1902 struct sioc_sg_req6 *sr; 1903 struct sioc_mif_req6 *vr; 1904 struct vif_device *vif; 1905 struct mfc6_cache *c; 1906 struct net *net = sock_net(sk); 1907 struct mr_table *mrt; 1908 1909 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1910 if (!mrt) 1911 return -ENOENT; 1912 1913 switch (cmd) { 1914 case SIOCGETMIFCNT_IN6: 1915 vr = (struct sioc_mif_req6 *)arg; 1916 if (vr->mifi >= mrt->maxvif) 1917 return -EINVAL; 1918 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif); 1919 rcu_read_lock(); 1920 vif = &mrt->vif_table[vr->mifi]; 1921 if (VIF_EXISTS(mrt, vr->mifi)) { 1922 vr->icount = READ_ONCE(vif->pkt_in); 1923 vr->ocount = READ_ONCE(vif->pkt_out); 1924 vr->ibytes = READ_ONCE(vif->bytes_in); 1925 vr->obytes = READ_ONCE(vif->bytes_out); 1926 rcu_read_unlock(); 1927 return 0; 1928 } 1929 rcu_read_unlock(); 1930 return -EADDRNOTAVAIL; 1931 case SIOCGETSGCNT_IN6: 1932 sr = (struct sioc_sg_req6 *)arg; 1933 1934 rcu_read_lock(); 1935 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr, 1936 &sr->grp.sin6_addr); 1937 if (c) { 1938 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1939 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1940 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1941 rcu_read_unlock(); 1942 return 0; 1943 } 1944 rcu_read_unlock(); 1945 return -EADDRNOTAVAIL; 1946 default: 1947 return -ENOIOCTLCMD; 1948 } 1949 } 1950 1951 #ifdef CONFIG_COMPAT 1952 struct compat_sioc_sg_req6 { 1953 struct sockaddr_in6 src; 1954 struct sockaddr_in6 grp; 1955 compat_ulong_t pktcnt; 1956 compat_ulong_t bytecnt; 1957 compat_ulong_t wrong_if; 1958 }; 1959 1960 struct compat_sioc_mif_req6 { 1961 mifi_t mifi; 1962 compat_ulong_t icount; 1963 compat_ulong_t ocount; 1964 compat_ulong_t ibytes; 1965 compat_ulong_t obytes; 1966 }; 1967 1968 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1969 { 1970 struct compat_sioc_sg_req6 sr; 1971 struct compat_sioc_mif_req6 vr; 1972 struct vif_device *vif; 1973 struct mfc6_cache *c; 1974 struct net *net = sock_net(sk); 1975 struct mr_table *mrt; 1976 1977 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1978 if (!mrt) 1979 return -ENOENT; 1980 1981 switch (cmd) { 1982 case SIOCGETMIFCNT_IN6: 1983 if (copy_from_user(&vr, arg, sizeof(vr))) 1984 return -EFAULT; 1985 if (vr.mifi >= mrt->maxvif) 1986 return -EINVAL; 1987 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 1988 rcu_read_lock(); 1989 vif = &mrt->vif_table[vr.mifi]; 1990 if (VIF_EXISTS(mrt, vr.mifi)) { 1991 vr.icount = READ_ONCE(vif->pkt_in); 1992 vr.ocount = READ_ONCE(vif->pkt_out); 1993 vr.ibytes = READ_ONCE(vif->bytes_in); 1994 vr.obytes = READ_ONCE(vif->bytes_out); 1995 rcu_read_unlock(); 1996 1997 if (copy_to_user(arg, &vr, sizeof(vr))) 1998 return -EFAULT; 1999 return 0; 2000 } 2001 rcu_read_unlock(); 2002 return -EADDRNOTAVAIL; 2003 case SIOCGETSGCNT_IN6: 2004 if (copy_from_user(&sr, arg, sizeof(sr))) 2005 return -EFAULT; 2006 2007 rcu_read_lock(); 2008 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 2009 if (c) { 2010 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 2011 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 2012 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 2013 rcu_read_unlock(); 2014 2015 if (copy_to_user(arg, &sr, sizeof(sr))) 2016 return -EFAULT; 2017 return 0; 2018 } 2019 rcu_read_unlock(); 2020 return -EADDRNOTAVAIL; 2021 default: 2022 return -ENOIOCTLCMD; 2023 } 2024 } 2025 #endif 2026 2027 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 2028 { 2029 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 2030 IPSTATS_MIB_OUTFORWDATAGRAMS); 2031 return dst_output(net, sk, skb); 2032 } 2033 2034 /* 2035 * Processing handlers for ip6mr_forward 2036 */ 2037 2038 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt, 2039 struct sk_buff *skb, int vifi) 2040 { 2041 struct vif_device *vif = &mrt->vif_table[vifi]; 2042 struct net_device *vif_dev; 2043 struct ipv6hdr *ipv6h; 2044 struct dst_entry *dst; 2045 struct flowi6 fl6; 2046 2047 vif_dev = vif_dev_read(vif); 2048 if (!vif_dev) 2049 return -1; 2050 2051 #ifdef CONFIG_IPV6_PIMSM_V2 2052 if (vif->flags & MIFF_REGISTER) { 2053 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2054 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2055 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 2056 DEV_STATS_INC(vif_dev, tx_packets); 2057 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2058 return -1; 2059 } 2060 #endif 2061 2062 ipv6h = ipv6_hdr(skb); 2063 2064 fl6 = (struct flowi6) { 2065 .flowi6_oif = vif->link, 2066 .daddr = ipv6h->daddr, 2067 }; 2068 2069 dst = ip6_route_output(net, NULL, &fl6); 2070 if (dst->error) { 2071 dst_release(dst); 2072 return -1; 2073 } 2074 2075 skb_dst_drop(skb); 2076 skb_dst_set(skb, dst); 2077 2078 /* 2079 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2080 * not only before forwarding, but after forwarding on all output 2081 * interfaces. It is clear, if mrouter runs a multicasting 2082 * program, it should receive packets not depending to what interface 2083 * program is joined. 2084 * If we will not make it, the program will have to join on all 2085 * interfaces. On the other hand, multihoming host (or router, but 2086 * not mrouter) cannot join to more than one interface - it will 2087 * result in receiving multiple packets. 2088 */ 2089 skb->dev = vif_dev; 2090 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2091 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2092 2093 /* We are about to write */ 2094 /* XXX: extension headers? */ 2095 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) 2096 return -1; 2097 2098 ipv6h = ipv6_hdr(skb); 2099 ipv6h->hop_limit--; 2100 return 0; 2101 } 2102 2103 static void ip6mr_forward2(struct net *net, struct mr_table *mrt, 2104 struct sk_buff *skb, int vifi) 2105 { 2106 struct net_device *indev = skb->dev; 2107 2108 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2109 goto out_free; 2110 2111 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2112 2113 NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2114 net, NULL, skb, indev, skb->dev, 2115 ip6mr_forward2_finish); 2116 return; 2117 2118 out_free: 2119 kfree_skb(skb); 2120 } 2121 2122 static void ip6mr_output2(struct net *net, struct mr_table *mrt, 2123 struct sk_buff *skb, int vifi) 2124 { 2125 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2126 goto out_free; 2127 2128 ip6_output(net, NULL, skb); 2129 return; 2130 2131 out_free: 2132 kfree_skb(skb); 2133 } 2134 2135 /* Called with rcu_read_lock() */ 2136 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2137 { 2138 int ct; 2139 2140 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ 2141 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2142 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2143 break; 2144 } 2145 return ct; 2146 } 2147 2148 /* Called under rcu_read_lock() */ 2149 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2150 struct net_device *dev, struct sk_buff *skb, 2151 struct mfc6_cache *c) 2152 { 2153 int psend = -1; 2154 int vif, ct; 2155 int true_vifi = ip6mr_find_vif(mrt, dev); 2156 2157 vif = c->_c.mfc_parent; 2158 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2159 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2160 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2161 2162 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2163 struct mfc6_cache *cache_proxy; 2164 2165 /* For an (*,G) entry, we only check that the incoming 2166 * interface is part of the static tree. 2167 */ 2168 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2169 if (cache_proxy && 2170 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2171 goto forward; 2172 } 2173 2174 /* 2175 * Wrong interface: drop packet and (maybe) send PIM assert. 2176 */ 2177 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2178 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2179 2180 if (true_vifi >= 0 && mrt->mroute_do_assert && 2181 /* pimsm uses asserts, when switching from RPT to SPT, 2182 so that we cannot check that packet arrived on an oif. 2183 It is bad, but otherwise we would need to move pretty 2184 large chunk of pimd to kernel. Ough... --ANK 2185 */ 2186 (mrt->mroute_do_pim || 2187 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2188 time_after(jiffies, 2189 c->_c.mfc_un.res.last_assert + 2190 MFC_ASSERT_THRESH)) { 2191 c->_c.mfc_un.res.last_assert = jiffies; 2192 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2193 if (mrt->mroute_do_wrvifwhole) 2194 ip6mr_cache_report(mrt, skb, true_vifi, 2195 MRT6MSG_WRMIFWHOLE); 2196 } 2197 goto dont_forward; 2198 } 2199 2200 forward: 2201 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2202 mrt->vif_table[vif].pkt_in + 1); 2203 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2204 mrt->vif_table[vif].bytes_in + skb->len); 2205 2206 /* 2207 * Forward the frame 2208 */ 2209 if (ipv6_addr_any(&c->mf6c_origin) && 2210 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2211 if (true_vifi >= 0 && 2212 true_vifi != c->_c.mfc_parent && 2213 ipv6_hdr(skb)->hop_limit > 2214 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2215 /* It's an (*,*) entry and the packet is not coming from 2216 * the upstream: forward the packet to the upstream 2217 * only. 2218 */ 2219 psend = c->_c.mfc_parent; 2220 goto last_forward; 2221 } 2222 goto dont_forward; 2223 } 2224 for (ct = c->_c.mfc_un.res.maxvif - 1; 2225 ct >= c->_c.mfc_un.res.minvif; ct--) { 2226 /* For (*,G) entry, don't forward to the incoming interface */ 2227 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2228 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2229 if (psend != -1) { 2230 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2231 if (skb2) 2232 ip6mr_forward2(net, mrt, skb2, psend); 2233 } 2234 psend = ct; 2235 } 2236 } 2237 last_forward: 2238 if (psend != -1) { 2239 ip6mr_forward2(net, mrt, skb, psend); 2240 return; 2241 } 2242 2243 dont_forward: 2244 kfree_skb(skb); 2245 } 2246 2247 /* Called under rcu_read_lock() */ 2248 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt, 2249 struct net_device *dev, struct sk_buff *skb, 2250 struct mfc6_cache *c) 2251 { 2252 int psend = -1; 2253 int ct; 2254 2255 WARN_ON_ONCE(!rcu_read_lock_held()); 2256 2257 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2258 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2259 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2260 2261 /* Forward the frame */ 2262 if (ipv6_addr_any(&c->mf6c_origin) && 2263 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2264 if (ipv6_hdr(skb)->hop_limit > 2265 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2266 /* It's an (*,*) entry and the packet is not coming from 2267 * the upstream: forward the packet to the upstream 2268 * only. 2269 */ 2270 psend = c->_c.mfc_parent; 2271 goto last_forward; 2272 } 2273 goto dont_forward; 2274 } 2275 for (ct = c->_c.mfc_un.res.maxvif - 1; 2276 ct >= c->_c.mfc_un.res.minvif; ct--) { 2277 if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2278 if (psend != -1) { 2279 struct sk_buff *skb2; 2280 2281 skb2 = skb_clone(skb, GFP_ATOMIC); 2282 if (skb2) 2283 ip6mr_output2(net, mrt, skb2, psend); 2284 } 2285 psend = ct; 2286 } 2287 } 2288 last_forward: 2289 if (psend != -1) { 2290 ip6mr_output2(net, mrt, skb, psend); 2291 return; 2292 } 2293 2294 dont_forward: 2295 kfree_skb(skb); 2296 } 2297 2298 /* 2299 * Multicast packets for forwarding arrive here 2300 */ 2301 2302 int ip6_mr_input(struct sk_buff *skb) 2303 { 2304 struct net_device *dev = skb->dev; 2305 struct net *net = dev_net_rcu(dev); 2306 struct mfc6_cache *cache; 2307 struct mr_table *mrt; 2308 struct flowi6 fl6 = { 2309 .flowi6_iif = dev->ifindex, 2310 .flowi6_mark = skb->mark, 2311 }; 2312 int err; 2313 2314 /* skb->dev passed in is the master dev for vrfs. 2315 * Get the proper interface that does have a vif associated with it. 2316 */ 2317 if (netif_is_l3_master(dev)) { 2318 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2319 if (!dev) { 2320 kfree_skb(skb); 2321 return -ENODEV; 2322 } 2323 } 2324 2325 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2326 if (err < 0) { 2327 kfree_skb(skb); 2328 return err; 2329 } 2330 2331 cache = ip6mr_cache_find(mrt, 2332 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2333 if (!cache) { 2334 int vif = ip6mr_find_vif(mrt, dev); 2335 2336 if (vif >= 0) 2337 cache = ip6mr_cache_find_any(mrt, 2338 &ipv6_hdr(skb)->daddr, 2339 vif); 2340 } 2341 2342 /* 2343 * No usable cache entry 2344 */ 2345 if (!cache) { 2346 int vif; 2347 2348 vif = ip6mr_find_vif(mrt, dev); 2349 if (vif >= 0) { 2350 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2351 2352 return err; 2353 } 2354 kfree_skb(skb); 2355 return -ENODEV; 2356 } 2357 2358 ip6_mr_forward(net, mrt, dev, skb, cache); 2359 2360 return 0; 2361 } 2362 2363 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2364 { 2365 struct net_device *dev = skb_dst(skb)->dev; 2366 struct flowi6 fl6 = (struct flowi6) { 2367 .flowi6_iif = LOOPBACK_IFINDEX, 2368 .flowi6_mark = skb->mark, 2369 }; 2370 struct mfc6_cache *cache; 2371 struct mr_table *mrt; 2372 int err; 2373 int vif; 2374 2375 guard(rcu)(); 2376 2377 if (IP6CB(skb)->flags & IP6SKB_FORWARDED) 2378 goto ip6_output; 2379 if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE)) 2380 goto ip6_output; 2381 2382 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2383 if (err < 0) { 2384 kfree_skb(skb); 2385 return err; 2386 } 2387 2388 cache = ip6mr_cache_find(mrt, 2389 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2390 if (!cache) { 2391 vif = ip6mr_find_vif(mrt, dev); 2392 if (vif >= 0) 2393 cache = ip6mr_cache_find_any(mrt, 2394 &ipv6_hdr(skb)->daddr, 2395 vif); 2396 } 2397 2398 /* No usable cache entry */ 2399 if (!cache) { 2400 vif = ip6mr_find_vif(mrt, dev); 2401 if (vif >= 0) 2402 return ip6mr_cache_unresolved(mrt, vif, skb, dev); 2403 goto ip6_output; 2404 } 2405 2406 /* Wrong interface */ 2407 vif = cache->_c.mfc_parent; 2408 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2409 goto ip6_output; 2410 2411 ip6_mr_output_finish(net, mrt, dev, skb, cache); 2412 return 0; 2413 2414 ip6_output: 2415 return ip6_output(net, sk, skb); 2416 } 2417 2418 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2419 u32 portid) 2420 { 2421 int err; 2422 struct mr_table *mrt; 2423 struct mfc6_cache *cache; 2424 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 2425 2426 rcu_read_lock(); 2427 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 2428 if (!mrt) { 2429 rcu_read_unlock(); 2430 return -ENOENT; 2431 } 2432 2433 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2434 if (!cache && skb->dev) { 2435 int vif = ip6mr_find_vif(mrt, skb->dev); 2436 2437 if (vif >= 0) 2438 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2439 vif); 2440 } 2441 2442 if (!cache) { 2443 struct sk_buff *skb2; 2444 struct ipv6hdr *iph; 2445 struct net_device *dev; 2446 int vif; 2447 2448 dev = skb->dev; 2449 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2450 rcu_read_unlock(); 2451 return -ENODEV; 2452 } 2453 2454 /* really correct? */ 2455 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2456 if (!skb2) { 2457 rcu_read_unlock(); 2458 return -ENOMEM; 2459 } 2460 2461 NETLINK_CB(skb2).portid = portid; 2462 skb_reset_transport_header(skb2); 2463 2464 skb_put(skb2, sizeof(struct ipv6hdr)); 2465 skb_reset_network_header(skb2); 2466 2467 iph = ipv6_hdr(skb2); 2468 iph->version = 0; 2469 iph->priority = 0; 2470 iph->flow_lbl[0] = 0; 2471 iph->flow_lbl[1] = 0; 2472 iph->flow_lbl[2] = 0; 2473 iph->payload_len = 0; 2474 iph->nexthdr = IPPROTO_NONE; 2475 iph->hop_limit = 0; 2476 iph->saddr = rt->rt6i_src.addr; 2477 iph->daddr = rt->rt6i_dst.addr; 2478 2479 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2480 rcu_read_unlock(); 2481 2482 return err; 2483 } 2484 2485 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2486 rcu_read_unlock(); 2487 return err; 2488 } 2489 2490 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2491 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2492 int flags) 2493 { 2494 struct nlmsghdr *nlh; 2495 struct rtmsg *rtm; 2496 int err; 2497 2498 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2499 if (!nlh) 2500 return -EMSGSIZE; 2501 2502 rtm = nlmsg_data(nlh); 2503 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2504 rtm->rtm_dst_len = 128; 2505 rtm->rtm_src_len = 128; 2506 rtm->rtm_tos = 0; 2507 rtm->rtm_table = mrt->id; 2508 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2509 goto nla_put_failure; 2510 rtm->rtm_type = RTN_MULTICAST; 2511 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2512 if (c->_c.mfc_flags & MFC_STATIC) 2513 rtm->rtm_protocol = RTPROT_STATIC; 2514 else 2515 rtm->rtm_protocol = RTPROT_MROUTED; 2516 rtm->rtm_flags = 0; 2517 2518 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2519 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2520 goto nla_put_failure; 2521 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2522 /* do not break the dump if cache is unresolved */ 2523 if (err < 0 && err != -ENOENT) 2524 goto nla_put_failure; 2525 2526 nlmsg_end(skb, nlh); 2527 return 0; 2528 2529 nla_put_failure: 2530 nlmsg_cancel(skb, nlh); 2531 return -EMSGSIZE; 2532 } 2533 2534 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2535 u32 portid, u32 seq, struct mr_mfc *c, 2536 int cmd, int flags) 2537 { 2538 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2539 cmd, flags); 2540 } 2541 2542 static int mr6_msgsize(bool unresolved, int maxvif) 2543 { 2544 size_t len = 2545 NLMSG_ALIGN(sizeof(struct rtmsg)) 2546 + nla_total_size(4) /* RTA_TABLE */ 2547 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2548 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2549 ; 2550 2551 if (!unresolved) 2552 len = len 2553 + nla_total_size(4) /* RTA_IIF */ 2554 + nla_total_size(0) /* RTA_MULTIPATH */ 2555 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2556 /* RTA_MFC_STATS */ 2557 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2558 ; 2559 2560 return len; 2561 } 2562 2563 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2564 int cmd) 2565 { 2566 struct net *net = read_pnet(&mrt->net); 2567 struct sk_buff *skb; 2568 int err = -ENOBUFS; 2569 2570 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2571 GFP_ATOMIC); 2572 if (!skb) 2573 goto errout; 2574 2575 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2576 if (err < 0) 2577 goto errout; 2578 2579 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2580 return; 2581 2582 errout: 2583 kfree_skb(skb); 2584 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2585 } 2586 2587 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2588 { 2589 size_t len = 2590 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2591 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2592 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2593 /* IP6MRA_CREPORT_SRC_ADDR */ 2594 + nla_total_size(sizeof(struct in6_addr)) 2595 /* IP6MRA_CREPORT_DST_ADDR */ 2596 + nla_total_size(sizeof(struct in6_addr)) 2597 /* IP6MRA_CREPORT_PKT */ 2598 + nla_total_size(payloadlen) 2599 ; 2600 2601 return len; 2602 } 2603 2604 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2605 { 2606 struct net *net = read_pnet(&mrt->net); 2607 struct nlmsghdr *nlh; 2608 struct rtgenmsg *rtgenm; 2609 struct mrt6msg *msg; 2610 struct sk_buff *skb; 2611 struct nlattr *nla; 2612 int payloadlen; 2613 2614 payloadlen = pkt->len - sizeof(struct mrt6msg); 2615 msg = (struct mrt6msg *)skb_transport_header(pkt); 2616 2617 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2618 if (!skb) 2619 goto errout; 2620 2621 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2622 sizeof(struct rtgenmsg), 0); 2623 if (!nlh) 2624 goto errout; 2625 rtgenm = nlmsg_data(nlh); 2626 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2627 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2628 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2629 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2630 &msg->im6_src) || 2631 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2632 &msg->im6_dst)) 2633 goto nla_put_failure; 2634 2635 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2636 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2637 nla_data(nla), payloadlen)) 2638 goto nla_put_failure; 2639 2640 nlmsg_end(skb, nlh); 2641 2642 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2643 return; 2644 2645 nla_put_failure: 2646 nlmsg_cancel(skb, nlh); 2647 errout: 2648 kfree_skb(skb); 2649 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2650 } 2651 2652 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = { 2653 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2654 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2655 [RTA_TABLE] = { .type = NLA_U32 }, 2656 }; 2657 2658 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb, 2659 const struct nlmsghdr *nlh, 2660 struct nlattr **tb, 2661 struct netlink_ext_ack *extack) 2662 { 2663 struct rtmsg *rtm; 2664 int err; 2665 2666 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy, 2667 extack); 2668 if (err) 2669 return err; 2670 2671 rtm = nlmsg_data(nlh); 2672 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || 2673 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || 2674 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2675 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2676 NL_SET_ERR_MSG_MOD(extack, 2677 "Invalid values in header for multicast route get request"); 2678 return -EINVAL; 2679 } 2680 2681 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2682 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2683 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); 2684 return -EINVAL; 2685 } 2686 2687 return 0; 2688 } 2689 2690 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2691 struct netlink_ext_ack *extack) 2692 { 2693 struct net *net = sock_net(in_skb->sk); 2694 struct in6_addr src = {}, grp = {}; 2695 struct nlattr *tb[RTA_MAX + 1]; 2696 struct mfc6_cache *cache; 2697 struct mr_table *mrt; 2698 struct sk_buff *skb; 2699 u32 tableid; 2700 int err; 2701 2702 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2703 if (err < 0) 2704 return err; 2705 2706 if (tb[RTA_SRC]) 2707 src = nla_get_in6_addr(tb[RTA_SRC]); 2708 if (tb[RTA_DST]) 2709 grp = nla_get_in6_addr(tb[RTA_DST]); 2710 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2711 2712 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); 2713 if (!mrt) { 2714 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist"); 2715 return -ENOENT; 2716 } 2717 2718 /* entries are added/deleted only under RTNL */ 2719 rcu_read_lock(); 2720 cache = ip6mr_cache_find(mrt, &src, &grp); 2721 rcu_read_unlock(); 2722 if (!cache) { 2723 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found"); 2724 return -ENOENT; 2725 } 2726 2727 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL); 2728 if (!skb) 2729 return -ENOBUFS; 2730 2731 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2732 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); 2733 if (err < 0) { 2734 kfree_skb(skb); 2735 return err; 2736 } 2737 2738 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2739 } 2740 2741 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2742 { 2743 const struct nlmsghdr *nlh = cb->nlh; 2744 struct fib_dump_filter filter = { 2745 .rtnl_held = true, 2746 }; 2747 int err; 2748 2749 if (cb->strict_check) { 2750 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2751 &filter, cb); 2752 if (err < 0) 2753 return err; 2754 } 2755 2756 if (filter.table_id) { 2757 struct mr_table *mrt; 2758 2759 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2760 if (!mrt) { 2761 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) 2762 return skb->len; 2763 2764 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2765 return -ENOENT; 2766 } 2767 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2768 &mfc_unres_lock, &filter); 2769 return skb->len ? : err; 2770 } 2771 2772 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2773 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2774 } 2775