1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux IPv6 multicast routing support for BSD pim6sd 4 * Based on net/ipv4/ipmr.c. 5 * 6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 7 * LSIIT Laboratory, Strasbourg, France 8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 9 * 6WIND, Paris, France 10 * Copyright (C)2007,2008 USAGI/WIDE Project 11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 12 */ 13 14 #include <linux/uaccess.h> 15 #include <linux/types.h> 16 #include <linux/sched.h> 17 #include <linux/errno.h> 18 #include <linux/mm.h> 19 #include <linux/kernel.h> 20 #include <linux/fcntl.h> 21 #include <linux/stat.h> 22 #include <linux/socket.h> 23 #include <linux/inet.h> 24 #include <linux/netdevice.h> 25 #include <linux/inetdevice.h> 26 #include <linux/proc_fs.h> 27 #include <linux/seq_file.h> 28 #include <linux/init.h> 29 #include <linux/compat.h> 30 #include <linux/rhashtable.h> 31 #include <net/protocol.h> 32 #include <linux/skbuff.h> 33 #include <net/raw.h> 34 #include <linux/notifier.h> 35 #include <linux/if_arp.h> 36 #include <net/checksum.h> 37 #include <net/netlink.h> 38 #include <net/fib_rules.h> 39 40 #include <net/ipv6.h> 41 #include <net/ip6_route.h> 42 #include <linux/mroute6.h> 43 #include <linux/pim.h> 44 #include <net/addrconf.h> 45 #include <linux/netfilter_ipv6.h> 46 #include <linux/export.h> 47 #include <net/ip6_checksum.h> 48 #include <linux/netconf.h> 49 #include <net/ip_tunnels.h> 50 51 #include <linux/nospec.h> 52 53 struct ip6mr_rule { 54 struct fib_rule common; 55 }; 56 57 struct ip6mr_result { 58 struct mr_table *mrt; 59 }; 60 61 /* Big lock, protecting vif table, mrt cache and mroute socket state. 62 Note that the changes are semaphored via rtnl_lock. 63 */ 64 65 static DEFINE_SPINLOCK(mrt_lock); 66 67 static struct net_device *vif_dev_read(const struct vif_device *vif) 68 { 69 return rcu_dereference(vif->dev); 70 } 71 72 /* Multicast router control variables */ 73 74 /* Special spinlock for queue of unresolved entries */ 75 static DEFINE_SPINLOCK(mfc_unres_lock); 76 77 /* We return to original Alan's scheme. Hash table of resolved 78 entries is changed only in process context and protected 79 with weak lock mrt_lock. Queue of unresolved entries is protected 80 with strong spinlock mfc_unres_lock. 81 82 In this case data path is free of exclusive locks at all. 83 */ 84 85 static struct kmem_cache *mrt_cachep __read_mostly; 86 87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 88 static void ip6mr_free_table(struct mr_table *mrt, 89 struct list_head *dev_kill_list); 90 91 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 92 struct net_device *dev, struct sk_buff *skb, 93 struct mfc6_cache *cache); 94 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 95 mifi_t mifi, int assert); 96 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 97 int cmd); 98 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 99 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 100 struct netlink_ext_ack *extack); 101 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 102 struct netlink_callback *cb); 103 static void mroute_clean_tables(struct mr_table *mrt, int flags, 104 struct list_head *dev_kill_list); 105 static void ipmr_expire_process(struct timer_list *t); 106 107 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 108 #define ip6mr_for_each_table(mrt, net) \ 109 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ 110 lockdep_rtnl_is_held() || \ 111 list_empty(&net->ipv6.mr6_tables)) 112 113 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 114 struct mr_table *mrt) 115 { 116 struct mr_table *ret; 117 118 if (!mrt) 119 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 120 struct mr_table, list); 121 else 122 ret = list_entry_rcu(mrt->list.next, 123 struct mr_table, list); 124 125 if (&ret->list == &net->ipv6.mr6_tables) 126 return NULL; 127 return ret; 128 } 129 130 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) 131 { 132 struct mr_table *mrt; 133 134 ip6mr_for_each_table(mrt, net) { 135 if (mrt->id == id) 136 return mrt; 137 } 138 return NULL; 139 } 140 141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 142 struct mr_table **mrt) 143 { 144 int err; 145 struct ip6mr_result res; 146 struct fib_lookup_arg arg = { 147 .result = &res, 148 .flags = FIB_LOOKUP_NOREF, 149 }; 150 151 /* update flow if oif or iif point to device enslaved to l3mdev */ 152 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 153 154 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 155 flowi6_to_flowi(flp6), 0, &arg); 156 if (err < 0) 157 return err; 158 *mrt = res.mrt; 159 return 0; 160 } 161 162 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 163 int flags, struct fib_lookup_arg *arg) 164 { 165 struct ip6mr_result *res = arg->result; 166 struct mr_table *mrt; 167 168 switch (rule->action) { 169 case FR_ACT_TO_TBL: 170 break; 171 case FR_ACT_UNREACHABLE: 172 return -ENETUNREACH; 173 case FR_ACT_PROHIBIT: 174 return -EACCES; 175 case FR_ACT_BLACKHOLE: 176 default: 177 return -EINVAL; 178 } 179 180 arg->table = fib_rule_get_table(rule, arg); 181 182 mrt = __ip6mr_get_table(rule->fr_net, arg->table); 183 if (!mrt) 184 return -EAGAIN; 185 res->mrt = mrt; 186 return 0; 187 } 188 189 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 190 { 191 return 1; 192 } 193 194 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 195 struct fib_rule_hdr *frh, struct nlattr **tb, 196 struct netlink_ext_ack *extack) 197 { 198 return 0; 199 } 200 201 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 202 struct nlattr **tb) 203 { 204 return 1; 205 } 206 207 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 208 struct fib_rule_hdr *frh) 209 { 210 frh->dst_len = 0; 211 frh->src_len = 0; 212 frh->tos = 0; 213 return 0; 214 } 215 216 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 217 .family = RTNL_FAMILY_IP6MR, 218 .rule_size = sizeof(struct ip6mr_rule), 219 .addr_size = sizeof(struct in6_addr), 220 .action = ip6mr_rule_action, 221 .match = ip6mr_rule_match, 222 .configure = ip6mr_rule_configure, 223 .compare = ip6mr_rule_compare, 224 .fill = ip6mr_rule_fill, 225 .nlgroup = RTNLGRP_IPV6_RULE, 226 .owner = THIS_MODULE, 227 }; 228 229 static int __net_init ip6mr_rules_init(struct net *net) 230 { 231 struct fib_rules_ops *ops; 232 LIST_HEAD(dev_kill_list); 233 struct mr_table *mrt; 234 int err; 235 236 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 237 if (IS_ERR(ops)) 238 return PTR_ERR(ops); 239 240 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 241 242 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 243 if (IS_ERR(mrt)) { 244 err = PTR_ERR(mrt); 245 goto err1; 246 } 247 248 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); 249 if (err < 0) 250 goto err2; 251 252 net->ipv6.mr6_rules_ops = ops; 253 return 0; 254 255 err2: 256 ip6mr_free_table(mrt, &dev_kill_list); 257 err1: 258 fib_rules_unregister(ops); 259 return err; 260 } 261 262 static void __net_exit ip6mr_rules_exit(struct net *net) 263 { 264 fib_rules_unregister(net->ipv6.mr6_rules_ops); 265 } 266 267 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net, 268 struct list_head *dev_kill_list) 269 { 270 struct mr_table *mrt, *next; 271 272 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 273 list_del_rcu(&mrt->list); 274 ip6mr_free_table(mrt, dev_kill_list); 275 } 276 } 277 278 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 279 struct netlink_ext_ack *extack) 280 { 281 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); 282 } 283 284 static unsigned int ip6mr_rules_seq_read(const struct net *net) 285 { 286 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 287 } 288 289 bool ip6mr_rule_default(const struct fib_rule *rule) 290 { 291 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 292 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 293 } 294 EXPORT_SYMBOL(ip6mr_rule_default); 295 #else 296 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 297 struct mr_table *mrt) 298 { 299 if (!mrt) 300 return rcu_dereference(net->ipv6.mrt6); 301 return NULL; 302 } 303 304 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) 305 { 306 return rcu_dereference_check(net->ipv6.mrt6, 307 lockdep_rtnl_is_held() || 308 !rcu_access_pointer(net->ipv6.mrt6)); 309 } 310 311 #define ip6mr_for_each_table(mrt, net) \ 312 for (mrt = __ip6mr_get_table(net, 0); mrt; mrt = NULL) 313 314 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 315 struct mr_table **mrt) 316 { 317 *mrt = rcu_dereference(net->ipv6.mrt6); 318 if (!*mrt) 319 return -EAGAIN; 320 return 0; 321 } 322 323 static int __net_init ip6mr_rules_init(struct net *net) 324 { 325 struct mr_table *mrt; 326 327 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 328 if (IS_ERR(mrt)) 329 return PTR_ERR(mrt); 330 331 rcu_assign_pointer(net->ipv6.mrt6, mrt); 332 return 0; 333 } 334 335 static void __net_exit ip6mr_rules_exit(struct net *net) 336 { 337 } 338 339 static void __net_exit ip6mr_rules_exit_rtnl(struct net *net, 340 struct list_head *dev_kill_list) 341 { 342 struct mr_table *mrt = rcu_dereference_protected(net->ipv6.mrt6, 1); 343 344 RCU_INIT_POINTER(net->ipv6.mrt6, NULL); 345 ip6mr_free_table(mrt, dev_kill_list); 346 } 347 348 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 349 struct netlink_ext_ack *extack) 350 { 351 return 0; 352 } 353 354 static unsigned int ip6mr_rules_seq_read(const struct net *net) 355 { 356 return 0; 357 } 358 #endif 359 360 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 361 { 362 struct mr_table *mrt; 363 364 rcu_read_lock(); 365 mrt = __ip6mr_get_table(net, id); 366 rcu_read_unlock(); 367 368 return mrt; 369 } 370 371 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 372 const void *ptr) 373 { 374 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 375 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 376 377 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 378 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 379 } 380 381 static const struct rhashtable_params ip6mr_rht_params = { 382 .head_offset = offsetof(struct mr_mfc, mnode), 383 .key_offset = offsetof(struct mfc6_cache, cmparg), 384 .key_len = sizeof(struct mfc6_cache_cmp_arg), 385 .nelem_hint = 3, 386 .obj_cmpfn = ip6mr_hash_cmp, 387 .automatic_shrinking = true, 388 }; 389 390 static void ip6mr_new_table_set(struct mr_table *mrt, 391 struct net *net) 392 { 393 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 394 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 395 #endif 396 } 397 398 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 399 .mf6c_origin = IN6ADDR_ANY_INIT, 400 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 401 }; 402 403 static struct mr_table_ops ip6mr_mr_table_ops = { 404 .rht_params = &ip6mr_rht_params, 405 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 406 }; 407 408 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 409 { 410 struct mr_table *mrt; 411 412 mrt = __ip6mr_get_table(net, id); 413 if (mrt) 414 return mrt; 415 416 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 417 ipmr_expire_process, ip6mr_new_table_set); 418 } 419 420 static void ip6mr_free_table(struct mr_table *mrt, 421 struct list_head *dev_kill_list) 422 { 423 struct net *net = read_pnet(&mrt->net); 424 LIST_HEAD(ip6mr_dev_kill_list); 425 426 WARN_ON_ONCE(!mr_can_free_table(net)); 427 428 timer_shutdown_sync(&mrt->ipmr_expire_timer); 429 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | 430 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC, 431 &ip6mr_dev_kill_list); 432 433 mr_table_free(mrt); 434 435 WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ip6mr_dev_kill_list)); 436 list_splice(&ip6mr_dev_kill_list, dev_kill_list); 437 } 438 439 #ifdef CONFIG_PROC_FS 440 /* The /proc interfaces to multicast routing 441 * /proc/ip6_mr_cache /proc/ip6_mr_vif 442 */ 443 444 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 445 __acquires(RCU) 446 { 447 struct mr_vif_iter *iter = seq->private; 448 struct net *net = seq_file_net(seq); 449 struct mr_table *mrt; 450 451 rcu_read_lock(); 452 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 453 if (!mrt) { 454 rcu_read_unlock(); 455 return ERR_PTR(-ENOENT); 456 } 457 458 iter->mrt = mrt; 459 460 return mr_vif_seq_start(seq, pos); 461 } 462 463 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 464 __releases(RCU) 465 { 466 rcu_read_unlock(); 467 } 468 469 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 470 { 471 struct mr_vif_iter *iter = seq->private; 472 struct mr_table *mrt = iter->mrt; 473 474 if (v == SEQ_START_TOKEN) { 475 seq_puts(seq, 476 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 477 } else { 478 const struct vif_device *vif = v; 479 const struct net_device *vif_dev; 480 const char *name; 481 482 vif_dev = vif_dev_read(vif); 483 name = vif_dev ? vif_dev->name : "none"; 484 485 seq_printf(seq, 486 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 487 vif - mrt->vif_table, 488 name, vif->bytes_in, vif->pkt_in, 489 vif->bytes_out, vif->pkt_out, 490 vif->flags); 491 } 492 return 0; 493 } 494 495 static const struct seq_operations ip6mr_vif_seq_ops = { 496 .start = ip6mr_vif_seq_start, 497 .next = mr_vif_seq_next, 498 .stop = ip6mr_vif_seq_stop, 499 .show = ip6mr_vif_seq_show, 500 }; 501 502 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 503 { 504 struct net *net = seq_file_net(seq); 505 struct mr_table *mrt; 506 507 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 508 if (!mrt) 509 return ERR_PTR(-ENOENT); 510 511 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 512 } 513 514 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 515 { 516 int n; 517 518 if (v == SEQ_START_TOKEN) { 519 seq_puts(seq, 520 "Group " 521 "Origin " 522 "Iif Pkts Bytes Wrong Oifs\n"); 523 } else { 524 const struct mfc6_cache *mfc = v; 525 const struct mr_mfc_iter *it = seq->private; 526 struct mr_table *mrt = it->mrt; 527 528 seq_printf(seq, "%pI6 %pI6 %-3hd", 529 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 530 mfc->_c.mfc_parent); 531 532 if (it->cache != &mrt->mfc_unres_queue) { 533 seq_printf(seq, " %8lu %8lu %8lu", 534 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 535 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 536 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 537 for (n = mfc->_c.mfc_un.res.minvif; 538 n < mfc->_c.mfc_un.res.maxvif; n++) { 539 if (VIF_EXISTS(mrt, n) && 540 mfc->_c.mfc_un.res.ttls[n] < 255) 541 seq_printf(seq, 542 " %2d:%-3d", n, 543 mfc->_c.mfc_un.res.ttls[n]); 544 } 545 } else { 546 /* unresolved mfc_caches don't contain 547 * pkt, bytes and wrong_if values 548 */ 549 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 550 } 551 seq_putc(seq, '\n'); 552 } 553 return 0; 554 } 555 556 static const struct seq_operations ipmr_mfc_seq_ops = { 557 .start = ipmr_mfc_seq_start, 558 .next = mr_mfc_seq_next, 559 .stop = mr_mfc_seq_stop, 560 .show = ipmr_mfc_seq_show, 561 }; 562 #endif 563 564 #ifdef CONFIG_IPV6_PIMSM_V2 565 566 static int pim6_rcv(struct sk_buff *skb) 567 { 568 struct pimreghdr *pim; 569 struct ipv6hdr *encap; 570 struct net_device *reg_dev = NULL; 571 struct net *net = dev_net(skb->dev); 572 struct mr_table *mrt; 573 struct flowi6 fl6 = { 574 .flowi6_iif = skb->dev->ifindex, 575 .flowi6_mark = skb->mark, 576 }; 577 int reg_vif_num; 578 579 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 580 goto drop; 581 582 pim = (struct pimreghdr *)skb_transport_header(skb); 583 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 584 (pim->flags & PIM_NULL_REGISTER) || 585 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 586 sizeof(*pim), IPPROTO_PIM, 587 csum_partial((void *)pim, sizeof(*pim), 0)) && 588 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 589 goto drop; 590 591 /* check if the inner packet is destined to mcast group */ 592 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 593 sizeof(*pim)); 594 595 if (!ipv6_addr_is_multicast(&encap->daddr) || 596 encap->payload_len == 0 || 597 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 598 goto drop; 599 600 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 601 goto drop; 602 603 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ 604 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 605 if (reg_vif_num >= 0) 606 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); 607 608 if (!reg_dev) 609 goto drop; 610 611 skb->mac_header = skb->network_header; 612 skb_pull(skb, (u8 *)encap - skb->data); 613 skb_reset_network_header(skb); 614 skb->protocol = htons(ETH_P_IPV6); 615 skb->ip_summed = CHECKSUM_NONE; 616 617 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 618 619 netif_rx(skb); 620 621 return 0; 622 drop: 623 kfree_skb(skb); 624 return 0; 625 } 626 627 static const struct inet6_protocol pim6_protocol = { 628 .handler = pim6_rcv, 629 }; 630 631 /* Service routines creating virtual interfaces: PIMREG */ 632 633 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 634 struct net_device *dev) 635 { 636 struct net *net = dev_net(dev); 637 struct mr_table *mrt; 638 struct flowi6 fl6 = { 639 .flowi6_oif = dev->ifindex, 640 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 641 .flowi6_mark = skb->mark, 642 }; 643 644 if (!pskb_inet_may_pull(skb)) 645 goto tx_err; 646 647 rcu_read_lock(); 648 649 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 650 goto tx_lookup_err; 651 652 DEV_STATS_ADD(dev, tx_bytes, skb->len); 653 DEV_STATS_INC(dev, tx_packets); 654 655 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 656 MRT6MSG_WHOLEPKT); 657 rcu_read_unlock(); 658 kfree_skb(skb); 659 return NETDEV_TX_OK; 660 661 tx_lookup_err: 662 rcu_read_unlock(); 663 tx_err: 664 DEV_STATS_INC(dev, tx_errors); 665 kfree_skb(skb); 666 return NETDEV_TX_OK; 667 } 668 669 static int reg_vif_get_iflink(const struct net_device *dev) 670 { 671 return 0; 672 } 673 674 static const struct net_device_ops reg_vif_netdev_ops = { 675 .ndo_start_xmit = reg_vif_xmit, 676 .ndo_get_iflink = reg_vif_get_iflink, 677 }; 678 679 static void reg_vif_setup(struct net_device *dev) 680 { 681 dev->type = ARPHRD_PIMREG; 682 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 683 dev->flags = IFF_NOARP; 684 dev->netdev_ops = ®_vif_netdev_ops; 685 dev->needs_free_netdev = true; 686 dev->netns_immutable = true; 687 } 688 689 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 690 { 691 struct net_device *dev; 692 char name[IFNAMSIZ]; 693 694 if (mrt->id == RT6_TABLE_DFLT) 695 sprintf(name, "pim6reg"); 696 else 697 sprintf(name, "pim6reg%u", mrt->id); 698 699 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 700 if (!dev) 701 return NULL; 702 703 dev_net_set(dev, net); 704 705 if (register_netdevice(dev)) { 706 free_netdev(dev); 707 return NULL; 708 } 709 710 if (dev_open(dev, NULL)) 711 goto failure; 712 713 dev_hold(dev); 714 return dev; 715 716 failure: 717 unregister_netdevice(dev); 718 return NULL; 719 } 720 #endif 721 722 static int call_ip6mr_vif_entry_notifiers(struct net *net, 723 enum fib_event_type event_type, 724 struct vif_device *vif, 725 struct net_device *vif_dev, 726 mifi_t vif_index, u32 tb_id) 727 { 728 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 729 vif, vif_dev, vif_index, tb_id, 730 &net->ipv6.ipmr_seq); 731 } 732 733 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 734 enum fib_event_type event_type, 735 struct mfc6_cache *mfc, u32 tb_id) 736 { 737 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 738 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 739 } 740 741 /* Delete a VIF entry */ 742 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 743 struct list_head *head) 744 { 745 struct vif_device *v; 746 struct net_device *dev; 747 struct inet6_dev *in6_dev; 748 749 if (vifi < 0 || vifi >= mrt->maxvif) 750 return -EADDRNOTAVAIL; 751 752 v = &mrt->vif_table[vifi]; 753 754 dev = rtnl_dereference(v->dev); 755 if (!dev) 756 return -EADDRNOTAVAIL; 757 758 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 759 FIB_EVENT_VIF_DEL, v, dev, 760 vifi, mrt->id); 761 spin_lock(&mrt_lock); 762 RCU_INIT_POINTER(v->dev, NULL); 763 764 #ifdef CONFIG_IPV6_PIMSM_V2 765 if (vifi == mrt->mroute_reg_vif_num) { 766 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ 767 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 768 } 769 #endif 770 771 if (vifi + 1 == mrt->maxvif) { 772 int tmp; 773 for (tmp = vifi - 1; tmp >= 0; tmp--) { 774 if (VIF_EXISTS(mrt, tmp)) 775 break; 776 } 777 WRITE_ONCE(mrt->maxvif, tmp + 1); 778 } 779 780 spin_unlock(&mrt_lock); 781 782 dev_set_allmulti(dev, -1); 783 784 in6_dev = __in6_dev_get(dev); 785 if (in6_dev) { 786 atomic_dec(&in6_dev->cnf.mc_forwarding); 787 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 788 NETCONFA_MC_FORWARDING, 789 dev->ifindex, &in6_dev->cnf); 790 } 791 792 if ((v->flags & MIFF_REGISTER) && !notify) 793 unregister_netdevice_queue(dev, head); 794 795 netdev_put(dev, &v->dev_tracker); 796 return 0; 797 } 798 799 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 800 { 801 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 802 803 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 804 } 805 806 static inline void ip6mr_cache_free(struct mfc6_cache *c) 807 { 808 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 809 } 810 811 /* Destroy an unresolved cache entry, killing queued skbs 812 and reporting error to netlink readers. 813 */ 814 815 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 816 { 817 struct net *net = read_pnet(&mrt->net); 818 struct sk_buff *skb; 819 820 WRITE_ONCE(mrt->cache_resolve_queue_len, 821 mrt->cache_resolve_queue_len - 1); 822 823 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 824 if (ipv6_hdr(skb)->version == 0) { 825 struct nlmsghdr *nlh = skb_pull(skb, 826 sizeof(struct ipv6hdr)); 827 nlh->nlmsg_type = NLMSG_ERROR; 828 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 829 skb_trim(skb, nlh->nlmsg_len); 830 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 831 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 832 } else 833 kfree_skb(skb); 834 } 835 836 ip6mr_cache_free(c); 837 } 838 839 840 /* Timer process for all the unresolved queue. */ 841 842 static void ipmr_do_expire_process(struct mr_table *mrt) 843 { 844 unsigned long now = jiffies; 845 unsigned long expires = 10 * HZ; 846 struct mr_mfc *c, *next; 847 848 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 849 if (time_after(c->mfc_un.unres.expires, now)) { 850 /* not yet... */ 851 unsigned long interval = c->mfc_un.unres.expires - now; 852 if (interval < expires) 853 expires = interval; 854 continue; 855 } 856 857 list_del(&c->list); 858 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 859 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 860 } 861 862 if (!list_empty(&mrt->mfc_unres_queue)) 863 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 864 } 865 866 static void ipmr_expire_process(struct timer_list *t) 867 { 868 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer); 869 870 if (!spin_trylock(&mfc_unres_lock)) { 871 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 872 return; 873 } 874 875 if (!list_empty(&mrt->mfc_unres_queue)) 876 ipmr_do_expire_process(mrt); 877 878 spin_unlock(&mfc_unres_lock); 879 } 880 881 /* Fill oifs list. It is called under locked mrt_lock. */ 882 883 static void ip6mr_update_thresholds(struct mr_table *mrt, 884 struct mr_mfc *cache, 885 unsigned char *ttls) 886 { 887 int vifi; 888 889 cache->mfc_un.res.minvif = MAXMIFS; 890 cache->mfc_un.res.maxvif = 0; 891 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 892 893 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 894 if (VIF_EXISTS(mrt, vifi) && 895 ttls[vifi] && ttls[vifi] < 255) { 896 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 897 if (cache->mfc_un.res.minvif > vifi) 898 cache->mfc_un.res.minvif = vifi; 899 if (cache->mfc_un.res.maxvif <= vifi) 900 cache->mfc_un.res.maxvif = vifi + 1; 901 } 902 } 903 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 904 } 905 906 static int mif6_add(struct net *net, struct mr_table *mrt, 907 struct mif6ctl *vifc, int mrtsock) 908 { 909 int vifi = vifc->mif6c_mifi; 910 struct vif_device *v = &mrt->vif_table[vifi]; 911 struct net_device *dev; 912 struct inet6_dev *in6_dev; 913 int err; 914 915 /* Is vif busy ? */ 916 if (VIF_EXISTS(mrt, vifi)) 917 return -EADDRINUSE; 918 919 switch (vifc->mif6c_flags) { 920 #ifdef CONFIG_IPV6_PIMSM_V2 921 case MIFF_REGISTER: 922 /* 923 * Special Purpose VIF in PIM 924 * All the packets will be sent to the daemon 925 */ 926 if (mrt->mroute_reg_vif_num >= 0) 927 return -EADDRINUSE; 928 dev = ip6mr_reg_vif(net, mrt); 929 if (!dev) 930 return -ENOBUFS; 931 err = dev_set_allmulti(dev, 1); 932 if (err) { 933 unregister_netdevice(dev); 934 dev_put(dev); 935 return err; 936 } 937 break; 938 #endif 939 case 0: 940 dev = dev_get_by_index(net, vifc->mif6c_pifi); 941 if (!dev) 942 return -EADDRNOTAVAIL; 943 err = dev_set_allmulti(dev, 1); 944 if (err) { 945 dev_put(dev); 946 return err; 947 } 948 break; 949 default: 950 return -EINVAL; 951 } 952 953 in6_dev = __in6_dev_get(dev); 954 if (in6_dev) { 955 atomic_inc(&in6_dev->cnf.mc_forwarding); 956 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 957 NETCONFA_MC_FORWARDING, 958 dev->ifindex, &in6_dev->cnf); 959 } 960 961 /* Fill in the VIF structures */ 962 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 963 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 964 MIFF_REGISTER); 965 966 /* And finish update writing critical data */ 967 spin_lock(&mrt_lock); 968 rcu_assign_pointer(v->dev, dev); 969 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 970 #ifdef CONFIG_IPV6_PIMSM_V2 971 if (v->flags & MIFF_REGISTER) 972 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 973 #endif 974 if (vifi + 1 > mrt->maxvif) 975 WRITE_ONCE(mrt->maxvif, vifi + 1); 976 spin_unlock(&mrt_lock); 977 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 978 v, dev, vifi, mrt->id); 979 return 0; 980 } 981 982 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 983 const struct in6_addr *origin, 984 const struct in6_addr *mcastgrp) 985 { 986 struct mfc6_cache_cmp_arg arg = { 987 .mf6c_origin = *origin, 988 .mf6c_mcastgrp = *mcastgrp, 989 }; 990 991 return mr_mfc_find(mrt, &arg); 992 } 993 994 /* Look for a (*,G) entry */ 995 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 996 struct in6_addr *mcastgrp, 997 mifi_t mifi) 998 { 999 struct mfc6_cache_cmp_arg arg = { 1000 .mf6c_origin = in6addr_any, 1001 .mf6c_mcastgrp = *mcastgrp, 1002 }; 1003 1004 if (ipv6_addr_any(mcastgrp)) 1005 return mr_mfc_find_any_parent(mrt, mifi); 1006 return mr_mfc_find_any(mrt, mifi, &arg); 1007 } 1008 1009 /* Look for a (S,G,iif) entry if parent != -1 */ 1010 static struct mfc6_cache * 1011 ip6mr_cache_find_parent(struct mr_table *mrt, 1012 const struct in6_addr *origin, 1013 const struct in6_addr *mcastgrp, 1014 int parent) 1015 { 1016 struct mfc6_cache_cmp_arg arg = { 1017 .mf6c_origin = *origin, 1018 .mf6c_mcastgrp = *mcastgrp, 1019 }; 1020 1021 return mr_mfc_find_parent(mrt, &arg, parent); 1022 } 1023 1024 /* Allocate a multicast cache entry */ 1025 static struct mfc6_cache *ip6mr_cache_alloc(void) 1026 { 1027 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1028 if (!c) 1029 return NULL; 1030 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1031 c->_c.mfc_un.res.minvif = MAXMIFS; 1032 c->_c.free = ip6mr_cache_free_rcu; 1033 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1034 return c; 1035 } 1036 1037 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1038 { 1039 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1040 if (!c) 1041 return NULL; 1042 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1043 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1044 return c; 1045 } 1046 1047 /* 1048 * A cache entry has gone into a resolved state from queued 1049 */ 1050 1051 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1052 struct mfc6_cache *uc, struct mfc6_cache *c) 1053 { 1054 struct sk_buff *skb; 1055 1056 /* 1057 * Play the pending entries through our router 1058 */ 1059 1060 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1061 if (ipv6_hdr(skb)->version == 0) { 1062 struct nlmsghdr *nlh = skb_pull(skb, 1063 sizeof(struct ipv6hdr)); 1064 1065 if (mr_fill_mroute(mrt, skb, &c->_c, 1066 nlmsg_data(nlh)) > 0) { 1067 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1068 } else { 1069 nlh->nlmsg_type = NLMSG_ERROR; 1070 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1071 skb_trim(skb, nlh->nlmsg_len); 1072 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1073 } 1074 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1075 } else { 1076 rcu_read_lock(); 1077 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1078 rcu_read_unlock(); 1079 } 1080 } 1081 } 1082 1083 /* 1084 * Bounce a cache query up to pim6sd and netlink. 1085 * 1086 * Called under rcu_read_lock() 1087 */ 1088 1089 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 1090 mifi_t mifi, int assert) 1091 { 1092 enum skb_drop_reason reason; 1093 struct sock *mroute6_sk; 1094 struct sk_buff *skb; 1095 struct mrt6msg *msg; 1096 1097 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1098 if (!mroute6_sk) 1099 return -EINVAL; 1100 1101 #ifdef CONFIG_IPV6_PIMSM_V2 1102 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) 1103 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1104 +sizeof(*msg)); 1105 else 1106 #endif 1107 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1108 1109 if (!skb) 1110 return -ENOBUFS; 1111 1112 /* I suppose that internal messages 1113 * do not require checksums */ 1114 1115 skb->ip_summed = CHECKSUM_UNNECESSARY; 1116 1117 #ifdef CONFIG_IPV6_PIMSM_V2 1118 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { 1119 /* Ugly, but we have no choice with this interface. 1120 Duplicate old header, fix length etc. 1121 And all this only to mangle msg->im6_msgtype and 1122 to set msg->im6_mbz to "mbz" :-) 1123 */ 1124 __skb_pull(skb, skb_network_offset(pkt)); 1125 1126 skb_push(skb, sizeof(*msg)); 1127 skb_reset_transport_header(skb); 1128 msg = (struct mrt6msg *)skb_transport_header(skb); 1129 msg->im6_mbz = 0; 1130 msg->im6_msgtype = assert; 1131 if (assert == MRT6MSG_WRMIFWHOLE) 1132 msg->im6_mif = mifi; 1133 else 1134 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); 1135 msg->im6_pad = 0; 1136 msg->im6_src = ipv6_hdr(pkt)->saddr; 1137 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1138 1139 skb->ip_summed = CHECKSUM_UNNECESSARY; 1140 } else 1141 #endif 1142 { 1143 /* 1144 * Copy the IP header 1145 */ 1146 1147 skb_put(skb, sizeof(struct ipv6hdr)); 1148 skb_reset_network_header(skb); 1149 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1150 1151 /* 1152 * Add our header 1153 */ 1154 skb_put(skb, sizeof(*msg)); 1155 skb_reset_transport_header(skb); 1156 msg = (struct mrt6msg *)skb_transport_header(skb); 1157 1158 msg->im6_mbz = 0; 1159 msg->im6_msgtype = assert; 1160 msg->im6_mif = mifi; 1161 msg->im6_pad = 0; 1162 msg->im6_src = ipv6_hdr(pkt)->saddr; 1163 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1164 1165 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1166 skb->ip_summed = CHECKSUM_UNNECESSARY; 1167 } 1168 1169 mrt6msg_netlink_event(mrt, skb); 1170 1171 /* Deliver to user space multicast routing algorithms */ 1172 reason = sock_queue_rcv_skb_reason(mroute6_sk, skb); 1173 1174 if (reason) { 1175 sk_skb_reason_drop(mroute6_sk, skb, reason); 1176 return -ENOMEM; 1177 } 1178 1179 return 0; 1180 } 1181 1182 /* Queue a packet for resolution. It gets locked cache entry! */ 1183 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1184 struct sk_buff *skb, struct net_device *dev) 1185 { 1186 struct net *net = read_pnet(&mrt->net); 1187 struct mfc6_cache *c = NULL; 1188 bool found = false; 1189 int err; 1190 1191 spin_lock_bh(&mfc_unres_lock); 1192 1193 if (!check_net(net)) { 1194 err = -EINVAL; 1195 goto err; 1196 } 1197 1198 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1199 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1200 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1201 found = true; 1202 break; 1203 } 1204 } 1205 1206 if (!found) { 1207 /* 1208 * Create a new entry if allowable 1209 */ 1210 1211 c = ip6mr_cache_alloc_unres(); 1212 if (!c) { 1213 err = -ENOBUFS; 1214 goto err; 1215 } 1216 1217 /* Fill in the new cache entry */ 1218 c->_c.mfc_parent = -1; 1219 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1220 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1221 1222 /* 1223 * Reflect first query at pim6sd 1224 */ 1225 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1226 if (err < 0) 1227 goto err; 1228 1229 WRITE_ONCE(mrt->cache_resolve_queue_len, 1230 mrt->cache_resolve_queue_len + 1); 1231 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1232 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1233 1234 ipmr_do_expire_process(mrt); 1235 } 1236 1237 /* See if we can append the packet */ 1238 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1239 c = NULL; 1240 err = -ENOBUFS; 1241 goto err; 1242 } 1243 1244 if (dev) { 1245 skb->dev = dev; 1246 skb->skb_iif = dev->ifindex; 1247 } 1248 1249 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1250 1251 spin_unlock_bh(&mfc_unres_lock); 1252 return 0; 1253 1254 err: 1255 spin_unlock_bh(&mfc_unres_lock); 1256 if (c) 1257 ip6mr_cache_free(c); 1258 kfree_skb(skb); 1259 return err; 1260 } 1261 1262 /* 1263 * MFC6 cache manipulation by user space 1264 */ 1265 1266 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1267 int parent) 1268 { 1269 struct mfc6_cache *c; 1270 1271 rcu_read_lock(); 1272 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1273 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1274 rcu_read_unlock(); 1275 if (!c) 1276 return -ENOENT; 1277 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1278 list_del_rcu(&c->_c.list); 1279 1280 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1281 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1282 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1283 mr_cache_put(&c->_c); 1284 return 0; 1285 } 1286 1287 static int ip6mr_device_event(struct notifier_block *this, 1288 unsigned long event, void *ptr) 1289 { 1290 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1291 struct net *net = dev_net(dev); 1292 struct mr_table *mrt; 1293 struct vif_device *v; 1294 int ct; 1295 1296 if (event != NETDEV_UNREGISTER) 1297 return NOTIFY_DONE; 1298 1299 ip6mr_for_each_table(mrt, net) { 1300 v = &mrt->vif_table[0]; 1301 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1302 if (rcu_access_pointer(v->dev) == dev) 1303 mif6_delete(mrt, ct, 1, NULL); 1304 } 1305 } 1306 1307 return NOTIFY_DONE; 1308 } 1309 1310 static unsigned int ip6mr_seq_read(const struct net *net) 1311 { 1312 return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); 1313 } 1314 1315 static int ip6mr_dump(struct net *net, struct notifier_block *nb, 1316 struct netlink_ext_ack *extack) 1317 { 1318 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1319 ip6mr_mr_table_iter, extack); 1320 } 1321 1322 static struct notifier_block ip6_mr_notifier = { 1323 .notifier_call = ip6mr_device_event 1324 }; 1325 1326 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1327 .family = RTNL_FAMILY_IP6MR, 1328 .fib_seq_read = ip6mr_seq_read, 1329 .fib_dump = ip6mr_dump, 1330 .owner = THIS_MODULE, 1331 }; 1332 1333 static int __net_init ip6mr_notifier_init(struct net *net) 1334 { 1335 struct fib_notifier_ops *ops; 1336 1337 atomic_set(&net->ipv6.ipmr_seq, 0); 1338 1339 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1340 if (IS_ERR(ops)) 1341 return PTR_ERR(ops); 1342 1343 net->ipv6.ip6mr_notifier_ops = ops; 1344 1345 return 0; 1346 } 1347 1348 static void __net_exit ip6mr_notifier_exit(struct net *net) 1349 { 1350 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1351 net->ipv6.ip6mr_notifier_ops = NULL; 1352 } 1353 1354 /* Setup for IP multicast routing */ 1355 static int __net_init ip6mr_net_init(struct net *net) 1356 { 1357 #ifdef CONFIG_PROC_FS 1358 LIST_HEAD(dev_kill_list); 1359 #endif 1360 int err; 1361 1362 mutex_init(&net->ipv6.mfc_mutex); 1363 1364 err = ip6mr_notifier_init(net); 1365 if (err) 1366 return err; 1367 1368 err = ip6mr_rules_init(net); 1369 if (err < 0) 1370 goto ip6mr_rules_fail; 1371 1372 #ifdef CONFIG_PROC_FS 1373 err = -ENOMEM; 1374 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1375 sizeof(struct mr_vif_iter))) 1376 goto proc_vif_fail; 1377 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1378 sizeof(struct mr_mfc_iter))) 1379 goto proc_cache_fail; 1380 #endif 1381 1382 return 0; 1383 1384 #ifdef CONFIG_PROC_FS 1385 proc_cache_fail: 1386 remove_proc_entry("ip6_mr_vif", net->proc_net); 1387 proc_vif_fail: 1388 ip6mr_rules_exit_rtnl(net, &dev_kill_list); 1389 ip6mr_rules_exit(net); 1390 #endif 1391 ip6mr_rules_fail: 1392 ip6mr_notifier_exit(net); 1393 return err; 1394 } 1395 1396 static void __net_exit ip6mr_net_exit(struct net *net) 1397 { 1398 #ifdef CONFIG_PROC_FS 1399 remove_proc_entry("ip6_mr_cache", net->proc_net); 1400 remove_proc_entry("ip6_mr_vif", net->proc_net); 1401 #endif 1402 ip6mr_rules_exit(net); 1403 ip6mr_notifier_exit(net); 1404 } 1405 1406 static void __net_exit ip6mr_net_exit_rtnl(struct net *net, 1407 struct list_head *dev_kill_list) 1408 { 1409 ip6mr_rules_exit_rtnl(net, dev_kill_list); 1410 } 1411 1412 static struct pernet_operations ip6mr_net_ops = { 1413 .init = ip6mr_net_init, 1414 .exit = ip6mr_net_exit, 1415 .exit_rtnl = ip6mr_net_exit_rtnl, 1416 }; 1417 1418 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { 1419 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, 1420 .msgtype = RTM_GETROUTE, 1421 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute, 1422 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 1423 }; 1424 1425 int __init ip6_mr_init(void) 1426 { 1427 int err; 1428 1429 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN); 1430 if (!mrt_cachep) 1431 return -ENOMEM; 1432 1433 err = register_pernet_subsys(&ip6mr_net_ops); 1434 if (err) 1435 goto reg_pernet_fail; 1436 1437 err = register_netdevice_notifier(&ip6_mr_notifier); 1438 if (err) 1439 goto reg_notif_fail; 1440 #ifdef CONFIG_IPV6_PIMSM_V2 1441 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1442 pr_err("%s: can't add PIM protocol\n", __func__); 1443 err = -EAGAIN; 1444 goto add_proto_fail; 1445 } 1446 #endif 1447 err = rtnl_register_many(ip6mr_rtnl_msg_handlers); 1448 if (!err) 1449 return 0; 1450 1451 #ifdef CONFIG_IPV6_PIMSM_V2 1452 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1453 add_proto_fail: 1454 unregister_netdevice_notifier(&ip6_mr_notifier); 1455 #endif 1456 reg_notif_fail: 1457 unregister_pernet_subsys(&ip6mr_net_ops); 1458 reg_pernet_fail: 1459 kmem_cache_destroy(mrt_cachep); 1460 return err; 1461 } 1462 1463 void __init ip6_mr_cleanup(void) 1464 { 1465 rtnl_unregister_many(ip6mr_rtnl_msg_handlers); 1466 #ifdef CONFIG_IPV6_PIMSM_V2 1467 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1468 #endif 1469 unregister_netdevice_notifier(&ip6_mr_notifier); 1470 unregister_pernet_subsys(&ip6mr_net_ops); 1471 kmem_cache_destroy(mrt_cachep); 1472 } 1473 1474 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1475 struct mf6cctl *mfc, int mrtsock, int parent) 1476 { 1477 unsigned char ttls[MAXMIFS]; 1478 struct mfc6_cache *uc, *c; 1479 struct mr_mfc *_uc; 1480 bool found; 1481 int i, err; 1482 1483 if (mfc->mf6cc_parent >= MAXMIFS) 1484 return -ENFILE; 1485 1486 memset(ttls, 255, MAXMIFS); 1487 for (i = 0; i < MAXMIFS; i++) { 1488 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1489 ttls[i] = 1; 1490 } 1491 1492 rcu_read_lock(); 1493 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1494 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1495 rcu_read_unlock(); 1496 if (c) { 1497 spin_lock(&mrt_lock); 1498 c->_c.mfc_parent = mfc->mf6cc_parent; 1499 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1500 if (!mrtsock) 1501 c->_c.mfc_flags |= MFC_STATIC; 1502 spin_unlock(&mrt_lock); 1503 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1504 c, mrt->id); 1505 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1506 return 0; 1507 } 1508 1509 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1510 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1511 return -EINVAL; 1512 1513 c = ip6mr_cache_alloc(); 1514 if (!c) 1515 return -ENOMEM; 1516 1517 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1518 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1519 c->_c.mfc_parent = mfc->mf6cc_parent; 1520 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1521 if (!mrtsock) 1522 c->_c.mfc_flags |= MFC_STATIC; 1523 1524 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1525 ip6mr_rht_params); 1526 if (err) { 1527 pr_err("ip6mr: rhtable insert error %d\n", err); 1528 ip6mr_cache_free(c); 1529 return err; 1530 } 1531 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1532 1533 /* Check to see if we resolved a queued list. If so we 1534 * need to send on the frames and tidy up. 1535 */ 1536 found = false; 1537 spin_lock_bh(&mfc_unres_lock); 1538 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1539 uc = (struct mfc6_cache *)_uc; 1540 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1541 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1542 list_del(&_uc->list); 1543 WRITE_ONCE(mrt->cache_resolve_queue_len, 1544 mrt->cache_resolve_queue_len - 1); 1545 found = true; 1546 break; 1547 } 1548 } 1549 if (list_empty(&mrt->mfc_unres_queue)) 1550 timer_delete(&mrt->ipmr_expire_timer); 1551 spin_unlock_bh(&mfc_unres_lock); 1552 1553 if (found) { 1554 ip6mr_cache_resolve(net, mrt, uc, c); 1555 ip6mr_cache_free(uc); 1556 } 1557 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1558 c, mrt->id); 1559 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1560 return 0; 1561 } 1562 1563 /* 1564 * Close the multicast socket, and clear the vif tables etc 1565 */ 1566 1567 static void mroute_clean_tables(struct mr_table *mrt, int flags, 1568 struct list_head *dev_kill_list) 1569 { 1570 struct net *net = read_pnet(&mrt->net); 1571 struct mr_mfc *c, *tmp; 1572 int i; 1573 1574 /* Shut down all active vif entries */ 1575 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { 1576 for (i = 0; i < mrt->maxvif; i++) { 1577 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1578 !(flags & MRT6_FLUSH_MIFS_STATIC)) || 1579 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) 1580 continue; 1581 mif6_delete(mrt, i, 0, dev_kill_list); 1582 } 1583 } 1584 1585 /* Wipe the cache */ 1586 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { 1587 mutex_lock(&net->ipv6.mfc_mutex); 1588 1589 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1590 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || 1591 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) 1592 continue; 1593 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1594 list_del_rcu(&c->list); 1595 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, 1596 (struct mfc6_cache *)c, mrt->id); 1597 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1598 mr_cache_put(c); 1599 } 1600 1601 mutex_unlock(&net->ipv6.mfc_mutex); 1602 } 1603 1604 if (flags & MRT6_FLUSH_MFC) { 1605 if (READ_ONCE(mrt->cache_resolve_queue_len) || !check_net(net)) { 1606 spin_lock_bh(&mfc_unres_lock); 1607 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1608 list_del(&c->list); 1609 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1610 RTM_DELROUTE); 1611 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1612 } 1613 spin_unlock_bh(&mfc_unres_lock); 1614 } 1615 } 1616 } 1617 1618 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1619 { 1620 int err = 0; 1621 struct net *net = sock_net(sk); 1622 1623 rtnl_lock(); 1624 spin_lock(&mrt_lock); 1625 if (rtnl_dereference(mrt->mroute_sk)) { 1626 err = -EADDRINUSE; 1627 } else { 1628 rcu_assign_pointer(mrt->mroute_sk, sk); 1629 sock_set_flag(sk, SOCK_RCU_FREE); 1630 atomic_inc(&net->ipv6.devconf_all->mc_forwarding); 1631 } 1632 spin_unlock(&mrt_lock); 1633 1634 if (!err) 1635 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1636 NETCONFA_MC_FORWARDING, 1637 NETCONFA_IFINDEX_ALL, 1638 net->ipv6.devconf_all); 1639 rtnl_unlock(); 1640 1641 return err; 1642 } 1643 1644 int ip6mr_sk_done(struct sock *sk) 1645 { 1646 struct net *net = sock_net(sk); 1647 struct ipv6_devconf *devconf; 1648 LIST_HEAD(dev_kill_list); 1649 struct mr_table *mrt; 1650 int err = -EACCES; 1651 1652 if (sk->sk_type != SOCK_RAW || 1653 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1654 return err; 1655 1656 devconf = net->ipv6.devconf_all; 1657 if (!devconf || !atomic_read(&devconf->mc_forwarding)) 1658 return err; 1659 1660 rtnl_lock(); 1661 ip6mr_for_each_table(mrt, net) { 1662 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1663 spin_lock(&mrt_lock); 1664 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1665 /* Note that mroute_sk had SOCK_RCU_FREE set, 1666 * so the RCU grace period before sk freeing 1667 * is guaranteed by sk_destruct() 1668 */ 1669 atomic_dec(&devconf->mc_forwarding); 1670 spin_unlock(&mrt_lock); 1671 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1672 NETCONFA_MC_FORWARDING, 1673 NETCONFA_IFINDEX_ALL, 1674 net->ipv6.devconf_all); 1675 1676 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC, 1677 &dev_kill_list); 1678 err = 0; 1679 break; 1680 } 1681 } 1682 unregister_netdevice_many(&dev_kill_list); 1683 rtnl_unlock(); 1684 1685 return err; 1686 } 1687 1688 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1689 { 1690 struct mr_table *mrt; 1691 struct flowi6 fl6 = { 1692 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1693 .flowi6_oif = skb->dev->ifindex, 1694 .flowi6_mark = skb->mark, 1695 }; 1696 1697 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1698 return NULL; 1699 1700 return rcu_access_pointer(mrt->mroute_sk); 1701 } 1702 EXPORT_SYMBOL(mroute6_is_socket); 1703 1704 /* 1705 * Socket options and virtual interface manipulation. The whole 1706 * virtual interface system is a complete heap, but unfortunately 1707 * that's how BSD mrouted happens to think. Maybe one day with a proper 1708 * MOSPF/PIM router set up we can clean this up. 1709 */ 1710 1711 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1712 unsigned int optlen) 1713 { 1714 int ret, parent = 0; 1715 struct mif6ctl vif; 1716 struct mf6cctl mfc; 1717 mifi_t mifi; 1718 struct net *net = sock_net(sk); 1719 struct mr_table *mrt; 1720 1721 if (sk->sk_type != SOCK_RAW || 1722 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1723 return -EOPNOTSUPP; 1724 1725 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1726 if (!mrt) 1727 return -ENOENT; 1728 1729 if (optname != MRT6_INIT) { 1730 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1731 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1732 return -EACCES; 1733 } 1734 1735 switch (optname) { 1736 case MRT6_INIT: 1737 if (optlen < sizeof(int)) 1738 return -EINVAL; 1739 1740 return ip6mr_sk_init(mrt, sk); 1741 1742 case MRT6_DONE: 1743 return ip6mr_sk_done(sk); 1744 1745 case MRT6_ADD_MIF: 1746 if (optlen < sizeof(vif)) 1747 return -EINVAL; 1748 if (copy_from_sockptr(&vif, optval, sizeof(vif))) 1749 return -EFAULT; 1750 if (vif.mif6c_mifi >= MAXMIFS) 1751 return -ENFILE; 1752 rtnl_lock(); 1753 ret = mif6_add(net, mrt, &vif, 1754 sk == rtnl_dereference(mrt->mroute_sk)); 1755 rtnl_unlock(); 1756 return ret; 1757 1758 case MRT6_DEL_MIF: 1759 if (optlen < sizeof(mifi_t)) 1760 return -EINVAL; 1761 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) 1762 return -EFAULT; 1763 rtnl_lock(); 1764 ret = mif6_delete(mrt, mifi, 0, NULL); 1765 rtnl_unlock(); 1766 return ret; 1767 1768 /* 1769 * Manipulate the forwarding caches. These live 1770 * in a sort of kernel/user symbiosis. 1771 */ 1772 case MRT6_ADD_MFC: 1773 case MRT6_DEL_MFC: 1774 parent = -1; 1775 fallthrough; 1776 case MRT6_ADD_MFC_PROXY: 1777 case MRT6_DEL_MFC_PROXY: 1778 if (optlen < sizeof(mfc)) 1779 return -EINVAL; 1780 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) 1781 return -EFAULT; 1782 if (parent == 0) 1783 parent = mfc.mf6cc_parent; 1784 1785 mutex_lock(&net->ipv6.mfc_mutex); 1786 1787 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1788 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1789 else 1790 ret = ip6mr_mfc_add(net, mrt, &mfc, 1791 sk == 1792 rcu_access_pointer(mrt->mroute_sk), 1793 parent); 1794 1795 mutex_unlock(&net->ipv6.mfc_mutex); 1796 return ret; 1797 1798 case MRT6_FLUSH: 1799 { 1800 LIST_HEAD(dev_kill_list); 1801 int flags; 1802 1803 if (optlen != sizeof(flags)) 1804 return -EINVAL; 1805 if (copy_from_sockptr(&flags, optval, sizeof(flags))) 1806 return -EFAULT; 1807 1808 rtnl_lock(); 1809 mroute_clean_tables(mrt, flags, &dev_kill_list); 1810 unregister_netdevice_many(&dev_kill_list); 1811 rtnl_unlock(); 1812 return 0; 1813 } 1814 1815 /* 1816 * Control PIM assert (to activate pim will activate assert) 1817 */ 1818 case MRT6_ASSERT: 1819 { 1820 int v; 1821 1822 if (optlen != sizeof(v)) 1823 return -EINVAL; 1824 if (copy_from_sockptr(&v, optval, sizeof(v))) 1825 return -EFAULT; 1826 WRITE_ONCE(mrt->mroute_do_assert, v); 1827 return 0; 1828 } 1829 1830 #ifdef CONFIG_IPV6_PIMSM_V2 1831 case MRT6_PIM: 1832 { 1833 bool do_wrmifwhole; 1834 int v; 1835 1836 if (optlen != sizeof(v)) 1837 return -EINVAL; 1838 if (copy_from_sockptr(&v, optval, sizeof(v))) 1839 return -EFAULT; 1840 1841 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); 1842 v = !!v; 1843 rtnl_lock(); 1844 ret = 0; 1845 if (v != mrt->mroute_do_pim) { 1846 WRITE_ONCE(mrt->mroute_do_pim, v); 1847 WRITE_ONCE(mrt->mroute_do_assert, v); 1848 WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrmifwhole); 1849 } 1850 rtnl_unlock(); 1851 return ret; 1852 } 1853 1854 #endif 1855 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1856 case MRT6_TABLE: 1857 { 1858 u32 v; 1859 1860 if (optlen != sizeof(u32)) 1861 return -EINVAL; 1862 if (copy_from_sockptr(&v, optval, sizeof(v))) 1863 return -EFAULT; 1864 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1865 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1866 return -EINVAL; 1867 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1868 return -EBUSY; 1869 1870 rtnl_lock(); 1871 ret = 0; 1872 mrt = ip6mr_new_table(net, v); 1873 if (IS_ERR(mrt)) 1874 ret = PTR_ERR(mrt); 1875 else 1876 raw6_sk(sk)->ip6mr_table = v; 1877 rtnl_unlock(); 1878 return ret; 1879 } 1880 #endif 1881 /* 1882 * Spurious command, or MRT6_VERSION which you cannot 1883 * set. 1884 */ 1885 default: 1886 return -ENOPROTOOPT; 1887 } 1888 } 1889 1890 /* 1891 * Getsock opt support for the multicast routing system. 1892 */ 1893 1894 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1895 sockptr_t optlen) 1896 { 1897 int olr; 1898 int val; 1899 struct net *net = sock_net(sk); 1900 struct mr_table *mrt; 1901 1902 if (sk->sk_type != SOCK_RAW || 1903 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1904 return -EOPNOTSUPP; 1905 1906 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1907 if (!mrt) 1908 return -ENOENT; 1909 1910 switch (optname) { 1911 case MRT6_VERSION: 1912 val = 0x0305; 1913 break; 1914 #ifdef CONFIG_IPV6_PIMSM_V2 1915 case MRT6_PIM: 1916 val = READ_ONCE(mrt->mroute_do_pim); 1917 break; 1918 #endif 1919 case MRT6_ASSERT: 1920 val = READ_ONCE(mrt->mroute_do_assert); 1921 break; 1922 default: 1923 return -ENOPROTOOPT; 1924 } 1925 1926 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1927 return -EFAULT; 1928 1929 olr = min_t(int, olr, sizeof(int)); 1930 if (olr < 0) 1931 return -EINVAL; 1932 1933 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1934 return -EFAULT; 1935 if (copy_to_sockptr(optval, &val, olr)) 1936 return -EFAULT; 1937 return 0; 1938 } 1939 1940 /* 1941 * The IP multicast ioctl support routines. 1942 */ 1943 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg) 1944 { 1945 struct sioc_sg_req6 *sr; 1946 struct sioc_mif_req6 *vr; 1947 struct vif_device *vif; 1948 struct mfc6_cache *c; 1949 struct net *net = sock_net(sk); 1950 struct mr_table *mrt; 1951 1952 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1953 if (!mrt) 1954 return -ENOENT; 1955 1956 switch (cmd) { 1957 case SIOCGETMIFCNT_IN6: 1958 vr = (struct sioc_mif_req6 *)arg; 1959 if (vr->mifi >= mrt->maxvif) 1960 return -EINVAL; 1961 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif); 1962 rcu_read_lock(); 1963 vif = &mrt->vif_table[vr->mifi]; 1964 if (VIF_EXISTS(mrt, vr->mifi)) { 1965 vr->icount = READ_ONCE(vif->pkt_in); 1966 vr->ocount = READ_ONCE(vif->pkt_out); 1967 vr->ibytes = READ_ONCE(vif->bytes_in); 1968 vr->obytes = READ_ONCE(vif->bytes_out); 1969 rcu_read_unlock(); 1970 return 0; 1971 } 1972 rcu_read_unlock(); 1973 return -EADDRNOTAVAIL; 1974 case SIOCGETSGCNT_IN6: 1975 sr = (struct sioc_sg_req6 *)arg; 1976 1977 rcu_read_lock(); 1978 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr, 1979 &sr->grp.sin6_addr); 1980 if (c) { 1981 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1982 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1983 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1984 rcu_read_unlock(); 1985 return 0; 1986 } 1987 rcu_read_unlock(); 1988 return -EADDRNOTAVAIL; 1989 default: 1990 return -ENOIOCTLCMD; 1991 } 1992 } 1993 1994 #ifdef CONFIG_COMPAT 1995 struct compat_sioc_sg_req6 { 1996 struct sockaddr_in6 src; 1997 struct sockaddr_in6 grp; 1998 compat_ulong_t pktcnt; 1999 compat_ulong_t bytecnt; 2000 compat_ulong_t wrong_if; 2001 }; 2002 2003 struct compat_sioc_mif_req6 { 2004 mifi_t mifi; 2005 compat_ulong_t icount; 2006 compat_ulong_t ocount; 2007 compat_ulong_t ibytes; 2008 compat_ulong_t obytes; 2009 }; 2010 2011 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 2012 { 2013 struct compat_sioc_sg_req6 sr; 2014 struct compat_sioc_mif_req6 vr; 2015 struct vif_device *vif; 2016 struct mfc6_cache *c; 2017 struct net *net = sock_net(sk); 2018 struct mr_table *mrt; 2019 2020 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 2021 if (!mrt) 2022 return -ENOENT; 2023 2024 switch (cmd) { 2025 case SIOCGETMIFCNT_IN6: 2026 if (copy_from_user(&vr, arg, sizeof(vr))) 2027 return -EFAULT; 2028 if (vr.mifi >= mrt->maxvif) 2029 return -EINVAL; 2030 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 2031 rcu_read_lock(); 2032 vif = &mrt->vif_table[vr.mifi]; 2033 if (VIF_EXISTS(mrt, vr.mifi)) { 2034 vr.icount = READ_ONCE(vif->pkt_in); 2035 vr.ocount = READ_ONCE(vif->pkt_out); 2036 vr.ibytes = READ_ONCE(vif->bytes_in); 2037 vr.obytes = READ_ONCE(vif->bytes_out); 2038 rcu_read_unlock(); 2039 2040 if (copy_to_user(arg, &vr, sizeof(vr))) 2041 return -EFAULT; 2042 return 0; 2043 } 2044 rcu_read_unlock(); 2045 return -EADDRNOTAVAIL; 2046 case SIOCGETSGCNT_IN6: 2047 if (copy_from_user(&sr, arg, sizeof(sr))) 2048 return -EFAULT; 2049 2050 rcu_read_lock(); 2051 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 2052 if (c) { 2053 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 2054 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 2055 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 2056 rcu_read_unlock(); 2057 2058 if (copy_to_user(arg, &sr, sizeof(sr))) 2059 return -EFAULT; 2060 return 0; 2061 } 2062 rcu_read_unlock(); 2063 return -EADDRNOTAVAIL; 2064 default: 2065 return -ENOIOCTLCMD; 2066 } 2067 } 2068 #endif 2069 2070 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 2071 { 2072 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 2073 IPSTATS_MIB_OUTFORWDATAGRAMS); 2074 return dst_output(net, sk, skb); 2075 } 2076 2077 /* 2078 * Processing handlers for ip6mr_forward 2079 */ 2080 2081 static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt, 2082 struct sk_buff *skb, int vifi) 2083 { 2084 struct vif_device *vif = &mrt->vif_table[vifi]; 2085 struct net_device *vif_dev; 2086 struct ipv6hdr *ipv6h; 2087 struct dst_entry *dst; 2088 struct flowi6 fl6; 2089 2090 vif_dev = vif_dev_read(vif); 2091 if (!vif_dev) 2092 return -1; 2093 2094 #ifdef CONFIG_IPV6_PIMSM_V2 2095 if (vif->flags & MIFF_REGISTER) { 2096 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2097 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2098 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 2099 DEV_STATS_INC(vif_dev, tx_packets); 2100 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2101 return -1; 2102 } 2103 #endif 2104 2105 ipv6h = ipv6_hdr(skb); 2106 2107 fl6 = (struct flowi6) { 2108 .flowi6_oif = vif->link, 2109 .daddr = ipv6h->daddr, 2110 }; 2111 2112 dst = ip6_route_output(net, NULL, &fl6); 2113 if (dst->error) { 2114 dst_release(dst); 2115 return -1; 2116 } 2117 2118 skb_dst_drop(skb); 2119 skb_dst_set(skb, dst); 2120 2121 /* 2122 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2123 * not only before forwarding, but after forwarding on all output 2124 * interfaces. It is clear, if mrouter runs a multicasting 2125 * program, it should receive packets not depending to what interface 2126 * program is joined. 2127 * If we will not make it, the program will have to join on all 2128 * interfaces. On the other hand, multihoming host (or router, but 2129 * not mrouter) cannot join to more than one interface - it will 2130 * result in receiving multiple packets. 2131 */ 2132 skb->dev = vif_dev; 2133 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2134 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2135 2136 /* We are about to write */ 2137 /* XXX: extension headers? */ 2138 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) 2139 return -1; 2140 2141 ipv6h = ipv6_hdr(skb); 2142 ipv6h->hop_limit--; 2143 return 0; 2144 } 2145 2146 static void ip6mr_forward2(struct net *net, struct mr_table *mrt, 2147 struct sk_buff *skb, int vifi) 2148 { 2149 struct net_device *indev = skb->dev; 2150 2151 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2152 goto out_free; 2153 2154 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2155 2156 NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2157 net, NULL, skb, indev, skb->dev, 2158 ip6mr_forward2_finish); 2159 return; 2160 2161 out_free: 2162 kfree_skb(skb); 2163 } 2164 2165 static void ip6mr_output2(struct net *net, struct mr_table *mrt, 2166 struct sk_buff *skb, int vifi) 2167 { 2168 if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2169 goto out_free; 2170 2171 ip6_output(net, NULL, skb); 2172 return; 2173 2174 out_free: 2175 kfree_skb(skb); 2176 } 2177 2178 /* Called with rcu_read_lock() */ 2179 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2180 { 2181 int ct; 2182 2183 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ 2184 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2185 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2186 break; 2187 } 2188 return ct; 2189 } 2190 2191 /* Called under rcu_read_lock() */ 2192 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2193 struct net_device *dev, struct sk_buff *skb, 2194 struct mfc6_cache *c) 2195 { 2196 int psend = -1; 2197 int vif, ct; 2198 int true_vifi = ip6mr_find_vif(mrt, dev); 2199 2200 vif = c->_c.mfc_parent; 2201 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2202 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2203 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2204 2205 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2206 struct mfc6_cache *cache_proxy; 2207 2208 /* For an (*,G) entry, we only check that the incoming 2209 * interface is part of the static tree. 2210 */ 2211 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2212 if (cache_proxy && 2213 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2214 goto forward; 2215 } 2216 2217 /* 2218 * Wrong interface: drop packet and (maybe) send PIM assert. 2219 */ 2220 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2221 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2222 2223 if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) && 2224 /* pimsm uses asserts, when switching from RPT to SPT, 2225 so that we cannot check that packet arrived on an oif. 2226 It is bad, but otherwise we would need to move pretty 2227 large chunk of pimd to kernel. Ough... --ANK 2228 */ 2229 (READ_ONCE(mrt->mroute_do_pim) || 2230 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2231 time_after(jiffies, 2232 c->_c.mfc_un.res.last_assert + 2233 MFC_ASSERT_THRESH)) { 2234 c->_c.mfc_un.res.last_assert = jiffies; 2235 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2236 if (READ_ONCE(mrt->mroute_do_wrvifwhole)) 2237 ip6mr_cache_report(mrt, skb, true_vifi, 2238 MRT6MSG_WRMIFWHOLE); 2239 } 2240 goto dont_forward; 2241 } 2242 2243 forward: 2244 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2245 mrt->vif_table[vif].pkt_in + 1); 2246 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2247 mrt->vif_table[vif].bytes_in + skb->len); 2248 2249 /* 2250 * Forward the frame 2251 */ 2252 if (ipv6_addr_any(&c->mf6c_origin) && 2253 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2254 if (true_vifi >= 0 && 2255 true_vifi != c->_c.mfc_parent && 2256 ipv6_hdr(skb)->hop_limit > 2257 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2258 /* It's an (*,*) entry and the packet is not coming from 2259 * the upstream: forward the packet to the upstream 2260 * only. 2261 */ 2262 psend = c->_c.mfc_parent; 2263 goto last_forward; 2264 } 2265 goto dont_forward; 2266 } 2267 for (ct = c->_c.mfc_un.res.maxvif - 1; 2268 ct >= c->_c.mfc_un.res.minvif; ct--) { 2269 /* For (*,G) entry, don't forward to the incoming interface */ 2270 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2271 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2272 if (psend != -1) { 2273 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2274 if (skb2) 2275 ip6mr_forward2(net, mrt, skb2, psend); 2276 } 2277 psend = ct; 2278 } 2279 } 2280 last_forward: 2281 if (psend != -1) { 2282 ip6mr_forward2(net, mrt, skb, psend); 2283 return; 2284 } 2285 2286 dont_forward: 2287 kfree_skb(skb); 2288 } 2289 2290 /* Called under rcu_read_lock() */ 2291 static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt, 2292 struct net_device *dev, struct sk_buff *skb, 2293 struct mfc6_cache *c) 2294 { 2295 int psend = -1; 2296 int ct; 2297 2298 WARN_ON_ONCE(!rcu_read_lock_held()); 2299 2300 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2301 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2302 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2303 2304 /* Forward the frame */ 2305 if (ipv6_addr_any(&c->mf6c_origin) && 2306 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2307 if (ipv6_hdr(skb)->hop_limit > 2308 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2309 /* It's an (*,*) entry and the packet is not coming from 2310 * the upstream: forward the packet to the upstream 2311 * only. 2312 */ 2313 psend = c->_c.mfc_parent; 2314 goto last_forward; 2315 } 2316 goto dont_forward; 2317 } 2318 for (ct = c->_c.mfc_un.res.maxvif - 1; 2319 ct >= c->_c.mfc_un.res.minvif; ct--) { 2320 if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2321 if (psend != -1) { 2322 struct sk_buff *skb2; 2323 2324 skb2 = skb_clone(skb, GFP_ATOMIC); 2325 if (skb2) 2326 ip6mr_output2(net, mrt, skb2, psend); 2327 } 2328 psend = ct; 2329 } 2330 } 2331 last_forward: 2332 if (psend != -1) { 2333 ip6mr_output2(net, mrt, skb, psend); 2334 return; 2335 } 2336 2337 dont_forward: 2338 kfree_skb(skb); 2339 } 2340 2341 /* 2342 * Multicast packets for forwarding arrive here 2343 */ 2344 2345 int ip6_mr_input(struct sk_buff *skb) 2346 { 2347 struct net_device *dev = skb->dev; 2348 struct net *net = dev_net_rcu(dev); 2349 struct mfc6_cache *cache; 2350 struct mr_table *mrt; 2351 struct flowi6 fl6 = { 2352 .flowi6_iif = dev->ifindex, 2353 .flowi6_mark = skb->mark, 2354 }; 2355 int err; 2356 2357 /* skb->dev passed in is the master dev for vrfs. 2358 * Get the proper interface that does have a vif associated with it. 2359 */ 2360 if (netif_is_l3_master(dev)) { 2361 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2362 if (!dev) { 2363 kfree_skb(skb); 2364 return -ENODEV; 2365 } 2366 } 2367 2368 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2369 if (err < 0) { 2370 kfree_skb(skb); 2371 return err; 2372 } 2373 2374 cache = ip6mr_cache_find(mrt, 2375 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2376 if (!cache) { 2377 int vif = ip6mr_find_vif(mrt, dev); 2378 2379 if (vif >= 0) 2380 cache = ip6mr_cache_find_any(mrt, 2381 &ipv6_hdr(skb)->daddr, 2382 vif); 2383 } 2384 2385 /* 2386 * No usable cache entry 2387 */ 2388 if (!cache) { 2389 int vif; 2390 2391 vif = ip6mr_find_vif(mrt, dev); 2392 if (vif >= 0) { 2393 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2394 2395 return err; 2396 } 2397 kfree_skb(skb); 2398 return -ENODEV; 2399 } 2400 2401 ip6_mr_forward(net, mrt, dev, skb, cache); 2402 2403 return 0; 2404 } 2405 2406 int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2407 { 2408 struct net_device *dev = skb_dst(skb)->dev; 2409 struct flowi6 fl6 = (struct flowi6) { 2410 .flowi6_iif = LOOPBACK_IFINDEX, 2411 .flowi6_mark = skb->mark, 2412 }; 2413 struct mfc6_cache *cache; 2414 struct mr_table *mrt; 2415 int err; 2416 int vif; 2417 2418 guard(rcu)(); 2419 2420 if (IP6CB(skb)->flags & IP6SKB_FORWARDED) 2421 goto ip6_output; 2422 if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE)) 2423 goto ip6_output; 2424 2425 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2426 if (err < 0) { 2427 kfree_skb(skb); 2428 return err; 2429 } 2430 2431 cache = ip6mr_cache_find(mrt, 2432 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2433 if (!cache) { 2434 vif = ip6mr_find_vif(mrt, dev); 2435 if (vif >= 0) 2436 cache = ip6mr_cache_find_any(mrt, 2437 &ipv6_hdr(skb)->daddr, 2438 vif); 2439 } 2440 2441 /* No usable cache entry */ 2442 if (!cache) { 2443 vif = ip6mr_find_vif(mrt, dev); 2444 if (vif >= 0) 2445 return ip6mr_cache_unresolved(mrt, vif, skb, dev); 2446 goto ip6_output; 2447 } 2448 2449 /* Wrong interface */ 2450 vif = cache->_c.mfc_parent; 2451 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2452 goto ip6_output; 2453 2454 ip6_mr_output_finish(net, mrt, dev, skb, cache); 2455 return 0; 2456 2457 ip6_output: 2458 return ip6_output(net, sk, skb); 2459 } 2460 2461 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2462 u32 portid) 2463 { 2464 int err; 2465 struct mr_table *mrt; 2466 struct mfc6_cache *cache; 2467 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 2468 2469 rcu_read_lock(); 2470 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); 2471 if (!mrt) { 2472 rcu_read_unlock(); 2473 return -ENOENT; 2474 } 2475 2476 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2477 if (!cache && skb->dev) { 2478 int vif = ip6mr_find_vif(mrt, skb->dev); 2479 2480 if (vif >= 0) 2481 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2482 vif); 2483 } 2484 2485 if (!cache) { 2486 struct sk_buff *skb2; 2487 struct ipv6hdr *iph; 2488 struct net_device *dev; 2489 int vif; 2490 2491 dev = skb->dev; 2492 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2493 rcu_read_unlock(); 2494 return -ENODEV; 2495 } 2496 2497 /* really correct? */ 2498 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2499 if (!skb2) { 2500 rcu_read_unlock(); 2501 return -ENOMEM; 2502 } 2503 2504 NETLINK_CB(skb2).portid = portid; 2505 skb_reset_transport_header(skb2); 2506 2507 skb_put(skb2, sizeof(struct ipv6hdr)); 2508 skb_reset_network_header(skb2); 2509 2510 iph = ipv6_hdr(skb2); 2511 iph->version = 0; 2512 iph->priority = 0; 2513 iph->flow_lbl[0] = 0; 2514 iph->flow_lbl[1] = 0; 2515 iph->flow_lbl[2] = 0; 2516 iph->payload_len = 0; 2517 iph->nexthdr = IPPROTO_NONE; 2518 iph->hop_limit = 0; 2519 iph->saddr = rt->rt6i_src.addr; 2520 iph->daddr = rt->rt6i_dst.addr; 2521 2522 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2523 rcu_read_unlock(); 2524 2525 return err; 2526 } 2527 2528 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2529 rcu_read_unlock(); 2530 return err; 2531 } 2532 2533 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2534 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2535 int flags) 2536 { 2537 struct nlmsghdr *nlh; 2538 struct rtmsg *rtm; 2539 int err; 2540 2541 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2542 if (!nlh) 2543 return -EMSGSIZE; 2544 2545 rtm = nlmsg_data(nlh); 2546 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2547 rtm->rtm_dst_len = 128; 2548 rtm->rtm_src_len = 128; 2549 rtm->rtm_tos = 0; 2550 rtm->rtm_table = mrt->id; 2551 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2552 goto nla_put_failure; 2553 rtm->rtm_type = RTN_MULTICAST; 2554 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2555 if (c->_c.mfc_flags & MFC_STATIC) 2556 rtm->rtm_protocol = RTPROT_STATIC; 2557 else 2558 rtm->rtm_protocol = RTPROT_MROUTED; 2559 rtm->rtm_flags = 0; 2560 2561 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2562 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2563 goto nla_put_failure; 2564 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2565 /* do not break the dump if cache is unresolved */ 2566 if (err < 0 && err != -ENOENT) 2567 goto nla_put_failure; 2568 2569 nlmsg_end(skb, nlh); 2570 return 0; 2571 2572 nla_put_failure: 2573 nlmsg_cancel(skb, nlh); 2574 return -EMSGSIZE; 2575 } 2576 2577 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2578 u32 portid, u32 seq, struct mr_mfc *c, 2579 int cmd, int flags) 2580 { 2581 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2582 cmd, flags); 2583 } 2584 2585 static int mr6_msgsize(bool unresolved) 2586 { 2587 size_t len = 2588 NLMSG_ALIGN(sizeof(struct rtmsg)) 2589 + nla_total_size(4) /* RTA_TABLE */ 2590 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2591 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2592 ; 2593 2594 if (!unresolved) 2595 len = len 2596 + nla_total_size(4) /* RTA_IIF */ 2597 + nla_total_size(0) /* RTA_MULTIPATH */ 2598 + MAXMIFS * NLA_ALIGN(sizeof(struct rtnexthop)) 2599 /* RTA_MFC_STATS */ 2600 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2601 ; 2602 2603 return len; 2604 } 2605 2606 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2607 int cmd) 2608 { 2609 struct net *net = read_pnet(&mrt->net); 2610 struct sk_buff *skb; 2611 int err = -ENOBUFS; 2612 2613 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS), GFP_ATOMIC); 2614 if (!skb) 2615 goto errout; 2616 2617 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2618 if (err < 0) 2619 goto errout; 2620 2621 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2622 return; 2623 2624 errout: 2625 kfree_skb(skb); 2626 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2627 } 2628 2629 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2630 { 2631 size_t len = 2632 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2633 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2634 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2635 /* IP6MRA_CREPORT_SRC_ADDR */ 2636 + nla_total_size(sizeof(struct in6_addr)) 2637 /* IP6MRA_CREPORT_DST_ADDR */ 2638 + nla_total_size(sizeof(struct in6_addr)) 2639 /* IP6MRA_CREPORT_PKT */ 2640 + nla_total_size(payloadlen) 2641 ; 2642 2643 return len; 2644 } 2645 2646 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2647 { 2648 struct net *net = read_pnet(&mrt->net); 2649 struct nlmsghdr *nlh; 2650 struct rtgenmsg *rtgenm; 2651 struct mrt6msg *msg; 2652 struct sk_buff *skb; 2653 struct nlattr *nla; 2654 int payloadlen; 2655 2656 payloadlen = pkt->len - sizeof(struct mrt6msg); 2657 msg = (struct mrt6msg *)skb_transport_header(pkt); 2658 2659 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2660 if (!skb) 2661 goto errout; 2662 2663 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2664 sizeof(struct rtgenmsg), 0); 2665 if (!nlh) 2666 goto errout; 2667 rtgenm = nlmsg_data(nlh); 2668 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2669 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2670 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2671 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2672 &msg->im6_src) || 2673 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2674 &msg->im6_dst)) 2675 goto nla_put_failure; 2676 2677 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2678 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2679 nla_data(nla), payloadlen)) 2680 goto nla_put_failure; 2681 2682 nlmsg_end(skb, nlh); 2683 2684 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2685 return; 2686 2687 nla_put_failure: 2688 nlmsg_cancel(skb, nlh); 2689 errout: 2690 kfree_skb(skb); 2691 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2692 } 2693 2694 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = { 2695 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2696 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 2697 [RTA_TABLE] = { .type = NLA_U32 }, 2698 }; 2699 2700 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb, 2701 const struct nlmsghdr *nlh, 2702 struct nlattr **tb, 2703 struct netlink_ext_ack *extack) 2704 { 2705 struct rtmsg *rtm; 2706 int err; 2707 2708 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy, 2709 extack); 2710 if (err) 2711 return err; 2712 2713 rtm = nlmsg_data(nlh); 2714 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || 2715 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || 2716 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2717 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2718 NL_SET_ERR_MSG_MOD(extack, 2719 "Invalid values in header for multicast route get request"); 2720 return -EINVAL; 2721 } 2722 2723 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2724 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2725 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); 2726 return -EINVAL; 2727 } 2728 2729 return 0; 2730 } 2731 2732 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2733 struct netlink_ext_ack *extack) 2734 { 2735 struct net *net = sock_net(in_skb->sk); 2736 struct in6_addr src = {}, grp = {}; 2737 struct nlattr *tb[RTA_MAX + 1]; 2738 struct mfc6_cache *cache; 2739 struct mr_table *mrt; 2740 struct sk_buff *skb; 2741 u32 tableid; 2742 int err; 2743 2744 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2745 if (err < 0) 2746 return err; 2747 2748 skb = nlmsg_new(mr6_msgsize(false), GFP_KERNEL); 2749 if (!skb) 2750 return -ENOBUFS; 2751 2752 if (tb[RTA_SRC]) 2753 src = nla_get_in6_addr(tb[RTA_SRC]); 2754 if (tb[RTA_DST]) 2755 grp = nla_get_in6_addr(tb[RTA_DST]); 2756 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2757 2758 rcu_read_lock(); 2759 2760 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); 2761 if (!mrt) { 2762 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist"); 2763 err = -ENOENT; 2764 goto err; 2765 } 2766 2767 cache = ip6mr_cache_find(mrt, &src, &grp); 2768 if (!cache) { 2769 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found"); 2770 err = -ENOENT; 2771 goto err; 2772 } 2773 2774 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2775 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); 2776 if (err < 0) 2777 goto err; 2778 2779 rcu_read_unlock(); 2780 2781 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2782 2783 err: 2784 rcu_read_unlock(); 2785 kfree_skb(skb); 2786 return err; 2787 } 2788 2789 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2790 { 2791 const struct nlmsghdr *nlh = cb->nlh; 2792 struct fib_dump_filter filter = {}; 2793 int err; 2794 2795 rcu_read_lock(); 2796 2797 if (cb->strict_check) { 2798 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2799 &filter, cb); 2800 if (err < 0) 2801 goto unlock; 2802 } 2803 2804 if (filter.table_id) { 2805 struct mr_table *mrt; 2806 2807 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2808 if (!mrt) { 2809 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) { 2810 err = skb->len; 2811 goto unlock; 2812 } 2813 2814 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2815 err = -ENOENT; 2816 goto unlock; 2817 } 2818 2819 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2820 &mfc_unres_lock, &filter); 2821 err = skb->len ? : err; 2822 goto unlock; 2823 } 2824 2825 err = mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2826 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2827 unlock: 2828 rcu_read_unlock(); 2829 2830 return err; 2831 } 2832