1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IP multicast routing support for mrouted 3.6/3.8 4 * 5 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * Linux Consultancy and Custom Driver Development 7 * 8 * Fixes: 9 * Michael Chastain : Incorrect size of copying. 10 * Alan Cox : Added the cache manager code 11 * Alan Cox : Fixed the clone/copy bug and device race. 12 * Mike McLagan : Routing by source 13 * Malcolm Beattie : Buffer handling fixes. 14 * Alexey Kuznetsov : Double buffer free and other fixes. 15 * SVR Anand : Fixed several multicast bugs and problems. 16 * Alexey Kuznetsov : Status, optimisations and more. 17 * Brad Parker : Better behaviour on mrouted upcall 18 * overflow. 19 * Carlos Picoto : PIMv1 Support 20 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 21 * Relax this requirement to work with older peers. 22 */ 23 24 #include <linux/uaccess.h> 25 #include <linux/types.h> 26 #include <linux/cache.h> 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/mm.h> 30 #include <linux/kernel.h> 31 #include <linux/fcntl.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/inet.h> 36 #include <linux/netdevice.h> 37 #include <linux/inetdevice.h> 38 #include <linux/igmp.h> 39 #include <linux/proc_fs.h> 40 #include <linux/seq_file.h> 41 #include <linux/mroute.h> 42 #include <linux/init.h> 43 #include <linux/if_ether.h> 44 #include <linux/slab.h> 45 #include <net/flow.h> 46 #include <net/net_namespace.h> 47 #include <net/ip.h> 48 #include <net/protocol.h> 49 #include <linux/skbuff.h> 50 #include <net/route.h> 51 #include <net/icmp.h> 52 #include <net/udp.h> 53 #include <net/raw.h> 54 #include <linux/notifier.h> 55 #include <linux/if_arp.h> 56 #include <linux/netfilter_ipv4.h> 57 #include <linux/compat.h> 58 #include <linux/export.h> 59 #include <linux/rhashtable.h> 60 #include <net/ip_tunnels.h> 61 #include <net/checksum.h> 62 #include <net/netlink.h> 63 #include <net/fib_rules.h> 64 #include <linux/netconf.h> 65 #include <net/rtnh.h> 66 #include <net/inet_dscp.h> 67 68 #include <linux/nospec.h> 69 70 struct ipmr_rule { 71 struct fib_rule common; 72 }; 73 74 struct ipmr_result { 75 struct mr_table *mrt; 76 }; 77 78 /* Big lock, protecting vif table, mrt cache and mroute socket state. 79 * Note that the changes are semaphored via rtnl_lock. 80 */ 81 82 static DEFINE_SPINLOCK(mrt_lock); 83 84 static struct net_device *vif_dev_read(const struct vif_device *vif) 85 { 86 return rcu_dereference(vif->dev); 87 } 88 89 /* Multicast router control variables */ 90 91 /* Special spinlock for queue of unresolved entries */ 92 static DEFINE_SPINLOCK(mfc_unres_lock); 93 94 /* We return to original Alan's scheme. Hash table of resolved 95 * entries is changed only in process context and protected 96 * with weak lock mrt_lock. Queue of unresolved entries is protected 97 * with strong spinlock mfc_unres_lock. 98 * 99 * In this case data path is free of exclusive locks at all. 100 */ 101 102 static struct kmem_cache *mrt_cachep __ro_after_init; 103 104 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 105 static void ipmr_free_table(struct mr_table *mrt, 106 struct list_head *dev_kill_list); 107 108 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 109 struct net_device *dev, struct sk_buff *skb, 110 struct mfc_cache *cache, int local); 111 static int ipmr_cache_report(const struct mr_table *mrt, 112 struct sk_buff *pkt, vifi_t vifi, int assert); 113 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 114 int cmd); 115 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 116 static void mroute_clean_tables(struct mr_table *mrt, int flags, 117 struct list_head *dev_kill_list); 118 static void ipmr_expire_process(struct timer_list *t); 119 120 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 121 #define ipmr_for_each_table(mrt, net) \ 122 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ 123 lockdep_rtnl_is_held() || \ 124 list_empty(&net->ipv4.mr_tables)) 125 126 static struct mr_table *ipmr_mr_table_iter(struct net *net, 127 struct mr_table *mrt) 128 { 129 struct mr_table *ret; 130 131 if (!mrt) 132 ret = list_entry_rcu(net->ipv4.mr_tables.next, 133 struct mr_table, list); 134 else 135 ret = list_entry_rcu(mrt->list.next, 136 struct mr_table, list); 137 138 if (&ret->list == &net->ipv4.mr_tables) 139 return NULL; 140 return ret; 141 } 142 143 static struct mr_table *__ipmr_get_table(struct net *net, u32 id) 144 { 145 struct mr_table *mrt; 146 147 ipmr_for_each_table(mrt, net) { 148 if (mrt->id == id) 149 return mrt; 150 } 151 return NULL; 152 } 153 154 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 155 struct mr_table **mrt) 156 { 157 int err; 158 struct ipmr_result res; 159 struct fib_lookup_arg arg = { 160 .result = &res, 161 .flags = FIB_LOOKUP_NOREF, 162 }; 163 164 /* update flow if oif or iif point to device enslaved to l3mdev */ 165 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 166 167 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 168 flowi4_to_flowi(flp4), 0, &arg); 169 if (err < 0) 170 return err; 171 *mrt = res.mrt; 172 return 0; 173 } 174 175 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 176 int flags, struct fib_lookup_arg *arg) 177 { 178 struct ipmr_result *res = arg->result; 179 struct mr_table *mrt; 180 181 switch (rule->action) { 182 case FR_ACT_TO_TBL: 183 break; 184 case FR_ACT_UNREACHABLE: 185 return -ENETUNREACH; 186 case FR_ACT_PROHIBIT: 187 return -EACCES; 188 case FR_ACT_BLACKHOLE: 189 default: 190 return -EINVAL; 191 } 192 193 arg->table = fib_rule_get_table(rule, arg); 194 195 mrt = __ipmr_get_table(rule->fr_net, arg->table); 196 if (!mrt) 197 return -EAGAIN; 198 res->mrt = mrt; 199 return 0; 200 } 201 202 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 203 { 204 return 1; 205 } 206 207 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 208 struct fib_rule_hdr *frh, struct nlattr **tb, 209 struct netlink_ext_ack *extack) 210 { 211 return 0; 212 } 213 214 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 215 struct nlattr **tb) 216 { 217 return 1; 218 } 219 220 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 221 struct fib_rule_hdr *frh) 222 { 223 frh->dst_len = 0; 224 frh->src_len = 0; 225 frh->tos = 0; 226 return 0; 227 } 228 229 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 230 .family = RTNL_FAMILY_IPMR, 231 .rule_size = sizeof(struct ipmr_rule), 232 .addr_size = sizeof(u32), 233 .action = ipmr_rule_action, 234 .match = ipmr_rule_match, 235 .configure = ipmr_rule_configure, 236 .compare = ipmr_rule_compare, 237 .fill = ipmr_rule_fill, 238 .nlgroup = RTNLGRP_IPV4_RULE, 239 .owner = THIS_MODULE, 240 }; 241 242 static int __net_init ipmr_rules_init(struct net *net) 243 { 244 struct fib_rules_ops *ops; 245 LIST_HEAD(dev_kill_list); 246 struct mr_table *mrt; 247 int err; 248 249 ops = fib_rules_register(&ipmr_rules_ops_template, net); 250 if (IS_ERR(ops)) 251 return PTR_ERR(ops); 252 253 INIT_LIST_HEAD(&net->ipv4.mr_tables); 254 255 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 256 if (IS_ERR(mrt)) { 257 err = PTR_ERR(mrt); 258 goto err1; 259 } 260 261 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT); 262 if (err < 0) 263 goto err2; 264 265 net->ipv4.mr_rules_ops = ops; 266 return 0; 267 268 err2: 269 ipmr_free_table(mrt, &dev_kill_list); 270 err1: 271 fib_rules_unregister(ops); 272 return err; 273 } 274 275 static void __net_exit ipmr_rules_exit(struct net *net) 276 { 277 fib_rules_unregister(net->ipv4.mr_rules_ops); 278 } 279 280 static void __net_exit ipmr_rules_exit_rtnl(struct net *net, 281 struct list_head *dev_kill_list) 282 { 283 struct mr_table *mrt, *next; 284 285 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 286 list_del_rcu(&mrt->list); 287 ipmr_free_table(mrt, dev_kill_list); 288 } 289 } 290 291 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 292 struct netlink_ext_ack *extack) 293 { 294 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); 295 } 296 297 static unsigned int ipmr_rules_seq_read(const struct net *net) 298 { 299 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 300 } 301 302 bool ipmr_rule_default(const struct fib_rule *rule) 303 { 304 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 305 } 306 EXPORT_SYMBOL(ipmr_rule_default); 307 #else 308 static struct mr_table *ipmr_mr_table_iter(struct net *net, 309 struct mr_table *mrt) 310 { 311 if (!mrt) 312 return rcu_dereference(net->ipv4.mrt); 313 return NULL; 314 } 315 316 static struct mr_table *__ipmr_get_table(struct net *net, u32 id) 317 { 318 return rcu_dereference_check(net->ipv4.mrt, 319 lockdep_rtnl_is_held() || 320 !rcu_access_pointer(net->ipv4.mrt)); 321 } 322 323 #define ipmr_for_each_table(mrt, net) \ 324 for (mrt = __ipmr_get_table(net, 0); mrt; mrt = NULL) 325 326 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 327 struct mr_table **mrt) 328 { 329 *mrt = rcu_dereference(net->ipv4.mrt); 330 if (!*mrt) 331 return -EAGAIN; 332 return 0; 333 } 334 335 static int __net_init ipmr_rules_init(struct net *net) 336 { 337 struct mr_table *mrt; 338 339 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 340 if (IS_ERR(mrt)) 341 return PTR_ERR(mrt); 342 343 rcu_assign_pointer(net->ipv4.mrt, mrt); 344 return 0; 345 } 346 347 static void __net_exit ipmr_rules_exit(struct net *net) 348 { 349 } 350 351 static void __net_exit ipmr_rules_exit_rtnl(struct net *net, 352 struct list_head *dev_kill_list) 353 { 354 struct mr_table *mrt = rcu_dereference_protected(net->ipv4.mrt, 1); 355 356 RCU_INIT_POINTER(net->ipv4.mrt, NULL); 357 ipmr_free_table(mrt, dev_kill_list); 358 } 359 360 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 361 struct netlink_ext_ack *extack) 362 { 363 return 0; 364 } 365 366 static unsigned int ipmr_rules_seq_read(const struct net *net) 367 { 368 return 0; 369 } 370 371 bool ipmr_rule_default(const struct fib_rule *rule) 372 { 373 return true; 374 } 375 EXPORT_SYMBOL(ipmr_rule_default); 376 #endif 377 378 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 379 { 380 struct mr_table *mrt; 381 382 rcu_read_lock(); 383 mrt = __ipmr_get_table(net, id); 384 rcu_read_unlock(); 385 386 return mrt; 387 } 388 389 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 390 const void *ptr) 391 { 392 const struct mfc_cache_cmp_arg *cmparg = arg->key; 393 const struct mfc_cache *c = ptr; 394 395 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 396 cmparg->mfc_origin != c->mfc_origin; 397 } 398 399 static const struct rhashtable_params ipmr_rht_params = { 400 .head_offset = offsetof(struct mr_mfc, mnode), 401 .key_offset = offsetof(struct mfc_cache, cmparg), 402 .key_len = sizeof(struct mfc_cache_cmp_arg), 403 .nelem_hint = 3, 404 .obj_cmpfn = ipmr_hash_cmp, 405 .automatic_shrinking = true, 406 }; 407 408 static void ipmr_new_table_set(struct mr_table *mrt, 409 struct net *net) 410 { 411 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 412 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 413 #endif 414 } 415 416 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { 417 .mfc_mcastgrp = htonl(INADDR_ANY), 418 .mfc_origin = htonl(INADDR_ANY), 419 }; 420 421 static struct mr_table_ops ipmr_mr_table_ops = { 422 .rht_params = &ipmr_rht_params, 423 .cmparg_any = &ipmr_mr_table_ops_cmparg_any, 424 }; 425 426 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 427 { 428 struct mr_table *mrt; 429 430 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 431 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 432 return ERR_PTR(-EINVAL); 433 434 mrt = __ipmr_get_table(net, id); 435 if (mrt) 436 return mrt; 437 438 return mr_table_alloc(net, id, &ipmr_mr_table_ops, 439 ipmr_expire_process, ipmr_new_table_set); 440 } 441 442 static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_list) 443 { 444 struct net *net = read_pnet(&mrt->net); 445 LIST_HEAD(ipmr_dev_kill_list); 446 447 WARN_ON_ONCE(!mr_can_free_table(net)); 448 449 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | 450 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, 451 &ipmr_dev_kill_list); 452 timer_shutdown_sync(&mrt->ipmr_expire_timer); 453 mr_table_free(mrt); 454 455 WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); 456 list_splice(&ipmr_dev_kill_list, dev_kill_list); 457 } 458 459 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 460 461 /* Initialize ipmr pimreg/tunnel in_device */ 462 static bool ipmr_init_vif_indev(const struct net_device *dev) 463 { 464 struct in_device *in_dev; 465 466 ASSERT_RTNL(); 467 468 in_dev = __in_dev_get_rtnl(dev); 469 if (!in_dev) 470 return false; 471 ipv4_devconf_setall(in_dev); 472 neigh_parms_data_state_setall(in_dev->arp_parms); 473 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 474 475 return true; 476 } 477 478 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 479 { 480 struct net_device *tunnel_dev, *new_dev; 481 struct ip_tunnel_parm_kern p = { }; 482 int err; 483 484 tunnel_dev = __dev_get_by_name(net, "tunl0"); 485 if (!tunnel_dev) 486 goto out; 487 488 p.iph.daddr = v->vifc_rmt_addr.s_addr; 489 p.iph.saddr = v->vifc_lcl_addr.s_addr; 490 p.iph.version = 4; 491 p.iph.ihl = 5; 492 p.iph.protocol = IPPROTO_IPIP; 493 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 494 495 if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) 496 goto out; 497 err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 498 SIOCADDTUNNEL); 499 if (err) 500 goto out; 501 502 new_dev = __dev_get_by_name(net, p.name); 503 if (!new_dev) 504 goto out; 505 506 new_dev->flags |= IFF_MULTICAST; 507 if (!ipmr_init_vif_indev(new_dev)) 508 goto out_unregister; 509 if (dev_open(new_dev, NULL)) 510 goto out_unregister; 511 dev_hold(new_dev); 512 err = dev_set_allmulti(new_dev, 1); 513 if (err) { 514 dev_close(new_dev); 515 tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 516 SIOCDELTUNNEL); 517 dev_put(new_dev); 518 new_dev = ERR_PTR(err); 519 } 520 return new_dev; 521 522 out_unregister: 523 unregister_netdevice(new_dev); 524 out: 525 return ERR_PTR(-ENOBUFS); 526 } 527 528 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 529 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 530 { 531 struct net *net = dev_net(dev); 532 struct mr_table *mrt; 533 struct flowi4 fl4 = { 534 .flowi4_oif = dev->ifindex, 535 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 536 .flowi4_mark = skb->mark, 537 }; 538 int err; 539 540 rcu_read_lock(); 541 err = ipmr_fib_lookup(net, &fl4, &mrt); 542 if (err < 0) { 543 rcu_read_unlock(); 544 kfree_skb(skb); 545 return err; 546 } 547 548 DEV_STATS_ADD(dev, tx_bytes, skb->len); 549 DEV_STATS_INC(dev, tx_packets); 550 551 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ 552 ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 553 IGMPMSG_WHOLEPKT); 554 555 rcu_read_unlock(); 556 kfree_skb(skb); 557 return NETDEV_TX_OK; 558 } 559 560 static int reg_vif_get_iflink(const struct net_device *dev) 561 { 562 return 0; 563 } 564 565 static const struct net_device_ops reg_vif_netdev_ops = { 566 .ndo_start_xmit = reg_vif_xmit, 567 .ndo_get_iflink = reg_vif_get_iflink, 568 }; 569 570 static void reg_vif_setup(struct net_device *dev) 571 { 572 dev->type = ARPHRD_PIMREG; 573 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 574 dev->flags = IFF_NOARP; 575 dev->netdev_ops = ®_vif_netdev_ops; 576 dev->needs_free_netdev = true; 577 dev->netns_immutable = true; 578 } 579 580 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 581 { 582 struct net_device *dev; 583 char name[IFNAMSIZ]; 584 585 if (mrt->id == RT_TABLE_DEFAULT) 586 sprintf(name, "pimreg"); 587 else 588 sprintf(name, "pimreg%u", mrt->id); 589 590 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 591 592 if (!dev) 593 return NULL; 594 595 dev_net_set(dev, net); 596 597 if (register_netdevice(dev)) { 598 free_netdev(dev); 599 return NULL; 600 } 601 602 if (!ipmr_init_vif_indev(dev)) 603 goto failure; 604 if (dev_open(dev, NULL)) 605 goto failure; 606 607 dev_hold(dev); 608 609 return dev; 610 611 failure: 612 unregister_netdevice(dev); 613 return NULL; 614 } 615 616 /* called with rcu_read_lock() */ 617 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 618 unsigned int pimlen) 619 { 620 struct net_device *reg_dev = NULL; 621 struct iphdr *encap; 622 int vif_num; 623 624 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 625 /* Check that: 626 * a. packet is really sent to a multicast group 627 * b. packet is not a NULL-REGISTER 628 * c. packet is not truncated 629 */ 630 if (!ipv4_is_multicast(encap->daddr) || 631 encap->tot_len == 0 || 632 ntohs(encap->tot_len) + pimlen > skb->len) 633 return 1; 634 635 /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */ 636 vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 637 if (vif_num >= 0) 638 reg_dev = vif_dev_read(&mrt->vif_table[vif_num]); 639 if (!reg_dev) 640 return 1; 641 642 skb->mac_header = skb->network_header; 643 skb_pull(skb, (u8 *)encap - skb->data); 644 skb_reset_network_header(skb); 645 skb->protocol = htons(ETH_P_IP); 646 skb->ip_summed = CHECKSUM_NONE; 647 648 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 649 650 netif_rx(skb); 651 652 return NET_RX_SUCCESS; 653 } 654 #else 655 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 656 { 657 return NULL; 658 } 659 #endif 660 661 static int call_ipmr_vif_entry_notifiers(struct net *net, 662 enum fib_event_type event_type, 663 struct vif_device *vif, 664 struct net_device *vif_dev, 665 vifi_t vif_index, u32 tb_id) 666 { 667 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, 668 vif, vif_dev, vif_index, tb_id, 669 &net->ipv4.ipmr_seq); 670 } 671 672 static int call_ipmr_mfc_entry_notifiers(struct net *net, 673 enum fib_event_type event_type, 674 struct mfc_cache *mfc, u32 tb_id) 675 { 676 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, 677 &mfc->_c, tb_id, &net->ipv4.ipmr_seq); 678 } 679 680 /** 681 * vif_delete - Delete a VIF entry 682 * @mrt: Table to delete from 683 * @vifi: VIF identifier to delete 684 * @notify: Set to 1, if the caller is a notifier_call 685 * @head: if unregistering the VIF, place it on this queue 686 */ 687 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 688 struct list_head *head) 689 { 690 struct net *net = read_pnet(&mrt->net); 691 struct vif_device *v; 692 struct net_device *dev; 693 struct in_device *in_dev; 694 695 if (vifi < 0 || vifi >= mrt->maxvif) 696 return -EADDRNOTAVAIL; 697 698 v = &mrt->vif_table[vifi]; 699 700 dev = rtnl_dereference(v->dev); 701 if (!dev) 702 return -EADDRNOTAVAIL; 703 704 spin_lock(&mrt_lock); 705 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, 706 vifi, mrt->id); 707 RCU_INIT_POINTER(v->dev, NULL); 708 709 if (vifi == mrt->mroute_reg_vif_num) { 710 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ 711 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 712 } 713 if (vifi + 1 == mrt->maxvif) { 714 int tmp; 715 716 for (tmp = vifi - 1; tmp >= 0; tmp--) { 717 if (VIF_EXISTS(mrt, tmp)) 718 break; 719 } 720 WRITE_ONCE(mrt->maxvif, tmp + 1); 721 } 722 723 spin_unlock(&mrt_lock); 724 725 dev_set_allmulti(dev, -1); 726 727 in_dev = __in_dev_get_rtnl(dev); 728 if (in_dev) { 729 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 730 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 731 NETCONFA_MC_FORWARDING, 732 dev->ifindex, &in_dev->cnf); 733 ip_rt_multicast_event(in_dev); 734 } 735 736 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 737 unregister_netdevice_queue(dev, head); 738 739 netdev_put(dev, &v->dev_tracker); 740 return 0; 741 } 742 743 static void ipmr_cache_free_rcu(struct rcu_head *head) 744 { 745 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 746 747 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); 748 } 749 750 static void ipmr_cache_free(struct mfc_cache *c) 751 { 752 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); 753 } 754 755 /* Destroy an unresolved cache entry, killing queued skbs 756 * and reporting error to netlink readers. 757 */ 758 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 759 { 760 struct net *net = read_pnet(&mrt->net); 761 struct sk_buff *skb; 762 struct nlmsgerr *e; 763 764 WRITE_ONCE(mrt->cache_resolve_queue_len, 765 mrt->cache_resolve_queue_len - 1); 766 767 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { 768 if (ip_hdr(skb)->version == 0) { 769 struct nlmsghdr *nlh = skb_pull(skb, 770 sizeof(struct iphdr)); 771 nlh->nlmsg_type = NLMSG_ERROR; 772 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 773 skb_trim(skb, nlh->nlmsg_len); 774 e = nlmsg_data(nlh); 775 e->error = -ETIMEDOUT; 776 memset(&e->msg, 0, sizeof(e->msg)); 777 778 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 779 } else { 780 kfree_skb(skb); 781 } 782 } 783 784 ipmr_cache_free(c); 785 } 786 787 /* Timer process for the unresolved queue. */ 788 static void ipmr_expire_process(struct timer_list *t) 789 { 790 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer); 791 struct mr_mfc *c, *next; 792 unsigned long expires; 793 unsigned long now; 794 795 if (!spin_trylock(&mfc_unres_lock)) { 796 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 797 return; 798 } 799 800 if (list_empty(&mrt->mfc_unres_queue)) 801 goto out; 802 803 now = jiffies; 804 expires = 10*HZ; 805 806 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 807 if (time_after(c->mfc_un.unres.expires, now)) { 808 unsigned long interval = c->mfc_un.unres.expires - now; 809 if (interval < expires) 810 expires = interval; 811 continue; 812 } 813 814 list_del(&c->list); 815 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); 816 ipmr_destroy_unres(mrt, (struct mfc_cache *)c); 817 } 818 819 if (!list_empty(&mrt->mfc_unres_queue)) 820 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 821 822 out: 823 spin_unlock(&mfc_unres_lock); 824 } 825 826 /* Fill oifs list. It is called under locked mrt_lock. */ 827 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, 828 unsigned char *ttls) 829 { 830 int vifi; 831 832 cache->mfc_un.res.minvif = MAXVIFS; 833 cache->mfc_un.res.maxvif = 0; 834 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 835 836 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 837 if (VIF_EXISTS(mrt, vifi) && 838 ttls[vifi] && ttls[vifi] < 255) { 839 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 840 if (cache->mfc_un.res.minvif > vifi) 841 cache->mfc_un.res.minvif = vifi; 842 if (cache->mfc_un.res.maxvif <= vifi) 843 cache->mfc_un.res.maxvif = vifi + 1; 844 } 845 } 846 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 847 } 848 849 static int vif_add(struct net *net, struct mr_table *mrt, 850 struct vifctl *vifc, int mrtsock) 851 { 852 struct netdev_phys_item_id ppid = { }; 853 int vifi = vifc->vifc_vifi; 854 struct vif_device *v = &mrt->vif_table[vifi]; 855 struct net_device *dev; 856 struct in_device *in_dev; 857 int err; 858 859 /* Is vif busy ? */ 860 if (VIF_EXISTS(mrt, vifi)) 861 return -EADDRINUSE; 862 863 switch (vifc->vifc_flags) { 864 case VIFF_REGISTER: 865 if (!ipmr_pimsm_enabled()) 866 return -EINVAL; 867 /* Special Purpose VIF in PIM 868 * All the packets will be sent to the daemon 869 */ 870 if (mrt->mroute_reg_vif_num >= 0) 871 return -EADDRINUSE; 872 dev = ipmr_reg_vif(net, mrt); 873 if (!dev) 874 return -ENOBUFS; 875 err = dev_set_allmulti(dev, 1); 876 if (err) { 877 unregister_netdevice(dev); 878 dev_put(dev); 879 return err; 880 } 881 break; 882 case VIFF_TUNNEL: 883 dev = ipmr_new_tunnel(net, vifc); 884 if (IS_ERR(dev)) 885 return PTR_ERR(dev); 886 break; 887 case VIFF_USE_IFINDEX: 888 case 0: 889 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 890 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 891 if (dev && !__in_dev_get_rtnl(dev)) { 892 dev_put(dev); 893 return -EADDRNOTAVAIL; 894 } 895 } else { 896 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 897 } 898 if (!dev) 899 return -EADDRNOTAVAIL; 900 err = dev_set_allmulti(dev, 1); 901 if (err) { 902 dev_put(dev); 903 return err; 904 } 905 break; 906 default: 907 return -EINVAL; 908 } 909 910 in_dev = __in_dev_get_rtnl(dev); 911 if (!in_dev) { 912 dev_put(dev); 913 return -EADDRNOTAVAIL; 914 } 915 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 916 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 917 dev->ifindex, &in_dev->cnf); 918 ip_rt_multicast_event(in_dev); 919 920 /* Fill in the VIF structures */ 921 vif_device_init(v, dev, vifc->vifc_rate_limit, 922 vifc->vifc_threshold, 923 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), 924 (VIFF_TUNNEL | VIFF_REGISTER)); 925 926 err = netif_get_port_parent_id(dev, &ppid, true); 927 if (err == 0) { 928 memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); 929 v->dev_parent_id.id_len = ppid.id_len; 930 } else { 931 v->dev_parent_id.id_len = 0; 932 } 933 934 v->local = vifc->vifc_lcl_addr.s_addr; 935 v->remote = vifc->vifc_rmt_addr.s_addr; 936 937 /* And finish update writing critical data */ 938 spin_lock(&mrt_lock); 939 rcu_assign_pointer(v->dev, dev); 940 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 941 if (v->flags & VIFF_REGISTER) { 942 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ 943 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 944 } 945 if (vifi+1 > mrt->maxvif) 946 WRITE_ONCE(mrt->maxvif, vifi + 1); 947 spin_unlock(&mrt_lock); 948 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, 949 vifi, mrt->id); 950 return 0; 951 } 952 953 /* called with rcu_read_lock() */ 954 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 955 __be32 origin, 956 __be32 mcastgrp) 957 { 958 struct mfc_cache_cmp_arg arg = { 959 .mfc_mcastgrp = mcastgrp, 960 .mfc_origin = origin 961 }; 962 963 return mr_mfc_find(mrt, &arg); 964 } 965 966 /* Look for a (*,G) entry */ 967 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 968 __be32 mcastgrp, int vifi) 969 { 970 struct mfc_cache_cmp_arg arg = { 971 .mfc_mcastgrp = mcastgrp, 972 .mfc_origin = htonl(INADDR_ANY) 973 }; 974 975 if (mcastgrp == htonl(INADDR_ANY)) 976 return mr_mfc_find_any_parent(mrt, vifi); 977 return mr_mfc_find_any(mrt, vifi, &arg); 978 } 979 980 /* Look for a (S,G,iif) entry if parent != -1 */ 981 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 982 __be32 origin, __be32 mcastgrp, 983 int parent) 984 { 985 struct mfc_cache_cmp_arg arg = { 986 .mfc_mcastgrp = mcastgrp, 987 .mfc_origin = origin, 988 }; 989 990 return mr_mfc_find_parent(mrt, &arg, parent); 991 } 992 993 /* Allocate a multicast cache entry */ 994 static struct mfc_cache *ipmr_cache_alloc(void) 995 { 996 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 997 998 if (c) { 999 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1000 c->_c.mfc_un.res.minvif = MAXVIFS; 1001 c->_c.free = ipmr_cache_free_rcu; 1002 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1003 } 1004 return c; 1005 } 1006 1007 static struct mfc_cache *ipmr_cache_alloc_unres(void) 1008 { 1009 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1010 1011 if (c) { 1012 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1013 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1014 } 1015 return c; 1016 } 1017 1018 /* A cache entry has gone into a resolved state from queued */ 1019 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1020 struct mfc_cache *uc, struct mfc_cache *c) 1021 { 1022 struct sk_buff *skb; 1023 struct nlmsgerr *e; 1024 1025 /* Play the pending entries through our router */ 1026 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1027 if (ip_hdr(skb)->version == 0) { 1028 struct nlmsghdr *nlh = skb_pull(skb, 1029 sizeof(struct iphdr)); 1030 1031 if (mr_fill_mroute(mrt, skb, &c->_c, 1032 nlmsg_data(nlh)) > 0) { 1033 nlh->nlmsg_len = skb_tail_pointer(skb) - 1034 (u8 *)nlh; 1035 } else { 1036 nlh->nlmsg_type = NLMSG_ERROR; 1037 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1038 skb_trim(skb, nlh->nlmsg_len); 1039 e = nlmsg_data(nlh); 1040 e->error = -EMSGSIZE; 1041 memset(&e->msg, 0, sizeof(e->msg)); 1042 } 1043 1044 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1045 } else { 1046 rcu_read_lock(); 1047 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1048 rcu_read_unlock(); 1049 } 1050 } 1051 } 1052 1053 /* Bounce a cache query up to mrouted and netlink. 1054 * 1055 * Called under rcu_read_lock(). 1056 */ 1057 static int ipmr_cache_report(const struct mr_table *mrt, 1058 struct sk_buff *pkt, vifi_t vifi, int assert) 1059 { 1060 const int ihl = ip_hdrlen(pkt); 1061 struct sock *mroute_sk; 1062 struct igmphdr *igmp; 1063 struct igmpmsg *msg; 1064 struct sk_buff *skb; 1065 int ret; 1066 1067 mroute_sk = rcu_dereference(mrt->mroute_sk); 1068 if (!mroute_sk) 1069 return -EINVAL; 1070 1071 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) 1072 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1073 else 1074 skb = alloc_skb(128, GFP_ATOMIC); 1075 1076 if (!skb) 1077 return -ENOBUFS; 1078 1079 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { 1080 /* Ugly, but we have no choice with this interface. 1081 * Duplicate old header, fix ihl, length etc. 1082 * And all this only to mangle msg->im_msgtype and 1083 * to set msg->im_mbz to "mbz" :-) 1084 */ 1085 skb_push(skb, sizeof(struct iphdr)); 1086 skb_reset_network_header(skb); 1087 skb_reset_transport_header(skb); 1088 msg = (struct igmpmsg *)skb_network_header(skb); 1089 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1090 msg->im_msgtype = assert; 1091 msg->im_mbz = 0; 1092 if (assert == IGMPMSG_WRVIFWHOLE) { 1093 msg->im_vif = vifi; 1094 msg->im_vif_hi = vifi >> 8; 1095 } else { 1096 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ 1097 int vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 1098 1099 msg->im_vif = vif_num; 1100 msg->im_vif_hi = vif_num >> 8; 1101 } 1102 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1103 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1104 sizeof(struct iphdr)); 1105 } else { 1106 /* Copy the IP header */ 1107 skb_set_network_header(skb, skb->len); 1108 skb_put(skb, ihl); 1109 skb_copy_to_linear_data(skb, pkt->data, ihl); 1110 /* Flag to the kernel this is a route add */ 1111 ip_hdr(skb)->protocol = 0; 1112 msg = (struct igmpmsg *)skb_network_header(skb); 1113 msg->im_vif = vifi; 1114 msg->im_vif_hi = vifi >> 8; 1115 ipv4_pktinfo_prepare(mroute_sk, pkt, false); 1116 memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); 1117 /* Add our header. 1118 * Note that code, csum and group fields are cleared. 1119 */ 1120 igmp = skb_put_zero(skb, sizeof(struct igmphdr)); 1121 igmp->type = assert; 1122 msg->im_msgtype = assert; 1123 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1124 skb->transport_header = skb->network_header; 1125 } 1126 1127 igmpmsg_netlink_event(mrt, skb); 1128 1129 /* Deliver to mrouted */ 1130 ret = sock_queue_rcv_skb(mroute_sk, skb); 1131 1132 if (ret < 0) { 1133 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1134 kfree_skb(skb); 1135 } 1136 1137 return ret; 1138 } 1139 1140 /* Queue a packet for resolution. It gets locked cache entry! */ 1141 /* Called under rcu_read_lock() */ 1142 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1143 struct sk_buff *skb, struct net_device *dev) 1144 { 1145 struct net *net = read_pnet(&mrt->net); 1146 const struct iphdr *iph = ip_hdr(skb); 1147 struct mfc_cache *c = NULL; 1148 bool found = false; 1149 int err; 1150 1151 spin_lock_bh(&mfc_unres_lock); 1152 1153 if (!check_net(net)) { 1154 err = -EINVAL; 1155 goto err; 1156 } 1157 1158 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1159 if (c->mfc_mcastgrp == iph->daddr && 1160 c->mfc_origin == iph->saddr) { 1161 found = true; 1162 break; 1163 } 1164 } 1165 1166 if (!found) { 1167 /* Create a new entry if allowable */ 1168 c = ipmr_cache_alloc_unres(); 1169 if (!c) { 1170 err = -ENOBUFS; 1171 goto err; 1172 } 1173 1174 /* Fill in the new cache entry */ 1175 c->_c.mfc_parent = -1; 1176 c->mfc_origin = iph->saddr; 1177 c->mfc_mcastgrp = iph->daddr; 1178 1179 /* Reflect first query at mrouted. */ 1180 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1181 if (err < 0) 1182 goto err; 1183 1184 WRITE_ONCE(mrt->cache_resolve_queue_len, 1185 mrt->cache_resolve_queue_len + 1); 1186 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1187 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1188 1189 if (mrt->cache_resolve_queue_len == 1) 1190 mod_timer(&mrt->ipmr_expire_timer, 1191 c->_c.mfc_un.unres.expires); 1192 } 1193 1194 /* See if we can append the packet */ 1195 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1196 c = NULL; 1197 err = -ENOBUFS; 1198 goto err; 1199 } 1200 1201 if (dev) { 1202 skb->dev = dev; 1203 skb->skb_iif = dev->ifindex; 1204 } 1205 1206 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1207 1208 spin_unlock_bh(&mfc_unres_lock); 1209 return 0; 1210 1211 err: 1212 spin_unlock_bh(&mfc_unres_lock); 1213 if (c) 1214 ipmr_cache_free(c); 1215 kfree_skb(skb); 1216 return err; 1217 } 1218 1219 /* MFC cache manipulation by user space mroute daemon */ 1220 1221 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1222 { 1223 struct net *net = read_pnet(&mrt->net); 1224 struct mfc_cache *c; 1225 1226 rcu_read_lock(); 1227 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1228 mfc->mfcc_mcastgrp.s_addr, parent); 1229 rcu_read_unlock(); 1230 if (!c) 1231 return -ENOENT; 1232 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); 1233 list_del_rcu(&c->_c.list); 1234 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1235 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1236 mr_cache_put(&c->_c); 1237 1238 return 0; 1239 } 1240 1241 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1242 struct mfcctl *mfc, int mrtsock, int parent) 1243 { 1244 struct mfc_cache *uc, *c; 1245 struct mr_mfc *_uc; 1246 bool found; 1247 int ret; 1248 1249 if (mfc->mfcc_parent >= MAXVIFS) 1250 return -ENFILE; 1251 1252 rcu_read_lock(); 1253 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1254 mfc->mfcc_mcastgrp.s_addr, parent); 1255 rcu_read_unlock(); 1256 if (c) { 1257 spin_lock(&mrt_lock); 1258 c->_c.mfc_parent = mfc->mfcc_parent; 1259 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1260 if (!mrtsock) 1261 c->_c.mfc_flags |= MFC_STATIC; 1262 spin_unlock(&mrt_lock); 1263 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1264 mrt->id); 1265 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1266 return 0; 1267 } 1268 1269 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1270 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1271 return -EINVAL; 1272 1273 c = ipmr_cache_alloc(); 1274 if (!c) 1275 return -ENOMEM; 1276 1277 c->mfc_origin = mfc->mfcc_origin.s_addr; 1278 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1279 c->_c.mfc_parent = mfc->mfcc_parent; 1280 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1281 if (!mrtsock) 1282 c->_c.mfc_flags |= MFC_STATIC; 1283 1284 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1285 ipmr_rht_params); 1286 if (ret) { 1287 pr_err("ipmr: rhtable insert error %d\n", ret); 1288 ipmr_cache_free(c); 1289 return ret; 1290 } 1291 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1292 /* Check to see if we resolved a queued list. If so we 1293 * need to send on the frames and tidy up. 1294 */ 1295 found = false; 1296 spin_lock_bh(&mfc_unres_lock); 1297 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1298 uc = (struct mfc_cache *)_uc; 1299 if (uc->mfc_origin == c->mfc_origin && 1300 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1301 list_del(&_uc->list); 1302 WRITE_ONCE(mrt->cache_resolve_queue_len, 1303 mrt->cache_resolve_queue_len - 1); 1304 found = true; 1305 break; 1306 } 1307 } 1308 if (list_empty(&mrt->mfc_unres_queue)) 1309 timer_delete(&mrt->ipmr_expire_timer); 1310 spin_unlock_bh(&mfc_unres_lock); 1311 1312 if (found) { 1313 ipmr_cache_resolve(net, mrt, uc, c); 1314 ipmr_cache_free(uc); 1315 } 1316 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1317 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1318 return 0; 1319 } 1320 1321 /* Close the multicast socket, and clear the vif tables etc */ 1322 static void mroute_clean_tables(struct mr_table *mrt, int flags, 1323 struct list_head *dev_kill_list) 1324 { 1325 struct net *net = read_pnet(&mrt->net); 1326 struct mfc_cache *cache; 1327 struct mr_mfc *c, *tmp; 1328 int i; 1329 1330 /* Shut down all active vif entries */ 1331 if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { 1332 for (i = 0; i < mrt->maxvif; i++) { 1333 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1334 !(flags & MRT_FLUSH_VIFS_STATIC)) || 1335 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) 1336 continue; 1337 vif_delete(mrt, i, 0, dev_kill_list); 1338 } 1339 } 1340 1341 /* Wipe the cache */ 1342 if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { 1343 mutex_lock(&net->ipv4.mfc_mutex); 1344 1345 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1346 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || 1347 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) 1348 continue; 1349 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1350 list_del_rcu(&c->list); 1351 cache = (struct mfc_cache *)c; 1352 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, 1353 mrt->id); 1354 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1355 mr_cache_put(c); 1356 } 1357 1358 mutex_unlock(&net->ipv4.mfc_mutex); 1359 } 1360 1361 if (flags & MRT_FLUSH_MFC) { 1362 if (READ_ONCE(mrt->cache_resolve_queue_len) || !check_net(net)) { 1363 spin_lock_bh(&mfc_unres_lock); 1364 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1365 list_del(&c->list); 1366 cache = (struct mfc_cache *)c; 1367 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1368 ipmr_destroy_unres(mrt, cache); 1369 } 1370 spin_unlock_bh(&mfc_unres_lock); 1371 } 1372 } 1373 } 1374 1375 /* called from ip_ra_control(), before an RCU grace period, 1376 * we don't need to call synchronize_rcu() here 1377 */ 1378 static void mrtsock_destruct(struct sock *sk) 1379 { 1380 struct net *net = sock_net(sk); 1381 LIST_HEAD(dev_kill_list); 1382 struct mr_table *mrt; 1383 1384 rtnl_lock(); 1385 1386 ipmr_for_each_table(mrt, net) { 1387 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1388 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1389 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1390 NETCONFA_MC_FORWARDING, 1391 NETCONFA_IFINDEX_ALL, 1392 net->ipv4.devconf_all); 1393 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1394 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC, 1395 &dev_kill_list); 1396 } 1397 } 1398 1399 unregister_netdevice_many(&dev_kill_list); 1400 1401 rtnl_unlock(); 1402 } 1403 1404 /* Socket options and virtual interface manipulation. The whole 1405 * virtual interface system is a complete heap, but unfortunately 1406 * that's how BSD mrouted happens to think. Maybe one day with a proper 1407 * MOSPF/PIM router set up we can clean this up. 1408 */ 1409 1410 int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1411 unsigned int optlen) 1412 { 1413 struct net *net = sock_net(sk); 1414 int val, ret = 0, parent = 0; 1415 struct mr_table *mrt; 1416 struct vifctl vif; 1417 struct mfcctl mfc; 1418 bool do_wrvifwhole; 1419 u32 uval; 1420 1421 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1422 rtnl_lock(); 1423 if (sk->sk_type != SOCK_RAW || 1424 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1425 ret = -EOPNOTSUPP; 1426 goto out_unlock; 1427 } 1428 1429 mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1430 if (!mrt) { 1431 ret = -ENOENT; 1432 goto out_unlock; 1433 } 1434 if (optname != MRT_INIT) { 1435 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1436 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1437 ret = -EACCES; 1438 goto out_unlock; 1439 } 1440 } 1441 1442 switch (optname) { 1443 case MRT_INIT: 1444 if (optlen != sizeof(int)) { 1445 ret = -EINVAL; 1446 break; 1447 } 1448 if (rtnl_dereference(mrt->mroute_sk)) { 1449 ret = -EADDRINUSE; 1450 break; 1451 } 1452 1453 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1454 if (ret == 0) { 1455 rcu_assign_pointer(mrt->mroute_sk, sk); 1456 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1457 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1458 NETCONFA_MC_FORWARDING, 1459 NETCONFA_IFINDEX_ALL, 1460 net->ipv4.devconf_all); 1461 } 1462 break; 1463 case MRT_DONE: 1464 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1465 ret = -EACCES; 1466 } else { 1467 /* We need to unlock here because mrtsock_destruct takes 1468 * care of rtnl itself and we can't change that due to 1469 * the IP_ROUTER_ALERT setsockopt which runs without it. 1470 */ 1471 rtnl_unlock(); 1472 ret = ip_ra_control(sk, 0, NULL); 1473 goto out; 1474 } 1475 break; 1476 case MRT_ADD_VIF: 1477 case MRT_DEL_VIF: 1478 if (optlen != sizeof(vif)) { 1479 ret = -EINVAL; 1480 break; 1481 } 1482 if (copy_from_sockptr(&vif, optval, sizeof(vif))) { 1483 ret = -EFAULT; 1484 break; 1485 } 1486 if (vif.vifc_vifi >= MAXVIFS) { 1487 ret = -ENFILE; 1488 break; 1489 } 1490 if (optname == MRT_ADD_VIF) { 1491 ret = vif_add(net, mrt, &vif, 1492 sk == rtnl_dereference(mrt->mroute_sk)); 1493 } else { 1494 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1495 } 1496 break; 1497 /* Manipulate the forwarding caches. These live 1498 * in a sort of kernel/user symbiosis. 1499 */ 1500 case MRT_ADD_MFC: 1501 case MRT_DEL_MFC: 1502 parent = -1; 1503 fallthrough; 1504 case MRT_ADD_MFC_PROXY: 1505 case MRT_DEL_MFC_PROXY: 1506 if (optlen != sizeof(mfc)) { 1507 ret = -EINVAL; 1508 break; 1509 } 1510 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) { 1511 ret = -EFAULT; 1512 break; 1513 } 1514 if (parent == 0) 1515 parent = mfc.mfcc_parent; 1516 1517 mutex_lock(&net->ipv4.mfc_mutex); 1518 1519 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1520 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1521 else 1522 ret = ipmr_mfc_add(net, mrt, &mfc, 1523 sk == rtnl_dereference(mrt->mroute_sk), 1524 parent); 1525 1526 mutex_unlock(&net->ipv4.mfc_mutex); 1527 break; 1528 case MRT_FLUSH: { 1529 LIST_HEAD(dev_kill_list); 1530 1531 if (optlen != sizeof(val)) { 1532 ret = -EINVAL; 1533 break; 1534 } 1535 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1536 ret = -EFAULT; 1537 break; 1538 } 1539 1540 mroute_clean_tables(mrt, val, &dev_kill_list); 1541 unregister_netdevice_many(&dev_kill_list); 1542 break; 1543 } 1544 /* Control PIM assert. */ 1545 case MRT_ASSERT: 1546 if (optlen != sizeof(val)) { 1547 ret = -EINVAL; 1548 break; 1549 } 1550 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1551 ret = -EFAULT; 1552 break; 1553 } 1554 WRITE_ONCE(mrt->mroute_do_assert, val); 1555 break; 1556 case MRT_PIM: 1557 if (!ipmr_pimsm_enabled()) { 1558 ret = -ENOPROTOOPT; 1559 break; 1560 } 1561 if (optlen != sizeof(val)) { 1562 ret = -EINVAL; 1563 break; 1564 } 1565 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1566 ret = -EFAULT; 1567 break; 1568 } 1569 1570 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); 1571 val = !!val; 1572 if (val != mrt->mroute_do_pim) { 1573 WRITE_ONCE(mrt->mroute_do_pim, val); 1574 WRITE_ONCE(mrt->mroute_do_assert, val); 1575 WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrvifwhole); 1576 } 1577 break; 1578 case MRT_TABLE: 1579 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1580 ret = -ENOPROTOOPT; 1581 break; 1582 } 1583 if (optlen != sizeof(uval)) { 1584 ret = -EINVAL; 1585 break; 1586 } 1587 if (copy_from_sockptr(&uval, optval, sizeof(uval))) { 1588 ret = -EFAULT; 1589 break; 1590 } 1591 1592 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1593 ret = -EBUSY; 1594 } else { 1595 mrt = ipmr_new_table(net, uval); 1596 if (IS_ERR(mrt)) 1597 ret = PTR_ERR(mrt); 1598 else 1599 raw_sk(sk)->ipmr_table = uval; 1600 } 1601 break; 1602 /* Spurious command, or MRT_VERSION which you cannot set. */ 1603 default: 1604 ret = -ENOPROTOOPT; 1605 } 1606 out_unlock: 1607 rtnl_unlock(); 1608 out: 1609 return ret; 1610 } 1611 1612 /* Execute if this ioctl is a special mroute ioctl */ 1613 int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1614 { 1615 switch (cmd) { 1616 /* These userspace buffers will be consumed by ipmr_ioctl() */ 1617 case SIOCGETVIFCNT: { 1618 struct sioc_vif_req buffer; 1619 1620 return sock_ioctl_inout(sk, cmd, arg, &buffer, 1621 sizeof(buffer)); 1622 } 1623 case SIOCGETSGCNT: { 1624 struct sioc_sg_req buffer; 1625 1626 return sock_ioctl_inout(sk, cmd, arg, &buffer, 1627 sizeof(buffer)); 1628 } 1629 } 1630 /* return code > 0 means that the ioctl was not executed */ 1631 return 1; 1632 } 1633 1634 /* Getsock opt support for the multicast routing system. */ 1635 int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1636 sockptr_t optlen) 1637 { 1638 int olr; 1639 int val; 1640 struct net *net = sock_net(sk); 1641 struct mr_table *mrt; 1642 1643 if (sk->sk_type != SOCK_RAW || 1644 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1645 return -EOPNOTSUPP; 1646 1647 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1648 if (!mrt) 1649 return -ENOENT; 1650 1651 switch (optname) { 1652 case MRT_VERSION: 1653 val = 0x0305; 1654 break; 1655 case MRT_PIM: 1656 if (!ipmr_pimsm_enabled()) 1657 return -ENOPROTOOPT; 1658 val = READ_ONCE(mrt->mroute_do_pim); 1659 break; 1660 case MRT_ASSERT: 1661 val = READ_ONCE(mrt->mroute_do_assert); 1662 break; 1663 default: 1664 return -ENOPROTOOPT; 1665 } 1666 1667 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1668 return -EFAULT; 1669 if (olr < 0) 1670 return -EINVAL; 1671 1672 olr = min_t(unsigned int, olr, sizeof(int)); 1673 1674 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1675 return -EFAULT; 1676 if (copy_to_sockptr(optval, &val, olr)) 1677 return -EFAULT; 1678 return 0; 1679 } 1680 1681 /* The IP multicast ioctl support routines. */ 1682 int ipmr_ioctl(struct sock *sk, int cmd, void *arg) 1683 { 1684 struct vif_device *vif; 1685 struct mfc_cache *c; 1686 struct net *net = sock_net(sk); 1687 struct sioc_vif_req *vr; 1688 struct sioc_sg_req *sr; 1689 struct mr_table *mrt; 1690 1691 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1692 if (!mrt) 1693 return -ENOENT; 1694 1695 switch (cmd) { 1696 case SIOCGETVIFCNT: 1697 vr = (struct sioc_vif_req *)arg; 1698 if (vr->vifi >= mrt->maxvif) 1699 return -EINVAL; 1700 vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif); 1701 rcu_read_lock(); 1702 vif = &mrt->vif_table[vr->vifi]; 1703 if (VIF_EXISTS(mrt, vr->vifi)) { 1704 vr->icount = READ_ONCE(vif->pkt_in); 1705 vr->ocount = READ_ONCE(vif->pkt_out); 1706 vr->ibytes = READ_ONCE(vif->bytes_in); 1707 vr->obytes = READ_ONCE(vif->bytes_out); 1708 rcu_read_unlock(); 1709 1710 return 0; 1711 } 1712 rcu_read_unlock(); 1713 return -EADDRNOTAVAIL; 1714 case SIOCGETSGCNT: 1715 sr = (struct sioc_sg_req *)arg; 1716 1717 rcu_read_lock(); 1718 c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr); 1719 if (c) { 1720 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1721 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1722 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1723 rcu_read_unlock(); 1724 return 0; 1725 } 1726 rcu_read_unlock(); 1727 return -EADDRNOTAVAIL; 1728 default: 1729 return -ENOIOCTLCMD; 1730 } 1731 } 1732 1733 #ifdef CONFIG_COMPAT 1734 struct compat_sioc_sg_req { 1735 struct in_addr src; 1736 struct in_addr grp; 1737 compat_ulong_t pktcnt; 1738 compat_ulong_t bytecnt; 1739 compat_ulong_t wrong_if; 1740 }; 1741 1742 struct compat_sioc_vif_req { 1743 vifi_t vifi; /* Which iface */ 1744 compat_ulong_t icount; 1745 compat_ulong_t ocount; 1746 compat_ulong_t ibytes; 1747 compat_ulong_t obytes; 1748 }; 1749 1750 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1751 { 1752 struct compat_sioc_sg_req sr; 1753 struct compat_sioc_vif_req vr; 1754 struct vif_device *vif; 1755 struct mfc_cache *c; 1756 struct net *net = sock_net(sk); 1757 struct mr_table *mrt; 1758 1759 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1760 if (!mrt) 1761 return -ENOENT; 1762 1763 switch (cmd) { 1764 case SIOCGETVIFCNT: 1765 if (copy_from_user(&vr, arg, sizeof(vr))) 1766 return -EFAULT; 1767 if (vr.vifi >= mrt->maxvif) 1768 return -EINVAL; 1769 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1770 rcu_read_lock(); 1771 vif = &mrt->vif_table[vr.vifi]; 1772 if (VIF_EXISTS(mrt, vr.vifi)) { 1773 vr.icount = READ_ONCE(vif->pkt_in); 1774 vr.ocount = READ_ONCE(vif->pkt_out); 1775 vr.ibytes = READ_ONCE(vif->bytes_in); 1776 vr.obytes = READ_ONCE(vif->bytes_out); 1777 rcu_read_unlock(); 1778 1779 if (copy_to_user(arg, &vr, sizeof(vr))) 1780 return -EFAULT; 1781 return 0; 1782 } 1783 rcu_read_unlock(); 1784 return -EADDRNOTAVAIL; 1785 case SIOCGETSGCNT: 1786 if (copy_from_user(&sr, arg, sizeof(sr))) 1787 return -EFAULT; 1788 1789 rcu_read_lock(); 1790 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1791 if (c) { 1792 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1793 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1794 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1795 rcu_read_unlock(); 1796 1797 if (copy_to_user(arg, &sr, sizeof(sr))) 1798 return -EFAULT; 1799 return 0; 1800 } 1801 rcu_read_unlock(); 1802 return -EADDRNOTAVAIL; 1803 default: 1804 return -ENOIOCTLCMD; 1805 } 1806 } 1807 #endif 1808 1809 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1810 { 1811 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1812 struct net *net = dev_net(dev); 1813 struct mr_table *mrt; 1814 struct vif_device *v; 1815 int ct; 1816 1817 if (event != NETDEV_UNREGISTER) 1818 return NOTIFY_DONE; 1819 1820 ipmr_for_each_table(mrt, net) { 1821 v = &mrt->vif_table[0]; 1822 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1823 if (rcu_access_pointer(v->dev) == dev) 1824 vif_delete(mrt, ct, 1, NULL); 1825 } 1826 } 1827 return NOTIFY_DONE; 1828 } 1829 1830 static struct notifier_block ip_mr_notifier = { 1831 .notifier_call = ipmr_device_event, 1832 }; 1833 1834 /* Encapsulate a packet by attaching a valid IPIP header to it. 1835 * This avoids tunnel drivers and other mess and gives us the speed so 1836 * important for multicast video. 1837 */ 1838 static void ip_encap(struct net *net, struct sk_buff *skb, 1839 __be32 saddr, __be32 daddr) 1840 { 1841 struct iphdr *iph; 1842 const struct iphdr *old_iph = ip_hdr(skb); 1843 1844 skb_push(skb, sizeof(struct iphdr)); 1845 skb->transport_header = skb->network_header; 1846 skb_reset_network_header(skb); 1847 iph = ip_hdr(skb); 1848 1849 iph->version = 4; 1850 iph->tos = old_iph->tos; 1851 iph->ttl = old_iph->ttl; 1852 iph->frag_off = 0; 1853 iph->daddr = daddr; 1854 iph->saddr = saddr; 1855 iph->protocol = IPPROTO_IPIP; 1856 iph->ihl = 5; 1857 iph->tot_len = htons(skb->len); 1858 ip_select_ident(net, skb, NULL); 1859 ip_send_check(iph); 1860 1861 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1862 nf_reset_ct(skb); 1863 } 1864 1865 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1866 struct sk_buff *skb) 1867 { 1868 struct ip_options *opt = &(IPCB(skb)->opt); 1869 1870 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1871 1872 if (unlikely(opt->optlen)) 1873 ip_forward_options(skb); 1874 1875 return dst_output(net, sk, skb); 1876 } 1877 1878 #ifdef CONFIG_NET_SWITCHDEV 1879 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1880 int in_vifi, int out_vifi) 1881 { 1882 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1883 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1884 1885 if (!skb->offload_l3_fwd_mark) 1886 return false; 1887 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1888 return false; 1889 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1890 &in_vif->dev_parent_id); 1891 } 1892 #else 1893 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1894 int in_vifi, int out_vifi) 1895 { 1896 return false; 1897 } 1898 #endif 1899 1900 /* Processing handlers for ipmr_forward, under rcu_read_lock() */ 1901 1902 static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt, 1903 struct sk_buff *skb, int vifi) 1904 { 1905 const struct iphdr *iph = ip_hdr(skb); 1906 struct vif_device *vif = &mrt->vif_table[vifi]; 1907 struct net_device *vif_dev; 1908 struct rtable *rt; 1909 struct flowi4 fl4; 1910 int encap = 0; 1911 1912 vif_dev = vif_dev_read(vif); 1913 if (!vif_dev) 1914 return -1; 1915 1916 if (vif->flags & VIFF_REGISTER) { 1917 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 1918 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 1919 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 1920 DEV_STATS_INC(vif_dev, tx_packets); 1921 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1922 return -1; 1923 } 1924 1925 if (vif->flags & VIFF_TUNNEL) { 1926 rt = ip_route_output_ports(net, &fl4, NULL, 1927 vif->remote, vif->local, 1928 0, 0, 1929 IPPROTO_IPIP, 1930 iph->tos & INET_DSCP_MASK, vif->link); 1931 if (IS_ERR(rt)) 1932 return -1; 1933 encap = sizeof(struct iphdr); 1934 } else { 1935 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1936 0, 0, 1937 IPPROTO_IPIP, 1938 iph->tos & INET_DSCP_MASK, vif->link); 1939 if (IS_ERR(rt)) 1940 return -1; 1941 } 1942 1943 if (skb->len+encap > dst4_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1944 /* Do not fragment multicasts. Alas, IPv4 does not 1945 * allow to send ICMP, so that packets will disappear 1946 * to blackhole. 1947 */ 1948 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1949 ip_rt_put(rt); 1950 return -1; 1951 } 1952 1953 encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len; 1954 1955 if (skb_cow(skb, encap)) { 1956 ip_rt_put(rt); 1957 return -1; 1958 } 1959 1960 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 1961 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 1962 1963 skb_dst_drop(skb); 1964 skb_dst_set(skb, &rt->dst); 1965 ip_decrease_ttl(ip_hdr(skb)); 1966 1967 /* FIXME: forward and output firewalls used to be called here. 1968 * What do we do with netfilter? -- RR 1969 */ 1970 if (vif->flags & VIFF_TUNNEL) { 1971 ip_encap(net, skb, vif->local, vif->remote); 1972 /* FIXME: extra output firewall step used to be here. --RR */ 1973 DEV_STATS_INC(vif_dev, tx_packets); 1974 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 1975 } 1976 1977 return 0; 1978 } 1979 1980 static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt, 1981 int in_vifi, struct sk_buff *skb, int vifi) 1982 { 1983 struct rtable *rt; 1984 1985 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1986 goto out_free; 1987 1988 if (ipmr_prepare_xmit(net, mrt, skb, vifi)) 1989 goto out_free; 1990 1991 rt = skb_rtable(skb); 1992 1993 IPCB(skb)->flags |= IPSKB_FORWARDED; 1994 1995 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1996 * not only before forwarding, but after forwarding on all output 1997 * interfaces. It is clear, if mrouter runs a multicasting 1998 * program, it should receive packets not depending to what interface 1999 * program is joined. 2000 * If we will not make it, the program will have to join on all 2001 * interfaces. On the other hand, multihoming host (or router, but 2002 * not mrouter) cannot join to more than one interface - it will 2003 * result in receiving multiple packets. 2004 */ 2005 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 2006 net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst), 2007 ipmr_forward_finish); 2008 return; 2009 2010 out_free: 2011 kfree_skb(skb); 2012 } 2013 2014 static void ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt, 2015 struct sk_buff *skb, int vifi) 2016 { 2017 if (ipmr_prepare_xmit(net, mrt, skb, vifi)) 2018 goto out_free; 2019 2020 ip_mc_output(net, NULL, skb); 2021 return; 2022 2023 out_free: 2024 kfree_skb(skb); 2025 } 2026 2027 /* Called with mrt_lock or rcu_read_lock() */ 2028 static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) 2029 { 2030 int ct; 2031 /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */ 2032 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2033 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2034 break; 2035 } 2036 return ct; 2037 } 2038 2039 /* "local" means that we should preserve one skb (for local delivery) */ 2040 /* Called uner rcu_read_lock() */ 2041 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 2042 struct net_device *dev, struct sk_buff *skb, 2043 struct mfc_cache *c, int local) 2044 { 2045 int true_vifi = ipmr_find_vif(mrt, dev); 2046 int psend = -1; 2047 int vif, ct; 2048 2049 vif = c->_c.mfc_parent; 2050 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2051 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2052 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2053 2054 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 2055 struct mfc_cache *cache_proxy; 2056 2057 /* For an (*,G) entry, we only check that the incoming 2058 * interface is part of the static tree. 2059 */ 2060 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2061 if (cache_proxy && 2062 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2063 goto forward; 2064 } 2065 2066 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 2067 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2068 if (rt_is_output_route(skb_rtable(skb))) { 2069 /* It is our own packet, looped back. 2070 * Very complicated situation... 2071 * 2072 * The best workaround until routing daemons will be 2073 * fixed is not to redistribute packet, if it was 2074 * send through wrong interface. It means, that 2075 * multicast applications WILL NOT work for 2076 * (S,G), which have default multicast route pointing 2077 * to wrong oif. In any case, it is not a good 2078 * idea to use multicasting applications on router. 2079 */ 2080 goto dont_forward; 2081 } 2082 2083 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2084 2085 if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) && 2086 /* pimsm uses asserts, when switching from RPT to SPT, 2087 * so that we cannot check that packet arrived on an oif. 2088 * It is bad, but otherwise we would need to move pretty 2089 * large chunk of pimd to kernel. Ough... --ANK 2090 */ 2091 (READ_ONCE(mrt->mroute_do_pim) || 2092 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2093 time_after(jiffies, 2094 c->_c.mfc_un.res.last_assert + 2095 MFC_ASSERT_THRESH)) { 2096 c->_c.mfc_un.res.last_assert = jiffies; 2097 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 2098 if (READ_ONCE(mrt->mroute_do_wrvifwhole)) 2099 ipmr_cache_report(mrt, skb, true_vifi, 2100 IGMPMSG_WRVIFWHOLE); 2101 } 2102 goto dont_forward; 2103 } 2104 2105 forward: 2106 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2107 mrt->vif_table[vif].pkt_in + 1); 2108 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2109 mrt->vif_table[vif].bytes_in + skb->len); 2110 2111 /* Forward the frame */ 2112 if (c->mfc_origin == htonl(INADDR_ANY) && 2113 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2114 if (true_vifi >= 0 && 2115 true_vifi != c->_c.mfc_parent && 2116 ip_hdr(skb)->ttl > 2117 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2118 /* It's an (*,*) entry and the packet is not coming from 2119 * the upstream: forward the packet to the upstream 2120 * only. 2121 */ 2122 psend = c->_c.mfc_parent; 2123 goto last_forward; 2124 } 2125 goto dont_forward; 2126 } 2127 for (ct = c->_c.mfc_un.res.maxvif - 1; 2128 ct >= c->_c.mfc_un.res.minvif; ct--) { 2129 /* For (*,G) entry, don't forward to the incoming interface */ 2130 if ((c->mfc_origin != htonl(INADDR_ANY) || 2131 ct != true_vifi) && 2132 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2133 if (psend != -1) { 2134 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2135 2136 if (skb2) 2137 ipmr_queue_fwd_xmit(net, mrt, true_vifi, 2138 skb2, psend); 2139 } 2140 psend = ct; 2141 } 2142 } 2143 last_forward: 2144 if (psend != -1) { 2145 if (local) { 2146 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2147 2148 if (skb2) 2149 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb2, 2150 psend); 2151 } else { 2152 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb, psend); 2153 return; 2154 } 2155 } 2156 2157 dont_forward: 2158 if (!local) 2159 kfree_skb(skb); 2160 } 2161 2162 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2163 { 2164 struct rtable *rt = skb_rtable(skb); 2165 struct iphdr *iph = ip_hdr(skb); 2166 struct flowi4 fl4 = { 2167 .daddr = iph->daddr, 2168 .saddr = iph->saddr, 2169 .flowi4_dscp = ip4h_dscp(iph), 2170 .flowi4_oif = (rt_is_output_route(rt) ? 2171 skb->dev->ifindex : 0), 2172 .flowi4_iif = (rt_is_output_route(rt) ? 2173 LOOPBACK_IFINDEX : 2174 skb->dev->ifindex), 2175 .flowi4_mark = skb->mark, 2176 }; 2177 struct mr_table *mrt; 2178 int err; 2179 2180 err = ipmr_fib_lookup(net, &fl4, &mrt); 2181 if (err) 2182 return ERR_PTR(err); 2183 return mrt; 2184 } 2185 2186 /* Multicast packets for forwarding arrive here 2187 * Called with rcu_read_lock(); 2188 */ 2189 int ip_mr_input(struct sk_buff *skb) 2190 { 2191 struct mfc_cache *cache; 2192 struct net *net = dev_net(skb->dev); 2193 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2194 struct mr_table *mrt; 2195 struct net_device *dev; 2196 2197 /* skb->dev passed in is the loX master dev for vrfs. 2198 * As there are no vifs associated with loopback devices, 2199 * get the proper interface that does have a vif associated with it. 2200 */ 2201 dev = skb->dev; 2202 if (netif_is_l3_master(skb->dev)) { 2203 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2204 if (!dev) { 2205 kfree_skb(skb); 2206 return -ENODEV; 2207 } 2208 } 2209 2210 /* Packet is looped back after forward, it should not be 2211 * forwarded second time, but still can be delivered locally. 2212 */ 2213 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2214 goto dont_forward; 2215 2216 mrt = ipmr_rt_fib_lookup(net, skb); 2217 if (IS_ERR(mrt)) { 2218 kfree_skb(skb); 2219 return PTR_ERR(mrt); 2220 } 2221 if (!local) { 2222 if (IPCB(skb)->opt.router_alert) { 2223 if (ip_call_ra_chain(skb)) 2224 return 0; 2225 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2226 /* IGMPv1 (and broken IGMPv2 implementations sort of 2227 * Cisco IOS <= 11.2(8)) do not put router alert 2228 * option to IGMP packets destined to routable 2229 * groups. It is very bad, because it means 2230 * that we can forward NO IGMP messages. 2231 */ 2232 struct sock *mroute_sk; 2233 2234 mroute_sk = rcu_dereference(mrt->mroute_sk); 2235 if (mroute_sk) { 2236 nf_reset_ct(skb); 2237 raw_rcv(mroute_sk, skb); 2238 return 0; 2239 } 2240 } 2241 } 2242 2243 /* already under rcu_read_lock() */ 2244 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2245 if (!cache) { 2246 int vif = ipmr_find_vif(mrt, dev); 2247 2248 if (vif >= 0) 2249 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2250 vif); 2251 } 2252 2253 /* No usable cache entry */ 2254 if (!cache) { 2255 int vif; 2256 2257 if (local) { 2258 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2259 ip_local_deliver(skb); 2260 if (!skb2) 2261 return -ENOBUFS; 2262 skb = skb2; 2263 } 2264 2265 vif = ipmr_find_vif(mrt, dev); 2266 if (vif >= 0) 2267 return ipmr_cache_unresolved(mrt, vif, skb, dev); 2268 kfree_skb(skb); 2269 return -ENODEV; 2270 } 2271 2272 ip_mr_forward(net, mrt, dev, skb, cache, local); 2273 2274 if (local) 2275 return ip_local_deliver(skb); 2276 2277 return 0; 2278 2279 dont_forward: 2280 if (local) 2281 return ip_local_deliver(skb); 2282 kfree_skb(skb); 2283 return 0; 2284 } 2285 2286 static void ip_mr_output_finish(struct net *net, struct mr_table *mrt, 2287 struct net_device *dev, struct sk_buff *skb, 2288 struct mfc_cache *c) 2289 { 2290 int psend = -1; 2291 int ct; 2292 2293 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2294 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2295 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2296 2297 /* Forward the frame */ 2298 if (c->mfc_origin == htonl(INADDR_ANY) && 2299 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2300 if (ip_hdr(skb)->ttl > 2301 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2302 /* It's an (*,*) entry and the packet is not coming from 2303 * the upstream: forward the packet to the upstream 2304 * only. 2305 */ 2306 psend = c->_c.mfc_parent; 2307 goto last_xmit; 2308 } 2309 goto dont_xmit; 2310 } 2311 2312 for (ct = c->_c.mfc_un.res.maxvif - 1; 2313 ct >= c->_c.mfc_un.res.minvif; ct--) { 2314 if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2315 if (psend != -1) { 2316 struct sk_buff *skb2; 2317 2318 skb2 = skb_clone(skb, GFP_ATOMIC); 2319 if (skb2) 2320 ipmr_queue_output_xmit(net, mrt, 2321 skb2, psend); 2322 } 2323 psend = ct; 2324 } 2325 } 2326 2327 last_xmit: 2328 if (psend != -1) { 2329 ipmr_queue_output_xmit(net, mrt, skb, psend); 2330 return; 2331 } 2332 2333 dont_xmit: 2334 kfree_skb(skb); 2335 } 2336 2337 /* Multicast packets for forwarding arrive here 2338 * Called with rcu_read_lock(); 2339 */ 2340 int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2341 { 2342 struct rtable *rt = skb_rtable(skb); 2343 struct mfc_cache *cache; 2344 struct net_device *dev; 2345 struct mr_table *mrt; 2346 int vif; 2347 2348 guard(rcu)(); 2349 2350 dev = dst_dev_rcu(&rt->dst); 2351 2352 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2353 goto mc_output; 2354 if (!(IPCB(skb)->flags & IPSKB_MCROUTE)) 2355 goto mc_output; 2356 2357 skb->dev = dev; 2358 2359 mrt = ipmr_rt_fib_lookup(net, skb); 2360 if (IS_ERR(mrt)) 2361 goto mc_output; 2362 2363 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2364 if (!cache) { 2365 vif = ipmr_find_vif(mrt, dev); 2366 if (vif >= 0) 2367 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2368 vif); 2369 } 2370 2371 /* No usable cache entry */ 2372 if (!cache) { 2373 vif = ipmr_find_vif(mrt, dev); 2374 if (vif >= 0) 2375 return ipmr_cache_unresolved(mrt, vif, skb, dev); 2376 goto mc_output; 2377 } 2378 2379 vif = cache->_c.mfc_parent; 2380 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2381 goto mc_output; 2382 2383 ip_mr_output_finish(net, mrt, dev, skb, cache); 2384 return 0; 2385 2386 mc_output: 2387 return ip_mc_output(net, sk, skb); 2388 } 2389 2390 #ifdef CONFIG_IP_PIMSM_V1 2391 /* Handle IGMP messages of PIMv1 */ 2392 int pim_rcv_v1(struct sk_buff *skb) 2393 { 2394 struct igmphdr *pim; 2395 struct net *net = dev_net(skb->dev); 2396 struct mr_table *mrt; 2397 2398 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2399 goto drop; 2400 2401 pim = igmp_hdr(skb); 2402 2403 mrt = ipmr_rt_fib_lookup(net, skb); 2404 if (IS_ERR(mrt)) 2405 goto drop; 2406 if (!READ_ONCE(mrt->mroute_do_pim) || 2407 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2408 goto drop; 2409 2410 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2411 drop: 2412 kfree_skb(skb); 2413 } 2414 return 0; 2415 } 2416 #endif 2417 2418 #ifdef CONFIG_IP_PIMSM_V2 2419 static int pim_rcv(struct sk_buff *skb) 2420 { 2421 struct pimreghdr *pim; 2422 struct net *net = dev_net(skb->dev); 2423 struct mr_table *mrt; 2424 2425 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2426 goto drop; 2427 2428 pim = (struct pimreghdr *)skb_transport_header(skb); 2429 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2430 (pim->flags & PIM_NULL_REGISTER) || 2431 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2432 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2433 goto drop; 2434 2435 mrt = ipmr_rt_fib_lookup(net, skb); 2436 if (IS_ERR(mrt)) 2437 goto drop; 2438 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2439 drop: 2440 kfree_skb(skb); 2441 } 2442 return 0; 2443 } 2444 #endif 2445 2446 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2447 __be32 saddr, __be32 daddr, 2448 struct rtmsg *rtm, u32 portid) 2449 { 2450 struct mfc_cache *cache; 2451 struct mr_table *mrt; 2452 int err; 2453 2454 rcu_read_lock(); 2455 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); 2456 if (!mrt) { 2457 rcu_read_unlock(); 2458 return -ENOENT; 2459 } 2460 2461 cache = ipmr_cache_find(mrt, saddr, daddr); 2462 if (!cache && skb->dev) { 2463 int vif = ipmr_find_vif(mrt, skb->dev); 2464 2465 if (vif >= 0) 2466 cache = ipmr_cache_find_any(mrt, daddr, vif); 2467 } 2468 if (!cache) { 2469 struct sk_buff *skb2; 2470 struct iphdr *iph; 2471 struct net_device *dev; 2472 int vif = -1; 2473 2474 dev = skb->dev; 2475 if (dev) 2476 vif = ipmr_find_vif(mrt, dev); 2477 if (vif < 0) { 2478 rcu_read_unlock(); 2479 return -ENODEV; 2480 } 2481 2482 skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); 2483 if (!skb2) { 2484 rcu_read_unlock(); 2485 return -ENOMEM; 2486 } 2487 2488 NETLINK_CB(skb2).portid = portid; 2489 skb_push(skb2, sizeof(struct iphdr)); 2490 skb_reset_network_header(skb2); 2491 iph = ip_hdr(skb2); 2492 iph->ihl = sizeof(struct iphdr) >> 2; 2493 iph->saddr = saddr; 2494 iph->daddr = daddr; 2495 iph->version = 0; 2496 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2497 rcu_read_unlock(); 2498 return err; 2499 } 2500 2501 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2502 rcu_read_unlock(); 2503 return err; 2504 } 2505 2506 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2507 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2508 int flags) 2509 { 2510 struct nlmsghdr *nlh; 2511 struct rtmsg *rtm; 2512 int err; 2513 2514 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2515 if (!nlh) 2516 return -EMSGSIZE; 2517 2518 rtm = nlmsg_data(nlh); 2519 rtm->rtm_family = RTNL_FAMILY_IPMR; 2520 rtm->rtm_dst_len = 32; 2521 rtm->rtm_src_len = 32; 2522 rtm->rtm_tos = 0; 2523 rtm->rtm_table = mrt->id; 2524 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2525 goto nla_put_failure; 2526 rtm->rtm_type = RTN_MULTICAST; 2527 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2528 if (c->_c.mfc_flags & MFC_STATIC) 2529 rtm->rtm_protocol = RTPROT_STATIC; 2530 else 2531 rtm->rtm_protocol = RTPROT_MROUTED; 2532 rtm->rtm_flags = 0; 2533 2534 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2535 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2536 goto nla_put_failure; 2537 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2538 /* do not break the dump if cache is unresolved */ 2539 if (err < 0 && err != -ENOENT) 2540 goto nla_put_failure; 2541 2542 nlmsg_end(skb, nlh); 2543 return 0; 2544 2545 nla_put_failure: 2546 nlmsg_cancel(skb, nlh); 2547 return -EMSGSIZE; 2548 } 2549 2550 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2551 u32 portid, u32 seq, struct mr_mfc *c, int cmd, 2552 int flags) 2553 { 2554 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, 2555 cmd, flags); 2556 } 2557 2558 static size_t mroute_msgsize(bool unresolved) 2559 { 2560 size_t len = 2561 NLMSG_ALIGN(sizeof(struct rtmsg)) 2562 + nla_total_size(4) /* RTA_TABLE */ 2563 + nla_total_size(4) /* RTA_SRC */ 2564 + nla_total_size(4) /* RTA_DST */ 2565 ; 2566 2567 if (!unresolved) 2568 len = len 2569 + nla_total_size(4) /* RTA_IIF */ 2570 + nla_total_size(0) /* RTA_MULTIPATH */ 2571 + MAXVIFS * NLA_ALIGN(sizeof(struct rtnexthop)) 2572 /* RTA_MFC_STATS */ 2573 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2574 ; 2575 2576 return len; 2577 } 2578 2579 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2580 int cmd) 2581 { 2582 struct net *net = read_pnet(&mrt->net); 2583 struct sk_buff *skb; 2584 int err = -ENOBUFS; 2585 2586 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS), 2587 GFP_ATOMIC); 2588 if (!skb) 2589 goto errout; 2590 2591 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2592 if (err < 0) 2593 goto errout; 2594 2595 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2596 return; 2597 2598 errout: 2599 kfree_skb(skb); 2600 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2601 } 2602 2603 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2604 { 2605 size_t len = 2606 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2607 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2608 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2609 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2610 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2611 + nla_total_size(4) /* IPMRA_CREPORT_TABLE */ 2612 /* IPMRA_CREPORT_PKT */ 2613 + nla_total_size(payloadlen) 2614 ; 2615 2616 return len; 2617 } 2618 2619 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2620 { 2621 struct net *net = read_pnet(&mrt->net); 2622 struct nlmsghdr *nlh; 2623 struct rtgenmsg *rtgenm; 2624 struct igmpmsg *msg; 2625 struct sk_buff *skb; 2626 struct nlattr *nla; 2627 int payloadlen; 2628 2629 payloadlen = pkt->len - sizeof(struct igmpmsg); 2630 msg = (struct igmpmsg *)skb_network_header(pkt); 2631 2632 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2633 if (!skb) 2634 goto errout; 2635 2636 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2637 sizeof(struct rtgenmsg), 0); 2638 if (!nlh) 2639 goto errout; 2640 rtgenm = nlmsg_data(nlh); 2641 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2642 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2643 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) || 2644 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2645 msg->im_src.s_addr) || 2646 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2647 msg->im_dst.s_addr) || 2648 nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id)) 2649 goto nla_put_failure; 2650 2651 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2652 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2653 nla_data(nla), payloadlen)) 2654 goto nla_put_failure; 2655 2656 nlmsg_end(skb, nlh); 2657 2658 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2659 return; 2660 2661 nla_put_failure: 2662 nlmsg_cancel(skb, nlh); 2663 errout: 2664 kfree_skb(skb); 2665 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2666 } 2667 2668 static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, 2669 const struct nlmsghdr *nlh, 2670 struct nlattr **tb, 2671 struct netlink_ext_ack *extack) 2672 { 2673 struct rtmsg *rtm; 2674 int i, err; 2675 2676 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 2677 if (!rtm) { 2678 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); 2679 return -EINVAL; 2680 } 2681 2682 if (!netlink_strict_get_check(skb)) 2683 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2684 rtm_ipv4_policy, extack); 2685 2686 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 2687 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 2688 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2689 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2690 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); 2691 return -EINVAL; 2692 } 2693 2694 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2695 rtm_ipv4_policy, extack); 2696 if (err) 2697 return err; 2698 2699 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2700 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2701 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 2702 return -EINVAL; 2703 } 2704 2705 for (i = 0; i <= RTA_MAX; i++) { 2706 if (!tb[i]) 2707 continue; 2708 2709 switch (i) { 2710 case RTA_SRC: 2711 case RTA_DST: 2712 case RTA_TABLE: 2713 break; 2714 default: 2715 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); 2716 return -EINVAL; 2717 } 2718 } 2719 2720 return 0; 2721 } 2722 2723 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2724 struct netlink_ext_ack *extack) 2725 { 2726 struct net *net = sock_net(in_skb->sk); 2727 struct nlattr *tb[RTA_MAX + 1]; 2728 struct mfc_cache *cache; 2729 struct mr_table *mrt; 2730 struct sk_buff *skb; 2731 __be32 src, grp; 2732 u32 tableid; 2733 int err; 2734 2735 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2736 if (err < 0) 2737 goto errout; 2738 2739 src = nla_get_in_addr_default(tb[RTA_SRC], 0); 2740 grp = nla_get_in_addr_default(tb[RTA_DST], 0); 2741 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2742 2743 skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); 2744 if (!skb) { 2745 err = -ENOBUFS; 2746 goto errout; 2747 } 2748 2749 rcu_read_lock(); 2750 2751 mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2752 if (!mrt) { 2753 err = -ENOENT; 2754 goto errout_unlock; 2755 } 2756 2757 cache = ipmr_cache_find(mrt, src, grp); 2758 if (!cache) { 2759 err = -ENOENT; 2760 goto errout_unlock; 2761 } 2762 2763 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2764 nlh->nlmsg_seq, cache, 2765 RTM_NEWROUTE, 0); 2766 if (err < 0) 2767 goto errout_unlock; 2768 2769 rcu_read_unlock(); 2770 2771 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2772 errout: 2773 return err; 2774 2775 errout_unlock: 2776 rcu_read_unlock(); 2777 kfree_skb(skb); 2778 goto errout; 2779 } 2780 2781 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2782 { 2783 struct fib_dump_filter filter = {}; 2784 int err; 2785 2786 rcu_read_lock(); 2787 2788 if (cb->strict_check) { 2789 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2790 &filter, cb); 2791 if (err < 0) 2792 goto out; 2793 } 2794 2795 if (filter.table_id) { 2796 struct mr_table *mrt; 2797 2798 mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id); 2799 if (!mrt) { 2800 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) { 2801 err = skb->len; 2802 goto out; 2803 } 2804 2805 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2806 err = -ENOENT; 2807 goto out; 2808 } 2809 2810 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2811 &mfc_unres_lock, &filter); 2812 err = skb->len ? : err; 2813 goto out; 2814 } 2815 2816 err = mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2817 _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2818 out: 2819 rcu_read_unlock(); 2820 2821 return err; 2822 } 2823 2824 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2825 [RTA_SRC] = { .type = NLA_U32 }, 2826 [RTA_DST] = { .type = NLA_U32 }, 2827 [RTA_IIF] = { .type = NLA_U32 }, 2828 [RTA_TABLE] = { .type = NLA_U32 }, 2829 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2830 }; 2831 2832 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2833 { 2834 switch (rtm_protocol) { 2835 case RTPROT_STATIC: 2836 case RTPROT_MROUTED: 2837 return true; 2838 } 2839 return false; 2840 } 2841 2842 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2843 { 2844 struct rtnexthop *rtnh = nla_data(nla); 2845 int remaining = nla_len(nla), vifi = 0; 2846 2847 while (rtnh_ok(rtnh, remaining)) { 2848 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2849 if (++vifi == MAXVIFS) 2850 break; 2851 rtnh = rtnh_next(rtnh, &remaining); 2852 } 2853 2854 return remaining > 0 ? -EINVAL : vifi; 2855 } 2856 2857 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2858 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2859 struct mfcctl *mfcc, int *mrtsock, 2860 struct mr_table **mrtret, 2861 struct netlink_ext_ack *extack) 2862 { 2863 struct net_device *dev = NULL; 2864 u32 tblid = RT_TABLE_DEFAULT; 2865 int ret, rem, iif = 0; 2866 struct mr_table *mrt; 2867 struct nlattr *attr; 2868 struct rtmsg *rtm; 2869 2870 ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, 2871 rtm_ipmr_policy, extack); 2872 if (ret < 0) 2873 goto out; 2874 rtm = nlmsg_data(nlh); 2875 2876 ret = -EINVAL; 2877 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2878 rtm->rtm_type != RTN_MULTICAST || 2879 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2880 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2881 goto out; 2882 2883 memset(mfcc, 0, sizeof(*mfcc)); 2884 mfcc->mfcc_parent = -1; 2885 ret = 0; 2886 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2887 switch (nla_type(attr)) { 2888 case RTA_SRC: 2889 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2890 break; 2891 case RTA_DST: 2892 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2893 break; 2894 case RTA_IIF: 2895 iif = nla_get_u32(attr); 2896 break; 2897 case RTA_MULTIPATH: 2898 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2899 ret = -EINVAL; 2900 goto out; 2901 } 2902 break; 2903 case RTA_PREFSRC: 2904 ret = 1; 2905 break; 2906 case RTA_TABLE: 2907 tblid = nla_get_u32(attr); 2908 break; 2909 } 2910 } 2911 2912 rcu_read_lock(); 2913 2914 mrt = __ipmr_get_table(net, tblid); 2915 if (!mrt) { 2916 ret = -ENOENT; 2917 goto unlock; 2918 } 2919 2920 if (iif) { 2921 dev = dev_get_by_index_rcu(net, iif); 2922 if (!dev) { 2923 ret = -ENODEV; 2924 goto unlock; 2925 } 2926 2927 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2928 } 2929 2930 *mrtret = mrt; 2931 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2932 2933 unlock: 2934 rcu_read_unlock(); 2935 out: 2936 return ret; 2937 } 2938 2939 /* takes care of both newroute and delroute */ 2940 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2941 struct netlink_ext_ack *extack) 2942 { 2943 struct net *net = sock_net(skb->sk); 2944 int ret, mrtsock = 0, parent; 2945 struct mr_table *tbl = NULL; 2946 struct mfcctl mfcc; 2947 2948 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2949 if (ret < 0) 2950 return ret; 2951 2952 parent = ret ? mfcc.mfcc_parent : -1; 2953 2954 mutex_lock(&net->ipv4.mfc_mutex); 2955 2956 if (nlh->nlmsg_type == RTM_NEWROUTE) 2957 ret = ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2958 else 2959 ret = ipmr_mfc_delete(tbl, &mfcc, parent); 2960 2961 mutex_unlock(&net->ipv4.mfc_mutex); 2962 2963 return ret; 2964 } 2965 2966 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2967 { 2968 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2969 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, 2970 READ_ONCE(mrt->cache_resolve_queue_len)) || 2971 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2972 READ_ONCE(mrt->mroute_reg_vif_num)) || 2973 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2974 READ_ONCE(mrt->mroute_do_assert)) || 2975 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, 2976 READ_ONCE(mrt->mroute_do_pim)) || 2977 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, 2978 READ_ONCE(mrt->mroute_do_wrvifwhole))) 2979 return false; 2980 2981 return true; 2982 } 2983 2984 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2985 { 2986 struct net_device *vif_dev; 2987 struct nlattr *vif_nest; 2988 struct vif_device *vif; 2989 2990 vif = &mrt->vif_table[vifid]; 2991 vif_dev = vif_dev_read(vif); 2992 /* if the VIF doesn't exist just continue */ 2993 if (!vif_dev) 2994 return true; 2995 2996 vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); 2997 if (!vif_nest) 2998 return false; 2999 3000 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, READ_ONCE(vif_dev->ifindex)) || 3001 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 3002 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 3003 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, READ_ONCE(vif->bytes_in), 3004 IPMRA_VIFA_PAD) || 3005 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, READ_ONCE(vif->bytes_out), 3006 IPMRA_VIFA_PAD) || 3007 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, READ_ONCE(vif->pkt_in), 3008 IPMRA_VIFA_PAD) || 3009 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, READ_ONCE(vif->pkt_out), 3010 IPMRA_VIFA_PAD) || 3011 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 3012 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 3013 nla_nest_cancel(skb, vif_nest); 3014 return false; 3015 } 3016 nla_nest_end(skb, vif_nest); 3017 3018 return true; 3019 } 3020 3021 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, 3022 struct netlink_ext_ack *extack) 3023 { 3024 struct ifinfomsg *ifm; 3025 3026 ifm = nlmsg_payload(nlh, sizeof(*ifm)); 3027 if (!ifm) { 3028 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); 3029 return -EINVAL; 3030 } 3031 3032 if (nlmsg_attrlen(nlh, sizeof(*ifm))) { 3033 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump"); 3034 return -EINVAL; 3035 } 3036 3037 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || 3038 ifm->ifi_change || ifm->ifi_index) { 3039 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); 3040 return -EINVAL; 3041 } 3042 3043 return 0; 3044 } 3045 3046 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 3047 { 3048 struct net *net = sock_net(skb->sk); 3049 struct nlmsghdr *nlh = NULL; 3050 unsigned int t = 0, s_t; 3051 unsigned int e = 0, s_e; 3052 struct mr_table *mrt; 3053 3054 if (cb->strict_check) { 3055 int err = ipmr_valid_dumplink(cb->nlh, cb->extack); 3056 3057 if (err < 0) 3058 return err; 3059 } 3060 3061 s_t = cb->args[0]; 3062 s_e = cb->args[1]; 3063 3064 rcu_read_lock(); 3065 3066 ipmr_for_each_table(mrt, net) { 3067 struct nlattr *vifs, *af; 3068 struct ifinfomsg *hdr; 3069 u32 i; 3070 3071 if (t < s_t) 3072 goto skip_table; 3073 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3074 cb->nlh->nlmsg_seq, RTM_NEWLINK, 3075 sizeof(*hdr), NLM_F_MULTI); 3076 if (!nlh) 3077 break; 3078 3079 hdr = nlmsg_data(nlh); 3080 memset(hdr, 0, sizeof(*hdr)); 3081 hdr->ifi_family = RTNL_FAMILY_IPMR; 3082 3083 af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); 3084 if (!af) { 3085 nlmsg_cancel(skb, nlh); 3086 goto out; 3087 } 3088 3089 if (!ipmr_fill_table(mrt, skb)) { 3090 nlmsg_cancel(skb, nlh); 3091 goto out; 3092 } 3093 3094 vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS); 3095 if (!vifs) { 3096 nla_nest_end(skb, af); 3097 nlmsg_end(skb, nlh); 3098 goto out; 3099 } 3100 for (i = 0; i < READ_ONCE(mrt->maxvif); i++) { 3101 if (e < s_e) 3102 goto skip_entry; 3103 if (!ipmr_fill_vif(mrt, i, skb)) { 3104 nla_nest_end(skb, vifs); 3105 nla_nest_end(skb, af); 3106 nlmsg_end(skb, nlh); 3107 goto out; 3108 } 3109 skip_entry: 3110 e++; 3111 } 3112 s_e = 0; 3113 e = 0; 3114 nla_nest_end(skb, vifs); 3115 nla_nest_end(skb, af); 3116 nlmsg_end(skb, nlh); 3117 skip_table: 3118 t++; 3119 } 3120 3121 out: 3122 rcu_read_unlock(); 3123 3124 cb->args[1] = e; 3125 cb->args[0] = t; 3126 3127 return skb->len; 3128 } 3129 3130 #ifdef CONFIG_PROC_FS 3131 /* The /proc interfaces to multicast routing : 3132 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 3133 */ 3134 3135 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 3136 __acquires(RCU) 3137 { 3138 struct mr_vif_iter *iter = seq->private; 3139 struct net *net = seq_file_net(seq); 3140 struct mr_table *mrt; 3141 3142 rcu_read_lock(); 3143 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); 3144 if (!mrt) { 3145 rcu_read_unlock(); 3146 return ERR_PTR(-ENOENT); 3147 } 3148 3149 iter->mrt = mrt; 3150 3151 return mr_vif_seq_start(seq, pos); 3152 } 3153 3154 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 3155 __releases(RCU) 3156 { 3157 rcu_read_unlock(); 3158 } 3159 3160 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 3161 { 3162 struct mr_vif_iter *iter = seq->private; 3163 struct mr_table *mrt = iter->mrt; 3164 3165 if (v == SEQ_START_TOKEN) { 3166 seq_puts(seq, 3167 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 3168 } else { 3169 const struct vif_device *vif = v; 3170 const struct net_device *vif_dev; 3171 const char *name; 3172 3173 vif_dev = vif_dev_read(vif); 3174 name = vif_dev ? vif_dev->name : "none"; 3175 seq_printf(seq, 3176 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 3177 vif - mrt->vif_table, 3178 name, vif->bytes_in, vif->pkt_in, 3179 vif->bytes_out, vif->pkt_out, 3180 vif->flags, vif->local, vif->remote); 3181 } 3182 return 0; 3183 } 3184 3185 static const struct seq_operations ipmr_vif_seq_ops = { 3186 .start = ipmr_vif_seq_start, 3187 .next = mr_vif_seq_next, 3188 .stop = ipmr_vif_seq_stop, 3189 .show = ipmr_vif_seq_show, 3190 }; 3191 3192 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 3193 { 3194 struct net *net = seq_file_net(seq); 3195 struct mr_table *mrt; 3196 3197 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 3198 if (!mrt) 3199 return ERR_PTR(-ENOENT); 3200 3201 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 3202 } 3203 3204 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 3205 { 3206 int n; 3207 3208 if (v == SEQ_START_TOKEN) { 3209 seq_puts(seq, 3210 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 3211 } else { 3212 const struct mfc_cache *mfc = v; 3213 const struct mr_mfc_iter *it = seq->private; 3214 const struct mr_table *mrt = it->mrt; 3215 3216 seq_printf(seq, "%08X %08X %-3hd", 3217 (__force u32) mfc->mfc_mcastgrp, 3218 (__force u32) mfc->mfc_origin, 3219 mfc->_c.mfc_parent); 3220 3221 if (it->cache != &mrt->mfc_unres_queue) { 3222 seq_printf(seq, " %8lu %8lu %8lu", 3223 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 3224 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 3225 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 3226 for (n = mfc->_c.mfc_un.res.minvif; 3227 n < mfc->_c.mfc_un.res.maxvif; n++) { 3228 if (VIF_EXISTS(mrt, n) && 3229 mfc->_c.mfc_un.res.ttls[n] < 255) 3230 seq_printf(seq, 3231 " %2d:%-3d", 3232 n, mfc->_c.mfc_un.res.ttls[n]); 3233 } 3234 } else { 3235 /* unresolved mfc_caches don't contain 3236 * pkt, bytes and wrong_if values 3237 */ 3238 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3239 } 3240 seq_putc(seq, '\n'); 3241 } 3242 return 0; 3243 } 3244 3245 static const struct seq_operations ipmr_mfc_seq_ops = { 3246 .start = ipmr_mfc_seq_start, 3247 .next = mr_mfc_seq_next, 3248 .stop = mr_mfc_seq_stop, 3249 .show = ipmr_mfc_seq_show, 3250 }; 3251 #endif 3252 3253 #ifdef CONFIG_IP_PIMSM_V2 3254 static const struct net_protocol pim_protocol = { 3255 .handler = pim_rcv, 3256 }; 3257 #endif 3258 3259 static unsigned int ipmr_seq_read(const struct net *net) 3260 { 3261 return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); 3262 } 3263 3264 static int ipmr_dump(struct net *net, struct notifier_block *nb, 3265 struct netlink_ext_ack *extack) 3266 { 3267 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, 3268 ipmr_mr_table_iter, extack); 3269 } 3270 3271 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3272 .family = RTNL_FAMILY_IPMR, 3273 .fib_seq_read = ipmr_seq_read, 3274 .fib_dump = ipmr_dump, 3275 .owner = THIS_MODULE, 3276 }; 3277 3278 static int __net_init ipmr_notifier_init(struct net *net) 3279 { 3280 struct fib_notifier_ops *ops; 3281 3282 atomic_set(&net->ipv4.ipmr_seq, 0); 3283 3284 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3285 if (IS_ERR(ops)) 3286 return PTR_ERR(ops); 3287 net->ipv4.ipmr_notifier_ops = ops; 3288 3289 return 0; 3290 } 3291 3292 static void __net_exit ipmr_notifier_exit(struct net *net) 3293 { 3294 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3295 net->ipv4.ipmr_notifier_ops = NULL; 3296 } 3297 3298 /* Setup for IP multicast routing */ 3299 static int __net_init ipmr_net_init(struct net *net) 3300 { 3301 LIST_HEAD(dev_kill_list); 3302 int err; 3303 3304 mutex_init(&net->ipv4.mfc_mutex); 3305 3306 err = ipmr_notifier_init(net); 3307 if (err) 3308 goto ipmr_notifier_fail; 3309 3310 err = ipmr_rules_init(net); 3311 if (err < 0) 3312 goto ipmr_rules_fail; 3313 3314 #ifdef CONFIG_PROC_FS 3315 err = -ENOMEM; 3316 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, 3317 sizeof(struct mr_vif_iter))) 3318 goto proc_vif_fail; 3319 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 3320 sizeof(struct mr_mfc_iter))) 3321 goto proc_cache_fail; 3322 #endif 3323 return 0; 3324 3325 #ifdef CONFIG_PROC_FS 3326 proc_cache_fail: 3327 remove_proc_entry("ip_mr_vif", net->proc_net); 3328 proc_vif_fail: 3329 ipmr_rules_exit_rtnl(net, &dev_kill_list); 3330 ipmr_rules_exit(net); 3331 #endif 3332 ipmr_rules_fail: 3333 ipmr_notifier_exit(net); 3334 ipmr_notifier_fail: 3335 return err; 3336 } 3337 3338 static void __net_exit ipmr_net_exit(struct net *net) 3339 { 3340 #ifdef CONFIG_PROC_FS 3341 remove_proc_entry("ip_mr_cache", net->proc_net); 3342 remove_proc_entry("ip_mr_vif", net->proc_net); 3343 #endif 3344 ipmr_rules_exit(net); 3345 ipmr_notifier_exit(net); 3346 } 3347 3348 static void __net_exit ipmr_net_exit_rtnl(struct net *net, 3349 struct list_head *dev_kill_list) 3350 { 3351 ipmr_rules_exit_rtnl(net, dev_kill_list); 3352 } 3353 3354 static struct pernet_operations ipmr_net_ops = { 3355 .init = ipmr_net_init, 3356 .exit = ipmr_net_exit, 3357 .exit_rtnl = ipmr_net_exit_rtnl, 3358 }; 3359 3360 static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { 3361 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, 3362 .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED}, 3363 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, 3364 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 3365 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, 3366 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 3367 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, 3368 .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, 3369 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 3370 }; 3371 3372 int __init ip_mr_init(void) 3373 { 3374 int err; 3375 3376 mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC); 3377 3378 err = register_pernet_subsys(&ipmr_net_ops); 3379 if (err) 3380 goto reg_pernet_fail; 3381 3382 err = register_netdevice_notifier(&ip_mr_notifier); 3383 if (err) 3384 goto reg_notif_fail; 3385 #ifdef CONFIG_IP_PIMSM_V2 3386 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3387 pr_err("%s: can't add PIM protocol\n", __func__); 3388 err = -EAGAIN; 3389 goto add_proto_fail; 3390 } 3391 #endif 3392 rtnl_register_many(ipmr_rtnl_msg_handlers); 3393 3394 return 0; 3395 3396 #ifdef CONFIG_IP_PIMSM_V2 3397 add_proto_fail: 3398 unregister_netdevice_notifier(&ip_mr_notifier); 3399 #endif 3400 reg_notif_fail: 3401 unregister_pernet_subsys(&ipmr_net_ops); 3402 reg_pernet_fail: 3403 kmem_cache_destroy(mrt_cachep); 3404 return err; 3405 } 3406