1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IP multicast routing support for mrouted 3.6/3.8 4 * 5 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * Linux Consultancy and Custom Driver Development 7 * 8 * Fixes: 9 * Michael Chastain : Incorrect size of copying. 10 * Alan Cox : Added the cache manager code 11 * Alan Cox : Fixed the clone/copy bug and device race. 12 * Mike McLagan : Routing by source 13 * Malcolm Beattie : Buffer handling fixes. 14 * Alexey Kuznetsov : Double buffer free and other fixes. 15 * SVR Anand : Fixed several multicast bugs and problems. 16 * Alexey Kuznetsov : Status, optimisations and more. 17 * Brad Parker : Better behaviour on mrouted upcall 18 * overflow. 19 * Carlos Picoto : PIMv1 Support 20 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 21 * Relax this requirement to work with older peers. 22 */ 23 24 #include <linux/uaccess.h> 25 #include <linux/types.h> 26 #include <linux/cache.h> 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/mm.h> 30 #include <linux/kernel.h> 31 #include <linux/fcntl.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/inet.h> 36 #include <linux/netdevice.h> 37 #include <linux/inetdevice.h> 38 #include <linux/igmp.h> 39 #include <linux/proc_fs.h> 40 #include <linux/seq_file.h> 41 #include <linux/mroute.h> 42 #include <linux/init.h> 43 #include <linux/if_ether.h> 44 #include <linux/slab.h> 45 #include <net/flow.h> 46 #include <net/net_namespace.h> 47 #include <net/ip.h> 48 #include <net/protocol.h> 49 #include <linux/skbuff.h> 50 #include <net/route.h> 51 #include <net/icmp.h> 52 #include <net/udp.h> 53 #include <net/raw.h> 54 #include <linux/notifier.h> 55 #include <linux/if_arp.h> 56 #include <linux/netfilter_ipv4.h> 57 #include <linux/compat.h> 58 #include <linux/export.h> 59 #include <linux/rhashtable.h> 60 #include <net/ip_tunnels.h> 61 #include <net/checksum.h> 62 #include <net/netlink.h> 63 #include <net/fib_rules.h> 64 #include <linux/netconf.h> 65 #include <net/rtnh.h> 66 #include <net/inet_dscp.h> 67 68 #include <linux/nospec.h> 69 70 struct ipmr_rule { 71 struct fib_rule common; 72 }; 73 74 struct ipmr_result { 75 struct mr_table *mrt; 76 }; 77 78 /* Big lock, protecting vif table, mrt cache and mroute socket state. 79 * Note that the changes are semaphored via rtnl_lock. 80 */ 81 82 static DEFINE_SPINLOCK(mrt_lock); 83 84 static struct net_device *vif_dev_read(const struct vif_device *vif) 85 { 86 return rcu_dereference(vif->dev); 87 } 88 89 /* Multicast router control variables */ 90 91 /* Special spinlock for queue of unresolved entries */ 92 static DEFINE_SPINLOCK(mfc_unres_lock); 93 94 /* We return to original Alan's scheme. Hash table of resolved 95 * entries is changed only in process context and protected 96 * with weak lock mrt_lock. Queue of unresolved entries is protected 97 * with strong spinlock mfc_unres_lock. 98 * 99 * In this case data path is free of exclusive locks at all. 100 */ 101 102 static struct kmem_cache *mrt_cachep __ro_after_init; 103 104 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 105 static void ipmr_free_table(struct mr_table *mrt, 106 struct list_head *dev_kill_list); 107 108 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 109 struct net_device *dev, struct sk_buff *skb, 110 struct mfc_cache *cache, int local); 111 static int ipmr_cache_report(const struct mr_table *mrt, 112 struct sk_buff *pkt, vifi_t vifi, int assert); 113 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 114 int cmd); 115 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 116 static void mroute_clean_tables(struct mr_table *mrt, int flags, 117 struct list_head *dev_kill_list); 118 static void ipmr_expire_process(struct timer_list *t); 119 120 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 121 #define ipmr_for_each_table(mrt, net) \ 122 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ 123 lockdep_rtnl_is_held() || \ 124 list_empty(&net->ipv4.mr_tables)) 125 126 static struct mr_table *ipmr_mr_table_iter(struct net *net, 127 struct mr_table *mrt) 128 { 129 struct mr_table *ret; 130 131 if (!mrt) 132 ret = list_entry_rcu(net->ipv4.mr_tables.next, 133 struct mr_table, list); 134 else 135 ret = list_entry_rcu(mrt->list.next, 136 struct mr_table, list); 137 138 if (&ret->list == &net->ipv4.mr_tables) 139 return NULL; 140 return ret; 141 } 142 143 static struct mr_table *__ipmr_get_table(struct net *net, u32 id) 144 { 145 struct mr_table *mrt; 146 147 ipmr_for_each_table(mrt, net) { 148 if (mrt->id == id) 149 return mrt; 150 } 151 return NULL; 152 } 153 154 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 155 struct mr_table **mrt) 156 { 157 int err; 158 struct ipmr_result res; 159 struct fib_lookup_arg arg = { 160 .result = &res, 161 .flags = FIB_LOOKUP_NOREF, 162 }; 163 164 /* update flow if oif or iif point to device enslaved to l3mdev */ 165 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 166 167 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 168 flowi4_to_flowi(flp4), 0, &arg); 169 if (err < 0) 170 return err; 171 *mrt = res.mrt; 172 return 0; 173 } 174 175 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 176 int flags, struct fib_lookup_arg *arg) 177 { 178 struct ipmr_result *res = arg->result; 179 struct mr_table *mrt; 180 181 switch (rule->action) { 182 case FR_ACT_TO_TBL: 183 break; 184 case FR_ACT_UNREACHABLE: 185 return -ENETUNREACH; 186 case FR_ACT_PROHIBIT: 187 return -EACCES; 188 case FR_ACT_BLACKHOLE: 189 default: 190 return -EINVAL; 191 } 192 193 arg->table = fib_rule_get_table(rule, arg); 194 195 mrt = __ipmr_get_table(rule->fr_net, arg->table); 196 if (!mrt) 197 return -EAGAIN; 198 res->mrt = mrt; 199 return 0; 200 } 201 202 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 203 { 204 return 1; 205 } 206 207 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 208 struct fib_rule_hdr *frh, struct nlattr **tb, 209 struct netlink_ext_ack *extack) 210 { 211 return 0; 212 } 213 214 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 215 struct nlattr **tb) 216 { 217 return 1; 218 } 219 220 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 221 struct fib_rule_hdr *frh) 222 { 223 frh->dst_len = 0; 224 frh->src_len = 0; 225 frh->tos = 0; 226 return 0; 227 } 228 229 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 230 .family = RTNL_FAMILY_IPMR, 231 .rule_size = sizeof(struct ipmr_rule), 232 .addr_size = sizeof(u32), 233 .action = ipmr_rule_action, 234 .match = ipmr_rule_match, 235 .configure = ipmr_rule_configure, 236 .compare = ipmr_rule_compare, 237 .fill = ipmr_rule_fill, 238 .nlgroup = RTNLGRP_IPV4_RULE, 239 .owner = THIS_MODULE, 240 }; 241 242 static int __net_init ipmr_rules_init(struct net *net) 243 { 244 struct fib_rules_ops *ops; 245 LIST_HEAD(dev_kill_list); 246 struct mr_table *mrt; 247 int err; 248 249 ops = fib_rules_register(&ipmr_rules_ops_template, net); 250 if (IS_ERR(ops)) 251 return PTR_ERR(ops); 252 253 INIT_LIST_HEAD(&net->ipv4.mr_tables); 254 255 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 256 if (IS_ERR(mrt)) { 257 err = PTR_ERR(mrt); 258 goto err1; 259 } 260 261 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT); 262 if (err < 0) 263 goto err2; 264 265 net->ipv4.mr_rules_ops = ops; 266 return 0; 267 268 err2: 269 ipmr_free_table(mrt, &dev_kill_list); 270 err1: 271 fib_rules_unregister(ops); 272 return err; 273 } 274 275 static void __net_exit ipmr_rules_exit(struct net *net) 276 { 277 fib_rules_unregister(net->ipv4.mr_rules_ops); 278 } 279 280 static void __net_exit ipmr_rules_exit_rtnl(struct net *net, 281 struct list_head *dev_kill_list) 282 { 283 struct mr_table *mrt, *next; 284 285 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 286 list_del_rcu(&mrt->list); 287 ipmr_free_table(mrt, dev_kill_list); 288 } 289 } 290 291 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 292 struct netlink_ext_ack *extack) 293 { 294 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); 295 } 296 297 static unsigned int ipmr_rules_seq_read(const struct net *net) 298 { 299 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 300 } 301 302 bool ipmr_rule_default(const struct fib_rule *rule) 303 { 304 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 305 } 306 EXPORT_SYMBOL(ipmr_rule_default); 307 #else 308 static struct mr_table *ipmr_mr_table_iter(struct net *net, 309 struct mr_table *mrt) 310 { 311 if (!mrt) 312 return rcu_dereference(net->ipv4.mrt); 313 return NULL; 314 } 315 316 static struct mr_table *__ipmr_get_table(struct net *net, u32 id) 317 { 318 return rcu_dereference_check(net->ipv4.mrt, 319 lockdep_rtnl_is_held() || 320 !rcu_access_pointer(net->ipv4.mrt)); 321 } 322 323 #define ipmr_for_each_table(mrt, net) \ 324 for (mrt = __ipmr_get_table(net, 0); mrt; mrt = NULL) 325 326 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 327 struct mr_table **mrt) 328 { 329 *mrt = rcu_dereference(net->ipv4.mrt); 330 if (!*mrt) 331 return -EAGAIN; 332 return 0; 333 } 334 335 static int __net_init ipmr_rules_init(struct net *net) 336 { 337 struct mr_table *mrt; 338 339 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 340 if (IS_ERR(mrt)) 341 return PTR_ERR(mrt); 342 343 rcu_assign_pointer(net->ipv4.mrt, mrt); 344 return 0; 345 } 346 347 static void __net_exit ipmr_rules_exit(struct net *net) 348 { 349 } 350 351 static void __net_exit ipmr_rules_exit_rtnl(struct net *net, 352 struct list_head *dev_kill_list) 353 { 354 struct mr_table *mrt = rcu_dereference_protected(net->ipv4.mrt, 1); 355 356 RCU_INIT_POINTER(net->ipv4.mrt, NULL); 357 ipmr_free_table(mrt, dev_kill_list); 358 } 359 360 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 361 struct netlink_ext_ack *extack) 362 { 363 return 0; 364 } 365 366 static unsigned int ipmr_rules_seq_read(const struct net *net) 367 { 368 return 0; 369 } 370 371 bool ipmr_rule_default(const struct fib_rule *rule) 372 { 373 return true; 374 } 375 EXPORT_SYMBOL(ipmr_rule_default); 376 #endif 377 378 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 379 { 380 struct mr_table *mrt; 381 382 rcu_read_lock(); 383 mrt = __ipmr_get_table(net, id); 384 rcu_read_unlock(); 385 386 return mrt; 387 } 388 389 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 390 const void *ptr) 391 { 392 const struct mfc_cache_cmp_arg *cmparg = arg->key; 393 const struct mfc_cache *c = ptr; 394 395 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 396 cmparg->mfc_origin != c->mfc_origin; 397 } 398 399 static const struct rhashtable_params ipmr_rht_params = { 400 .head_offset = offsetof(struct mr_mfc, mnode), 401 .key_offset = offsetof(struct mfc_cache, cmparg), 402 .key_len = sizeof(struct mfc_cache_cmp_arg), 403 .nelem_hint = 3, 404 .obj_cmpfn = ipmr_hash_cmp, 405 .automatic_shrinking = true, 406 }; 407 408 static void ipmr_new_table_set(struct mr_table *mrt, 409 struct net *net) 410 { 411 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 412 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 413 #endif 414 } 415 416 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { 417 .mfc_mcastgrp = htonl(INADDR_ANY), 418 .mfc_origin = htonl(INADDR_ANY), 419 }; 420 421 static struct mr_table_ops ipmr_mr_table_ops = { 422 .rht_params = &ipmr_rht_params, 423 .cmparg_any = &ipmr_mr_table_ops_cmparg_any, 424 }; 425 426 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 427 { 428 struct mr_table *mrt; 429 430 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 431 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 432 return ERR_PTR(-EINVAL); 433 434 mrt = __ipmr_get_table(net, id); 435 if (mrt) 436 return mrt; 437 438 return mr_table_alloc(net, id, &ipmr_mr_table_ops, 439 ipmr_expire_process, ipmr_new_table_set); 440 } 441 442 static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_list) 443 { 444 struct net *net = read_pnet(&mrt->net); 445 LIST_HEAD(ipmr_dev_kill_list); 446 447 WARN_ON_ONCE(!mr_can_free_table(net)); 448 449 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | 450 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, 451 &ipmr_dev_kill_list); 452 timer_shutdown_sync(&mrt->ipmr_expire_timer); 453 mr_table_free(mrt); 454 455 WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); 456 list_splice(&ipmr_dev_kill_list, dev_kill_list); 457 } 458 459 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 460 461 /* Initialize ipmr pimreg/tunnel in_device */ 462 static bool ipmr_init_vif_indev(const struct net_device *dev) 463 { 464 struct in_device *in_dev; 465 466 ASSERT_RTNL(); 467 468 in_dev = __in_dev_get_rtnl(dev); 469 if (!in_dev) 470 return false; 471 ipv4_devconf_setall(in_dev); 472 neigh_parms_data_state_setall(in_dev->arp_parms); 473 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 474 475 return true; 476 } 477 478 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 479 { 480 struct net_device *tunnel_dev, *new_dev; 481 struct ip_tunnel_parm_kern p = { }; 482 int err; 483 484 tunnel_dev = __dev_get_by_name(net, "tunl0"); 485 if (!tunnel_dev) 486 goto out; 487 488 p.iph.daddr = v->vifc_rmt_addr.s_addr; 489 p.iph.saddr = v->vifc_lcl_addr.s_addr; 490 p.iph.version = 4; 491 p.iph.ihl = 5; 492 p.iph.protocol = IPPROTO_IPIP; 493 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 494 495 if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) 496 goto out; 497 err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 498 SIOCADDTUNNEL); 499 if (err) 500 goto out; 501 502 new_dev = __dev_get_by_name(net, p.name); 503 if (!new_dev) 504 goto out; 505 506 new_dev->flags |= IFF_MULTICAST; 507 if (!ipmr_init_vif_indev(new_dev)) 508 goto out_unregister; 509 if (dev_open(new_dev, NULL)) 510 goto out_unregister; 511 dev_hold(new_dev); 512 err = dev_set_allmulti(new_dev, 1); 513 if (err) { 514 dev_close(new_dev); 515 tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 516 SIOCDELTUNNEL); 517 dev_put(new_dev); 518 new_dev = ERR_PTR(err); 519 } 520 return new_dev; 521 522 out_unregister: 523 unregister_netdevice(new_dev); 524 out: 525 return ERR_PTR(-ENOBUFS); 526 } 527 528 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 529 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 530 { 531 struct net *net = dev_net(dev); 532 struct mr_table *mrt; 533 struct flowi4 fl4 = { 534 .flowi4_oif = dev->ifindex, 535 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 536 .flowi4_mark = skb->mark, 537 }; 538 int err; 539 540 rcu_read_lock(); 541 err = ipmr_fib_lookup(net, &fl4, &mrt); 542 if (err < 0) { 543 rcu_read_unlock(); 544 kfree_skb(skb); 545 return err; 546 } 547 548 DEV_STATS_ADD(dev, tx_bytes, skb->len); 549 DEV_STATS_INC(dev, tx_packets); 550 551 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ 552 ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 553 IGMPMSG_WHOLEPKT); 554 555 rcu_read_unlock(); 556 kfree_skb(skb); 557 return NETDEV_TX_OK; 558 } 559 560 static int reg_vif_get_iflink(const struct net_device *dev) 561 { 562 return 0; 563 } 564 565 static const struct net_device_ops reg_vif_netdev_ops = { 566 .ndo_start_xmit = reg_vif_xmit, 567 .ndo_get_iflink = reg_vif_get_iflink, 568 }; 569 570 static void reg_vif_setup(struct net_device *dev) 571 { 572 dev->type = ARPHRD_PIMREG; 573 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 574 dev->flags = IFF_NOARP; 575 dev->netdev_ops = ®_vif_netdev_ops; 576 dev->needs_free_netdev = true; 577 dev->netns_immutable = true; 578 } 579 580 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 581 { 582 struct net_device *dev; 583 char name[IFNAMSIZ]; 584 585 if (mrt->id == RT_TABLE_DEFAULT) 586 sprintf(name, "pimreg"); 587 else 588 sprintf(name, "pimreg%u", mrt->id); 589 590 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 591 592 if (!dev) 593 return NULL; 594 595 dev_net_set(dev, net); 596 597 if (register_netdevice(dev)) { 598 free_netdev(dev); 599 return NULL; 600 } 601 602 if (!ipmr_init_vif_indev(dev)) 603 goto failure; 604 if (dev_open(dev, NULL)) 605 goto failure; 606 607 dev_hold(dev); 608 609 return dev; 610 611 failure: 612 unregister_netdevice(dev); 613 return NULL; 614 } 615 616 /* called with rcu_read_lock() */ 617 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 618 unsigned int pimlen) 619 { 620 struct net_device *reg_dev = NULL; 621 struct iphdr *encap; 622 int vif_num; 623 624 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 625 /* Check that: 626 * a. packet is really sent to a multicast group 627 * b. packet is not a NULL-REGISTER 628 * c. packet is not truncated 629 */ 630 if (!ipv4_is_multicast(encap->daddr) || 631 encap->tot_len == 0 || 632 ntohs(encap->tot_len) + pimlen > skb->len) 633 return 1; 634 635 /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */ 636 vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 637 if (vif_num >= 0) 638 reg_dev = vif_dev_read(&mrt->vif_table[vif_num]); 639 if (!reg_dev) 640 return 1; 641 642 skb->mac_header = skb->network_header; 643 skb_pull(skb, (u8 *)encap - skb->data); 644 skb_reset_network_header(skb); 645 skb->protocol = htons(ETH_P_IP); 646 skb->ip_summed = CHECKSUM_NONE; 647 648 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 649 650 netif_rx(skb); 651 652 return NET_RX_SUCCESS; 653 } 654 #else 655 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 656 { 657 return NULL; 658 } 659 #endif 660 661 static int call_ipmr_vif_entry_notifiers(struct net *net, 662 enum fib_event_type event_type, 663 struct vif_device *vif, 664 struct net_device *vif_dev, 665 vifi_t vif_index, u32 tb_id) 666 { 667 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, 668 vif, vif_dev, vif_index, tb_id, 669 &net->ipv4.ipmr_seq); 670 } 671 672 static int call_ipmr_mfc_entry_notifiers(struct net *net, 673 enum fib_event_type event_type, 674 struct mfc_cache *mfc, u32 tb_id) 675 { 676 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, 677 &mfc->_c, tb_id, &net->ipv4.ipmr_seq); 678 } 679 680 /** 681 * vif_delete - Delete a VIF entry 682 * @mrt: Table to delete from 683 * @vifi: VIF identifier to delete 684 * @notify: Set to 1, if the caller is a notifier_call 685 * @head: if unregistering the VIF, place it on this queue 686 */ 687 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 688 struct list_head *head) 689 { 690 struct net *net = read_pnet(&mrt->net); 691 struct vif_device *v; 692 struct net_device *dev; 693 struct in_device *in_dev; 694 695 if (vifi < 0 || vifi >= mrt->maxvif) 696 return -EADDRNOTAVAIL; 697 698 v = &mrt->vif_table[vifi]; 699 700 dev = rtnl_dereference(v->dev); 701 if (!dev) 702 return -EADDRNOTAVAIL; 703 704 spin_lock(&mrt_lock); 705 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, 706 vifi, mrt->id); 707 RCU_INIT_POINTER(v->dev, NULL); 708 709 if (vifi == mrt->mroute_reg_vif_num) { 710 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ 711 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 712 } 713 if (vifi + 1 == mrt->maxvif) { 714 int tmp; 715 716 for (tmp = vifi - 1; tmp >= 0; tmp--) { 717 if (VIF_EXISTS(mrt, tmp)) 718 break; 719 } 720 WRITE_ONCE(mrt->maxvif, tmp + 1); 721 } 722 723 spin_unlock(&mrt_lock); 724 725 dev_set_allmulti(dev, -1); 726 727 in_dev = __in_dev_get_rtnl(dev); 728 if (in_dev) { 729 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 730 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 731 NETCONFA_MC_FORWARDING, 732 dev->ifindex, &in_dev->cnf); 733 ip_rt_multicast_event(in_dev); 734 } 735 736 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 737 unregister_netdevice_queue(dev, head); 738 739 netdev_put(dev, &v->dev_tracker); 740 return 0; 741 } 742 743 static void ipmr_cache_free_rcu(struct rcu_head *head) 744 { 745 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 746 747 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); 748 } 749 750 static void ipmr_cache_free(struct mfc_cache *c) 751 { 752 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); 753 } 754 755 /* Destroy an unresolved cache entry, killing queued skbs 756 * and reporting error to netlink readers. 757 */ 758 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 759 { 760 struct net *net = read_pnet(&mrt->net); 761 struct sk_buff *skb; 762 struct nlmsgerr *e; 763 764 atomic_dec(&mrt->cache_resolve_queue_len); 765 766 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { 767 if (ip_hdr(skb)->version == 0) { 768 struct nlmsghdr *nlh = skb_pull(skb, 769 sizeof(struct iphdr)); 770 nlh->nlmsg_type = NLMSG_ERROR; 771 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 772 skb_trim(skb, nlh->nlmsg_len); 773 e = nlmsg_data(nlh); 774 e->error = -ETIMEDOUT; 775 memset(&e->msg, 0, sizeof(e->msg)); 776 777 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 778 } else { 779 kfree_skb(skb); 780 } 781 } 782 783 ipmr_cache_free(c); 784 } 785 786 /* Timer process for the unresolved queue. */ 787 static void ipmr_expire_process(struct timer_list *t) 788 { 789 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer); 790 struct mr_mfc *c, *next; 791 unsigned long expires; 792 unsigned long now; 793 794 if (!spin_trylock(&mfc_unres_lock)) { 795 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 796 return; 797 } 798 799 if (list_empty(&mrt->mfc_unres_queue)) 800 goto out; 801 802 now = jiffies; 803 expires = 10*HZ; 804 805 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 806 if (time_after(c->mfc_un.unres.expires, now)) { 807 unsigned long interval = c->mfc_un.unres.expires - now; 808 if (interval < expires) 809 expires = interval; 810 continue; 811 } 812 813 list_del(&c->list); 814 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); 815 ipmr_destroy_unres(mrt, (struct mfc_cache *)c); 816 } 817 818 if (!list_empty(&mrt->mfc_unres_queue)) 819 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 820 821 out: 822 spin_unlock(&mfc_unres_lock); 823 } 824 825 /* Fill oifs list. It is called under locked mrt_lock. */ 826 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, 827 unsigned char *ttls) 828 { 829 int vifi; 830 831 cache->mfc_un.res.minvif = MAXVIFS; 832 cache->mfc_un.res.maxvif = 0; 833 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 834 835 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 836 if (VIF_EXISTS(mrt, vifi) && 837 ttls[vifi] && ttls[vifi] < 255) { 838 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 839 if (cache->mfc_un.res.minvif > vifi) 840 cache->mfc_un.res.minvif = vifi; 841 if (cache->mfc_un.res.maxvif <= vifi) 842 cache->mfc_un.res.maxvif = vifi + 1; 843 } 844 } 845 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 846 } 847 848 static int vif_add(struct net *net, struct mr_table *mrt, 849 struct vifctl *vifc, int mrtsock) 850 { 851 struct netdev_phys_item_id ppid = { }; 852 int vifi = vifc->vifc_vifi; 853 struct vif_device *v = &mrt->vif_table[vifi]; 854 struct net_device *dev; 855 struct in_device *in_dev; 856 int err; 857 858 /* Is vif busy ? */ 859 if (VIF_EXISTS(mrt, vifi)) 860 return -EADDRINUSE; 861 862 switch (vifc->vifc_flags) { 863 case VIFF_REGISTER: 864 if (!ipmr_pimsm_enabled()) 865 return -EINVAL; 866 /* Special Purpose VIF in PIM 867 * All the packets will be sent to the daemon 868 */ 869 if (mrt->mroute_reg_vif_num >= 0) 870 return -EADDRINUSE; 871 dev = ipmr_reg_vif(net, mrt); 872 if (!dev) 873 return -ENOBUFS; 874 err = dev_set_allmulti(dev, 1); 875 if (err) { 876 unregister_netdevice(dev); 877 dev_put(dev); 878 return err; 879 } 880 break; 881 case VIFF_TUNNEL: 882 dev = ipmr_new_tunnel(net, vifc); 883 if (IS_ERR(dev)) 884 return PTR_ERR(dev); 885 break; 886 case VIFF_USE_IFINDEX: 887 case 0: 888 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 889 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 890 if (dev && !__in_dev_get_rtnl(dev)) { 891 dev_put(dev); 892 return -EADDRNOTAVAIL; 893 } 894 } else { 895 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 896 } 897 if (!dev) 898 return -EADDRNOTAVAIL; 899 err = dev_set_allmulti(dev, 1); 900 if (err) { 901 dev_put(dev); 902 return err; 903 } 904 break; 905 default: 906 return -EINVAL; 907 } 908 909 in_dev = __in_dev_get_rtnl(dev); 910 if (!in_dev) { 911 dev_put(dev); 912 return -EADDRNOTAVAIL; 913 } 914 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 915 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 916 dev->ifindex, &in_dev->cnf); 917 ip_rt_multicast_event(in_dev); 918 919 /* Fill in the VIF structures */ 920 vif_device_init(v, dev, vifc->vifc_rate_limit, 921 vifc->vifc_threshold, 922 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), 923 (VIFF_TUNNEL | VIFF_REGISTER)); 924 925 err = netif_get_port_parent_id(dev, &ppid, true); 926 if (err == 0) { 927 memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); 928 v->dev_parent_id.id_len = ppid.id_len; 929 } else { 930 v->dev_parent_id.id_len = 0; 931 } 932 933 v->local = vifc->vifc_lcl_addr.s_addr; 934 v->remote = vifc->vifc_rmt_addr.s_addr; 935 936 /* And finish update writing critical data */ 937 spin_lock(&mrt_lock); 938 rcu_assign_pointer(v->dev, dev); 939 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 940 if (v->flags & VIFF_REGISTER) { 941 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ 942 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 943 } 944 if (vifi+1 > mrt->maxvif) 945 WRITE_ONCE(mrt->maxvif, vifi + 1); 946 spin_unlock(&mrt_lock); 947 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, 948 vifi, mrt->id); 949 return 0; 950 } 951 952 /* called with rcu_read_lock() */ 953 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 954 __be32 origin, 955 __be32 mcastgrp) 956 { 957 struct mfc_cache_cmp_arg arg = { 958 .mfc_mcastgrp = mcastgrp, 959 .mfc_origin = origin 960 }; 961 962 return mr_mfc_find(mrt, &arg); 963 } 964 965 /* Look for a (*,G) entry */ 966 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 967 __be32 mcastgrp, int vifi) 968 { 969 struct mfc_cache_cmp_arg arg = { 970 .mfc_mcastgrp = mcastgrp, 971 .mfc_origin = htonl(INADDR_ANY) 972 }; 973 974 if (mcastgrp == htonl(INADDR_ANY)) 975 return mr_mfc_find_any_parent(mrt, vifi); 976 return mr_mfc_find_any(mrt, vifi, &arg); 977 } 978 979 /* Look for a (S,G,iif) entry if parent != -1 */ 980 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 981 __be32 origin, __be32 mcastgrp, 982 int parent) 983 { 984 struct mfc_cache_cmp_arg arg = { 985 .mfc_mcastgrp = mcastgrp, 986 .mfc_origin = origin, 987 }; 988 989 return mr_mfc_find_parent(mrt, &arg, parent); 990 } 991 992 /* Allocate a multicast cache entry */ 993 static struct mfc_cache *ipmr_cache_alloc(void) 994 { 995 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 996 997 if (c) { 998 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 999 c->_c.mfc_un.res.minvif = MAXVIFS; 1000 c->_c.free = ipmr_cache_free_rcu; 1001 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1002 } 1003 return c; 1004 } 1005 1006 static struct mfc_cache *ipmr_cache_alloc_unres(void) 1007 { 1008 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1009 1010 if (c) { 1011 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1012 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1013 } 1014 return c; 1015 } 1016 1017 /* A cache entry has gone into a resolved state from queued */ 1018 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1019 struct mfc_cache *uc, struct mfc_cache *c) 1020 { 1021 struct sk_buff *skb; 1022 struct nlmsgerr *e; 1023 1024 /* Play the pending entries through our router */ 1025 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1026 if (ip_hdr(skb)->version == 0) { 1027 struct nlmsghdr *nlh = skb_pull(skb, 1028 sizeof(struct iphdr)); 1029 1030 if (mr_fill_mroute(mrt, skb, &c->_c, 1031 nlmsg_data(nlh)) > 0) { 1032 nlh->nlmsg_len = skb_tail_pointer(skb) - 1033 (u8 *)nlh; 1034 } else { 1035 nlh->nlmsg_type = NLMSG_ERROR; 1036 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1037 skb_trim(skb, nlh->nlmsg_len); 1038 e = nlmsg_data(nlh); 1039 e->error = -EMSGSIZE; 1040 memset(&e->msg, 0, sizeof(e->msg)); 1041 } 1042 1043 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1044 } else { 1045 rcu_read_lock(); 1046 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1047 rcu_read_unlock(); 1048 } 1049 } 1050 } 1051 1052 /* Bounce a cache query up to mrouted and netlink. 1053 * 1054 * Called under rcu_read_lock(). 1055 */ 1056 static int ipmr_cache_report(const struct mr_table *mrt, 1057 struct sk_buff *pkt, vifi_t vifi, int assert) 1058 { 1059 const int ihl = ip_hdrlen(pkt); 1060 struct sock *mroute_sk; 1061 struct igmphdr *igmp; 1062 struct igmpmsg *msg; 1063 struct sk_buff *skb; 1064 int ret; 1065 1066 mroute_sk = rcu_dereference(mrt->mroute_sk); 1067 if (!mroute_sk) 1068 return -EINVAL; 1069 1070 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) 1071 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1072 else 1073 skb = alloc_skb(128, GFP_ATOMIC); 1074 1075 if (!skb) 1076 return -ENOBUFS; 1077 1078 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { 1079 /* Ugly, but we have no choice with this interface. 1080 * Duplicate old header, fix ihl, length etc. 1081 * And all this only to mangle msg->im_msgtype and 1082 * to set msg->im_mbz to "mbz" :-) 1083 */ 1084 skb_push(skb, sizeof(struct iphdr)); 1085 skb_reset_network_header(skb); 1086 skb_reset_transport_header(skb); 1087 msg = (struct igmpmsg *)skb_network_header(skb); 1088 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1089 msg->im_msgtype = assert; 1090 msg->im_mbz = 0; 1091 if (assert == IGMPMSG_WRVIFWHOLE) { 1092 msg->im_vif = vifi; 1093 msg->im_vif_hi = vifi >> 8; 1094 } else { 1095 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ 1096 int vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 1097 1098 msg->im_vif = vif_num; 1099 msg->im_vif_hi = vif_num >> 8; 1100 } 1101 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1102 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1103 sizeof(struct iphdr)); 1104 } else { 1105 /* Copy the IP header */ 1106 skb_set_network_header(skb, skb->len); 1107 skb_put(skb, ihl); 1108 skb_copy_to_linear_data(skb, pkt->data, ihl); 1109 /* Flag to the kernel this is a route add */ 1110 ip_hdr(skb)->protocol = 0; 1111 msg = (struct igmpmsg *)skb_network_header(skb); 1112 msg->im_vif = vifi; 1113 msg->im_vif_hi = vifi >> 8; 1114 ipv4_pktinfo_prepare(mroute_sk, pkt, false); 1115 memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); 1116 /* Add our header. 1117 * Note that code, csum and group fields are cleared. 1118 */ 1119 igmp = skb_put_zero(skb, sizeof(struct igmphdr)); 1120 igmp->type = assert; 1121 msg->im_msgtype = assert; 1122 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1123 skb->transport_header = skb->network_header; 1124 } 1125 1126 igmpmsg_netlink_event(mrt, skb); 1127 1128 /* Deliver to mrouted */ 1129 ret = sock_queue_rcv_skb(mroute_sk, skb); 1130 1131 if (ret < 0) { 1132 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1133 kfree_skb(skb); 1134 } 1135 1136 return ret; 1137 } 1138 1139 /* Queue a packet for resolution. It gets locked cache entry! */ 1140 /* Called under rcu_read_lock() */ 1141 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1142 struct sk_buff *skb, struct net_device *dev) 1143 { 1144 struct net *net = read_pnet(&mrt->net); 1145 const struct iphdr *iph = ip_hdr(skb); 1146 struct mfc_cache *c = NULL; 1147 bool found = false; 1148 int err; 1149 1150 spin_lock_bh(&mfc_unres_lock); 1151 1152 if (!check_net(net)) { 1153 err = -EINVAL; 1154 goto err; 1155 } 1156 1157 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1158 if (c->mfc_mcastgrp == iph->daddr && 1159 c->mfc_origin == iph->saddr) { 1160 found = true; 1161 break; 1162 } 1163 } 1164 1165 if (!found) { 1166 /* Create a new entry if allowable */ 1167 c = ipmr_cache_alloc_unres(); 1168 if (!c) { 1169 err = -ENOBUFS; 1170 goto err; 1171 } 1172 1173 /* Fill in the new cache entry */ 1174 c->_c.mfc_parent = -1; 1175 c->mfc_origin = iph->saddr; 1176 c->mfc_mcastgrp = iph->daddr; 1177 1178 /* Reflect first query at mrouted. */ 1179 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1180 if (err < 0) 1181 goto err; 1182 1183 atomic_inc(&mrt->cache_resolve_queue_len); 1184 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1185 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1186 1187 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1188 mod_timer(&mrt->ipmr_expire_timer, 1189 c->_c.mfc_un.unres.expires); 1190 } 1191 1192 /* See if we can append the packet */ 1193 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1194 c = NULL; 1195 err = -ENOBUFS; 1196 goto err; 1197 } 1198 1199 if (dev) { 1200 skb->dev = dev; 1201 skb->skb_iif = dev->ifindex; 1202 } 1203 1204 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1205 1206 spin_unlock_bh(&mfc_unres_lock); 1207 return 0; 1208 1209 err: 1210 spin_unlock_bh(&mfc_unres_lock); 1211 if (c) 1212 ipmr_cache_free(c); 1213 kfree_skb(skb); 1214 return err; 1215 } 1216 1217 /* MFC cache manipulation by user space mroute daemon */ 1218 1219 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1220 { 1221 struct net *net = read_pnet(&mrt->net); 1222 struct mfc_cache *c; 1223 1224 rcu_read_lock(); 1225 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1226 mfc->mfcc_mcastgrp.s_addr, parent); 1227 rcu_read_unlock(); 1228 if (!c) 1229 return -ENOENT; 1230 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); 1231 list_del_rcu(&c->_c.list); 1232 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1233 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1234 mr_cache_put(&c->_c); 1235 1236 return 0; 1237 } 1238 1239 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1240 struct mfcctl *mfc, int mrtsock, int parent) 1241 { 1242 struct mfc_cache *uc, *c; 1243 struct mr_mfc *_uc; 1244 bool found; 1245 int ret; 1246 1247 if (mfc->mfcc_parent >= MAXVIFS) 1248 return -ENFILE; 1249 1250 rcu_read_lock(); 1251 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1252 mfc->mfcc_mcastgrp.s_addr, parent); 1253 rcu_read_unlock(); 1254 if (c) { 1255 spin_lock(&mrt_lock); 1256 c->_c.mfc_parent = mfc->mfcc_parent; 1257 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1258 if (!mrtsock) 1259 c->_c.mfc_flags |= MFC_STATIC; 1260 spin_unlock(&mrt_lock); 1261 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1262 mrt->id); 1263 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1264 return 0; 1265 } 1266 1267 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1268 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1269 return -EINVAL; 1270 1271 c = ipmr_cache_alloc(); 1272 if (!c) 1273 return -ENOMEM; 1274 1275 c->mfc_origin = mfc->mfcc_origin.s_addr; 1276 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1277 c->_c.mfc_parent = mfc->mfcc_parent; 1278 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1279 if (!mrtsock) 1280 c->_c.mfc_flags |= MFC_STATIC; 1281 1282 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1283 ipmr_rht_params); 1284 if (ret) { 1285 pr_err("ipmr: rhtable insert error %d\n", ret); 1286 ipmr_cache_free(c); 1287 return ret; 1288 } 1289 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1290 /* Check to see if we resolved a queued list. If so we 1291 * need to send on the frames and tidy up. 1292 */ 1293 found = false; 1294 spin_lock_bh(&mfc_unres_lock); 1295 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1296 uc = (struct mfc_cache *)_uc; 1297 if (uc->mfc_origin == c->mfc_origin && 1298 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1299 list_del(&_uc->list); 1300 atomic_dec(&mrt->cache_resolve_queue_len); 1301 found = true; 1302 break; 1303 } 1304 } 1305 if (list_empty(&mrt->mfc_unres_queue)) 1306 timer_delete(&mrt->ipmr_expire_timer); 1307 spin_unlock_bh(&mfc_unres_lock); 1308 1309 if (found) { 1310 ipmr_cache_resolve(net, mrt, uc, c); 1311 ipmr_cache_free(uc); 1312 } 1313 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1314 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1315 return 0; 1316 } 1317 1318 /* Close the multicast socket, and clear the vif tables etc */ 1319 static void mroute_clean_tables(struct mr_table *mrt, int flags, 1320 struct list_head *dev_kill_list) 1321 { 1322 struct net *net = read_pnet(&mrt->net); 1323 struct mfc_cache *cache; 1324 struct mr_mfc *c, *tmp; 1325 int i; 1326 1327 /* Shut down all active vif entries */ 1328 if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { 1329 for (i = 0; i < mrt->maxvif; i++) { 1330 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1331 !(flags & MRT_FLUSH_VIFS_STATIC)) || 1332 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) 1333 continue; 1334 vif_delete(mrt, i, 0, dev_kill_list); 1335 } 1336 } 1337 1338 /* Wipe the cache */ 1339 if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { 1340 mutex_lock(&net->ipv4.mfc_mutex); 1341 1342 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1343 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || 1344 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) 1345 continue; 1346 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1347 list_del_rcu(&c->list); 1348 cache = (struct mfc_cache *)c; 1349 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, 1350 mrt->id); 1351 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1352 mr_cache_put(c); 1353 } 1354 1355 mutex_unlock(&net->ipv4.mfc_mutex); 1356 } 1357 1358 if (flags & MRT_FLUSH_MFC) { 1359 if (atomic_read(&mrt->cache_resolve_queue_len) != 0 || !check_net(net)) { 1360 spin_lock_bh(&mfc_unres_lock); 1361 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1362 list_del(&c->list); 1363 cache = (struct mfc_cache *)c; 1364 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1365 ipmr_destroy_unres(mrt, cache); 1366 } 1367 spin_unlock_bh(&mfc_unres_lock); 1368 } 1369 } 1370 } 1371 1372 /* called from ip_ra_control(), before an RCU grace period, 1373 * we don't need to call synchronize_rcu() here 1374 */ 1375 static void mrtsock_destruct(struct sock *sk) 1376 { 1377 struct net *net = sock_net(sk); 1378 LIST_HEAD(dev_kill_list); 1379 struct mr_table *mrt; 1380 1381 rtnl_lock(); 1382 1383 ipmr_for_each_table(mrt, net) { 1384 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1385 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1386 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1387 NETCONFA_MC_FORWARDING, 1388 NETCONFA_IFINDEX_ALL, 1389 net->ipv4.devconf_all); 1390 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1391 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC, 1392 &dev_kill_list); 1393 } 1394 } 1395 1396 unregister_netdevice_many(&dev_kill_list); 1397 1398 rtnl_unlock(); 1399 } 1400 1401 /* Socket options and virtual interface manipulation. The whole 1402 * virtual interface system is a complete heap, but unfortunately 1403 * that's how BSD mrouted happens to think. Maybe one day with a proper 1404 * MOSPF/PIM router set up we can clean this up. 1405 */ 1406 1407 int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1408 unsigned int optlen) 1409 { 1410 struct net *net = sock_net(sk); 1411 int val, ret = 0, parent = 0; 1412 struct mr_table *mrt; 1413 struct vifctl vif; 1414 struct mfcctl mfc; 1415 bool do_wrvifwhole; 1416 u32 uval; 1417 1418 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1419 rtnl_lock(); 1420 if (sk->sk_type != SOCK_RAW || 1421 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1422 ret = -EOPNOTSUPP; 1423 goto out_unlock; 1424 } 1425 1426 mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1427 if (!mrt) { 1428 ret = -ENOENT; 1429 goto out_unlock; 1430 } 1431 if (optname != MRT_INIT) { 1432 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1433 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1434 ret = -EACCES; 1435 goto out_unlock; 1436 } 1437 } 1438 1439 switch (optname) { 1440 case MRT_INIT: 1441 if (optlen != sizeof(int)) { 1442 ret = -EINVAL; 1443 break; 1444 } 1445 if (rtnl_dereference(mrt->mroute_sk)) { 1446 ret = -EADDRINUSE; 1447 break; 1448 } 1449 1450 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1451 if (ret == 0) { 1452 rcu_assign_pointer(mrt->mroute_sk, sk); 1453 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1454 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1455 NETCONFA_MC_FORWARDING, 1456 NETCONFA_IFINDEX_ALL, 1457 net->ipv4.devconf_all); 1458 } 1459 break; 1460 case MRT_DONE: 1461 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1462 ret = -EACCES; 1463 } else { 1464 /* We need to unlock here because mrtsock_destruct takes 1465 * care of rtnl itself and we can't change that due to 1466 * the IP_ROUTER_ALERT setsockopt which runs without it. 1467 */ 1468 rtnl_unlock(); 1469 ret = ip_ra_control(sk, 0, NULL); 1470 goto out; 1471 } 1472 break; 1473 case MRT_ADD_VIF: 1474 case MRT_DEL_VIF: 1475 if (optlen != sizeof(vif)) { 1476 ret = -EINVAL; 1477 break; 1478 } 1479 if (copy_from_sockptr(&vif, optval, sizeof(vif))) { 1480 ret = -EFAULT; 1481 break; 1482 } 1483 if (vif.vifc_vifi >= MAXVIFS) { 1484 ret = -ENFILE; 1485 break; 1486 } 1487 if (optname == MRT_ADD_VIF) { 1488 ret = vif_add(net, mrt, &vif, 1489 sk == rtnl_dereference(mrt->mroute_sk)); 1490 } else { 1491 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1492 } 1493 break; 1494 /* Manipulate the forwarding caches. These live 1495 * in a sort of kernel/user symbiosis. 1496 */ 1497 case MRT_ADD_MFC: 1498 case MRT_DEL_MFC: 1499 parent = -1; 1500 fallthrough; 1501 case MRT_ADD_MFC_PROXY: 1502 case MRT_DEL_MFC_PROXY: 1503 if (optlen != sizeof(mfc)) { 1504 ret = -EINVAL; 1505 break; 1506 } 1507 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) { 1508 ret = -EFAULT; 1509 break; 1510 } 1511 if (parent == 0) 1512 parent = mfc.mfcc_parent; 1513 1514 mutex_lock(&net->ipv4.mfc_mutex); 1515 1516 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1517 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1518 else 1519 ret = ipmr_mfc_add(net, mrt, &mfc, 1520 sk == rtnl_dereference(mrt->mroute_sk), 1521 parent); 1522 1523 mutex_unlock(&net->ipv4.mfc_mutex); 1524 break; 1525 case MRT_FLUSH: { 1526 LIST_HEAD(dev_kill_list); 1527 1528 if (optlen != sizeof(val)) { 1529 ret = -EINVAL; 1530 break; 1531 } 1532 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1533 ret = -EFAULT; 1534 break; 1535 } 1536 1537 mroute_clean_tables(mrt, val, &dev_kill_list); 1538 unregister_netdevice_many(&dev_kill_list); 1539 break; 1540 } 1541 /* Control PIM assert. */ 1542 case MRT_ASSERT: 1543 if (optlen != sizeof(val)) { 1544 ret = -EINVAL; 1545 break; 1546 } 1547 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1548 ret = -EFAULT; 1549 break; 1550 } 1551 WRITE_ONCE(mrt->mroute_do_assert, val); 1552 break; 1553 case MRT_PIM: 1554 if (!ipmr_pimsm_enabled()) { 1555 ret = -ENOPROTOOPT; 1556 break; 1557 } 1558 if (optlen != sizeof(val)) { 1559 ret = -EINVAL; 1560 break; 1561 } 1562 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1563 ret = -EFAULT; 1564 break; 1565 } 1566 1567 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); 1568 val = !!val; 1569 if (val != mrt->mroute_do_pim) { 1570 WRITE_ONCE(mrt->mroute_do_pim, val); 1571 WRITE_ONCE(mrt->mroute_do_assert, val); 1572 WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrvifwhole); 1573 } 1574 break; 1575 case MRT_TABLE: 1576 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1577 ret = -ENOPROTOOPT; 1578 break; 1579 } 1580 if (optlen != sizeof(uval)) { 1581 ret = -EINVAL; 1582 break; 1583 } 1584 if (copy_from_sockptr(&uval, optval, sizeof(uval))) { 1585 ret = -EFAULT; 1586 break; 1587 } 1588 1589 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1590 ret = -EBUSY; 1591 } else { 1592 mrt = ipmr_new_table(net, uval); 1593 if (IS_ERR(mrt)) 1594 ret = PTR_ERR(mrt); 1595 else 1596 raw_sk(sk)->ipmr_table = uval; 1597 } 1598 break; 1599 /* Spurious command, or MRT_VERSION which you cannot set. */ 1600 default: 1601 ret = -ENOPROTOOPT; 1602 } 1603 out_unlock: 1604 rtnl_unlock(); 1605 out: 1606 return ret; 1607 } 1608 1609 /* Execute if this ioctl is a special mroute ioctl */ 1610 int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1611 { 1612 switch (cmd) { 1613 /* These userspace buffers will be consumed by ipmr_ioctl() */ 1614 case SIOCGETVIFCNT: { 1615 struct sioc_vif_req buffer; 1616 1617 return sock_ioctl_inout(sk, cmd, arg, &buffer, 1618 sizeof(buffer)); 1619 } 1620 case SIOCGETSGCNT: { 1621 struct sioc_sg_req buffer; 1622 1623 return sock_ioctl_inout(sk, cmd, arg, &buffer, 1624 sizeof(buffer)); 1625 } 1626 } 1627 /* return code > 0 means that the ioctl was not executed */ 1628 return 1; 1629 } 1630 1631 /* Getsock opt support for the multicast routing system. */ 1632 int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1633 sockptr_t optlen) 1634 { 1635 int olr; 1636 int val; 1637 struct net *net = sock_net(sk); 1638 struct mr_table *mrt; 1639 1640 if (sk->sk_type != SOCK_RAW || 1641 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1642 return -EOPNOTSUPP; 1643 1644 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1645 if (!mrt) 1646 return -ENOENT; 1647 1648 switch (optname) { 1649 case MRT_VERSION: 1650 val = 0x0305; 1651 break; 1652 case MRT_PIM: 1653 if (!ipmr_pimsm_enabled()) 1654 return -ENOPROTOOPT; 1655 val = READ_ONCE(mrt->mroute_do_pim); 1656 break; 1657 case MRT_ASSERT: 1658 val = READ_ONCE(mrt->mroute_do_assert); 1659 break; 1660 default: 1661 return -ENOPROTOOPT; 1662 } 1663 1664 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1665 return -EFAULT; 1666 if (olr < 0) 1667 return -EINVAL; 1668 1669 olr = min_t(unsigned int, olr, sizeof(int)); 1670 1671 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1672 return -EFAULT; 1673 if (copy_to_sockptr(optval, &val, olr)) 1674 return -EFAULT; 1675 return 0; 1676 } 1677 1678 /* The IP multicast ioctl support routines. */ 1679 int ipmr_ioctl(struct sock *sk, int cmd, void *arg) 1680 { 1681 struct vif_device *vif; 1682 struct mfc_cache *c; 1683 struct net *net = sock_net(sk); 1684 struct sioc_vif_req *vr; 1685 struct sioc_sg_req *sr; 1686 struct mr_table *mrt; 1687 1688 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1689 if (!mrt) 1690 return -ENOENT; 1691 1692 switch (cmd) { 1693 case SIOCGETVIFCNT: 1694 vr = (struct sioc_vif_req *)arg; 1695 if (vr->vifi >= mrt->maxvif) 1696 return -EINVAL; 1697 vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif); 1698 rcu_read_lock(); 1699 vif = &mrt->vif_table[vr->vifi]; 1700 if (VIF_EXISTS(mrt, vr->vifi)) { 1701 vr->icount = READ_ONCE(vif->pkt_in); 1702 vr->ocount = READ_ONCE(vif->pkt_out); 1703 vr->ibytes = READ_ONCE(vif->bytes_in); 1704 vr->obytes = READ_ONCE(vif->bytes_out); 1705 rcu_read_unlock(); 1706 1707 return 0; 1708 } 1709 rcu_read_unlock(); 1710 return -EADDRNOTAVAIL; 1711 case SIOCGETSGCNT: 1712 sr = (struct sioc_sg_req *)arg; 1713 1714 rcu_read_lock(); 1715 c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr); 1716 if (c) { 1717 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1718 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1719 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1720 rcu_read_unlock(); 1721 return 0; 1722 } 1723 rcu_read_unlock(); 1724 return -EADDRNOTAVAIL; 1725 default: 1726 return -ENOIOCTLCMD; 1727 } 1728 } 1729 1730 #ifdef CONFIG_COMPAT 1731 struct compat_sioc_sg_req { 1732 struct in_addr src; 1733 struct in_addr grp; 1734 compat_ulong_t pktcnt; 1735 compat_ulong_t bytecnt; 1736 compat_ulong_t wrong_if; 1737 }; 1738 1739 struct compat_sioc_vif_req { 1740 vifi_t vifi; /* Which iface */ 1741 compat_ulong_t icount; 1742 compat_ulong_t ocount; 1743 compat_ulong_t ibytes; 1744 compat_ulong_t obytes; 1745 }; 1746 1747 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1748 { 1749 struct compat_sioc_sg_req sr; 1750 struct compat_sioc_vif_req vr; 1751 struct vif_device *vif; 1752 struct mfc_cache *c; 1753 struct net *net = sock_net(sk); 1754 struct mr_table *mrt; 1755 1756 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1757 if (!mrt) 1758 return -ENOENT; 1759 1760 switch (cmd) { 1761 case SIOCGETVIFCNT: 1762 if (copy_from_user(&vr, arg, sizeof(vr))) 1763 return -EFAULT; 1764 if (vr.vifi >= mrt->maxvif) 1765 return -EINVAL; 1766 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1767 rcu_read_lock(); 1768 vif = &mrt->vif_table[vr.vifi]; 1769 if (VIF_EXISTS(mrt, vr.vifi)) { 1770 vr.icount = READ_ONCE(vif->pkt_in); 1771 vr.ocount = READ_ONCE(vif->pkt_out); 1772 vr.ibytes = READ_ONCE(vif->bytes_in); 1773 vr.obytes = READ_ONCE(vif->bytes_out); 1774 rcu_read_unlock(); 1775 1776 if (copy_to_user(arg, &vr, sizeof(vr))) 1777 return -EFAULT; 1778 return 0; 1779 } 1780 rcu_read_unlock(); 1781 return -EADDRNOTAVAIL; 1782 case SIOCGETSGCNT: 1783 if (copy_from_user(&sr, arg, sizeof(sr))) 1784 return -EFAULT; 1785 1786 rcu_read_lock(); 1787 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1788 if (c) { 1789 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1790 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1791 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1792 rcu_read_unlock(); 1793 1794 if (copy_to_user(arg, &sr, sizeof(sr))) 1795 return -EFAULT; 1796 return 0; 1797 } 1798 rcu_read_unlock(); 1799 return -EADDRNOTAVAIL; 1800 default: 1801 return -ENOIOCTLCMD; 1802 } 1803 } 1804 #endif 1805 1806 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1807 { 1808 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1809 struct net *net = dev_net(dev); 1810 struct mr_table *mrt; 1811 struct vif_device *v; 1812 int ct; 1813 1814 if (event != NETDEV_UNREGISTER) 1815 return NOTIFY_DONE; 1816 1817 ipmr_for_each_table(mrt, net) { 1818 v = &mrt->vif_table[0]; 1819 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1820 if (rcu_access_pointer(v->dev) == dev) 1821 vif_delete(mrt, ct, 1, NULL); 1822 } 1823 } 1824 return NOTIFY_DONE; 1825 } 1826 1827 static struct notifier_block ip_mr_notifier = { 1828 .notifier_call = ipmr_device_event, 1829 }; 1830 1831 /* Encapsulate a packet by attaching a valid IPIP header to it. 1832 * This avoids tunnel drivers and other mess and gives us the speed so 1833 * important for multicast video. 1834 */ 1835 static void ip_encap(struct net *net, struct sk_buff *skb, 1836 __be32 saddr, __be32 daddr) 1837 { 1838 struct iphdr *iph; 1839 const struct iphdr *old_iph = ip_hdr(skb); 1840 1841 skb_push(skb, sizeof(struct iphdr)); 1842 skb->transport_header = skb->network_header; 1843 skb_reset_network_header(skb); 1844 iph = ip_hdr(skb); 1845 1846 iph->version = 4; 1847 iph->tos = old_iph->tos; 1848 iph->ttl = old_iph->ttl; 1849 iph->frag_off = 0; 1850 iph->daddr = daddr; 1851 iph->saddr = saddr; 1852 iph->protocol = IPPROTO_IPIP; 1853 iph->ihl = 5; 1854 iph->tot_len = htons(skb->len); 1855 ip_select_ident(net, skb, NULL); 1856 ip_send_check(iph); 1857 1858 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1859 nf_reset_ct(skb); 1860 } 1861 1862 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1863 struct sk_buff *skb) 1864 { 1865 struct ip_options *opt = &(IPCB(skb)->opt); 1866 1867 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1868 1869 if (unlikely(opt->optlen)) 1870 ip_forward_options(skb); 1871 1872 return dst_output(net, sk, skb); 1873 } 1874 1875 #ifdef CONFIG_NET_SWITCHDEV 1876 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1877 int in_vifi, int out_vifi) 1878 { 1879 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1880 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1881 1882 if (!skb->offload_l3_fwd_mark) 1883 return false; 1884 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1885 return false; 1886 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1887 &in_vif->dev_parent_id); 1888 } 1889 #else 1890 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1891 int in_vifi, int out_vifi) 1892 { 1893 return false; 1894 } 1895 #endif 1896 1897 /* Processing handlers for ipmr_forward, under rcu_read_lock() */ 1898 1899 static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt, 1900 struct sk_buff *skb, int vifi) 1901 { 1902 const struct iphdr *iph = ip_hdr(skb); 1903 struct vif_device *vif = &mrt->vif_table[vifi]; 1904 struct net_device *vif_dev; 1905 struct rtable *rt; 1906 struct flowi4 fl4; 1907 int encap = 0; 1908 1909 vif_dev = vif_dev_read(vif); 1910 if (!vif_dev) 1911 return -1; 1912 1913 if (vif->flags & VIFF_REGISTER) { 1914 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 1915 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 1916 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 1917 DEV_STATS_INC(vif_dev, tx_packets); 1918 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1919 return -1; 1920 } 1921 1922 if (vif->flags & VIFF_TUNNEL) { 1923 rt = ip_route_output_ports(net, &fl4, NULL, 1924 vif->remote, vif->local, 1925 0, 0, 1926 IPPROTO_IPIP, 1927 iph->tos & INET_DSCP_MASK, vif->link); 1928 if (IS_ERR(rt)) 1929 return -1; 1930 encap = sizeof(struct iphdr); 1931 } else { 1932 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1933 0, 0, 1934 IPPROTO_IPIP, 1935 iph->tos & INET_DSCP_MASK, vif->link); 1936 if (IS_ERR(rt)) 1937 return -1; 1938 } 1939 1940 if (skb->len+encap > dst4_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1941 /* Do not fragment multicasts. Alas, IPv4 does not 1942 * allow to send ICMP, so that packets will disappear 1943 * to blackhole. 1944 */ 1945 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1946 ip_rt_put(rt); 1947 return -1; 1948 } 1949 1950 encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len; 1951 1952 if (skb_cow(skb, encap)) { 1953 ip_rt_put(rt); 1954 return -1; 1955 } 1956 1957 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 1958 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 1959 1960 skb_dst_drop(skb); 1961 skb_dst_set(skb, &rt->dst); 1962 ip_decrease_ttl(ip_hdr(skb)); 1963 1964 /* FIXME: forward and output firewalls used to be called here. 1965 * What do we do with netfilter? -- RR 1966 */ 1967 if (vif->flags & VIFF_TUNNEL) { 1968 ip_encap(net, skb, vif->local, vif->remote); 1969 /* FIXME: extra output firewall step used to be here. --RR */ 1970 DEV_STATS_INC(vif_dev, tx_packets); 1971 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 1972 } 1973 1974 return 0; 1975 } 1976 1977 static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt, 1978 int in_vifi, struct sk_buff *skb, int vifi) 1979 { 1980 struct rtable *rt; 1981 1982 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1983 goto out_free; 1984 1985 if (ipmr_prepare_xmit(net, mrt, skb, vifi)) 1986 goto out_free; 1987 1988 rt = skb_rtable(skb); 1989 1990 IPCB(skb)->flags |= IPSKB_FORWARDED; 1991 1992 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1993 * not only before forwarding, but after forwarding on all output 1994 * interfaces. It is clear, if mrouter runs a multicasting 1995 * program, it should receive packets not depending to what interface 1996 * program is joined. 1997 * If we will not make it, the program will have to join on all 1998 * interfaces. On the other hand, multihoming host (or router, but 1999 * not mrouter) cannot join to more than one interface - it will 2000 * result in receiving multiple packets. 2001 */ 2002 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 2003 net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst), 2004 ipmr_forward_finish); 2005 return; 2006 2007 out_free: 2008 kfree_skb(skb); 2009 } 2010 2011 static void ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt, 2012 struct sk_buff *skb, int vifi) 2013 { 2014 if (ipmr_prepare_xmit(net, mrt, skb, vifi)) 2015 goto out_free; 2016 2017 ip_mc_output(net, NULL, skb); 2018 return; 2019 2020 out_free: 2021 kfree_skb(skb); 2022 } 2023 2024 /* Called with mrt_lock or rcu_read_lock() */ 2025 static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) 2026 { 2027 int ct; 2028 /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */ 2029 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2030 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2031 break; 2032 } 2033 return ct; 2034 } 2035 2036 /* "local" means that we should preserve one skb (for local delivery) */ 2037 /* Called uner rcu_read_lock() */ 2038 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 2039 struct net_device *dev, struct sk_buff *skb, 2040 struct mfc_cache *c, int local) 2041 { 2042 int true_vifi = ipmr_find_vif(mrt, dev); 2043 int psend = -1; 2044 int vif, ct; 2045 2046 vif = c->_c.mfc_parent; 2047 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2048 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2049 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2050 2051 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 2052 struct mfc_cache *cache_proxy; 2053 2054 /* For an (*,G) entry, we only check that the incoming 2055 * interface is part of the static tree. 2056 */ 2057 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2058 if (cache_proxy && 2059 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2060 goto forward; 2061 } 2062 2063 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 2064 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2065 if (rt_is_output_route(skb_rtable(skb))) { 2066 /* It is our own packet, looped back. 2067 * Very complicated situation... 2068 * 2069 * The best workaround until routing daemons will be 2070 * fixed is not to redistribute packet, if it was 2071 * send through wrong interface. It means, that 2072 * multicast applications WILL NOT work for 2073 * (S,G), which have default multicast route pointing 2074 * to wrong oif. In any case, it is not a good 2075 * idea to use multicasting applications on router. 2076 */ 2077 goto dont_forward; 2078 } 2079 2080 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2081 2082 if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) && 2083 /* pimsm uses asserts, when switching from RPT to SPT, 2084 * so that we cannot check that packet arrived on an oif. 2085 * It is bad, but otherwise we would need to move pretty 2086 * large chunk of pimd to kernel. Ough... --ANK 2087 */ 2088 (READ_ONCE(mrt->mroute_do_pim) || 2089 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2090 time_after(jiffies, 2091 c->_c.mfc_un.res.last_assert + 2092 MFC_ASSERT_THRESH)) { 2093 c->_c.mfc_un.res.last_assert = jiffies; 2094 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 2095 if (READ_ONCE(mrt->mroute_do_wrvifwhole)) 2096 ipmr_cache_report(mrt, skb, true_vifi, 2097 IGMPMSG_WRVIFWHOLE); 2098 } 2099 goto dont_forward; 2100 } 2101 2102 forward: 2103 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2104 mrt->vif_table[vif].pkt_in + 1); 2105 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2106 mrt->vif_table[vif].bytes_in + skb->len); 2107 2108 /* Forward the frame */ 2109 if (c->mfc_origin == htonl(INADDR_ANY) && 2110 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2111 if (true_vifi >= 0 && 2112 true_vifi != c->_c.mfc_parent && 2113 ip_hdr(skb)->ttl > 2114 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2115 /* It's an (*,*) entry and the packet is not coming from 2116 * the upstream: forward the packet to the upstream 2117 * only. 2118 */ 2119 psend = c->_c.mfc_parent; 2120 goto last_forward; 2121 } 2122 goto dont_forward; 2123 } 2124 for (ct = c->_c.mfc_un.res.maxvif - 1; 2125 ct >= c->_c.mfc_un.res.minvif; ct--) { 2126 /* For (*,G) entry, don't forward to the incoming interface */ 2127 if ((c->mfc_origin != htonl(INADDR_ANY) || 2128 ct != true_vifi) && 2129 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2130 if (psend != -1) { 2131 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2132 2133 if (skb2) 2134 ipmr_queue_fwd_xmit(net, mrt, true_vifi, 2135 skb2, psend); 2136 } 2137 psend = ct; 2138 } 2139 } 2140 last_forward: 2141 if (psend != -1) { 2142 if (local) { 2143 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2144 2145 if (skb2) 2146 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb2, 2147 psend); 2148 } else { 2149 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb, psend); 2150 return; 2151 } 2152 } 2153 2154 dont_forward: 2155 if (!local) 2156 kfree_skb(skb); 2157 } 2158 2159 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2160 { 2161 struct rtable *rt = skb_rtable(skb); 2162 struct iphdr *iph = ip_hdr(skb); 2163 struct flowi4 fl4 = { 2164 .daddr = iph->daddr, 2165 .saddr = iph->saddr, 2166 .flowi4_dscp = ip4h_dscp(iph), 2167 .flowi4_oif = (rt_is_output_route(rt) ? 2168 skb->dev->ifindex : 0), 2169 .flowi4_iif = (rt_is_output_route(rt) ? 2170 LOOPBACK_IFINDEX : 2171 skb->dev->ifindex), 2172 .flowi4_mark = skb->mark, 2173 }; 2174 struct mr_table *mrt; 2175 int err; 2176 2177 err = ipmr_fib_lookup(net, &fl4, &mrt); 2178 if (err) 2179 return ERR_PTR(err); 2180 return mrt; 2181 } 2182 2183 /* Multicast packets for forwarding arrive here 2184 * Called with rcu_read_lock(); 2185 */ 2186 int ip_mr_input(struct sk_buff *skb) 2187 { 2188 struct mfc_cache *cache; 2189 struct net *net = dev_net(skb->dev); 2190 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2191 struct mr_table *mrt; 2192 struct net_device *dev; 2193 2194 /* skb->dev passed in is the loX master dev for vrfs. 2195 * As there are no vifs associated with loopback devices, 2196 * get the proper interface that does have a vif associated with it. 2197 */ 2198 dev = skb->dev; 2199 if (netif_is_l3_master(skb->dev)) { 2200 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2201 if (!dev) { 2202 kfree_skb(skb); 2203 return -ENODEV; 2204 } 2205 } 2206 2207 /* Packet is looped back after forward, it should not be 2208 * forwarded second time, but still can be delivered locally. 2209 */ 2210 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2211 goto dont_forward; 2212 2213 mrt = ipmr_rt_fib_lookup(net, skb); 2214 if (IS_ERR(mrt)) { 2215 kfree_skb(skb); 2216 return PTR_ERR(mrt); 2217 } 2218 if (!local) { 2219 if (IPCB(skb)->opt.router_alert) { 2220 if (ip_call_ra_chain(skb)) 2221 return 0; 2222 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2223 /* IGMPv1 (and broken IGMPv2 implementations sort of 2224 * Cisco IOS <= 11.2(8)) do not put router alert 2225 * option to IGMP packets destined to routable 2226 * groups. It is very bad, because it means 2227 * that we can forward NO IGMP messages. 2228 */ 2229 struct sock *mroute_sk; 2230 2231 mroute_sk = rcu_dereference(mrt->mroute_sk); 2232 if (mroute_sk) { 2233 nf_reset_ct(skb); 2234 raw_rcv(mroute_sk, skb); 2235 return 0; 2236 } 2237 } 2238 } 2239 2240 /* already under rcu_read_lock() */ 2241 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2242 if (!cache) { 2243 int vif = ipmr_find_vif(mrt, dev); 2244 2245 if (vif >= 0) 2246 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2247 vif); 2248 } 2249 2250 /* No usable cache entry */ 2251 if (!cache) { 2252 int vif; 2253 2254 if (local) { 2255 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2256 ip_local_deliver(skb); 2257 if (!skb2) 2258 return -ENOBUFS; 2259 skb = skb2; 2260 } 2261 2262 vif = ipmr_find_vif(mrt, dev); 2263 if (vif >= 0) 2264 return ipmr_cache_unresolved(mrt, vif, skb, dev); 2265 kfree_skb(skb); 2266 return -ENODEV; 2267 } 2268 2269 ip_mr_forward(net, mrt, dev, skb, cache, local); 2270 2271 if (local) 2272 return ip_local_deliver(skb); 2273 2274 return 0; 2275 2276 dont_forward: 2277 if (local) 2278 return ip_local_deliver(skb); 2279 kfree_skb(skb); 2280 return 0; 2281 } 2282 2283 static void ip_mr_output_finish(struct net *net, struct mr_table *mrt, 2284 struct net_device *dev, struct sk_buff *skb, 2285 struct mfc_cache *c) 2286 { 2287 int psend = -1; 2288 int ct; 2289 2290 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2291 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2292 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2293 2294 /* Forward the frame */ 2295 if (c->mfc_origin == htonl(INADDR_ANY) && 2296 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2297 if (ip_hdr(skb)->ttl > 2298 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2299 /* It's an (*,*) entry and the packet is not coming from 2300 * the upstream: forward the packet to the upstream 2301 * only. 2302 */ 2303 psend = c->_c.mfc_parent; 2304 goto last_xmit; 2305 } 2306 goto dont_xmit; 2307 } 2308 2309 for (ct = c->_c.mfc_un.res.maxvif - 1; 2310 ct >= c->_c.mfc_un.res.minvif; ct--) { 2311 if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2312 if (psend != -1) { 2313 struct sk_buff *skb2; 2314 2315 skb2 = skb_clone(skb, GFP_ATOMIC); 2316 if (skb2) 2317 ipmr_queue_output_xmit(net, mrt, 2318 skb2, psend); 2319 } 2320 psend = ct; 2321 } 2322 } 2323 2324 last_xmit: 2325 if (psend != -1) { 2326 ipmr_queue_output_xmit(net, mrt, skb, psend); 2327 return; 2328 } 2329 2330 dont_xmit: 2331 kfree_skb(skb); 2332 } 2333 2334 /* Multicast packets for forwarding arrive here 2335 * Called with rcu_read_lock(); 2336 */ 2337 int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2338 { 2339 struct rtable *rt = skb_rtable(skb); 2340 struct mfc_cache *cache; 2341 struct net_device *dev; 2342 struct mr_table *mrt; 2343 int vif; 2344 2345 guard(rcu)(); 2346 2347 dev = dst_dev_rcu(&rt->dst); 2348 2349 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2350 goto mc_output; 2351 if (!(IPCB(skb)->flags & IPSKB_MCROUTE)) 2352 goto mc_output; 2353 2354 skb->dev = dev; 2355 2356 mrt = ipmr_rt_fib_lookup(net, skb); 2357 if (IS_ERR(mrt)) 2358 goto mc_output; 2359 2360 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2361 if (!cache) { 2362 vif = ipmr_find_vif(mrt, dev); 2363 if (vif >= 0) 2364 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2365 vif); 2366 } 2367 2368 /* No usable cache entry */ 2369 if (!cache) { 2370 vif = ipmr_find_vif(mrt, dev); 2371 if (vif >= 0) 2372 return ipmr_cache_unresolved(mrt, vif, skb, dev); 2373 goto mc_output; 2374 } 2375 2376 vif = cache->_c.mfc_parent; 2377 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2378 goto mc_output; 2379 2380 ip_mr_output_finish(net, mrt, dev, skb, cache); 2381 return 0; 2382 2383 mc_output: 2384 return ip_mc_output(net, sk, skb); 2385 } 2386 2387 #ifdef CONFIG_IP_PIMSM_V1 2388 /* Handle IGMP messages of PIMv1 */ 2389 int pim_rcv_v1(struct sk_buff *skb) 2390 { 2391 struct igmphdr *pim; 2392 struct net *net = dev_net(skb->dev); 2393 struct mr_table *mrt; 2394 2395 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2396 goto drop; 2397 2398 pim = igmp_hdr(skb); 2399 2400 mrt = ipmr_rt_fib_lookup(net, skb); 2401 if (IS_ERR(mrt)) 2402 goto drop; 2403 if (!READ_ONCE(mrt->mroute_do_pim) || 2404 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2405 goto drop; 2406 2407 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2408 drop: 2409 kfree_skb(skb); 2410 } 2411 return 0; 2412 } 2413 #endif 2414 2415 #ifdef CONFIG_IP_PIMSM_V2 2416 static int pim_rcv(struct sk_buff *skb) 2417 { 2418 struct pimreghdr *pim; 2419 struct net *net = dev_net(skb->dev); 2420 struct mr_table *mrt; 2421 2422 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2423 goto drop; 2424 2425 pim = (struct pimreghdr *)skb_transport_header(skb); 2426 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2427 (pim->flags & PIM_NULL_REGISTER) || 2428 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2429 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2430 goto drop; 2431 2432 mrt = ipmr_rt_fib_lookup(net, skb); 2433 if (IS_ERR(mrt)) 2434 goto drop; 2435 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2436 drop: 2437 kfree_skb(skb); 2438 } 2439 return 0; 2440 } 2441 #endif 2442 2443 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2444 __be32 saddr, __be32 daddr, 2445 struct rtmsg *rtm, u32 portid) 2446 { 2447 struct mfc_cache *cache; 2448 struct mr_table *mrt; 2449 int err; 2450 2451 rcu_read_lock(); 2452 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); 2453 if (!mrt) { 2454 rcu_read_unlock(); 2455 return -ENOENT; 2456 } 2457 2458 cache = ipmr_cache_find(mrt, saddr, daddr); 2459 if (!cache && skb->dev) { 2460 int vif = ipmr_find_vif(mrt, skb->dev); 2461 2462 if (vif >= 0) 2463 cache = ipmr_cache_find_any(mrt, daddr, vif); 2464 } 2465 if (!cache) { 2466 struct sk_buff *skb2; 2467 struct iphdr *iph; 2468 struct net_device *dev; 2469 int vif = -1; 2470 2471 dev = skb->dev; 2472 if (dev) 2473 vif = ipmr_find_vif(mrt, dev); 2474 if (vif < 0) { 2475 rcu_read_unlock(); 2476 return -ENODEV; 2477 } 2478 2479 skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); 2480 if (!skb2) { 2481 rcu_read_unlock(); 2482 return -ENOMEM; 2483 } 2484 2485 NETLINK_CB(skb2).portid = portid; 2486 skb_push(skb2, sizeof(struct iphdr)); 2487 skb_reset_network_header(skb2); 2488 iph = ip_hdr(skb2); 2489 iph->ihl = sizeof(struct iphdr) >> 2; 2490 iph->saddr = saddr; 2491 iph->daddr = daddr; 2492 iph->version = 0; 2493 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2494 rcu_read_unlock(); 2495 return err; 2496 } 2497 2498 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2499 rcu_read_unlock(); 2500 return err; 2501 } 2502 2503 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2504 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2505 int flags) 2506 { 2507 struct nlmsghdr *nlh; 2508 struct rtmsg *rtm; 2509 int err; 2510 2511 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2512 if (!nlh) 2513 return -EMSGSIZE; 2514 2515 rtm = nlmsg_data(nlh); 2516 rtm->rtm_family = RTNL_FAMILY_IPMR; 2517 rtm->rtm_dst_len = 32; 2518 rtm->rtm_src_len = 32; 2519 rtm->rtm_tos = 0; 2520 rtm->rtm_table = mrt->id; 2521 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2522 goto nla_put_failure; 2523 rtm->rtm_type = RTN_MULTICAST; 2524 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2525 if (c->_c.mfc_flags & MFC_STATIC) 2526 rtm->rtm_protocol = RTPROT_STATIC; 2527 else 2528 rtm->rtm_protocol = RTPROT_MROUTED; 2529 rtm->rtm_flags = 0; 2530 2531 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2532 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2533 goto nla_put_failure; 2534 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2535 /* do not break the dump if cache is unresolved */ 2536 if (err < 0 && err != -ENOENT) 2537 goto nla_put_failure; 2538 2539 nlmsg_end(skb, nlh); 2540 return 0; 2541 2542 nla_put_failure: 2543 nlmsg_cancel(skb, nlh); 2544 return -EMSGSIZE; 2545 } 2546 2547 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2548 u32 portid, u32 seq, struct mr_mfc *c, int cmd, 2549 int flags) 2550 { 2551 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, 2552 cmd, flags); 2553 } 2554 2555 static size_t mroute_msgsize(bool unresolved) 2556 { 2557 size_t len = 2558 NLMSG_ALIGN(sizeof(struct rtmsg)) 2559 + nla_total_size(4) /* RTA_TABLE */ 2560 + nla_total_size(4) /* RTA_SRC */ 2561 + nla_total_size(4) /* RTA_DST */ 2562 ; 2563 2564 if (!unresolved) 2565 len = len 2566 + nla_total_size(4) /* RTA_IIF */ 2567 + nla_total_size(0) /* RTA_MULTIPATH */ 2568 + MAXVIFS * NLA_ALIGN(sizeof(struct rtnexthop)) 2569 /* RTA_MFC_STATS */ 2570 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2571 ; 2572 2573 return len; 2574 } 2575 2576 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2577 int cmd) 2578 { 2579 struct net *net = read_pnet(&mrt->net); 2580 struct sk_buff *skb; 2581 int err = -ENOBUFS; 2582 2583 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS), 2584 GFP_ATOMIC); 2585 if (!skb) 2586 goto errout; 2587 2588 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2589 if (err < 0) 2590 goto errout; 2591 2592 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2593 return; 2594 2595 errout: 2596 kfree_skb(skb); 2597 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2598 } 2599 2600 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2601 { 2602 size_t len = 2603 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2604 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2605 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2606 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2607 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2608 + nla_total_size(4) /* IPMRA_CREPORT_TABLE */ 2609 /* IPMRA_CREPORT_PKT */ 2610 + nla_total_size(payloadlen) 2611 ; 2612 2613 return len; 2614 } 2615 2616 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2617 { 2618 struct net *net = read_pnet(&mrt->net); 2619 struct nlmsghdr *nlh; 2620 struct rtgenmsg *rtgenm; 2621 struct igmpmsg *msg; 2622 struct sk_buff *skb; 2623 struct nlattr *nla; 2624 int payloadlen; 2625 2626 payloadlen = pkt->len - sizeof(struct igmpmsg); 2627 msg = (struct igmpmsg *)skb_network_header(pkt); 2628 2629 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2630 if (!skb) 2631 goto errout; 2632 2633 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2634 sizeof(struct rtgenmsg), 0); 2635 if (!nlh) 2636 goto errout; 2637 rtgenm = nlmsg_data(nlh); 2638 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2639 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2640 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) || 2641 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2642 msg->im_src.s_addr) || 2643 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2644 msg->im_dst.s_addr) || 2645 nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id)) 2646 goto nla_put_failure; 2647 2648 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2649 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2650 nla_data(nla), payloadlen)) 2651 goto nla_put_failure; 2652 2653 nlmsg_end(skb, nlh); 2654 2655 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2656 return; 2657 2658 nla_put_failure: 2659 nlmsg_cancel(skb, nlh); 2660 errout: 2661 kfree_skb(skb); 2662 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2663 } 2664 2665 static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, 2666 const struct nlmsghdr *nlh, 2667 struct nlattr **tb, 2668 struct netlink_ext_ack *extack) 2669 { 2670 struct rtmsg *rtm; 2671 int i, err; 2672 2673 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 2674 if (!rtm) { 2675 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); 2676 return -EINVAL; 2677 } 2678 2679 if (!netlink_strict_get_check(skb)) 2680 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2681 rtm_ipv4_policy, extack); 2682 2683 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 2684 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 2685 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2686 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2687 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); 2688 return -EINVAL; 2689 } 2690 2691 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2692 rtm_ipv4_policy, extack); 2693 if (err) 2694 return err; 2695 2696 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2697 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2698 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 2699 return -EINVAL; 2700 } 2701 2702 for (i = 0; i <= RTA_MAX; i++) { 2703 if (!tb[i]) 2704 continue; 2705 2706 switch (i) { 2707 case RTA_SRC: 2708 case RTA_DST: 2709 case RTA_TABLE: 2710 break; 2711 default: 2712 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); 2713 return -EINVAL; 2714 } 2715 } 2716 2717 return 0; 2718 } 2719 2720 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2721 struct netlink_ext_ack *extack) 2722 { 2723 struct net *net = sock_net(in_skb->sk); 2724 struct nlattr *tb[RTA_MAX + 1]; 2725 struct mfc_cache *cache; 2726 struct mr_table *mrt; 2727 struct sk_buff *skb; 2728 __be32 src, grp; 2729 u32 tableid; 2730 int err; 2731 2732 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2733 if (err < 0) 2734 goto errout; 2735 2736 src = nla_get_in_addr_default(tb[RTA_SRC], 0); 2737 grp = nla_get_in_addr_default(tb[RTA_DST], 0); 2738 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2739 2740 skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); 2741 if (!skb) { 2742 err = -ENOBUFS; 2743 goto errout; 2744 } 2745 2746 rcu_read_lock(); 2747 2748 mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2749 if (!mrt) { 2750 err = -ENOENT; 2751 goto errout_unlock; 2752 } 2753 2754 cache = ipmr_cache_find(mrt, src, grp); 2755 if (!cache) { 2756 err = -ENOENT; 2757 goto errout_unlock; 2758 } 2759 2760 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2761 nlh->nlmsg_seq, cache, 2762 RTM_NEWROUTE, 0); 2763 if (err < 0) 2764 goto errout_unlock; 2765 2766 rcu_read_unlock(); 2767 2768 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2769 errout: 2770 return err; 2771 2772 errout_unlock: 2773 rcu_read_unlock(); 2774 kfree_skb(skb); 2775 goto errout; 2776 } 2777 2778 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2779 { 2780 struct fib_dump_filter filter = { 2781 .rtnl_held = false, 2782 }; 2783 int err; 2784 2785 rcu_read_lock(); 2786 2787 if (cb->strict_check) { 2788 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2789 &filter, cb); 2790 if (err < 0) 2791 goto out; 2792 } 2793 2794 if (filter.table_id) { 2795 struct mr_table *mrt; 2796 2797 mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id); 2798 if (!mrt) { 2799 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) { 2800 err = skb->len; 2801 goto out; 2802 } 2803 2804 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2805 err = -ENOENT; 2806 goto out; 2807 } 2808 2809 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2810 &mfc_unres_lock, &filter); 2811 err = skb->len ? : err; 2812 goto out; 2813 } 2814 2815 err = mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2816 _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2817 out: 2818 rcu_read_unlock(); 2819 2820 return err; 2821 } 2822 2823 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2824 [RTA_SRC] = { .type = NLA_U32 }, 2825 [RTA_DST] = { .type = NLA_U32 }, 2826 [RTA_IIF] = { .type = NLA_U32 }, 2827 [RTA_TABLE] = { .type = NLA_U32 }, 2828 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2829 }; 2830 2831 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2832 { 2833 switch (rtm_protocol) { 2834 case RTPROT_STATIC: 2835 case RTPROT_MROUTED: 2836 return true; 2837 } 2838 return false; 2839 } 2840 2841 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2842 { 2843 struct rtnexthop *rtnh = nla_data(nla); 2844 int remaining = nla_len(nla), vifi = 0; 2845 2846 while (rtnh_ok(rtnh, remaining)) { 2847 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2848 if (++vifi == MAXVIFS) 2849 break; 2850 rtnh = rtnh_next(rtnh, &remaining); 2851 } 2852 2853 return remaining > 0 ? -EINVAL : vifi; 2854 } 2855 2856 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2857 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2858 struct mfcctl *mfcc, int *mrtsock, 2859 struct mr_table **mrtret, 2860 struct netlink_ext_ack *extack) 2861 { 2862 struct net_device *dev = NULL; 2863 u32 tblid = RT_TABLE_DEFAULT; 2864 int ret, rem, iif = 0; 2865 struct mr_table *mrt; 2866 struct nlattr *attr; 2867 struct rtmsg *rtm; 2868 2869 ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, 2870 rtm_ipmr_policy, extack); 2871 if (ret < 0) 2872 goto out; 2873 rtm = nlmsg_data(nlh); 2874 2875 ret = -EINVAL; 2876 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2877 rtm->rtm_type != RTN_MULTICAST || 2878 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2879 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2880 goto out; 2881 2882 memset(mfcc, 0, sizeof(*mfcc)); 2883 mfcc->mfcc_parent = -1; 2884 ret = 0; 2885 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2886 switch (nla_type(attr)) { 2887 case RTA_SRC: 2888 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2889 break; 2890 case RTA_DST: 2891 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2892 break; 2893 case RTA_IIF: 2894 iif = nla_get_u32(attr); 2895 break; 2896 case RTA_MULTIPATH: 2897 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2898 ret = -EINVAL; 2899 goto out; 2900 } 2901 break; 2902 case RTA_PREFSRC: 2903 ret = 1; 2904 break; 2905 case RTA_TABLE: 2906 tblid = nla_get_u32(attr); 2907 break; 2908 } 2909 } 2910 2911 rcu_read_lock(); 2912 2913 mrt = __ipmr_get_table(net, tblid); 2914 if (!mrt) { 2915 ret = -ENOENT; 2916 goto unlock; 2917 } 2918 2919 if (iif) { 2920 dev = dev_get_by_index_rcu(net, iif); 2921 if (!dev) { 2922 ret = -ENODEV; 2923 goto unlock; 2924 } 2925 2926 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2927 } 2928 2929 *mrtret = mrt; 2930 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2931 2932 unlock: 2933 rcu_read_unlock(); 2934 out: 2935 return ret; 2936 } 2937 2938 /* takes care of both newroute and delroute */ 2939 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2940 struct netlink_ext_ack *extack) 2941 { 2942 struct net *net = sock_net(skb->sk); 2943 int ret, mrtsock = 0, parent; 2944 struct mr_table *tbl = NULL; 2945 struct mfcctl mfcc; 2946 2947 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2948 if (ret < 0) 2949 return ret; 2950 2951 parent = ret ? mfcc.mfcc_parent : -1; 2952 2953 mutex_lock(&net->ipv4.mfc_mutex); 2954 2955 if (nlh->nlmsg_type == RTM_NEWROUTE) 2956 ret = ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2957 else 2958 ret = ipmr_mfc_delete(tbl, &mfcc, parent); 2959 2960 mutex_unlock(&net->ipv4.mfc_mutex); 2961 2962 return ret; 2963 } 2964 2965 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2966 { 2967 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2968 2969 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2970 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2971 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2972 READ_ONCE(mrt->mroute_reg_vif_num)) || 2973 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2974 READ_ONCE(mrt->mroute_do_assert)) || 2975 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, 2976 READ_ONCE(mrt->mroute_do_pim)) || 2977 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, 2978 READ_ONCE(mrt->mroute_do_wrvifwhole))) 2979 return false; 2980 2981 return true; 2982 } 2983 2984 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2985 { 2986 struct net_device *vif_dev; 2987 struct nlattr *vif_nest; 2988 struct vif_device *vif; 2989 2990 vif = &mrt->vif_table[vifid]; 2991 vif_dev = vif_dev_read(vif); 2992 /* if the VIF doesn't exist just continue */ 2993 if (!vif_dev) 2994 return true; 2995 2996 vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); 2997 if (!vif_nest) 2998 return false; 2999 3000 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, READ_ONCE(vif_dev->ifindex)) || 3001 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 3002 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 3003 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, READ_ONCE(vif->bytes_in), 3004 IPMRA_VIFA_PAD) || 3005 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, READ_ONCE(vif->bytes_out), 3006 IPMRA_VIFA_PAD) || 3007 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, READ_ONCE(vif->pkt_in), 3008 IPMRA_VIFA_PAD) || 3009 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, READ_ONCE(vif->pkt_out), 3010 IPMRA_VIFA_PAD) || 3011 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 3012 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 3013 nla_nest_cancel(skb, vif_nest); 3014 return false; 3015 } 3016 nla_nest_end(skb, vif_nest); 3017 3018 return true; 3019 } 3020 3021 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, 3022 struct netlink_ext_ack *extack) 3023 { 3024 struct ifinfomsg *ifm; 3025 3026 ifm = nlmsg_payload(nlh, sizeof(*ifm)); 3027 if (!ifm) { 3028 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); 3029 return -EINVAL; 3030 } 3031 3032 if (nlmsg_attrlen(nlh, sizeof(*ifm))) { 3033 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump"); 3034 return -EINVAL; 3035 } 3036 3037 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || 3038 ifm->ifi_change || ifm->ifi_index) { 3039 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); 3040 return -EINVAL; 3041 } 3042 3043 return 0; 3044 } 3045 3046 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 3047 { 3048 struct net *net = sock_net(skb->sk); 3049 struct nlmsghdr *nlh = NULL; 3050 unsigned int t = 0, s_t; 3051 unsigned int e = 0, s_e; 3052 struct mr_table *mrt; 3053 3054 if (cb->strict_check) { 3055 int err = ipmr_valid_dumplink(cb->nlh, cb->extack); 3056 3057 if (err < 0) 3058 return err; 3059 } 3060 3061 s_t = cb->args[0]; 3062 s_e = cb->args[1]; 3063 3064 rcu_read_lock(); 3065 3066 ipmr_for_each_table(mrt, net) { 3067 struct nlattr *vifs, *af; 3068 struct ifinfomsg *hdr; 3069 u32 i; 3070 3071 if (t < s_t) 3072 goto skip_table; 3073 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3074 cb->nlh->nlmsg_seq, RTM_NEWLINK, 3075 sizeof(*hdr), NLM_F_MULTI); 3076 if (!nlh) 3077 break; 3078 3079 hdr = nlmsg_data(nlh); 3080 memset(hdr, 0, sizeof(*hdr)); 3081 hdr->ifi_family = RTNL_FAMILY_IPMR; 3082 3083 af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); 3084 if (!af) { 3085 nlmsg_cancel(skb, nlh); 3086 goto out; 3087 } 3088 3089 if (!ipmr_fill_table(mrt, skb)) { 3090 nlmsg_cancel(skb, nlh); 3091 goto out; 3092 } 3093 3094 vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS); 3095 if (!vifs) { 3096 nla_nest_end(skb, af); 3097 nlmsg_end(skb, nlh); 3098 goto out; 3099 } 3100 for (i = 0; i < READ_ONCE(mrt->maxvif); i++) { 3101 if (e < s_e) 3102 goto skip_entry; 3103 if (!ipmr_fill_vif(mrt, i, skb)) { 3104 nla_nest_end(skb, vifs); 3105 nla_nest_end(skb, af); 3106 nlmsg_end(skb, nlh); 3107 goto out; 3108 } 3109 skip_entry: 3110 e++; 3111 } 3112 s_e = 0; 3113 e = 0; 3114 nla_nest_end(skb, vifs); 3115 nla_nest_end(skb, af); 3116 nlmsg_end(skb, nlh); 3117 skip_table: 3118 t++; 3119 } 3120 3121 out: 3122 rcu_read_unlock(); 3123 3124 cb->args[1] = e; 3125 cb->args[0] = t; 3126 3127 return skb->len; 3128 } 3129 3130 #ifdef CONFIG_PROC_FS 3131 /* The /proc interfaces to multicast routing : 3132 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 3133 */ 3134 3135 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 3136 __acquires(RCU) 3137 { 3138 struct mr_vif_iter *iter = seq->private; 3139 struct net *net = seq_file_net(seq); 3140 struct mr_table *mrt; 3141 3142 rcu_read_lock(); 3143 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); 3144 if (!mrt) { 3145 rcu_read_unlock(); 3146 return ERR_PTR(-ENOENT); 3147 } 3148 3149 iter->mrt = mrt; 3150 3151 return mr_vif_seq_start(seq, pos); 3152 } 3153 3154 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 3155 __releases(RCU) 3156 { 3157 rcu_read_unlock(); 3158 } 3159 3160 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 3161 { 3162 struct mr_vif_iter *iter = seq->private; 3163 struct mr_table *mrt = iter->mrt; 3164 3165 if (v == SEQ_START_TOKEN) { 3166 seq_puts(seq, 3167 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 3168 } else { 3169 const struct vif_device *vif = v; 3170 const struct net_device *vif_dev; 3171 const char *name; 3172 3173 vif_dev = vif_dev_read(vif); 3174 name = vif_dev ? vif_dev->name : "none"; 3175 seq_printf(seq, 3176 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 3177 vif - mrt->vif_table, 3178 name, vif->bytes_in, vif->pkt_in, 3179 vif->bytes_out, vif->pkt_out, 3180 vif->flags, vif->local, vif->remote); 3181 } 3182 return 0; 3183 } 3184 3185 static const struct seq_operations ipmr_vif_seq_ops = { 3186 .start = ipmr_vif_seq_start, 3187 .next = mr_vif_seq_next, 3188 .stop = ipmr_vif_seq_stop, 3189 .show = ipmr_vif_seq_show, 3190 }; 3191 3192 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 3193 { 3194 struct net *net = seq_file_net(seq); 3195 struct mr_table *mrt; 3196 3197 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 3198 if (!mrt) 3199 return ERR_PTR(-ENOENT); 3200 3201 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 3202 } 3203 3204 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 3205 { 3206 int n; 3207 3208 if (v == SEQ_START_TOKEN) { 3209 seq_puts(seq, 3210 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 3211 } else { 3212 const struct mfc_cache *mfc = v; 3213 const struct mr_mfc_iter *it = seq->private; 3214 const struct mr_table *mrt = it->mrt; 3215 3216 seq_printf(seq, "%08X %08X %-3hd", 3217 (__force u32) mfc->mfc_mcastgrp, 3218 (__force u32) mfc->mfc_origin, 3219 mfc->_c.mfc_parent); 3220 3221 if (it->cache != &mrt->mfc_unres_queue) { 3222 seq_printf(seq, " %8lu %8lu %8lu", 3223 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 3224 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 3225 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 3226 for (n = mfc->_c.mfc_un.res.minvif; 3227 n < mfc->_c.mfc_un.res.maxvif; n++) { 3228 if (VIF_EXISTS(mrt, n) && 3229 mfc->_c.mfc_un.res.ttls[n] < 255) 3230 seq_printf(seq, 3231 " %2d:%-3d", 3232 n, mfc->_c.mfc_un.res.ttls[n]); 3233 } 3234 } else { 3235 /* unresolved mfc_caches don't contain 3236 * pkt, bytes and wrong_if values 3237 */ 3238 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3239 } 3240 seq_putc(seq, '\n'); 3241 } 3242 return 0; 3243 } 3244 3245 static const struct seq_operations ipmr_mfc_seq_ops = { 3246 .start = ipmr_mfc_seq_start, 3247 .next = mr_mfc_seq_next, 3248 .stop = mr_mfc_seq_stop, 3249 .show = ipmr_mfc_seq_show, 3250 }; 3251 #endif 3252 3253 #ifdef CONFIG_IP_PIMSM_V2 3254 static const struct net_protocol pim_protocol = { 3255 .handler = pim_rcv, 3256 }; 3257 #endif 3258 3259 static unsigned int ipmr_seq_read(const struct net *net) 3260 { 3261 return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); 3262 } 3263 3264 static int ipmr_dump(struct net *net, struct notifier_block *nb, 3265 struct netlink_ext_ack *extack) 3266 { 3267 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, 3268 ipmr_mr_table_iter, extack); 3269 } 3270 3271 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3272 .family = RTNL_FAMILY_IPMR, 3273 .fib_seq_read = ipmr_seq_read, 3274 .fib_dump = ipmr_dump, 3275 .owner = THIS_MODULE, 3276 }; 3277 3278 static int __net_init ipmr_notifier_init(struct net *net) 3279 { 3280 struct fib_notifier_ops *ops; 3281 3282 atomic_set(&net->ipv4.ipmr_seq, 0); 3283 3284 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3285 if (IS_ERR(ops)) 3286 return PTR_ERR(ops); 3287 net->ipv4.ipmr_notifier_ops = ops; 3288 3289 return 0; 3290 } 3291 3292 static void __net_exit ipmr_notifier_exit(struct net *net) 3293 { 3294 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3295 net->ipv4.ipmr_notifier_ops = NULL; 3296 } 3297 3298 /* Setup for IP multicast routing */ 3299 static int __net_init ipmr_net_init(struct net *net) 3300 { 3301 LIST_HEAD(dev_kill_list); 3302 int err; 3303 3304 mutex_init(&net->ipv4.mfc_mutex); 3305 3306 err = ipmr_notifier_init(net); 3307 if (err) 3308 goto ipmr_notifier_fail; 3309 3310 err = ipmr_rules_init(net); 3311 if (err < 0) 3312 goto ipmr_rules_fail; 3313 3314 #ifdef CONFIG_PROC_FS 3315 err = -ENOMEM; 3316 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, 3317 sizeof(struct mr_vif_iter))) 3318 goto proc_vif_fail; 3319 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 3320 sizeof(struct mr_mfc_iter))) 3321 goto proc_cache_fail; 3322 #endif 3323 return 0; 3324 3325 #ifdef CONFIG_PROC_FS 3326 proc_cache_fail: 3327 remove_proc_entry("ip_mr_vif", net->proc_net); 3328 proc_vif_fail: 3329 ipmr_rules_exit_rtnl(net, &dev_kill_list); 3330 ipmr_rules_exit(net); 3331 #endif 3332 ipmr_rules_fail: 3333 ipmr_notifier_exit(net); 3334 ipmr_notifier_fail: 3335 return err; 3336 } 3337 3338 static void __net_exit ipmr_net_exit(struct net *net) 3339 { 3340 #ifdef CONFIG_PROC_FS 3341 remove_proc_entry("ip_mr_cache", net->proc_net); 3342 remove_proc_entry("ip_mr_vif", net->proc_net); 3343 #endif 3344 ipmr_rules_exit(net); 3345 ipmr_notifier_exit(net); 3346 } 3347 3348 static void __net_exit ipmr_net_exit_rtnl(struct net *net, 3349 struct list_head *dev_kill_list) 3350 { 3351 ipmr_rules_exit_rtnl(net, dev_kill_list); 3352 } 3353 3354 static struct pernet_operations ipmr_net_ops = { 3355 .init = ipmr_net_init, 3356 .exit = ipmr_net_exit, 3357 .exit_rtnl = ipmr_net_exit_rtnl, 3358 }; 3359 3360 static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { 3361 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, 3362 .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED}, 3363 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, 3364 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 3365 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, 3366 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 3367 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, 3368 .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, 3369 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 3370 }; 3371 3372 int __init ip_mr_init(void) 3373 { 3374 int err; 3375 3376 mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC); 3377 3378 err = register_pernet_subsys(&ipmr_net_ops); 3379 if (err) 3380 goto reg_pernet_fail; 3381 3382 err = register_netdevice_notifier(&ip_mr_notifier); 3383 if (err) 3384 goto reg_notif_fail; 3385 #ifdef CONFIG_IP_PIMSM_V2 3386 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3387 pr_err("%s: can't add PIM protocol\n", __func__); 3388 err = -EAGAIN; 3389 goto add_proto_fail; 3390 } 3391 #endif 3392 rtnl_register_many(ipmr_rtnl_msg_handlers); 3393 3394 return 0; 3395 3396 #ifdef CONFIG_IP_PIMSM_V2 3397 add_proto_fail: 3398 unregister_netdevice_notifier(&ip_mr_notifier); 3399 #endif 3400 reg_notif_fail: 3401 unregister_pernet_subsys(&ipmr_net_ops); 3402 reg_pernet_fail: 3403 kmem_cache_destroy(mrt_cachep); 3404 return err; 3405 } 3406