1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IP multicast routing support for mrouted 3.6/3.8 4 * 5 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * Linux Consultancy and Custom Driver Development 7 * 8 * Fixes: 9 * Michael Chastain : Incorrect size of copying. 10 * Alan Cox : Added the cache manager code 11 * Alan Cox : Fixed the clone/copy bug and device race. 12 * Mike McLagan : Routing by source 13 * Malcolm Beattie : Buffer handling fixes. 14 * Alexey Kuznetsov : Double buffer free and other fixes. 15 * SVR Anand : Fixed several multicast bugs and problems. 16 * Alexey Kuznetsov : Status, optimisations and more. 17 * Brad Parker : Better behaviour on mrouted upcall 18 * overflow. 19 * Carlos Picoto : PIMv1 Support 20 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 21 * Relax this requirement to work with older peers. 22 */ 23 24 #include <linux/uaccess.h> 25 #include <linux/types.h> 26 #include <linux/cache.h> 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/mm.h> 30 #include <linux/kernel.h> 31 #include <linux/fcntl.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/inet.h> 36 #include <linux/netdevice.h> 37 #include <linux/inetdevice.h> 38 #include <linux/igmp.h> 39 #include <linux/proc_fs.h> 40 #include <linux/seq_file.h> 41 #include <linux/mroute.h> 42 #include <linux/init.h> 43 #include <linux/if_ether.h> 44 #include <linux/slab.h> 45 #include <net/flow.h> 46 #include <net/net_namespace.h> 47 #include <net/ip.h> 48 #include <net/protocol.h> 49 #include <linux/skbuff.h> 50 #include <net/route.h> 51 #include <net/icmp.h> 52 #include <net/udp.h> 53 #include <net/raw.h> 54 #include <linux/notifier.h> 55 #include <linux/if_arp.h> 56 #include <linux/netfilter_ipv4.h> 57 #include <linux/compat.h> 58 #include <linux/export.h> 59 #include <linux/rhashtable.h> 60 #include <net/ip_tunnels.h> 61 #include <net/checksum.h> 62 #include <net/netlink.h> 63 #include <net/fib_rules.h> 64 #include <linux/netconf.h> 65 #include <net/rtnh.h> 66 #include <net/inet_dscp.h> 67 68 #include <linux/nospec.h> 69 70 struct ipmr_rule { 71 struct fib_rule common; 72 }; 73 74 struct ipmr_result { 75 struct mr_table *mrt; 76 }; 77 78 /* Big lock, protecting vif table, mrt cache and mroute socket state. 79 * Note that the changes are semaphored via rtnl_lock. 80 */ 81 82 static DEFINE_SPINLOCK(mrt_lock); 83 84 static struct net_device *vif_dev_read(const struct vif_device *vif) 85 { 86 return rcu_dereference(vif->dev); 87 } 88 89 /* Multicast router control variables */ 90 91 /* Special spinlock for queue of unresolved entries */ 92 static DEFINE_SPINLOCK(mfc_unres_lock); 93 94 /* We return to original Alan's scheme. Hash table of resolved 95 * entries is changed only in process context and protected 96 * with weak lock mrt_lock. Queue of unresolved entries is protected 97 * with strong spinlock mfc_unres_lock. 98 * 99 * In this case data path is free of exclusive locks at all. 100 */ 101 102 static struct kmem_cache *mrt_cachep __ro_after_init; 103 104 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 105 static void ipmr_free_table(struct mr_table *mrt, 106 struct list_head *dev_kill_list); 107 108 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 109 struct net_device *dev, struct sk_buff *skb, 110 struct mfc_cache *cache, int local); 111 static int ipmr_cache_report(const struct mr_table *mrt, 112 struct sk_buff *pkt, vifi_t vifi, int assert); 113 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 114 int cmd); 115 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 116 static void mroute_clean_tables(struct mr_table *mrt, int flags, 117 struct list_head *dev_kill_list); 118 static void ipmr_expire_process(struct timer_list *t); 119 120 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 121 #define ipmr_for_each_table(mrt, net) \ 122 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ 123 lockdep_rtnl_is_held() || \ 124 list_empty(&net->ipv4.mr_tables)) 125 126 static struct mr_table *ipmr_mr_table_iter(struct net *net, 127 struct mr_table *mrt) 128 { 129 struct mr_table *ret; 130 131 if (!mrt) 132 ret = list_entry_rcu(net->ipv4.mr_tables.next, 133 struct mr_table, list); 134 else 135 ret = list_entry_rcu(mrt->list.next, 136 struct mr_table, list); 137 138 if (&ret->list == &net->ipv4.mr_tables) 139 return NULL; 140 return ret; 141 } 142 143 static struct mr_table *__ipmr_get_table(struct net *net, u32 id) 144 { 145 struct mr_table *mrt; 146 147 ipmr_for_each_table(mrt, net) { 148 if (mrt->id == id) 149 return mrt; 150 } 151 return NULL; 152 } 153 154 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 155 struct mr_table **mrt) 156 { 157 int err; 158 struct ipmr_result res; 159 struct fib_lookup_arg arg = { 160 .result = &res, 161 .flags = FIB_LOOKUP_NOREF, 162 }; 163 164 /* update flow if oif or iif point to device enslaved to l3mdev */ 165 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 166 167 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 168 flowi4_to_flowi(flp4), 0, &arg); 169 if (err < 0) 170 return err; 171 *mrt = res.mrt; 172 return 0; 173 } 174 175 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 176 int flags, struct fib_lookup_arg *arg) 177 { 178 struct ipmr_result *res = arg->result; 179 struct mr_table *mrt; 180 181 switch (rule->action) { 182 case FR_ACT_TO_TBL: 183 break; 184 case FR_ACT_UNREACHABLE: 185 return -ENETUNREACH; 186 case FR_ACT_PROHIBIT: 187 return -EACCES; 188 case FR_ACT_BLACKHOLE: 189 default: 190 return -EINVAL; 191 } 192 193 arg->table = fib_rule_get_table(rule, arg); 194 195 mrt = __ipmr_get_table(rule->fr_net, arg->table); 196 if (!mrt) 197 return -EAGAIN; 198 res->mrt = mrt; 199 return 0; 200 } 201 202 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 203 { 204 return 1; 205 } 206 207 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 208 struct fib_rule_hdr *frh, struct nlattr **tb, 209 struct netlink_ext_ack *extack) 210 { 211 return 0; 212 } 213 214 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 215 struct nlattr **tb) 216 { 217 return 1; 218 } 219 220 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 221 struct fib_rule_hdr *frh) 222 { 223 frh->dst_len = 0; 224 frh->src_len = 0; 225 frh->tos = 0; 226 return 0; 227 } 228 229 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 230 .family = RTNL_FAMILY_IPMR, 231 .rule_size = sizeof(struct ipmr_rule), 232 .addr_size = sizeof(u32), 233 .action = ipmr_rule_action, 234 .match = ipmr_rule_match, 235 .configure = ipmr_rule_configure, 236 .compare = ipmr_rule_compare, 237 .fill = ipmr_rule_fill, 238 .nlgroup = RTNLGRP_IPV4_RULE, 239 .owner = THIS_MODULE, 240 }; 241 242 static int __net_init ipmr_rules_init(struct net *net) 243 { 244 struct fib_rules_ops *ops; 245 LIST_HEAD(dev_kill_list); 246 struct mr_table *mrt; 247 int err; 248 249 ops = fib_rules_register(&ipmr_rules_ops_template, net); 250 if (IS_ERR(ops)) 251 return PTR_ERR(ops); 252 253 INIT_LIST_HEAD(&net->ipv4.mr_tables); 254 255 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 256 if (IS_ERR(mrt)) { 257 err = PTR_ERR(mrt); 258 goto err1; 259 } 260 261 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT); 262 if (err < 0) 263 goto err2; 264 265 net->ipv4.mr_rules_ops = ops; 266 return 0; 267 268 err2: 269 ipmr_free_table(mrt, &dev_kill_list); 270 err1: 271 fib_rules_unregister(ops); 272 return err; 273 } 274 275 static void __net_exit ipmr_rules_exit(struct net *net) 276 { 277 fib_rules_unregister(net->ipv4.mr_rules_ops); 278 } 279 280 static void __net_exit ipmr_rules_exit_rtnl(struct net *net, 281 struct list_head *dev_kill_list) 282 { 283 struct mr_table *mrt, *next; 284 285 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 286 list_del_rcu(&mrt->list); 287 ipmr_free_table(mrt, dev_kill_list); 288 } 289 } 290 291 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 292 struct netlink_ext_ack *extack) 293 { 294 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); 295 } 296 297 static unsigned int ipmr_rules_seq_read(const struct net *net) 298 { 299 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 300 } 301 302 bool ipmr_rule_default(const struct fib_rule *rule) 303 { 304 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 305 } 306 EXPORT_SYMBOL(ipmr_rule_default); 307 #else 308 static struct mr_table *ipmr_mr_table_iter(struct net *net, 309 struct mr_table *mrt) 310 { 311 if (!mrt) 312 return rcu_dereference(net->ipv4.mrt); 313 return NULL; 314 } 315 316 static struct mr_table *__ipmr_get_table(struct net *net, u32 id) 317 { 318 return rcu_dereference_check(net->ipv4.mrt, 319 lockdep_rtnl_is_held() || 320 !rcu_access_pointer(net->ipv4.mrt)); 321 } 322 323 #define ipmr_for_each_table(mrt, net) \ 324 for (mrt = __ipmr_get_table(net, 0); mrt; mrt = NULL) 325 326 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 327 struct mr_table **mrt) 328 { 329 *mrt = rcu_dereference(net->ipv4.mrt); 330 if (!*mrt) 331 return -EAGAIN; 332 return 0; 333 } 334 335 static int __net_init ipmr_rules_init(struct net *net) 336 { 337 struct mr_table *mrt; 338 339 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 340 if (IS_ERR(mrt)) 341 return PTR_ERR(mrt); 342 343 rcu_assign_pointer(net->ipv4.mrt, mrt); 344 return 0; 345 } 346 347 static void __net_exit ipmr_rules_exit(struct net *net) 348 { 349 } 350 351 static void __net_exit ipmr_rules_exit_rtnl(struct net *net, 352 struct list_head *dev_kill_list) 353 { 354 struct mr_table *mrt = rcu_dereference_protected(net->ipv4.mrt, 1); 355 356 RCU_INIT_POINTER(net->ipv4.mrt, NULL); 357 ipmr_free_table(mrt, dev_kill_list); 358 } 359 360 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 361 struct netlink_ext_ack *extack) 362 { 363 return 0; 364 } 365 366 static unsigned int ipmr_rules_seq_read(const struct net *net) 367 { 368 return 0; 369 } 370 371 bool ipmr_rule_default(const struct fib_rule *rule) 372 { 373 return true; 374 } 375 EXPORT_SYMBOL(ipmr_rule_default); 376 #endif 377 378 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 379 { 380 struct mr_table *mrt; 381 382 rcu_read_lock(); 383 mrt = __ipmr_get_table(net, id); 384 rcu_read_unlock(); 385 386 return mrt; 387 } 388 389 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 390 const void *ptr) 391 { 392 const struct mfc_cache_cmp_arg *cmparg = arg->key; 393 const struct mfc_cache *c = ptr; 394 395 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 396 cmparg->mfc_origin != c->mfc_origin; 397 } 398 399 static const struct rhashtable_params ipmr_rht_params = { 400 .head_offset = offsetof(struct mr_mfc, mnode), 401 .key_offset = offsetof(struct mfc_cache, cmparg), 402 .key_len = sizeof(struct mfc_cache_cmp_arg), 403 .nelem_hint = 3, 404 .obj_cmpfn = ipmr_hash_cmp, 405 .automatic_shrinking = true, 406 }; 407 408 static void ipmr_new_table_set(struct mr_table *mrt, 409 struct net *net) 410 { 411 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 412 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 413 #endif 414 } 415 416 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { 417 .mfc_mcastgrp = htonl(INADDR_ANY), 418 .mfc_origin = htonl(INADDR_ANY), 419 }; 420 421 static struct mr_table_ops ipmr_mr_table_ops = { 422 .rht_params = &ipmr_rht_params, 423 .cmparg_any = &ipmr_mr_table_ops_cmparg_any, 424 }; 425 426 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 427 { 428 struct mr_table *mrt; 429 430 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 431 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 432 return ERR_PTR(-EINVAL); 433 434 mrt = __ipmr_get_table(net, id); 435 if (mrt) 436 return mrt; 437 438 return mr_table_alloc(net, id, &ipmr_mr_table_ops, 439 ipmr_expire_process, ipmr_new_table_set); 440 } 441 442 static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_list) 443 { 444 struct net *net = read_pnet(&mrt->net); 445 LIST_HEAD(ipmr_dev_kill_list); 446 447 WARN_ON_ONCE(!mr_can_free_table(net)); 448 449 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | 450 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, 451 &ipmr_dev_kill_list); 452 timer_shutdown_sync(&mrt->ipmr_expire_timer); 453 mr_table_free(mrt); 454 455 WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); 456 list_splice(&ipmr_dev_kill_list, dev_kill_list); 457 } 458 459 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 460 461 /* Initialize ipmr pimreg/tunnel in_device */ 462 static bool ipmr_init_vif_indev(const struct net_device *dev) 463 { 464 struct in_device *in_dev; 465 466 ASSERT_RTNL(); 467 468 in_dev = __in_dev_get_rtnl(dev); 469 if (!in_dev) 470 return false; 471 ipv4_devconf_setall(in_dev); 472 neigh_parms_data_state_setall(in_dev->arp_parms); 473 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 474 475 return true; 476 } 477 478 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 479 { 480 struct net_device *tunnel_dev, *new_dev; 481 struct ip_tunnel_parm_kern p = { }; 482 int err; 483 484 tunnel_dev = __dev_get_by_name(net, "tunl0"); 485 if (!tunnel_dev) 486 goto out; 487 488 p.iph.daddr = v->vifc_rmt_addr.s_addr; 489 p.iph.saddr = v->vifc_lcl_addr.s_addr; 490 p.iph.version = 4; 491 p.iph.ihl = 5; 492 p.iph.protocol = IPPROTO_IPIP; 493 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 494 495 if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) 496 goto out; 497 err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 498 SIOCADDTUNNEL); 499 if (err) 500 goto out; 501 502 new_dev = __dev_get_by_name(net, p.name); 503 if (!new_dev) 504 goto out; 505 506 new_dev->flags |= IFF_MULTICAST; 507 if (!ipmr_init_vif_indev(new_dev)) 508 goto out_unregister; 509 if (dev_open(new_dev, NULL)) 510 goto out_unregister; 511 dev_hold(new_dev); 512 err = dev_set_allmulti(new_dev, 1); 513 if (err) { 514 dev_close(new_dev); 515 tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 516 SIOCDELTUNNEL); 517 dev_put(new_dev); 518 new_dev = ERR_PTR(err); 519 } 520 return new_dev; 521 522 out_unregister: 523 unregister_netdevice(new_dev); 524 out: 525 return ERR_PTR(-ENOBUFS); 526 } 527 528 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 529 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 530 { 531 struct net *net = dev_net(dev); 532 struct mr_table *mrt; 533 struct flowi4 fl4 = { 534 .flowi4_oif = dev->ifindex, 535 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 536 .flowi4_mark = skb->mark, 537 }; 538 int err; 539 540 err = ipmr_fib_lookup(net, &fl4, &mrt); 541 if (err < 0) { 542 kfree_skb(skb); 543 return err; 544 } 545 546 DEV_STATS_ADD(dev, tx_bytes, skb->len); 547 DEV_STATS_INC(dev, tx_packets); 548 rcu_read_lock(); 549 550 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ 551 ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 552 IGMPMSG_WHOLEPKT); 553 554 rcu_read_unlock(); 555 kfree_skb(skb); 556 return NETDEV_TX_OK; 557 } 558 559 static int reg_vif_get_iflink(const struct net_device *dev) 560 { 561 return 0; 562 } 563 564 static const struct net_device_ops reg_vif_netdev_ops = { 565 .ndo_start_xmit = reg_vif_xmit, 566 .ndo_get_iflink = reg_vif_get_iflink, 567 }; 568 569 static void reg_vif_setup(struct net_device *dev) 570 { 571 dev->type = ARPHRD_PIMREG; 572 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 573 dev->flags = IFF_NOARP; 574 dev->netdev_ops = ®_vif_netdev_ops; 575 dev->needs_free_netdev = true; 576 dev->netns_immutable = true; 577 } 578 579 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 580 { 581 struct net_device *dev; 582 char name[IFNAMSIZ]; 583 584 if (mrt->id == RT_TABLE_DEFAULT) 585 sprintf(name, "pimreg"); 586 else 587 sprintf(name, "pimreg%u", mrt->id); 588 589 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 590 591 if (!dev) 592 return NULL; 593 594 dev_net_set(dev, net); 595 596 if (register_netdevice(dev)) { 597 free_netdev(dev); 598 return NULL; 599 } 600 601 if (!ipmr_init_vif_indev(dev)) 602 goto failure; 603 if (dev_open(dev, NULL)) 604 goto failure; 605 606 dev_hold(dev); 607 608 return dev; 609 610 failure: 611 unregister_netdevice(dev); 612 return NULL; 613 } 614 615 /* called with rcu_read_lock() */ 616 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 617 unsigned int pimlen) 618 { 619 struct net_device *reg_dev = NULL; 620 struct iphdr *encap; 621 int vif_num; 622 623 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 624 /* Check that: 625 * a. packet is really sent to a multicast group 626 * b. packet is not a NULL-REGISTER 627 * c. packet is not truncated 628 */ 629 if (!ipv4_is_multicast(encap->daddr) || 630 encap->tot_len == 0 || 631 ntohs(encap->tot_len) + pimlen > skb->len) 632 return 1; 633 634 /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */ 635 vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 636 if (vif_num >= 0) 637 reg_dev = vif_dev_read(&mrt->vif_table[vif_num]); 638 if (!reg_dev) 639 return 1; 640 641 skb->mac_header = skb->network_header; 642 skb_pull(skb, (u8 *)encap - skb->data); 643 skb_reset_network_header(skb); 644 skb->protocol = htons(ETH_P_IP); 645 skb->ip_summed = CHECKSUM_NONE; 646 647 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 648 649 netif_rx(skb); 650 651 return NET_RX_SUCCESS; 652 } 653 #else 654 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 655 { 656 return NULL; 657 } 658 #endif 659 660 static int call_ipmr_vif_entry_notifiers(struct net *net, 661 enum fib_event_type event_type, 662 struct vif_device *vif, 663 struct net_device *vif_dev, 664 vifi_t vif_index, u32 tb_id) 665 { 666 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, 667 vif, vif_dev, vif_index, tb_id, 668 &net->ipv4.ipmr_seq); 669 } 670 671 static int call_ipmr_mfc_entry_notifiers(struct net *net, 672 enum fib_event_type event_type, 673 struct mfc_cache *mfc, u32 tb_id) 674 { 675 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, 676 &mfc->_c, tb_id, &net->ipv4.ipmr_seq); 677 } 678 679 /** 680 * vif_delete - Delete a VIF entry 681 * @mrt: Table to delete from 682 * @vifi: VIF identifier to delete 683 * @notify: Set to 1, if the caller is a notifier_call 684 * @head: if unregistering the VIF, place it on this queue 685 */ 686 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 687 struct list_head *head) 688 { 689 struct net *net = read_pnet(&mrt->net); 690 struct vif_device *v; 691 struct net_device *dev; 692 struct in_device *in_dev; 693 694 if (vifi < 0 || vifi >= mrt->maxvif) 695 return -EADDRNOTAVAIL; 696 697 v = &mrt->vif_table[vifi]; 698 699 dev = rtnl_dereference(v->dev); 700 if (!dev) 701 return -EADDRNOTAVAIL; 702 703 spin_lock(&mrt_lock); 704 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, 705 vifi, mrt->id); 706 RCU_INIT_POINTER(v->dev, NULL); 707 708 if (vifi == mrt->mroute_reg_vif_num) { 709 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ 710 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 711 } 712 if (vifi + 1 == mrt->maxvif) { 713 int tmp; 714 715 for (tmp = vifi - 1; tmp >= 0; tmp--) { 716 if (VIF_EXISTS(mrt, tmp)) 717 break; 718 } 719 WRITE_ONCE(mrt->maxvif, tmp + 1); 720 } 721 722 spin_unlock(&mrt_lock); 723 724 dev_set_allmulti(dev, -1); 725 726 in_dev = __in_dev_get_rtnl(dev); 727 if (in_dev) { 728 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 729 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 730 NETCONFA_MC_FORWARDING, 731 dev->ifindex, &in_dev->cnf); 732 ip_rt_multicast_event(in_dev); 733 } 734 735 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 736 unregister_netdevice_queue(dev, head); 737 738 netdev_put(dev, &v->dev_tracker); 739 return 0; 740 } 741 742 static void ipmr_cache_free_rcu(struct rcu_head *head) 743 { 744 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 745 746 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); 747 } 748 749 static void ipmr_cache_free(struct mfc_cache *c) 750 { 751 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); 752 } 753 754 /* Destroy an unresolved cache entry, killing queued skbs 755 * and reporting error to netlink readers. 756 */ 757 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 758 { 759 struct net *net = read_pnet(&mrt->net); 760 struct sk_buff *skb; 761 struct nlmsgerr *e; 762 763 atomic_dec(&mrt->cache_resolve_queue_len); 764 765 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { 766 if (ip_hdr(skb)->version == 0) { 767 struct nlmsghdr *nlh = skb_pull(skb, 768 sizeof(struct iphdr)); 769 nlh->nlmsg_type = NLMSG_ERROR; 770 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 771 skb_trim(skb, nlh->nlmsg_len); 772 e = nlmsg_data(nlh); 773 e->error = -ETIMEDOUT; 774 memset(&e->msg, 0, sizeof(e->msg)); 775 776 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 777 } else { 778 kfree_skb(skb); 779 } 780 } 781 782 ipmr_cache_free(c); 783 } 784 785 /* Timer process for the unresolved queue. */ 786 static void ipmr_expire_process(struct timer_list *t) 787 { 788 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer); 789 struct mr_mfc *c, *next; 790 unsigned long expires; 791 unsigned long now; 792 793 if (!spin_trylock(&mfc_unres_lock)) { 794 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 795 return; 796 } 797 798 if (list_empty(&mrt->mfc_unres_queue)) 799 goto out; 800 801 now = jiffies; 802 expires = 10*HZ; 803 804 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 805 if (time_after(c->mfc_un.unres.expires, now)) { 806 unsigned long interval = c->mfc_un.unres.expires - now; 807 if (interval < expires) 808 expires = interval; 809 continue; 810 } 811 812 list_del(&c->list); 813 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); 814 ipmr_destroy_unres(mrt, (struct mfc_cache *)c); 815 } 816 817 if (!list_empty(&mrt->mfc_unres_queue)) 818 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 819 820 out: 821 spin_unlock(&mfc_unres_lock); 822 } 823 824 /* Fill oifs list. It is called under locked mrt_lock. */ 825 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, 826 unsigned char *ttls) 827 { 828 int vifi; 829 830 cache->mfc_un.res.minvif = MAXVIFS; 831 cache->mfc_un.res.maxvif = 0; 832 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 833 834 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 835 if (VIF_EXISTS(mrt, vifi) && 836 ttls[vifi] && ttls[vifi] < 255) { 837 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 838 if (cache->mfc_un.res.minvif > vifi) 839 cache->mfc_un.res.minvif = vifi; 840 if (cache->mfc_un.res.maxvif <= vifi) 841 cache->mfc_un.res.maxvif = vifi + 1; 842 } 843 } 844 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); 845 } 846 847 static int vif_add(struct net *net, struct mr_table *mrt, 848 struct vifctl *vifc, int mrtsock) 849 { 850 struct netdev_phys_item_id ppid = { }; 851 int vifi = vifc->vifc_vifi; 852 struct vif_device *v = &mrt->vif_table[vifi]; 853 struct net_device *dev; 854 struct in_device *in_dev; 855 int err; 856 857 /* Is vif busy ? */ 858 if (VIF_EXISTS(mrt, vifi)) 859 return -EADDRINUSE; 860 861 switch (vifc->vifc_flags) { 862 case VIFF_REGISTER: 863 if (!ipmr_pimsm_enabled()) 864 return -EINVAL; 865 /* Special Purpose VIF in PIM 866 * All the packets will be sent to the daemon 867 */ 868 if (mrt->mroute_reg_vif_num >= 0) 869 return -EADDRINUSE; 870 dev = ipmr_reg_vif(net, mrt); 871 if (!dev) 872 return -ENOBUFS; 873 err = dev_set_allmulti(dev, 1); 874 if (err) { 875 unregister_netdevice(dev); 876 dev_put(dev); 877 return err; 878 } 879 break; 880 case VIFF_TUNNEL: 881 dev = ipmr_new_tunnel(net, vifc); 882 if (IS_ERR(dev)) 883 return PTR_ERR(dev); 884 break; 885 case VIFF_USE_IFINDEX: 886 case 0: 887 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 888 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 889 if (dev && !__in_dev_get_rtnl(dev)) { 890 dev_put(dev); 891 return -EADDRNOTAVAIL; 892 } 893 } else { 894 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 895 } 896 if (!dev) 897 return -EADDRNOTAVAIL; 898 err = dev_set_allmulti(dev, 1); 899 if (err) { 900 dev_put(dev); 901 return err; 902 } 903 break; 904 default: 905 return -EINVAL; 906 } 907 908 in_dev = __in_dev_get_rtnl(dev); 909 if (!in_dev) { 910 dev_put(dev); 911 return -EADDRNOTAVAIL; 912 } 913 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 914 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 915 dev->ifindex, &in_dev->cnf); 916 ip_rt_multicast_event(in_dev); 917 918 /* Fill in the VIF structures */ 919 vif_device_init(v, dev, vifc->vifc_rate_limit, 920 vifc->vifc_threshold, 921 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), 922 (VIFF_TUNNEL | VIFF_REGISTER)); 923 924 err = netif_get_port_parent_id(dev, &ppid, true); 925 if (err == 0) { 926 memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); 927 v->dev_parent_id.id_len = ppid.id_len; 928 } else { 929 v->dev_parent_id.id_len = 0; 930 } 931 932 v->local = vifc->vifc_lcl_addr.s_addr; 933 v->remote = vifc->vifc_rmt_addr.s_addr; 934 935 /* And finish update writing critical data */ 936 spin_lock(&mrt_lock); 937 rcu_assign_pointer(v->dev, dev); 938 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 939 if (v->flags & VIFF_REGISTER) { 940 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ 941 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 942 } 943 if (vifi+1 > mrt->maxvif) 944 WRITE_ONCE(mrt->maxvif, vifi + 1); 945 spin_unlock(&mrt_lock); 946 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, 947 vifi, mrt->id); 948 return 0; 949 } 950 951 /* called with rcu_read_lock() */ 952 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 953 __be32 origin, 954 __be32 mcastgrp) 955 { 956 struct mfc_cache_cmp_arg arg = { 957 .mfc_mcastgrp = mcastgrp, 958 .mfc_origin = origin 959 }; 960 961 return mr_mfc_find(mrt, &arg); 962 } 963 964 /* Look for a (*,G) entry */ 965 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 966 __be32 mcastgrp, int vifi) 967 { 968 struct mfc_cache_cmp_arg arg = { 969 .mfc_mcastgrp = mcastgrp, 970 .mfc_origin = htonl(INADDR_ANY) 971 }; 972 973 if (mcastgrp == htonl(INADDR_ANY)) 974 return mr_mfc_find_any_parent(mrt, vifi); 975 return mr_mfc_find_any(mrt, vifi, &arg); 976 } 977 978 /* Look for a (S,G,iif) entry if parent != -1 */ 979 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 980 __be32 origin, __be32 mcastgrp, 981 int parent) 982 { 983 struct mfc_cache_cmp_arg arg = { 984 .mfc_mcastgrp = mcastgrp, 985 .mfc_origin = origin, 986 }; 987 988 return mr_mfc_find_parent(mrt, &arg, parent); 989 } 990 991 /* Allocate a multicast cache entry */ 992 static struct mfc_cache *ipmr_cache_alloc(void) 993 { 994 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 995 996 if (c) { 997 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 998 c->_c.mfc_un.res.minvif = MAXVIFS; 999 c->_c.free = ipmr_cache_free_rcu; 1000 refcount_set(&c->_c.mfc_un.res.refcount, 1); 1001 } 1002 return c; 1003 } 1004 1005 static struct mfc_cache *ipmr_cache_alloc_unres(void) 1006 { 1007 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1008 1009 if (c) { 1010 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1011 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1012 } 1013 return c; 1014 } 1015 1016 /* A cache entry has gone into a resolved state from queued */ 1017 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1018 struct mfc_cache *uc, struct mfc_cache *c) 1019 { 1020 struct sk_buff *skb; 1021 struct nlmsgerr *e; 1022 1023 /* Play the pending entries through our router */ 1024 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1025 if (ip_hdr(skb)->version == 0) { 1026 struct nlmsghdr *nlh = skb_pull(skb, 1027 sizeof(struct iphdr)); 1028 1029 if (mr_fill_mroute(mrt, skb, &c->_c, 1030 nlmsg_data(nlh)) > 0) { 1031 nlh->nlmsg_len = skb_tail_pointer(skb) - 1032 (u8 *)nlh; 1033 } else { 1034 nlh->nlmsg_type = NLMSG_ERROR; 1035 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1036 skb_trim(skb, nlh->nlmsg_len); 1037 e = nlmsg_data(nlh); 1038 e->error = -EMSGSIZE; 1039 memset(&e->msg, 0, sizeof(e->msg)); 1040 } 1041 1042 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1043 } else { 1044 rcu_read_lock(); 1045 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1046 rcu_read_unlock(); 1047 } 1048 } 1049 } 1050 1051 /* Bounce a cache query up to mrouted and netlink. 1052 * 1053 * Called under rcu_read_lock(). 1054 */ 1055 static int ipmr_cache_report(const struct mr_table *mrt, 1056 struct sk_buff *pkt, vifi_t vifi, int assert) 1057 { 1058 const int ihl = ip_hdrlen(pkt); 1059 struct sock *mroute_sk; 1060 struct igmphdr *igmp; 1061 struct igmpmsg *msg; 1062 struct sk_buff *skb; 1063 int ret; 1064 1065 mroute_sk = rcu_dereference(mrt->mroute_sk); 1066 if (!mroute_sk) 1067 return -EINVAL; 1068 1069 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) 1070 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1071 else 1072 skb = alloc_skb(128, GFP_ATOMIC); 1073 1074 if (!skb) 1075 return -ENOBUFS; 1076 1077 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { 1078 /* Ugly, but we have no choice with this interface. 1079 * Duplicate old header, fix ihl, length etc. 1080 * And all this only to mangle msg->im_msgtype and 1081 * to set msg->im_mbz to "mbz" :-) 1082 */ 1083 skb_push(skb, sizeof(struct iphdr)); 1084 skb_reset_network_header(skb); 1085 skb_reset_transport_header(skb); 1086 msg = (struct igmpmsg *)skb_network_header(skb); 1087 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1088 msg->im_msgtype = assert; 1089 msg->im_mbz = 0; 1090 if (assert == IGMPMSG_WRVIFWHOLE) { 1091 msg->im_vif = vifi; 1092 msg->im_vif_hi = vifi >> 8; 1093 } else { 1094 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ 1095 int vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 1096 1097 msg->im_vif = vif_num; 1098 msg->im_vif_hi = vif_num >> 8; 1099 } 1100 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1101 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1102 sizeof(struct iphdr)); 1103 } else { 1104 /* Copy the IP header */ 1105 skb_set_network_header(skb, skb->len); 1106 skb_put(skb, ihl); 1107 skb_copy_to_linear_data(skb, pkt->data, ihl); 1108 /* Flag to the kernel this is a route add */ 1109 ip_hdr(skb)->protocol = 0; 1110 msg = (struct igmpmsg *)skb_network_header(skb); 1111 msg->im_vif = vifi; 1112 msg->im_vif_hi = vifi >> 8; 1113 ipv4_pktinfo_prepare(mroute_sk, pkt, false); 1114 memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); 1115 /* Add our header. 1116 * Note that code, csum and group fields are cleared. 1117 */ 1118 igmp = skb_put_zero(skb, sizeof(struct igmphdr)); 1119 igmp->type = assert; 1120 msg->im_msgtype = assert; 1121 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1122 skb->transport_header = skb->network_header; 1123 } 1124 1125 igmpmsg_netlink_event(mrt, skb); 1126 1127 /* Deliver to mrouted */ 1128 ret = sock_queue_rcv_skb(mroute_sk, skb); 1129 1130 if (ret < 0) { 1131 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1132 kfree_skb(skb); 1133 } 1134 1135 return ret; 1136 } 1137 1138 /* Queue a packet for resolution. It gets locked cache entry! */ 1139 /* Called under rcu_read_lock() */ 1140 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1141 struct sk_buff *skb, struct net_device *dev) 1142 { 1143 struct net *net = read_pnet(&mrt->net); 1144 const struct iphdr *iph = ip_hdr(skb); 1145 struct mfc_cache *c = NULL; 1146 bool found = false; 1147 int err; 1148 1149 spin_lock_bh(&mfc_unres_lock); 1150 1151 if (!check_net(net)) { 1152 err = -EINVAL; 1153 goto err; 1154 } 1155 1156 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1157 if (c->mfc_mcastgrp == iph->daddr && 1158 c->mfc_origin == iph->saddr) { 1159 found = true; 1160 break; 1161 } 1162 } 1163 1164 if (!found) { 1165 /* Create a new entry if allowable */ 1166 c = ipmr_cache_alloc_unres(); 1167 if (!c) { 1168 err = -ENOBUFS; 1169 goto err; 1170 } 1171 1172 /* Fill in the new cache entry */ 1173 c->_c.mfc_parent = -1; 1174 c->mfc_origin = iph->saddr; 1175 c->mfc_mcastgrp = iph->daddr; 1176 1177 /* Reflect first query at mrouted. */ 1178 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1179 if (err < 0) 1180 goto err; 1181 1182 atomic_inc(&mrt->cache_resolve_queue_len); 1183 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1184 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1185 1186 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1187 mod_timer(&mrt->ipmr_expire_timer, 1188 c->_c.mfc_un.unres.expires); 1189 } 1190 1191 /* See if we can append the packet */ 1192 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1193 c = NULL; 1194 err = -ENOBUFS; 1195 goto err; 1196 } 1197 1198 if (dev) { 1199 skb->dev = dev; 1200 skb->skb_iif = dev->ifindex; 1201 } 1202 1203 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1204 1205 spin_unlock_bh(&mfc_unres_lock); 1206 return 0; 1207 1208 err: 1209 spin_unlock_bh(&mfc_unres_lock); 1210 if (c) 1211 ipmr_cache_free(c); 1212 kfree_skb(skb); 1213 return err; 1214 } 1215 1216 /* MFC cache manipulation by user space mroute daemon */ 1217 1218 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1219 { 1220 struct net *net = read_pnet(&mrt->net); 1221 struct mfc_cache *c; 1222 1223 rcu_read_lock(); 1224 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1225 mfc->mfcc_mcastgrp.s_addr, parent); 1226 rcu_read_unlock(); 1227 if (!c) 1228 return -ENOENT; 1229 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); 1230 list_del_rcu(&c->_c.list); 1231 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1232 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1233 mr_cache_put(&c->_c); 1234 1235 return 0; 1236 } 1237 1238 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1239 struct mfcctl *mfc, int mrtsock, int parent) 1240 { 1241 struct mfc_cache *uc, *c; 1242 struct mr_mfc *_uc; 1243 bool found; 1244 int ret; 1245 1246 if (mfc->mfcc_parent >= MAXVIFS) 1247 return -ENFILE; 1248 1249 rcu_read_lock(); 1250 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1251 mfc->mfcc_mcastgrp.s_addr, parent); 1252 rcu_read_unlock(); 1253 if (c) { 1254 spin_lock(&mrt_lock); 1255 c->_c.mfc_parent = mfc->mfcc_parent; 1256 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1257 if (!mrtsock) 1258 c->_c.mfc_flags |= MFC_STATIC; 1259 spin_unlock(&mrt_lock); 1260 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1261 mrt->id); 1262 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1263 return 0; 1264 } 1265 1266 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1267 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1268 return -EINVAL; 1269 1270 c = ipmr_cache_alloc(); 1271 if (!c) 1272 return -ENOMEM; 1273 1274 c->mfc_origin = mfc->mfcc_origin.s_addr; 1275 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1276 c->_c.mfc_parent = mfc->mfcc_parent; 1277 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1278 if (!mrtsock) 1279 c->_c.mfc_flags |= MFC_STATIC; 1280 1281 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1282 ipmr_rht_params); 1283 if (ret) { 1284 pr_err("ipmr: rhtable insert error %d\n", ret); 1285 ipmr_cache_free(c); 1286 return ret; 1287 } 1288 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1289 /* Check to see if we resolved a queued list. If so we 1290 * need to send on the frames and tidy up. 1291 */ 1292 found = false; 1293 spin_lock_bh(&mfc_unres_lock); 1294 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1295 uc = (struct mfc_cache *)_uc; 1296 if (uc->mfc_origin == c->mfc_origin && 1297 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1298 list_del(&_uc->list); 1299 atomic_dec(&mrt->cache_resolve_queue_len); 1300 found = true; 1301 break; 1302 } 1303 } 1304 if (list_empty(&mrt->mfc_unres_queue)) 1305 timer_delete(&mrt->ipmr_expire_timer); 1306 spin_unlock_bh(&mfc_unres_lock); 1307 1308 if (found) { 1309 ipmr_cache_resolve(net, mrt, uc, c); 1310 ipmr_cache_free(uc); 1311 } 1312 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1313 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1314 return 0; 1315 } 1316 1317 /* Close the multicast socket, and clear the vif tables etc */ 1318 static void mroute_clean_tables(struct mr_table *mrt, int flags, 1319 struct list_head *dev_kill_list) 1320 { 1321 struct net *net = read_pnet(&mrt->net); 1322 struct mfc_cache *cache; 1323 struct mr_mfc *c, *tmp; 1324 int i; 1325 1326 /* Shut down all active vif entries */ 1327 if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { 1328 for (i = 0; i < mrt->maxvif; i++) { 1329 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1330 !(flags & MRT_FLUSH_VIFS_STATIC)) || 1331 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) 1332 continue; 1333 vif_delete(mrt, i, 0, dev_kill_list); 1334 } 1335 } 1336 1337 /* Wipe the cache */ 1338 if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { 1339 mutex_lock(&net->ipv4.mfc_mutex); 1340 1341 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1342 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || 1343 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) 1344 continue; 1345 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1346 list_del_rcu(&c->list); 1347 cache = (struct mfc_cache *)c; 1348 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, 1349 mrt->id); 1350 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1351 mr_cache_put(c); 1352 } 1353 1354 mutex_unlock(&net->ipv4.mfc_mutex); 1355 } 1356 1357 if (flags & MRT_FLUSH_MFC) { 1358 if (atomic_read(&mrt->cache_resolve_queue_len) != 0 || !check_net(net)) { 1359 spin_lock_bh(&mfc_unres_lock); 1360 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1361 list_del(&c->list); 1362 cache = (struct mfc_cache *)c; 1363 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1364 ipmr_destroy_unres(mrt, cache); 1365 } 1366 spin_unlock_bh(&mfc_unres_lock); 1367 } 1368 } 1369 } 1370 1371 /* called from ip_ra_control(), before an RCU grace period, 1372 * we don't need to call synchronize_rcu() here 1373 */ 1374 static void mrtsock_destruct(struct sock *sk) 1375 { 1376 struct net *net = sock_net(sk); 1377 LIST_HEAD(dev_kill_list); 1378 struct mr_table *mrt; 1379 1380 rtnl_lock(); 1381 1382 ipmr_for_each_table(mrt, net) { 1383 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1384 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1385 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1386 NETCONFA_MC_FORWARDING, 1387 NETCONFA_IFINDEX_ALL, 1388 net->ipv4.devconf_all); 1389 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1390 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC, 1391 &dev_kill_list); 1392 } 1393 } 1394 1395 unregister_netdevice_many(&dev_kill_list); 1396 1397 rtnl_unlock(); 1398 } 1399 1400 /* Socket options and virtual interface manipulation. The whole 1401 * virtual interface system is a complete heap, but unfortunately 1402 * that's how BSD mrouted happens to think. Maybe one day with a proper 1403 * MOSPF/PIM router set up we can clean this up. 1404 */ 1405 1406 int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1407 unsigned int optlen) 1408 { 1409 struct net *net = sock_net(sk); 1410 int val, ret = 0, parent = 0; 1411 struct mr_table *mrt; 1412 struct vifctl vif; 1413 struct mfcctl mfc; 1414 bool do_wrvifwhole; 1415 u32 uval; 1416 1417 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1418 rtnl_lock(); 1419 if (sk->sk_type != SOCK_RAW || 1420 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1421 ret = -EOPNOTSUPP; 1422 goto out_unlock; 1423 } 1424 1425 mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1426 if (!mrt) { 1427 ret = -ENOENT; 1428 goto out_unlock; 1429 } 1430 if (optname != MRT_INIT) { 1431 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1432 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1433 ret = -EACCES; 1434 goto out_unlock; 1435 } 1436 } 1437 1438 switch (optname) { 1439 case MRT_INIT: 1440 if (optlen != sizeof(int)) { 1441 ret = -EINVAL; 1442 break; 1443 } 1444 if (rtnl_dereference(mrt->mroute_sk)) { 1445 ret = -EADDRINUSE; 1446 break; 1447 } 1448 1449 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1450 if (ret == 0) { 1451 rcu_assign_pointer(mrt->mroute_sk, sk); 1452 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1453 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1454 NETCONFA_MC_FORWARDING, 1455 NETCONFA_IFINDEX_ALL, 1456 net->ipv4.devconf_all); 1457 } 1458 break; 1459 case MRT_DONE: 1460 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1461 ret = -EACCES; 1462 } else { 1463 /* We need to unlock here because mrtsock_destruct takes 1464 * care of rtnl itself and we can't change that due to 1465 * the IP_ROUTER_ALERT setsockopt which runs without it. 1466 */ 1467 rtnl_unlock(); 1468 ret = ip_ra_control(sk, 0, NULL); 1469 goto out; 1470 } 1471 break; 1472 case MRT_ADD_VIF: 1473 case MRT_DEL_VIF: 1474 if (optlen != sizeof(vif)) { 1475 ret = -EINVAL; 1476 break; 1477 } 1478 if (copy_from_sockptr(&vif, optval, sizeof(vif))) { 1479 ret = -EFAULT; 1480 break; 1481 } 1482 if (vif.vifc_vifi >= MAXVIFS) { 1483 ret = -ENFILE; 1484 break; 1485 } 1486 if (optname == MRT_ADD_VIF) { 1487 ret = vif_add(net, mrt, &vif, 1488 sk == rtnl_dereference(mrt->mroute_sk)); 1489 } else { 1490 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1491 } 1492 break; 1493 /* Manipulate the forwarding caches. These live 1494 * in a sort of kernel/user symbiosis. 1495 */ 1496 case MRT_ADD_MFC: 1497 case MRT_DEL_MFC: 1498 parent = -1; 1499 fallthrough; 1500 case MRT_ADD_MFC_PROXY: 1501 case MRT_DEL_MFC_PROXY: 1502 if (optlen != sizeof(mfc)) { 1503 ret = -EINVAL; 1504 break; 1505 } 1506 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) { 1507 ret = -EFAULT; 1508 break; 1509 } 1510 if (parent == 0) 1511 parent = mfc.mfcc_parent; 1512 1513 mutex_lock(&net->ipv4.mfc_mutex); 1514 1515 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1516 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1517 else 1518 ret = ipmr_mfc_add(net, mrt, &mfc, 1519 sk == rtnl_dereference(mrt->mroute_sk), 1520 parent); 1521 1522 mutex_unlock(&net->ipv4.mfc_mutex); 1523 break; 1524 case MRT_FLUSH: { 1525 LIST_HEAD(dev_kill_list); 1526 1527 if (optlen != sizeof(val)) { 1528 ret = -EINVAL; 1529 break; 1530 } 1531 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1532 ret = -EFAULT; 1533 break; 1534 } 1535 1536 mroute_clean_tables(mrt, val, &dev_kill_list); 1537 unregister_netdevice_many(&dev_kill_list); 1538 break; 1539 } 1540 /* Control PIM assert. */ 1541 case MRT_ASSERT: 1542 if (optlen != sizeof(val)) { 1543 ret = -EINVAL; 1544 break; 1545 } 1546 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1547 ret = -EFAULT; 1548 break; 1549 } 1550 WRITE_ONCE(mrt->mroute_do_assert, val); 1551 break; 1552 case MRT_PIM: 1553 if (!ipmr_pimsm_enabled()) { 1554 ret = -ENOPROTOOPT; 1555 break; 1556 } 1557 if (optlen != sizeof(val)) { 1558 ret = -EINVAL; 1559 break; 1560 } 1561 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1562 ret = -EFAULT; 1563 break; 1564 } 1565 1566 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); 1567 val = !!val; 1568 if (val != mrt->mroute_do_pim) { 1569 WRITE_ONCE(mrt->mroute_do_pim, val); 1570 WRITE_ONCE(mrt->mroute_do_assert, val); 1571 WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrvifwhole); 1572 } 1573 break; 1574 case MRT_TABLE: 1575 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1576 ret = -ENOPROTOOPT; 1577 break; 1578 } 1579 if (optlen != sizeof(uval)) { 1580 ret = -EINVAL; 1581 break; 1582 } 1583 if (copy_from_sockptr(&uval, optval, sizeof(uval))) { 1584 ret = -EFAULT; 1585 break; 1586 } 1587 1588 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1589 ret = -EBUSY; 1590 } else { 1591 mrt = ipmr_new_table(net, uval); 1592 if (IS_ERR(mrt)) 1593 ret = PTR_ERR(mrt); 1594 else 1595 raw_sk(sk)->ipmr_table = uval; 1596 } 1597 break; 1598 /* Spurious command, or MRT_VERSION which you cannot set. */ 1599 default: 1600 ret = -ENOPROTOOPT; 1601 } 1602 out_unlock: 1603 rtnl_unlock(); 1604 out: 1605 return ret; 1606 } 1607 1608 /* Execute if this ioctl is a special mroute ioctl */ 1609 int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1610 { 1611 switch (cmd) { 1612 /* These userspace buffers will be consumed by ipmr_ioctl() */ 1613 case SIOCGETVIFCNT: { 1614 struct sioc_vif_req buffer; 1615 1616 return sock_ioctl_inout(sk, cmd, arg, &buffer, 1617 sizeof(buffer)); 1618 } 1619 case SIOCGETSGCNT: { 1620 struct sioc_sg_req buffer; 1621 1622 return sock_ioctl_inout(sk, cmd, arg, &buffer, 1623 sizeof(buffer)); 1624 } 1625 } 1626 /* return code > 0 means that the ioctl was not executed */ 1627 return 1; 1628 } 1629 1630 /* Getsock opt support for the multicast routing system. */ 1631 int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, 1632 sockptr_t optlen) 1633 { 1634 int olr; 1635 int val; 1636 struct net *net = sock_net(sk); 1637 struct mr_table *mrt; 1638 1639 if (sk->sk_type != SOCK_RAW || 1640 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1641 return -EOPNOTSUPP; 1642 1643 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1644 if (!mrt) 1645 return -ENOENT; 1646 1647 switch (optname) { 1648 case MRT_VERSION: 1649 val = 0x0305; 1650 break; 1651 case MRT_PIM: 1652 if (!ipmr_pimsm_enabled()) 1653 return -ENOPROTOOPT; 1654 val = READ_ONCE(mrt->mroute_do_pim); 1655 break; 1656 case MRT_ASSERT: 1657 val = READ_ONCE(mrt->mroute_do_assert); 1658 break; 1659 default: 1660 return -ENOPROTOOPT; 1661 } 1662 1663 if (copy_from_sockptr(&olr, optlen, sizeof(int))) 1664 return -EFAULT; 1665 if (olr < 0) 1666 return -EINVAL; 1667 1668 olr = min_t(unsigned int, olr, sizeof(int)); 1669 1670 if (copy_to_sockptr(optlen, &olr, sizeof(int))) 1671 return -EFAULT; 1672 if (copy_to_sockptr(optval, &val, olr)) 1673 return -EFAULT; 1674 return 0; 1675 } 1676 1677 /* The IP multicast ioctl support routines. */ 1678 int ipmr_ioctl(struct sock *sk, int cmd, void *arg) 1679 { 1680 struct vif_device *vif; 1681 struct mfc_cache *c; 1682 struct net *net = sock_net(sk); 1683 struct sioc_vif_req *vr; 1684 struct sioc_sg_req *sr; 1685 struct mr_table *mrt; 1686 1687 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1688 if (!mrt) 1689 return -ENOENT; 1690 1691 switch (cmd) { 1692 case SIOCGETVIFCNT: 1693 vr = (struct sioc_vif_req *)arg; 1694 if (vr->vifi >= mrt->maxvif) 1695 return -EINVAL; 1696 vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif); 1697 rcu_read_lock(); 1698 vif = &mrt->vif_table[vr->vifi]; 1699 if (VIF_EXISTS(mrt, vr->vifi)) { 1700 vr->icount = READ_ONCE(vif->pkt_in); 1701 vr->ocount = READ_ONCE(vif->pkt_out); 1702 vr->ibytes = READ_ONCE(vif->bytes_in); 1703 vr->obytes = READ_ONCE(vif->bytes_out); 1704 rcu_read_unlock(); 1705 1706 return 0; 1707 } 1708 rcu_read_unlock(); 1709 return -EADDRNOTAVAIL; 1710 case SIOCGETSGCNT: 1711 sr = (struct sioc_sg_req *)arg; 1712 1713 rcu_read_lock(); 1714 c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr); 1715 if (c) { 1716 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1717 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1718 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1719 rcu_read_unlock(); 1720 return 0; 1721 } 1722 rcu_read_unlock(); 1723 return -EADDRNOTAVAIL; 1724 default: 1725 return -ENOIOCTLCMD; 1726 } 1727 } 1728 1729 #ifdef CONFIG_COMPAT 1730 struct compat_sioc_sg_req { 1731 struct in_addr src; 1732 struct in_addr grp; 1733 compat_ulong_t pktcnt; 1734 compat_ulong_t bytecnt; 1735 compat_ulong_t wrong_if; 1736 }; 1737 1738 struct compat_sioc_vif_req { 1739 vifi_t vifi; /* Which iface */ 1740 compat_ulong_t icount; 1741 compat_ulong_t ocount; 1742 compat_ulong_t ibytes; 1743 compat_ulong_t obytes; 1744 }; 1745 1746 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1747 { 1748 struct compat_sioc_sg_req sr; 1749 struct compat_sioc_vif_req vr; 1750 struct vif_device *vif; 1751 struct mfc_cache *c; 1752 struct net *net = sock_net(sk); 1753 struct mr_table *mrt; 1754 1755 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1756 if (!mrt) 1757 return -ENOENT; 1758 1759 switch (cmd) { 1760 case SIOCGETVIFCNT: 1761 if (copy_from_user(&vr, arg, sizeof(vr))) 1762 return -EFAULT; 1763 if (vr.vifi >= mrt->maxvif) 1764 return -EINVAL; 1765 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1766 rcu_read_lock(); 1767 vif = &mrt->vif_table[vr.vifi]; 1768 if (VIF_EXISTS(mrt, vr.vifi)) { 1769 vr.icount = READ_ONCE(vif->pkt_in); 1770 vr.ocount = READ_ONCE(vif->pkt_out); 1771 vr.ibytes = READ_ONCE(vif->bytes_in); 1772 vr.obytes = READ_ONCE(vif->bytes_out); 1773 rcu_read_unlock(); 1774 1775 if (copy_to_user(arg, &vr, sizeof(vr))) 1776 return -EFAULT; 1777 return 0; 1778 } 1779 rcu_read_unlock(); 1780 return -EADDRNOTAVAIL; 1781 case SIOCGETSGCNT: 1782 if (copy_from_user(&sr, arg, sizeof(sr))) 1783 return -EFAULT; 1784 1785 rcu_read_lock(); 1786 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1787 if (c) { 1788 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); 1789 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); 1790 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); 1791 rcu_read_unlock(); 1792 1793 if (copy_to_user(arg, &sr, sizeof(sr))) 1794 return -EFAULT; 1795 return 0; 1796 } 1797 rcu_read_unlock(); 1798 return -EADDRNOTAVAIL; 1799 default: 1800 return -ENOIOCTLCMD; 1801 } 1802 } 1803 #endif 1804 1805 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1806 { 1807 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1808 struct net *net = dev_net(dev); 1809 struct mr_table *mrt; 1810 struct vif_device *v; 1811 int ct; 1812 1813 if (event != NETDEV_UNREGISTER) 1814 return NOTIFY_DONE; 1815 1816 ipmr_for_each_table(mrt, net) { 1817 v = &mrt->vif_table[0]; 1818 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1819 if (rcu_access_pointer(v->dev) == dev) 1820 vif_delete(mrt, ct, 1, NULL); 1821 } 1822 } 1823 return NOTIFY_DONE; 1824 } 1825 1826 static struct notifier_block ip_mr_notifier = { 1827 .notifier_call = ipmr_device_event, 1828 }; 1829 1830 /* Encapsulate a packet by attaching a valid IPIP header to it. 1831 * This avoids tunnel drivers and other mess and gives us the speed so 1832 * important for multicast video. 1833 */ 1834 static void ip_encap(struct net *net, struct sk_buff *skb, 1835 __be32 saddr, __be32 daddr) 1836 { 1837 struct iphdr *iph; 1838 const struct iphdr *old_iph = ip_hdr(skb); 1839 1840 skb_push(skb, sizeof(struct iphdr)); 1841 skb->transport_header = skb->network_header; 1842 skb_reset_network_header(skb); 1843 iph = ip_hdr(skb); 1844 1845 iph->version = 4; 1846 iph->tos = old_iph->tos; 1847 iph->ttl = old_iph->ttl; 1848 iph->frag_off = 0; 1849 iph->daddr = daddr; 1850 iph->saddr = saddr; 1851 iph->protocol = IPPROTO_IPIP; 1852 iph->ihl = 5; 1853 iph->tot_len = htons(skb->len); 1854 ip_select_ident(net, skb, NULL); 1855 ip_send_check(iph); 1856 1857 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1858 nf_reset_ct(skb); 1859 } 1860 1861 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1862 struct sk_buff *skb) 1863 { 1864 struct ip_options *opt = &(IPCB(skb)->opt); 1865 1866 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1867 1868 if (unlikely(opt->optlen)) 1869 ip_forward_options(skb); 1870 1871 return dst_output(net, sk, skb); 1872 } 1873 1874 #ifdef CONFIG_NET_SWITCHDEV 1875 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1876 int in_vifi, int out_vifi) 1877 { 1878 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1879 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1880 1881 if (!skb->offload_l3_fwd_mark) 1882 return false; 1883 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1884 return false; 1885 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1886 &in_vif->dev_parent_id); 1887 } 1888 #else 1889 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1890 int in_vifi, int out_vifi) 1891 { 1892 return false; 1893 } 1894 #endif 1895 1896 /* Processing handlers for ipmr_forward, under rcu_read_lock() */ 1897 1898 static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt, 1899 struct sk_buff *skb, int vifi) 1900 { 1901 const struct iphdr *iph = ip_hdr(skb); 1902 struct vif_device *vif = &mrt->vif_table[vifi]; 1903 struct net_device *vif_dev; 1904 struct rtable *rt; 1905 struct flowi4 fl4; 1906 int encap = 0; 1907 1908 vif_dev = vif_dev_read(vif); 1909 if (!vif_dev) 1910 return -1; 1911 1912 if (vif->flags & VIFF_REGISTER) { 1913 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 1914 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 1915 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 1916 DEV_STATS_INC(vif_dev, tx_packets); 1917 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1918 return -1; 1919 } 1920 1921 if (vif->flags & VIFF_TUNNEL) { 1922 rt = ip_route_output_ports(net, &fl4, NULL, 1923 vif->remote, vif->local, 1924 0, 0, 1925 IPPROTO_IPIP, 1926 iph->tos & INET_DSCP_MASK, vif->link); 1927 if (IS_ERR(rt)) 1928 return -1; 1929 encap = sizeof(struct iphdr); 1930 } else { 1931 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1932 0, 0, 1933 IPPROTO_IPIP, 1934 iph->tos & INET_DSCP_MASK, vif->link); 1935 if (IS_ERR(rt)) 1936 return -1; 1937 } 1938 1939 if (skb->len+encap > dst4_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1940 /* Do not fragment multicasts. Alas, IPv4 does not 1941 * allow to send ICMP, so that packets will disappear 1942 * to blackhole. 1943 */ 1944 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1945 ip_rt_put(rt); 1946 return -1; 1947 } 1948 1949 encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len; 1950 1951 if (skb_cow(skb, encap)) { 1952 ip_rt_put(rt); 1953 return -1; 1954 } 1955 1956 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 1957 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 1958 1959 skb_dst_drop(skb); 1960 skb_dst_set(skb, &rt->dst); 1961 ip_decrease_ttl(ip_hdr(skb)); 1962 1963 /* FIXME: forward and output firewalls used to be called here. 1964 * What do we do with netfilter? -- RR 1965 */ 1966 if (vif->flags & VIFF_TUNNEL) { 1967 ip_encap(net, skb, vif->local, vif->remote); 1968 /* FIXME: extra output firewall step used to be here. --RR */ 1969 DEV_STATS_INC(vif_dev, tx_packets); 1970 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); 1971 } 1972 1973 return 0; 1974 } 1975 1976 static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt, 1977 int in_vifi, struct sk_buff *skb, int vifi) 1978 { 1979 struct rtable *rt; 1980 1981 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1982 goto out_free; 1983 1984 if (ipmr_prepare_xmit(net, mrt, skb, vifi)) 1985 goto out_free; 1986 1987 rt = skb_rtable(skb); 1988 1989 IPCB(skb)->flags |= IPSKB_FORWARDED; 1990 1991 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1992 * not only before forwarding, but after forwarding on all output 1993 * interfaces. It is clear, if mrouter runs a multicasting 1994 * program, it should receive packets not depending to what interface 1995 * program is joined. 1996 * If we will not make it, the program will have to join on all 1997 * interfaces. On the other hand, multihoming host (or router, but 1998 * not mrouter) cannot join to more than one interface - it will 1999 * result in receiving multiple packets. 2000 */ 2001 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 2002 net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst), 2003 ipmr_forward_finish); 2004 return; 2005 2006 out_free: 2007 kfree_skb(skb); 2008 } 2009 2010 static void ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt, 2011 struct sk_buff *skb, int vifi) 2012 { 2013 if (ipmr_prepare_xmit(net, mrt, skb, vifi)) 2014 goto out_free; 2015 2016 ip_mc_output(net, NULL, skb); 2017 return; 2018 2019 out_free: 2020 kfree_skb(skb); 2021 } 2022 2023 /* Called with mrt_lock or rcu_read_lock() */ 2024 static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) 2025 { 2026 int ct; 2027 /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */ 2028 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2029 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2030 break; 2031 } 2032 return ct; 2033 } 2034 2035 /* "local" means that we should preserve one skb (for local delivery) */ 2036 /* Called uner rcu_read_lock() */ 2037 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 2038 struct net_device *dev, struct sk_buff *skb, 2039 struct mfc_cache *c, int local) 2040 { 2041 int true_vifi = ipmr_find_vif(mrt, dev); 2042 int psend = -1; 2043 int vif, ct; 2044 2045 vif = c->_c.mfc_parent; 2046 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2047 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2048 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2049 2050 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 2051 struct mfc_cache *cache_proxy; 2052 2053 /* For an (*,G) entry, we only check that the incoming 2054 * interface is part of the static tree. 2055 */ 2056 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2057 if (cache_proxy && 2058 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 2059 goto forward; 2060 } 2061 2062 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 2063 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2064 if (rt_is_output_route(skb_rtable(skb))) { 2065 /* It is our own packet, looped back. 2066 * Very complicated situation... 2067 * 2068 * The best workaround until routing daemons will be 2069 * fixed is not to redistribute packet, if it was 2070 * send through wrong interface. It means, that 2071 * multicast applications WILL NOT work for 2072 * (S,G), which have default multicast route pointing 2073 * to wrong oif. In any case, it is not a good 2074 * idea to use multicasting applications on router. 2075 */ 2076 goto dont_forward; 2077 } 2078 2079 atomic_long_inc(&c->_c.mfc_un.res.wrong_if); 2080 2081 if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) && 2082 /* pimsm uses asserts, when switching from RPT to SPT, 2083 * so that we cannot check that packet arrived on an oif. 2084 * It is bad, but otherwise we would need to move pretty 2085 * large chunk of pimd to kernel. Ough... --ANK 2086 */ 2087 (READ_ONCE(mrt->mroute_do_pim) || 2088 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2089 time_after(jiffies, 2090 c->_c.mfc_un.res.last_assert + 2091 MFC_ASSERT_THRESH)) { 2092 c->_c.mfc_un.res.last_assert = jiffies; 2093 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 2094 if (READ_ONCE(mrt->mroute_do_wrvifwhole)) 2095 ipmr_cache_report(mrt, skb, true_vifi, 2096 IGMPMSG_WRVIFWHOLE); 2097 } 2098 goto dont_forward; 2099 } 2100 2101 forward: 2102 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2103 mrt->vif_table[vif].pkt_in + 1); 2104 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2105 mrt->vif_table[vif].bytes_in + skb->len); 2106 2107 /* Forward the frame */ 2108 if (c->mfc_origin == htonl(INADDR_ANY) && 2109 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2110 if (true_vifi >= 0 && 2111 true_vifi != c->_c.mfc_parent && 2112 ip_hdr(skb)->ttl > 2113 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2114 /* It's an (*,*) entry and the packet is not coming from 2115 * the upstream: forward the packet to the upstream 2116 * only. 2117 */ 2118 psend = c->_c.mfc_parent; 2119 goto last_forward; 2120 } 2121 goto dont_forward; 2122 } 2123 for (ct = c->_c.mfc_un.res.maxvif - 1; 2124 ct >= c->_c.mfc_un.res.minvif; ct--) { 2125 /* For (*,G) entry, don't forward to the incoming interface */ 2126 if ((c->mfc_origin != htonl(INADDR_ANY) || 2127 ct != true_vifi) && 2128 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2129 if (psend != -1) { 2130 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2131 2132 if (skb2) 2133 ipmr_queue_fwd_xmit(net, mrt, true_vifi, 2134 skb2, psend); 2135 } 2136 psend = ct; 2137 } 2138 } 2139 last_forward: 2140 if (psend != -1) { 2141 if (local) { 2142 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2143 2144 if (skb2) 2145 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb2, 2146 psend); 2147 } else { 2148 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb, psend); 2149 return; 2150 } 2151 } 2152 2153 dont_forward: 2154 if (!local) 2155 kfree_skb(skb); 2156 } 2157 2158 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2159 { 2160 struct rtable *rt = skb_rtable(skb); 2161 struct iphdr *iph = ip_hdr(skb); 2162 struct flowi4 fl4 = { 2163 .daddr = iph->daddr, 2164 .saddr = iph->saddr, 2165 .flowi4_dscp = ip4h_dscp(iph), 2166 .flowi4_oif = (rt_is_output_route(rt) ? 2167 skb->dev->ifindex : 0), 2168 .flowi4_iif = (rt_is_output_route(rt) ? 2169 LOOPBACK_IFINDEX : 2170 skb->dev->ifindex), 2171 .flowi4_mark = skb->mark, 2172 }; 2173 struct mr_table *mrt; 2174 int err; 2175 2176 err = ipmr_fib_lookup(net, &fl4, &mrt); 2177 if (err) 2178 return ERR_PTR(err); 2179 return mrt; 2180 } 2181 2182 /* Multicast packets for forwarding arrive here 2183 * Called with rcu_read_lock(); 2184 */ 2185 int ip_mr_input(struct sk_buff *skb) 2186 { 2187 struct mfc_cache *cache; 2188 struct net *net = dev_net(skb->dev); 2189 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2190 struct mr_table *mrt; 2191 struct net_device *dev; 2192 2193 /* skb->dev passed in is the loX master dev for vrfs. 2194 * As there are no vifs associated with loopback devices, 2195 * get the proper interface that does have a vif associated with it. 2196 */ 2197 dev = skb->dev; 2198 if (netif_is_l3_master(skb->dev)) { 2199 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2200 if (!dev) { 2201 kfree_skb(skb); 2202 return -ENODEV; 2203 } 2204 } 2205 2206 /* Packet is looped back after forward, it should not be 2207 * forwarded second time, but still can be delivered locally. 2208 */ 2209 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2210 goto dont_forward; 2211 2212 mrt = ipmr_rt_fib_lookup(net, skb); 2213 if (IS_ERR(mrt)) { 2214 kfree_skb(skb); 2215 return PTR_ERR(mrt); 2216 } 2217 if (!local) { 2218 if (IPCB(skb)->opt.router_alert) { 2219 if (ip_call_ra_chain(skb)) 2220 return 0; 2221 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2222 /* IGMPv1 (and broken IGMPv2 implementations sort of 2223 * Cisco IOS <= 11.2(8)) do not put router alert 2224 * option to IGMP packets destined to routable 2225 * groups. It is very bad, because it means 2226 * that we can forward NO IGMP messages. 2227 */ 2228 struct sock *mroute_sk; 2229 2230 mroute_sk = rcu_dereference(mrt->mroute_sk); 2231 if (mroute_sk) { 2232 nf_reset_ct(skb); 2233 raw_rcv(mroute_sk, skb); 2234 return 0; 2235 } 2236 } 2237 } 2238 2239 /* already under rcu_read_lock() */ 2240 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2241 if (!cache) { 2242 int vif = ipmr_find_vif(mrt, dev); 2243 2244 if (vif >= 0) 2245 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2246 vif); 2247 } 2248 2249 /* No usable cache entry */ 2250 if (!cache) { 2251 int vif; 2252 2253 if (local) { 2254 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2255 ip_local_deliver(skb); 2256 if (!skb2) 2257 return -ENOBUFS; 2258 skb = skb2; 2259 } 2260 2261 vif = ipmr_find_vif(mrt, dev); 2262 if (vif >= 0) 2263 return ipmr_cache_unresolved(mrt, vif, skb, dev); 2264 kfree_skb(skb); 2265 return -ENODEV; 2266 } 2267 2268 ip_mr_forward(net, mrt, dev, skb, cache, local); 2269 2270 if (local) 2271 return ip_local_deliver(skb); 2272 2273 return 0; 2274 2275 dont_forward: 2276 if (local) 2277 return ip_local_deliver(skb); 2278 kfree_skb(skb); 2279 return 0; 2280 } 2281 2282 static void ip_mr_output_finish(struct net *net, struct mr_table *mrt, 2283 struct net_device *dev, struct sk_buff *skb, 2284 struct mfc_cache *c) 2285 { 2286 int psend = -1; 2287 int ct; 2288 2289 atomic_long_inc(&c->_c.mfc_un.res.pkt); 2290 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2291 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2292 2293 /* Forward the frame */ 2294 if (c->mfc_origin == htonl(INADDR_ANY) && 2295 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2296 if (ip_hdr(skb)->ttl > 2297 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2298 /* It's an (*,*) entry and the packet is not coming from 2299 * the upstream: forward the packet to the upstream 2300 * only. 2301 */ 2302 psend = c->_c.mfc_parent; 2303 goto last_xmit; 2304 } 2305 goto dont_xmit; 2306 } 2307 2308 for (ct = c->_c.mfc_un.res.maxvif - 1; 2309 ct >= c->_c.mfc_un.res.minvif; ct--) { 2310 if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2311 if (psend != -1) { 2312 struct sk_buff *skb2; 2313 2314 skb2 = skb_clone(skb, GFP_ATOMIC); 2315 if (skb2) 2316 ipmr_queue_output_xmit(net, mrt, 2317 skb2, psend); 2318 } 2319 psend = ct; 2320 } 2321 } 2322 2323 last_xmit: 2324 if (psend != -1) { 2325 ipmr_queue_output_xmit(net, mrt, skb, psend); 2326 return; 2327 } 2328 2329 dont_xmit: 2330 kfree_skb(skb); 2331 } 2332 2333 /* Multicast packets for forwarding arrive here 2334 * Called with rcu_read_lock(); 2335 */ 2336 int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2337 { 2338 struct rtable *rt = skb_rtable(skb); 2339 struct mfc_cache *cache; 2340 struct net_device *dev; 2341 struct mr_table *mrt; 2342 int vif; 2343 2344 guard(rcu)(); 2345 2346 dev = dst_dev_rcu(&rt->dst); 2347 2348 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2349 goto mc_output; 2350 if (!(IPCB(skb)->flags & IPSKB_MCROUTE)) 2351 goto mc_output; 2352 2353 skb->dev = dev; 2354 2355 mrt = ipmr_rt_fib_lookup(net, skb); 2356 if (IS_ERR(mrt)) 2357 goto mc_output; 2358 2359 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2360 if (!cache) { 2361 vif = ipmr_find_vif(mrt, dev); 2362 if (vif >= 0) 2363 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2364 vif); 2365 } 2366 2367 /* No usable cache entry */ 2368 if (!cache) { 2369 vif = ipmr_find_vif(mrt, dev); 2370 if (vif >= 0) 2371 return ipmr_cache_unresolved(mrt, vif, skb, dev); 2372 goto mc_output; 2373 } 2374 2375 vif = cache->_c.mfc_parent; 2376 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2377 goto mc_output; 2378 2379 ip_mr_output_finish(net, mrt, dev, skb, cache); 2380 return 0; 2381 2382 mc_output: 2383 return ip_mc_output(net, sk, skb); 2384 } 2385 2386 #ifdef CONFIG_IP_PIMSM_V1 2387 /* Handle IGMP messages of PIMv1 */ 2388 int pim_rcv_v1(struct sk_buff *skb) 2389 { 2390 struct igmphdr *pim; 2391 struct net *net = dev_net(skb->dev); 2392 struct mr_table *mrt; 2393 2394 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2395 goto drop; 2396 2397 pim = igmp_hdr(skb); 2398 2399 mrt = ipmr_rt_fib_lookup(net, skb); 2400 if (IS_ERR(mrt)) 2401 goto drop; 2402 if (!READ_ONCE(mrt->mroute_do_pim) || 2403 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2404 goto drop; 2405 2406 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2407 drop: 2408 kfree_skb(skb); 2409 } 2410 return 0; 2411 } 2412 #endif 2413 2414 #ifdef CONFIG_IP_PIMSM_V2 2415 static int pim_rcv(struct sk_buff *skb) 2416 { 2417 struct pimreghdr *pim; 2418 struct net *net = dev_net(skb->dev); 2419 struct mr_table *mrt; 2420 2421 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2422 goto drop; 2423 2424 pim = (struct pimreghdr *)skb_transport_header(skb); 2425 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2426 (pim->flags & PIM_NULL_REGISTER) || 2427 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2428 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2429 goto drop; 2430 2431 mrt = ipmr_rt_fib_lookup(net, skb); 2432 if (IS_ERR(mrt)) 2433 goto drop; 2434 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2435 drop: 2436 kfree_skb(skb); 2437 } 2438 return 0; 2439 } 2440 #endif 2441 2442 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2443 __be32 saddr, __be32 daddr, 2444 struct rtmsg *rtm, u32 portid) 2445 { 2446 struct mfc_cache *cache; 2447 struct mr_table *mrt; 2448 int err; 2449 2450 rcu_read_lock(); 2451 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); 2452 if (!mrt) { 2453 rcu_read_unlock(); 2454 return -ENOENT; 2455 } 2456 2457 cache = ipmr_cache_find(mrt, saddr, daddr); 2458 if (!cache && skb->dev) { 2459 int vif = ipmr_find_vif(mrt, skb->dev); 2460 2461 if (vif >= 0) 2462 cache = ipmr_cache_find_any(mrt, daddr, vif); 2463 } 2464 if (!cache) { 2465 struct sk_buff *skb2; 2466 struct iphdr *iph; 2467 struct net_device *dev; 2468 int vif = -1; 2469 2470 dev = skb->dev; 2471 if (dev) 2472 vif = ipmr_find_vif(mrt, dev); 2473 if (vif < 0) { 2474 rcu_read_unlock(); 2475 return -ENODEV; 2476 } 2477 2478 skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); 2479 if (!skb2) { 2480 rcu_read_unlock(); 2481 return -ENOMEM; 2482 } 2483 2484 NETLINK_CB(skb2).portid = portid; 2485 skb_push(skb2, sizeof(struct iphdr)); 2486 skb_reset_network_header(skb2); 2487 iph = ip_hdr(skb2); 2488 iph->ihl = sizeof(struct iphdr) >> 2; 2489 iph->saddr = saddr; 2490 iph->daddr = daddr; 2491 iph->version = 0; 2492 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2493 rcu_read_unlock(); 2494 return err; 2495 } 2496 2497 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2498 rcu_read_unlock(); 2499 return err; 2500 } 2501 2502 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2503 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2504 int flags) 2505 { 2506 struct nlmsghdr *nlh; 2507 struct rtmsg *rtm; 2508 int err; 2509 2510 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2511 if (!nlh) 2512 return -EMSGSIZE; 2513 2514 rtm = nlmsg_data(nlh); 2515 rtm->rtm_family = RTNL_FAMILY_IPMR; 2516 rtm->rtm_dst_len = 32; 2517 rtm->rtm_src_len = 32; 2518 rtm->rtm_tos = 0; 2519 rtm->rtm_table = mrt->id; 2520 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2521 goto nla_put_failure; 2522 rtm->rtm_type = RTN_MULTICAST; 2523 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2524 if (c->_c.mfc_flags & MFC_STATIC) 2525 rtm->rtm_protocol = RTPROT_STATIC; 2526 else 2527 rtm->rtm_protocol = RTPROT_MROUTED; 2528 rtm->rtm_flags = 0; 2529 2530 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2531 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2532 goto nla_put_failure; 2533 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2534 /* do not break the dump if cache is unresolved */ 2535 if (err < 0 && err != -ENOENT) 2536 goto nla_put_failure; 2537 2538 nlmsg_end(skb, nlh); 2539 return 0; 2540 2541 nla_put_failure: 2542 nlmsg_cancel(skb, nlh); 2543 return -EMSGSIZE; 2544 } 2545 2546 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2547 u32 portid, u32 seq, struct mr_mfc *c, int cmd, 2548 int flags) 2549 { 2550 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, 2551 cmd, flags); 2552 } 2553 2554 static size_t mroute_msgsize(bool unresolved) 2555 { 2556 size_t len = 2557 NLMSG_ALIGN(sizeof(struct rtmsg)) 2558 + nla_total_size(4) /* RTA_TABLE */ 2559 + nla_total_size(4) /* RTA_SRC */ 2560 + nla_total_size(4) /* RTA_DST */ 2561 ; 2562 2563 if (!unresolved) 2564 len = len 2565 + nla_total_size(4) /* RTA_IIF */ 2566 + nla_total_size(0) /* RTA_MULTIPATH */ 2567 + MAXVIFS * NLA_ALIGN(sizeof(struct rtnexthop)) 2568 /* RTA_MFC_STATS */ 2569 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2570 ; 2571 2572 return len; 2573 } 2574 2575 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2576 int cmd) 2577 { 2578 struct net *net = read_pnet(&mrt->net); 2579 struct sk_buff *skb; 2580 int err = -ENOBUFS; 2581 2582 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS), 2583 GFP_ATOMIC); 2584 if (!skb) 2585 goto errout; 2586 2587 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2588 if (err < 0) 2589 goto errout; 2590 2591 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2592 return; 2593 2594 errout: 2595 kfree_skb(skb); 2596 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2597 } 2598 2599 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2600 { 2601 size_t len = 2602 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2603 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2604 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2605 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2606 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2607 + nla_total_size(4) /* IPMRA_CREPORT_TABLE */ 2608 /* IPMRA_CREPORT_PKT */ 2609 + nla_total_size(payloadlen) 2610 ; 2611 2612 return len; 2613 } 2614 2615 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2616 { 2617 struct net *net = read_pnet(&mrt->net); 2618 struct nlmsghdr *nlh; 2619 struct rtgenmsg *rtgenm; 2620 struct igmpmsg *msg; 2621 struct sk_buff *skb; 2622 struct nlattr *nla; 2623 int payloadlen; 2624 2625 payloadlen = pkt->len - sizeof(struct igmpmsg); 2626 msg = (struct igmpmsg *)skb_network_header(pkt); 2627 2628 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2629 if (!skb) 2630 goto errout; 2631 2632 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2633 sizeof(struct rtgenmsg), 0); 2634 if (!nlh) 2635 goto errout; 2636 rtgenm = nlmsg_data(nlh); 2637 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2638 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2639 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) || 2640 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2641 msg->im_src.s_addr) || 2642 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2643 msg->im_dst.s_addr) || 2644 nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id)) 2645 goto nla_put_failure; 2646 2647 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2648 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2649 nla_data(nla), payloadlen)) 2650 goto nla_put_failure; 2651 2652 nlmsg_end(skb, nlh); 2653 2654 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2655 return; 2656 2657 nla_put_failure: 2658 nlmsg_cancel(skb, nlh); 2659 errout: 2660 kfree_skb(skb); 2661 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2662 } 2663 2664 static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, 2665 const struct nlmsghdr *nlh, 2666 struct nlattr **tb, 2667 struct netlink_ext_ack *extack) 2668 { 2669 struct rtmsg *rtm; 2670 int i, err; 2671 2672 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 2673 if (!rtm) { 2674 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); 2675 return -EINVAL; 2676 } 2677 2678 if (!netlink_strict_get_check(skb)) 2679 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2680 rtm_ipv4_policy, extack); 2681 2682 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 2683 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 2684 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2685 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2686 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); 2687 return -EINVAL; 2688 } 2689 2690 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2691 rtm_ipv4_policy, extack); 2692 if (err) 2693 return err; 2694 2695 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2696 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2697 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 2698 return -EINVAL; 2699 } 2700 2701 for (i = 0; i <= RTA_MAX; i++) { 2702 if (!tb[i]) 2703 continue; 2704 2705 switch (i) { 2706 case RTA_SRC: 2707 case RTA_DST: 2708 case RTA_TABLE: 2709 break; 2710 default: 2711 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); 2712 return -EINVAL; 2713 } 2714 } 2715 2716 return 0; 2717 } 2718 2719 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2720 struct netlink_ext_ack *extack) 2721 { 2722 struct net *net = sock_net(in_skb->sk); 2723 struct nlattr *tb[RTA_MAX + 1]; 2724 struct mfc_cache *cache; 2725 struct mr_table *mrt; 2726 struct sk_buff *skb; 2727 __be32 src, grp; 2728 u32 tableid; 2729 int err; 2730 2731 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2732 if (err < 0) 2733 goto errout; 2734 2735 src = nla_get_in_addr_default(tb[RTA_SRC], 0); 2736 grp = nla_get_in_addr_default(tb[RTA_DST], 0); 2737 tableid = nla_get_u32_default(tb[RTA_TABLE], 0); 2738 2739 skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); 2740 if (!skb) { 2741 err = -ENOBUFS; 2742 goto errout; 2743 } 2744 2745 rcu_read_lock(); 2746 2747 mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2748 if (!mrt) { 2749 err = -ENOENT; 2750 goto errout_unlock; 2751 } 2752 2753 cache = ipmr_cache_find(mrt, src, grp); 2754 if (!cache) { 2755 err = -ENOENT; 2756 goto errout_unlock; 2757 } 2758 2759 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2760 nlh->nlmsg_seq, cache, 2761 RTM_NEWROUTE, 0); 2762 if (err < 0) 2763 goto errout_unlock; 2764 2765 rcu_read_unlock(); 2766 2767 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2768 errout: 2769 return err; 2770 2771 errout_unlock: 2772 rcu_read_unlock(); 2773 kfree_skb(skb); 2774 goto errout; 2775 } 2776 2777 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2778 { 2779 struct fib_dump_filter filter = { 2780 .rtnl_held = false, 2781 }; 2782 int err; 2783 2784 rcu_read_lock(); 2785 2786 if (cb->strict_check) { 2787 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2788 &filter, cb); 2789 if (err < 0) 2790 goto out; 2791 } 2792 2793 if (filter.table_id) { 2794 struct mr_table *mrt; 2795 2796 mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id); 2797 if (!mrt) { 2798 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) { 2799 err = skb->len; 2800 goto out; 2801 } 2802 2803 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2804 err = -ENOENT; 2805 goto out; 2806 } 2807 2808 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2809 &mfc_unres_lock, &filter); 2810 err = skb->len ? : err; 2811 goto out; 2812 } 2813 2814 err = mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2815 _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2816 out: 2817 rcu_read_unlock(); 2818 2819 return err; 2820 } 2821 2822 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2823 [RTA_SRC] = { .type = NLA_U32 }, 2824 [RTA_DST] = { .type = NLA_U32 }, 2825 [RTA_IIF] = { .type = NLA_U32 }, 2826 [RTA_TABLE] = { .type = NLA_U32 }, 2827 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2828 }; 2829 2830 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2831 { 2832 switch (rtm_protocol) { 2833 case RTPROT_STATIC: 2834 case RTPROT_MROUTED: 2835 return true; 2836 } 2837 return false; 2838 } 2839 2840 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2841 { 2842 struct rtnexthop *rtnh = nla_data(nla); 2843 int remaining = nla_len(nla), vifi = 0; 2844 2845 while (rtnh_ok(rtnh, remaining)) { 2846 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2847 if (++vifi == MAXVIFS) 2848 break; 2849 rtnh = rtnh_next(rtnh, &remaining); 2850 } 2851 2852 return remaining > 0 ? -EINVAL : vifi; 2853 } 2854 2855 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2856 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2857 struct mfcctl *mfcc, int *mrtsock, 2858 struct mr_table **mrtret, 2859 struct netlink_ext_ack *extack) 2860 { 2861 struct net_device *dev = NULL; 2862 u32 tblid = RT_TABLE_DEFAULT; 2863 int ret, rem, iif = 0; 2864 struct mr_table *mrt; 2865 struct nlattr *attr; 2866 struct rtmsg *rtm; 2867 2868 ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, 2869 rtm_ipmr_policy, extack); 2870 if (ret < 0) 2871 goto out; 2872 rtm = nlmsg_data(nlh); 2873 2874 ret = -EINVAL; 2875 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2876 rtm->rtm_type != RTN_MULTICAST || 2877 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2878 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2879 goto out; 2880 2881 memset(mfcc, 0, sizeof(*mfcc)); 2882 mfcc->mfcc_parent = -1; 2883 ret = 0; 2884 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2885 switch (nla_type(attr)) { 2886 case RTA_SRC: 2887 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2888 break; 2889 case RTA_DST: 2890 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2891 break; 2892 case RTA_IIF: 2893 iif = nla_get_u32(attr); 2894 break; 2895 case RTA_MULTIPATH: 2896 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2897 ret = -EINVAL; 2898 goto out; 2899 } 2900 break; 2901 case RTA_PREFSRC: 2902 ret = 1; 2903 break; 2904 case RTA_TABLE: 2905 tblid = nla_get_u32(attr); 2906 break; 2907 } 2908 } 2909 2910 rcu_read_lock(); 2911 2912 mrt = __ipmr_get_table(net, tblid); 2913 if (!mrt) { 2914 ret = -ENOENT; 2915 goto unlock; 2916 } 2917 2918 if (iif) { 2919 dev = dev_get_by_index_rcu(net, iif); 2920 if (!dev) { 2921 ret = -ENODEV; 2922 goto unlock; 2923 } 2924 2925 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2926 } 2927 2928 *mrtret = mrt; 2929 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2930 2931 unlock: 2932 rcu_read_unlock(); 2933 out: 2934 return ret; 2935 } 2936 2937 /* takes care of both newroute and delroute */ 2938 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2939 struct netlink_ext_ack *extack) 2940 { 2941 struct net *net = sock_net(skb->sk); 2942 int ret, mrtsock = 0, parent; 2943 struct mr_table *tbl = NULL; 2944 struct mfcctl mfcc; 2945 2946 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2947 if (ret < 0) 2948 return ret; 2949 2950 parent = ret ? mfcc.mfcc_parent : -1; 2951 2952 mutex_lock(&net->ipv4.mfc_mutex); 2953 2954 if (nlh->nlmsg_type == RTM_NEWROUTE) 2955 ret = ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2956 else 2957 ret = ipmr_mfc_delete(tbl, &mfcc, parent); 2958 2959 mutex_unlock(&net->ipv4.mfc_mutex); 2960 2961 return ret; 2962 } 2963 2964 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2965 { 2966 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2967 2968 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2969 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2970 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2971 READ_ONCE(mrt->mroute_reg_vif_num)) || 2972 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2973 READ_ONCE(mrt->mroute_do_assert)) || 2974 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, 2975 READ_ONCE(mrt->mroute_do_pim)) || 2976 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, 2977 READ_ONCE(mrt->mroute_do_wrvifwhole))) 2978 return false; 2979 2980 return true; 2981 } 2982 2983 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2984 { 2985 struct net_device *vif_dev; 2986 struct nlattr *vif_nest; 2987 struct vif_device *vif; 2988 2989 vif = &mrt->vif_table[vifid]; 2990 vif_dev = vif_dev_read(vif); 2991 /* if the VIF doesn't exist just continue */ 2992 if (!vif_dev) 2993 return true; 2994 2995 vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); 2996 if (!vif_nest) 2997 return false; 2998 2999 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, READ_ONCE(vif_dev->ifindex)) || 3000 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 3001 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 3002 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, READ_ONCE(vif->bytes_in), 3003 IPMRA_VIFA_PAD) || 3004 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, READ_ONCE(vif->bytes_out), 3005 IPMRA_VIFA_PAD) || 3006 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, READ_ONCE(vif->pkt_in), 3007 IPMRA_VIFA_PAD) || 3008 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, READ_ONCE(vif->pkt_out), 3009 IPMRA_VIFA_PAD) || 3010 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 3011 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 3012 nla_nest_cancel(skb, vif_nest); 3013 return false; 3014 } 3015 nla_nest_end(skb, vif_nest); 3016 3017 return true; 3018 } 3019 3020 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, 3021 struct netlink_ext_ack *extack) 3022 { 3023 struct ifinfomsg *ifm; 3024 3025 ifm = nlmsg_payload(nlh, sizeof(*ifm)); 3026 if (!ifm) { 3027 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); 3028 return -EINVAL; 3029 } 3030 3031 if (nlmsg_attrlen(nlh, sizeof(*ifm))) { 3032 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump"); 3033 return -EINVAL; 3034 } 3035 3036 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || 3037 ifm->ifi_change || ifm->ifi_index) { 3038 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); 3039 return -EINVAL; 3040 } 3041 3042 return 0; 3043 } 3044 3045 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 3046 { 3047 struct net *net = sock_net(skb->sk); 3048 struct nlmsghdr *nlh = NULL; 3049 unsigned int t = 0, s_t; 3050 unsigned int e = 0, s_e; 3051 struct mr_table *mrt; 3052 3053 if (cb->strict_check) { 3054 int err = ipmr_valid_dumplink(cb->nlh, cb->extack); 3055 3056 if (err < 0) 3057 return err; 3058 } 3059 3060 s_t = cb->args[0]; 3061 s_e = cb->args[1]; 3062 3063 rcu_read_lock(); 3064 3065 ipmr_for_each_table(mrt, net) { 3066 struct nlattr *vifs, *af; 3067 struct ifinfomsg *hdr; 3068 u32 i; 3069 3070 if (t < s_t) 3071 goto skip_table; 3072 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3073 cb->nlh->nlmsg_seq, RTM_NEWLINK, 3074 sizeof(*hdr), NLM_F_MULTI); 3075 if (!nlh) 3076 break; 3077 3078 hdr = nlmsg_data(nlh); 3079 memset(hdr, 0, sizeof(*hdr)); 3080 hdr->ifi_family = RTNL_FAMILY_IPMR; 3081 3082 af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); 3083 if (!af) { 3084 nlmsg_cancel(skb, nlh); 3085 goto out; 3086 } 3087 3088 if (!ipmr_fill_table(mrt, skb)) { 3089 nlmsg_cancel(skb, nlh); 3090 goto out; 3091 } 3092 3093 vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS); 3094 if (!vifs) { 3095 nla_nest_end(skb, af); 3096 nlmsg_end(skb, nlh); 3097 goto out; 3098 } 3099 for (i = 0; i < READ_ONCE(mrt->maxvif); i++) { 3100 if (e < s_e) 3101 goto skip_entry; 3102 if (!ipmr_fill_vif(mrt, i, skb)) { 3103 nla_nest_end(skb, vifs); 3104 nla_nest_end(skb, af); 3105 nlmsg_end(skb, nlh); 3106 goto out; 3107 } 3108 skip_entry: 3109 e++; 3110 } 3111 s_e = 0; 3112 e = 0; 3113 nla_nest_end(skb, vifs); 3114 nla_nest_end(skb, af); 3115 nlmsg_end(skb, nlh); 3116 skip_table: 3117 t++; 3118 } 3119 3120 out: 3121 rcu_read_unlock(); 3122 3123 cb->args[1] = e; 3124 cb->args[0] = t; 3125 3126 return skb->len; 3127 } 3128 3129 #ifdef CONFIG_PROC_FS 3130 /* The /proc interfaces to multicast routing : 3131 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 3132 */ 3133 3134 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 3135 __acquires(RCU) 3136 { 3137 struct mr_vif_iter *iter = seq->private; 3138 struct net *net = seq_file_net(seq); 3139 struct mr_table *mrt; 3140 3141 rcu_read_lock(); 3142 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); 3143 if (!mrt) { 3144 rcu_read_unlock(); 3145 return ERR_PTR(-ENOENT); 3146 } 3147 3148 iter->mrt = mrt; 3149 3150 return mr_vif_seq_start(seq, pos); 3151 } 3152 3153 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 3154 __releases(RCU) 3155 { 3156 rcu_read_unlock(); 3157 } 3158 3159 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 3160 { 3161 struct mr_vif_iter *iter = seq->private; 3162 struct mr_table *mrt = iter->mrt; 3163 3164 if (v == SEQ_START_TOKEN) { 3165 seq_puts(seq, 3166 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 3167 } else { 3168 const struct vif_device *vif = v; 3169 const struct net_device *vif_dev; 3170 const char *name; 3171 3172 vif_dev = vif_dev_read(vif); 3173 name = vif_dev ? vif_dev->name : "none"; 3174 seq_printf(seq, 3175 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 3176 vif - mrt->vif_table, 3177 name, vif->bytes_in, vif->pkt_in, 3178 vif->bytes_out, vif->pkt_out, 3179 vif->flags, vif->local, vif->remote); 3180 } 3181 return 0; 3182 } 3183 3184 static const struct seq_operations ipmr_vif_seq_ops = { 3185 .start = ipmr_vif_seq_start, 3186 .next = mr_vif_seq_next, 3187 .stop = ipmr_vif_seq_stop, 3188 .show = ipmr_vif_seq_show, 3189 }; 3190 3191 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 3192 { 3193 struct net *net = seq_file_net(seq); 3194 struct mr_table *mrt; 3195 3196 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 3197 if (!mrt) 3198 return ERR_PTR(-ENOENT); 3199 3200 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 3201 } 3202 3203 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 3204 { 3205 int n; 3206 3207 if (v == SEQ_START_TOKEN) { 3208 seq_puts(seq, 3209 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 3210 } else { 3211 const struct mfc_cache *mfc = v; 3212 const struct mr_mfc_iter *it = seq->private; 3213 const struct mr_table *mrt = it->mrt; 3214 3215 seq_printf(seq, "%08X %08X %-3hd", 3216 (__force u32) mfc->mfc_mcastgrp, 3217 (__force u32) mfc->mfc_origin, 3218 mfc->_c.mfc_parent); 3219 3220 if (it->cache != &mrt->mfc_unres_queue) { 3221 seq_printf(seq, " %8lu %8lu %8lu", 3222 atomic_long_read(&mfc->_c.mfc_un.res.pkt), 3223 atomic_long_read(&mfc->_c.mfc_un.res.bytes), 3224 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); 3225 for (n = mfc->_c.mfc_un.res.minvif; 3226 n < mfc->_c.mfc_un.res.maxvif; n++) { 3227 if (VIF_EXISTS(mrt, n) && 3228 mfc->_c.mfc_un.res.ttls[n] < 255) 3229 seq_printf(seq, 3230 " %2d:%-3d", 3231 n, mfc->_c.mfc_un.res.ttls[n]); 3232 } 3233 } else { 3234 /* unresolved mfc_caches don't contain 3235 * pkt, bytes and wrong_if values 3236 */ 3237 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3238 } 3239 seq_putc(seq, '\n'); 3240 } 3241 return 0; 3242 } 3243 3244 static const struct seq_operations ipmr_mfc_seq_ops = { 3245 .start = ipmr_mfc_seq_start, 3246 .next = mr_mfc_seq_next, 3247 .stop = mr_mfc_seq_stop, 3248 .show = ipmr_mfc_seq_show, 3249 }; 3250 #endif 3251 3252 #ifdef CONFIG_IP_PIMSM_V2 3253 static const struct net_protocol pim_protocol = { 3254 .handler = pim_rcv, 3255 }; 3256 #endif 3257 3258 static unsigned int ipmr_seq_read(const struct net *net) 3259 { 3260 return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); 3261 } 3262 3263 static int ipmr_dump(struct net *net, struct notifier_block *nb, 3264 struct netlink_ext_ack *extack) 3265 { 3266 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, 3267 ipmr_mr_table_iter, extack); 3268 } 3269 3270 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3271 .family = RTNL_FAMILY_IPMR, 3272 .fib_seq_read = ipmr_seq_read, 3273 .fib_dump = ipmr_dump, 3274 .owner = THIS_MODULE, 3275 }; 3276 3277 static int __net_init ipmr_notifier_init(struct net *net) 3278 { 3279 struct fib_notifier_ops *ops; 3280 3281 atomic_set(&net->ipv4.ipmr_seq, 0); 3282 3283 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3284 if (IS_ERR(ops)) 3285 return PTR_ERR(ops); 3286 net->ipv4.ipmr_notifier_ops = ops; 3287 3288 return 0; 3289 } 3290 3291 static void __net_exit ipmr_notifier_exit(struct net *net) 3292 { 3293 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3294 net->ipv4.ipmr_notifier_ops = NULL; 3295 } 3296 3297 /* Setup for IP multicast routing */ 3298 static int __net_init ipmr_net_init(struct net *net) 3299 { 3300 LIST_HEAD(dev_kill_list); 3301 int err; 3302 3303 mutex_init(&net->ipv4.mfc_mutex); 3304 3305 err = ipmr_notifier_init(net); 3306 if (err) 3307 goto ipmr_notifier_fail; 3308 3309 err = ipmr_rules_init(net); 3310 if (err < 0) 3311 goto ipmr_rules_fail; 3312 3313 #ifdef CONFIG_PROC_FS 3314 err = -ENOMEM; 3315 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, 3316 sizeof(struct mr_vif_iter))) 3317 goto proc_vif_fail; 3318 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 3319 sizeof(struct mr_mfc_iter))) 3320 goto proc_cache_fail; 3321 #endif 3322 return 0; 3323 3324 #ifdef CONFIG_PROC_FS 3325 proc_cache_fail: 3326 remove_proc_entry("ip_mr_vif", net->proc_net); 3327 proc_vif_fail: 3328 ipmr_rules_exit_rtnl(net, &dev_kill_list); 3329 ipmr_rules_exit(net); 3330 #endif 3331 ipmr_rules_fail: 3332 ipmr_notifier_exit(net); 3333 ipmr_notifier_fail: 3334 return err; 3335 } 3336 3337 static void __net_exit ipmr_net_exit(struct net *net) 3338 { 3339 #ifdef CONFIG_PROC_FS 3340 remove_proc_entry("ip_mr_cache", net->proc_net); 3341 remove_proc_entry("ip_mr_vif", net->proc_net); 3342 #endif 3343 ipmr_rules_exit(net); 3344 ipmr_notifier_exit(net); 3345 } 3346 3347 static void __net_exit ipmr_net_exit_rtnl(struct net *net, 3348 struct list_head *dev_kill_list) 3349 { 3350 ipmr_rules_exit_rtnl(net, dev_kill_list); 3351 } 3352 3353 static struct pernet_operations ipmr_net_ops = { 3354 .init = ipmr_net_init, 3355 .exit = ipmr_net_exit, 3356 .exit_rtnl = ipmr_net_exit_rtnl, 3357 }; 3358 3359 static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { 3360 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, 3361 .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED}, 3362 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, 3363 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 3364 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, 3365 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 3366 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, 3367 .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, 3368 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 3369 }; 3370 3371 int __init ip_mr_init(void) 3372 { 3373 int err; 3374 3375 mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC); 3376 3377 err = register_pernet_subsys(&ipmr_net_ops); 3378 if (err) 3379 goto reg_pernet_fail; 3380 3381 err = register_netdevice_notifier(&ip_mr_notifier); 3382 if (err) 3383 goto reg_notif_fail; 3384 #ifdef CONFIG_IP_PIMSM_V2 3385 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3386 pr_err("%s: can't add PIM protocol\n", __func__); 3387 err = -EAGAIN; 3388 goto add_proto_fail; 3389 } 3390 #endif 3391 rtnl_register_many(ipmr_rtnl_msg_handlers); 3392 3393 return 0; 3394 3395 #ifdef CONFIG_IP_PIMSM_V2 3396 add_proto_fail: 3397 unregister_netdevice_notifier(&ip_mr_notifier); 3398 #endif 3399 reg_notif_fail: 3400 unregister_pernet_subsys(&ipmr_net_ops); 3401 reg_pernet_fail: 3402 kmem_cache_destroy(mrt_cachep); 3403 return err; 3404 } 3405