1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <linux/uaccess.h> 30 #include <linux/types.h> 31 #include <linux/capability.h> 32 #include <linux/errno.h> 33 #include <linux/timer.h> 34 #include <linux/mm.h> 35 #include <linux/kernel.h> 36 #include <linux/fcntl.h> 37 #include <linux/stat.h> 38 #include <linux/socket.h> 39 #include <linux/in.h> 40 #include <linux/inet.h> 41 #include <linux/netdevice.h> 42 #include <linux/inetdevice.h> 43 #include <linux/igmp.h> 44 #include <linux/proc_fs.h> 45 #include <linux/seq_file.h> 46 #include <linux/mroute.h> 47 #include <linux/init.h> 48 #include <linux/if_ether.h> 49 #include <linux/slab.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <linux/compat.h> 63 #include <linux/export.h> 64 #include <net/ip_tunnels.h> 65 #include <net/checksum.h> 66 #include <net/netlink.h> 67 #include <net/fib_rules.h> 68 #include <linux/netconf.h> 69 #include <net/nexthop.h> 70 71 struct ipmr_rule { 72 struct fib_rule common; 73 }; 74 75 struct ipmr_result { 76 struct mr_table *mrt; 77 }; 78 79 /* Big lock, protecting vif table, mrt cache and mroute socket state. 80 * Note that the changes are semaphored via rtnl_lock. 81 */ 82 83 static DEFINE_RWLOCK(mrt_lock); 84 85 /* Multicast router control variables */ 86 87 /* Special spinlock for queue of unresolved entries */ 88 static DEFINE_SPINLOCK(mfc_unres_lock); 89 90 /* We return to original Alan's scheme. Hash table of resolved 91 * entries is changed only in process context and protected 92 * with weak lock mrt_lock. Queue of unresolved entries is protected 93 * with strong spinlock mfc_unres_lock. 94 * 95 * In this case data path is free of exclusive locks at all. 96 */ 97 98 static struct kmem_cache *mrt_cachep __read_mostly; 99 100 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 101 static void ipmr_free_table(struct mr_table *mrt); 102 103 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 104 struct net_device *dev, struct sk_buff *skb, 105 struct mfc_cache *cache, int local); 106 static int ipmr_cache_report(struct mr_table *mrt, 107 struct sk_buff *pkt, vifi_t vifi, int assert); 108 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 109 struct mfc_cache *c, struct rtmsg *rtm); 110 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 111 int cmd); 112 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 113 static void mroute_clean_tables(struct mr_table *mrt, bool all); 114 static void ipmr_expire_process(unsigned long arg); 115 116 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 117 #define ipmr_for_each_table(mrt, net) \ 118 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 119 120 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 121 { 122 struct mr_table *mrt; 123 124 ipmr_for_each_table(mrt, net) { 125 if (mrt->id == id) 126 return mrt; 127 } 128 return NULL; 129 } 130 131 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 132 struct mr_table **mrt) 133 { 134 int err; 135 struct ipmr_result res; 136 struct fib_lookup_arg arg = { 137 .result = &res, 138 .flags = FIB_LOOKUP_NOREF, 139 }; 140 141 /* update flow if oif or iif point to device enslaved to l3mdev */ 142 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 143 144 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 145 flowi4_to_flowi(flp4), 0, &arg); 146 if (err < 0) 147 return err; 148 *mrt = res.mrt; 149 return 0; 150 } 151 152 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 153 int flags, struct fib_lookup_arg *arg) 154 { 155 struct ipmr_result *res = arg->result; 156 struct mr_table *mrt; 157 158 switch (rule->action) { 159 case FR_ACT_TO_TBL: 160 break; 161 case FR_ACT_UNREACHABLE: 162 return -ENETUNREACH; 163 case FR_ACT_PROHIBIT: 164 return -EACCES; 165 case FR_ACT_BLACKHOLE: 166 default: 167 return -EINVAL; 168 } 169 170 arg->table = fib_rule_get_table(rule, arg); 171 172 mrt = ipmr_get_table(rule->fr_net, arg->table); 173 if (!mrt) 174 return -EAGAIN; 175 res->mrt = mrt; 176 return 0; 177 } 178 179 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 180 { 181 return 1; 182 } 183 184 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 185 FRA_GENERIC_POLICY, 186 }; 187 188 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 189 struct fib_rule_hdr *frh, struct nlattr **tb) 190 { 191 return 0; 192 } 193 194 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 195 struct nlattr **tb) 196 { 197 return 1; 198 } 199 200 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 201 struct fib_rule_hdr *frh) 202 { 203 frh->dst_len = 0; 204 frh->src_len = 0; 205 frh->tos = 0; 206 return 0; 207 } 208 209 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 210 .family = RTNL_FAMILY_IPMR, 211 .rule_size = sizeof(struct ipmr_rule), 212 .addr_size = sizeof(u32), 213 .action = ipmr_rule_action, 214 .match = ipmr_rule_match, 215 .configure = ipmr_rule_configure, 216 .compare = ipmr_rule_compare, 217 .fill = ipmr_rule_fill, 218 .nlgroup = RTNLGRP_IPV4_RULE, 219 .policy = ipmr_rule_policy, 220 .owner = THIS_MODULE, 221 }; 222 223 static int __net_init ipmr_rules_init(struct net *net) 224 { 225 struct fib_rules_ops *ops; 226 struct mr_table *mrt; 227 int err; 228 229 ops = fib_rules_register(&ipmr_rules_ops_template, net); 230 if (IS_ERR(ops)) 231 return PTR_ERR(ops); 232 233 INIT_LIST_HEAD(&net->ipv4.mr_tables); 234 235 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 236 if (IS_ERR(mrt)) { 237 err = PTR_ERR(mrt); 238 goto err1; 239 } 240 241 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 242 if (err < 0) 243 goto err2; 244 245 net->ipv4.mr_rules_ops = ops; 246 return 0; 247 248 err2: 249 ipmr_free_table(mrt); 250 err1: 251 fib_rules_unregister(ops); 252 return err; 253 } 254 255 static void __net_exit ipmr_rules_exit(struct net *net) 256 { 257 struct mr_table *mrt, *next; 258 259 rtnl_lock(); 260 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 261 list_del(&mrt->list); 262 ipmr_free_table(mrt); 263 } 264 fib_rules_unregister(net->ipv4.mr_rules_ops); 265 rtnl_unlock(); 266 } 267 #else 268 #define ipmr_for_each_table(mrt, net) \ 269 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 270 271 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 272 { 273 return net->ipv4.mrt; 274 } 275 276 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 277 struct mr_table **mrt) 278 { 279 *mrt = net->ipv4.mrt; 280 return 0; 281 } 282 283 static int __net_init ipmr_rules_init(struct net *net) 284 { 285 struct mr_table *mrt; 286 287 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 288 if (IS_ERR(mrt)) 289 return PTR_ERR(mrt); 290 net->ipv4.mrt = mrt; 291 return 0; 292 } 293 294 static void __net_exit ipmr_rules_exit(struct net *net) 295 { 296 rtnl_lock(); 297 ipmr_free_table(net->ipv4.mrt); 298 net->ipv4.mrt = NULL; 299 rtnl_unlock(); 300 } 301 #endif 302 303 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 304 const void *ptr) 305 { 306 const struct mfc_cache_cmp_arg *cmparg = arg->key; 307 struct mfc_cache *c = (struct mfc_cache *)ptr; 308 309 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 310 cmparg->mfc_origin != c->mfc_origin; 311 } 312 313 static const struct rhashtable_params ipmr_rht_params = { 314 .head_offset = offsetof(struct mfc_cache, mnode), 315 .key_offset = offsetof(struct mfc_cache, cmparg), 316 .key_len = sizeof(struct mfc_cache_cmp_arg), 317 .nelem_hint = 3, 318 .locks_mul = 1, 319 .obj_cmpfn = ipmr_hash_cmp, 320 .automatic_shrinking = true, 321 }; 322 323 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 324 { 325 struct mr_table *mrt; 326 327 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 328 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 329 return ERR_PTR(-EINVAL); 330 331 mrt = ipmr_get_table(net, id); 332 if (mrt) 333 return mrt; 334 335 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 336 if (!mrt) 337 return ERR_PTR(-ENOMEM); 338 write_pnet(&mrt->net, net); 339 mrt->id = id; 340 341 rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); 342 INIT_LIST_HEAD(&mrt->mfc_cache_list); 343 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 344 345 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 346 (unsigned long)mrt); 347 348 mrt->mroute_reg_vif_num = -1; 349 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 350 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 351 #endif 352 return mrt; 353 } 354 355 static void ipmr_free_table(struct mr_table *mrt) 356 { 357 del_timer_sync(&mrt->ipmr_expire_timer); 358 mroute_clean_tables(mrt, true); 359 rhltable_destroy(&mrt->mfc_hash); 360 kfree(mrt); 361 } 362 363 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 364 365 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 366 { 367 struct net *net = dev_net(dev); 368 369 dev_close(dev); 370 371 dev = __dev_get_by_name(net, "tunl0"); 372 if (dev) { 373 const struct net_device_ops *ops = dev->netdev_ops; 374 struct ifreq ifr; 375 struct ip_tunnel_parm p; 376 377 memset(&p, 0, sizeof(p)); 378 p.iph.daddr = v->vifc_rmt_addr.s_addr; 379 p.iph.saddr = v->vifc_lcl_addr.s_addr; 380 p.iph.version = 4; 381 p.iph.ihl = 5; 382 p.iph.protocol = IPPROTO_IPIP; 383 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 384 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 385 386 if (ops->ndo_do_ioctl) { 387 mm_segment_t oldfs = get_fs(); 388 389 set_fs(KERNEL_DS); 390 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 391 set_fs(oldfs); 392 } 393 } 394 } 395 396 /* Initialize ipmr pimreg/tunnel in_device */ 397 static bool ipmr_init_vif_indev(const struct net_device *dev) 398 { 399 struct in_device *in_dev; 400 401 ASSERT_RTNL(); 402 403 in_dev = __in_dev_get_rtnl(dev); 404 if (!in_dev) 405 return false; 406 ipv4_devconf_setall(in_dev); 407 neigh_parms_data_state_setall(in_dev->arp_parms); 408 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 409 410 return true; 411 } 412 413 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 414 { 415 struct net_device *dev; 416 417 dev = __dev_get_by_name(net, "tunl0"); 418 419 if (dev) { 420 const struct net_device_ops *ops = dev->netdev_ops; 421 int err; 422 struct ifreq ifr; 423 struct ip_tunnel_parm p; 424 425 memset(&p, 0, sizeof(p)); 426 p.iph.daddr = v->vifc_rmt_addr.s_addr; 427 p.iph.saddr = v->vifc_lcl_addr.s_addr; 428 p.iph.version = 4; 429 p.iph.ihl = 5; 430 p.iph.protocol = IPPROTO_IPIP; 431 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 432 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 433 434 if (ops->ndo_do_ioctl) { 435 mm_segment_t oldfs = get_fs(); 436 437 set_fs(KERNEL_DS); 438 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 439 set_fs(oldfs); 440 } else { 441 err = -EOPNOTSUPP; 442 } 443 dev = NULL; 444 445 if (err == 0 && 446 (dev = __dev_get_by_name(net, p.name)) != NULL) { 447 dev->flags |= IFF_MULTICAST; 448 if (!ipmr_init_vif_indev(dev)) 449 goto failure; 450 if (dev_open(dev)) 451 goto failure; 452 dev_hold(dev); 453 } 454 } 455 return dev; 456 457 failure: 458 unregister_netdevice(dev); 459 return NULL; 460 } 461 462 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 463 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 464 { 465 struct net *net = dev_net(dev); 466 struct mr_table *mrt; 467 struct flowi4 fl4 = { 468 .flowi4_oif = dev->ifindex, 469 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 470 .flowi4_mark = skb->mark, 471 }; 472 int err; 473 474 err = ipmr_fib_lookup(net, &fl4, &mrt); 475 if (err < 0) { 476 kfree_skb(skb); 477 return err; 478 } 479 480 read_lock(&mrt_lock); 481 dev->stats.tx_bytes += skb->len; 482 dev->stats.tx_packets++; 483 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 484 read_unlock(&mrt_lock); 485 kfree_skb(skb); 486 return NETDEV_TX_OK; 487 } 488 489 static int reg_vif_get_iflink(const struct net_device *dev) 490 { 491 return 0; 492 } 493 494 static const struct net_device_ops reg_vif_netdev_ops = { 495 .ndo_start_xmit = reg_vif_xmit, 496 .ndo_get_iflink = reg_vif_get_iflink, 497 }; 498 499 static void reg_vif_setup(struct net_device *dev) 500 { 501 dev->type = ARPHRD_PIMREG; 502 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 503 dev->flags = IFF_NOARP; 504 dev->netdev_ops = ®_vif_netdev_ops; 505 dev->needs_free_netdev = true; 506 dev->features |= NETIF_F_NETNS_LOCAL; 507 } 508 509 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 510 { 511 struct net_device *dev; 512 char name[IFNAMSIZ]; 513 514 if (mrt->id == RT_TABLE_DEFAULT) 515 sprintf(name, "pimreg"); 516 else 517 sprintf(name, "pimreg%u", mrt->id); 518 519 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 520 521 if (!dev) 522 return NULL; 523 524 dev_net_set(dev, net); 525 526 if (register_netdevice(dev)) { 527 free_netdev(dev); 528 return NULL; 529 } 530 531 if (!ipmr_init_vif_indev(dev)) 532 goto failure; 533 if (dev_open(dev)) 534 goto failure; 535 536 dev_hold(dev); 537 538 return dev; 539 540 failure: 541 unregister_netdevice(dev); 542 return NULL; 543 } 544 545 /* called with rcu_read_lock() */ 546 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 547 unsigned int pimlen) 548 { 549 struct net_device *reg_dev = NULL; 550 struct iphdr *encap; 551 552 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 553 /* Check that: 554 * a. packet is really sent to a multicast group 555 * b. packet is not a NULL-REGISTER 556 * c. packet is not truncated 557 */ 558 if (!ipv4_is_multicast(encap->daddr) || 559 encap->tot_len == 0 || 560 ntohs(encap->tot_len) + pimlen > skb->len) 561 return 1; 562 563 read_lock(&mrt_lock); 564 if (mrt->mroute_reg_vif_num >= 0) 565 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 566 read_unlock(&mrt_lock); 567 568 if (!reg_dev) 569 return 1; 570 571 skb->mac_header = skb->network_header; 572 skb_pull(skb, (u8 *)encap - skb->data); 573 skb_reset_network_header(skb); 574 skb->protocol = htons(ETH_P_IP); 575 skb->ip_summed = CHECKSUM_NONE; 576 577 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 578 579 netif_rx(skb); 580 581 return NET_RX_SUCCESS; 582 } 583 #else 584 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 585 { 586 return NULL; 587 } 588 #endif 589 590 /** 591 * vif_delete - Delete a VIF entry 592 * @notify: Set to 1, if the caller is a notifier_call 593 */ 594 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 595 struct list_head *head) 596 { 597 struct vif_device *v; 598 struct net_device *dev; 599 struct in_device *in_dev; 600 601 if (vifi < 0 || vifi >= mrt->maxvif) 602 return -EADDRNOTAVAIL; 603 604 v = &mrt->vif_table[vifi]; 605 606 write_lock_bh(&mrt_lock); 607 dev = v->dev; 608 v->dev = NULL; 609 610 if (!dev) { 611 write_unlock_bh(&mrt_lock); 612 return -EADDRNOTAVAIL; 613 } 614 615 if (vifi == mrt->mroute_reg_vif_num) 616 mrt->mroute_reg_vif_num = -1; 617 618 if (vifi + 1 == mrt->maxvif) { 619 int tmp; 620 621 for (tmp = vifi - 1; tmp >= 0; tmp--) { 622 if (VIF_EXISTS(mrt, tmp)) 623 break; 624 } 625 mrt->maxvif = tmp+1; 626 } 627 628 write_unlock_bh(&mrt_lock); 629 630 dev_set_allmulti(dev, -1); 631 632 in_dev = __in_dev_get_rtnl(dev); 633 if (in_dev) { 634 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 635 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 636 NETCONFA_MC_FORWARDING, 637 dev->ifindex, &in_dev->cnf); 638 ip_rt_multicast_event(in_dev); 639 } 640 641 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 642 unregister_netdevice_queue(dev, head); 643 644 dev_put(dev); 645 return 0; 646 } 647 648 static void ipmr_cache_free_rcu(struct rcu_head *head) 649 { 650 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu); 651 652 kmem_cache_free(mrt_cachep, c); 653 } 654 655 static inline void ipmr_cache_free(struct mfc_cache *c) 656 { 657 call_rcu(&c->rcu, ipmr_cache_free_rcu); 658 } 659 660 /* Destroy an unresolved cache entry, killing queued skbs 661 * and reporting error to netlink readers. 662 */ 663 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 664 { 665 struct net *net = read_pnet(&mrt->net); 666 struct sk_buff *skb; 667 struct nlmsgerr *e; 668 669 atomic_dec(&mrt->cache_resolve_queue_len); 670 671 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 672 if (ip_hdr(skb)->version == 0) { 673 struct nlmsghdr *nlh = skb_pull(skb, 674 sizeof(struct iphdr)); 675 nlh->nlmsg_type = NLMSG_ERROR; 676 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 677 skb_trim(skb, nlh->nlmsg_len); 678 e = nlmsg_data(nlh); 679 e->error = -ETIMEDOUT; 680 memset(&e->msg, 0, sizeof(e->msg)); 681 682 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 683 } else { 684 kfree_skb(skb); 685 } 686 } 687 688 ipmr_cache_free(c); 689 } 690 691 /* Timer process for the unresolved queue. */ 692 static void ipmr_expire_process(unsigned long arg) 693 { 694 struct mr_table *mrt = (struct mr_table *)arg; 695 unsigned long now; 696 unsigned long expires; 697 struct mfc_cache *c, *next; 698 699 if (!spin_trylock(&mfc_unres_lock)) { 700 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 701 return; 702 } 703 704 if (list_empty(&mrt->mfc_unres_queue)) 705 goto out; 706 707 now = jiffies; 708 expires = 10*HZ; 709 710 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 711 if (time_after(c->mfc_un.unres.expires, now)) { 712 unsigned long interval = c->mfc_un.unres.expires - now; 713 if (interval < expires) 714 expires = interval; 715 continue; 716 } 717 718 list_del(&c->list); 719 mroute_netlink_event(mrt, c, RTM_DELROUTE); 720 ipmr_destroy_unres(mrt, c); 721 } 722 723 if (!list_empty(&mrt->mfc_unres_queue)) 724 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 725 726 out: 727 spin_unlock(&mfc_unres_lock); 728 } 729 730 /* Fill oifs list. It is called under write locked mrt_lock. */ 731 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 732 unsigned char *ttls) 733 { 734 int vifi; 735 736 cache->mfc_un.res.minvif = MAXVIFS; 737 cache->mfc_un.res.maxvif = 0; 738 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 739 740 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 741 if (VIF_EXISTS(mrt, vifi) && 742 ttls[vifi] && ttls[vifi] < 255) { 743 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 744 if (cache->mfc_un.res.minvif > vifi) 745 cache->mfc_un.res.minvif = vifi; 746 if (cache->mfc_un.res.maxvif <= vifi) 747 cache->mfc_un.res.maxvif = vifi + 1; 748 } 749 } 750 cache->mfc_un.res.lastuse = jiffies; 751 } 752 753 static int vif_add(struct net *net, struct mr_table *mrt, 754 struct vifctl *vifc, int mrtsock) 755 { 756 int vifi = vifc->vifc_vifi; 757 struct vif_device *v = &mrt->vif_table[vifi]; 758 struct net_device *dev; 759 struct in_device *in_dev; 760 int err; 761 762 /* Is vif busy ? */ 763 if (VIF_EXISTS(mrt, vifi)) 764 return -EADDRINUSE; 765 766 switch (vifc->vifc_flags) { 767 case VIFF_REGISTER: 768 if (!ipmr_pimsm_enabled()) 769 return -EINVAL; 770 /* Special Purpose VIF in PIM 771 * All the packets will be sent to the daemon 772 */ 773 if (mrt->mroute_reg_vif_num >= 0) 774 return -EADDRINUSE; 775 dev = ipmr_reg_vif(net, mrt); 776 if (!dev) 777 return -ENOBUFS; 778 err = dev_set_allmulti(dev, 1); 779 if (err) { 780 unregister_netdevice(dev); 781 dev_put(dev); 782 return err; 783 } 784 break; 785 case VIFF_TUNNEL: 786 dev = ipmr_new_tunnel(net, vifc); 787 if (!dev) 788 return -ENOBUFS; 789 err = dev_set_allmulti(dev, 1); 790 if (err) { 791 ipmr_del_tunnel(dev, vifc); 792 dev_put(dev); 793 return err; 794 } 795 break; 796 case VIFF_USE_IFINDEX: 797 case 0: 798 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 799 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 800 if (dev && !__in_dev_get_rtnl(dev)) { 801 dev_put(dev); 802 return -EADDRNOTAVAIL; 803 } 804 } else { 805 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 806 } 807 if (!dev) 808 return -EADDRNOTAVAIL; 809 err = dev_set_allmulti(dev, 1); 810 if (err) { 811 dev_put(dev); 812 return err; 813 } 814 break; 815 default: 816 return -EINVAL; 817 } 818 819 in_dev = __in_dev_get_rtnl(dev); 820 if (!in_dev) { 821 dev_put(dev); 822 return -EADDRNOTAVAIL; 823 } 824 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 825 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 826 dev->ifindex, &in_dev->cnf); 827 ip_rt_multicast_event(in_dev); 828 829 /* Fill in the VIF structures */ 830 831 v->rate_limit = vifc->vifc_rate_limit; 832 v->local = vifc->vifc_lcl_addr.s_addr; 833 v->remote = vifc->vifc_rmt_addr.s_addr; 834 v->flags = vifc->vifc_flags; 835 if (!mrtsock) 836 v->flags |= VIFF_STATIC; 837 v->threshold = vifc->vifc_threshold; 838 v->bytes_in = 0; 839 v->bytes_out = 0; 840 v->pkt_in = 0; 841 v->pkt_out = 0; 842 v->link = dev->ifindex; 843 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) 844 v->link = dev_get_iflink(dev); 845 846 /* And finish update writing critical data */ 847 write_lock_bh(&mrt_lock); 848 v->dev = dev; 849 if (v->flags & VIFF_REGISTER) 850 mrt->mroute_reg_vif_num = vifi; 851 if (vifi+1 > mrt->maxvif) 852 mrt->maxvif = vifi+1; 853 write_unlock_bh(&mrt_lock); 854 return 0; 855 } 856 857 /* called with rcu_read_lock() */ 858 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 859 __be32 origin, 860 __be32 mcastgrp) 861 { 862 struct mfc_cache_cmp_arg arg = { 863 .mfc_mcastgrp = mcastgrp, 864 .mfc_origin = origin 865 }; 866 struct rhlist_head *tmp, *list; 867 struct mfc_cache *c; 868 869 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 870 rhl_for_each_entry_rcu(c, tmp, list, mnode) 871 return c; 872 873 return NULL; 874 } 875 876 /* Look for a (*,*,oif) entry */ 877 static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, 878 int vifi) 879 { 880 struct mfc_cache_cmp_arg arg = { 881 .mfc_mcastgrp = htonl(INADDR_ANY), 882 .mfc_origin = htonl(INADDR_ANY) 883 }; 884 struct rhlist_head *tmp, *list; 885 struct mfc_cache *c; 886 887 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 888 rhl_for_each_entry_rcu(c, tmp, list, mnode) 889 if (c->mfc_un.res.ttls[vifi] < 255) 890 return c; 891 892 return NULL; 893 } 894 895 /* Look for a (*,G) entry */ 896 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 897 __be32 mcastgrp, int vifi) 898 { 899 struct mfc_cache_cmp_arg arg = { 900 .mfc_mcastgrp = mcastgrp, 901 .mfc_origin = htonl(INADDR_ANY) 902 }; 903 struct rhlist_head *tmp, *list; 904 struct mfc_cache *c, *proxy; 905 906 if (mcastgrp == htonl(INADDR_ANY)) 907 goto skip; 908 909 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 910 rhl_for_each_entry_rcu(c, tmp, list, mnode) { 911 if (c->mfc_un.res.ttls[vifi] < 255) 912 return c; 913 914 /* It's ok if the vifi is part of the static tree */ 915 proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent); 916 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) 917 return c; 918 } 919 920 skip: 921 return ipmr_cache_find_any_parent(mrt, vifi); 922 } 923 924 /* Look for a (S,G,iif) entry if parent != -1 */ 925 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 926 __be32 origin, __be32 mcastgrp, 927 int parent) 928 { 929 struct mfc_cache_cmp_arg arg = { 930 .mfc_mcastgrp = mcastgrp, 931 .mfc_origin = origin, 932 }; 933 struct rhlist_head *tmp, *list; 934 struct mfc_cache *c; 935 936 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 937 rhl_for_each_entry_rcu(c, tmp, list, mnode) 938 if (parent == -1 || parent == c->mfc_parent) 939 return c; 940 941 return NULL; 942 } 943 944 /* Allocate a multicast cache entry */ 945 static struct mfc_cache *ipmr_cache_alloc(void) 946 { 947 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 948 949 if (c) { 950 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 951 c->mfc_un.res.minvif = MAXVIFS; 952 } 953 return c; 954 } 955 956 static struct mfc_cache *ipmr_cache_alloc_unres(void) 957 { 958 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 959 960 if (c) { 961 skb_queue_head_init(&c->mfc_un.unres.unresolved); 962 c->mfc_un.unres.expires = jiffies + 10*HZ; 963 } 964 return c; 965 } 966 967 /* A cache entry has gone into a resolved state from queued */ 968 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 969 struct mfc_cache *uc, struct mfc_cache *c) 970 { 971 struct sk_buff *skb; 972 struct nlmsgerr *e; 973 974 /* Play the pending entries through our router */ 975 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 976 if (ip_hdr(skb)->version == 0) { 977 struct nlmsghdr *nlh = skb_pull(skb, 978 sizeof(struct iphdr)); 979 980 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { 981 nlh->nlmsg_len = skb_tail_pointer(skb) - 982 (u8 *)nlh; 983 } else { 984 nlh->nlmsg_type = NLMSG_ERROR; 985 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 986 skb_trim(skb, nlh->nlmsg_len); 987 e = nlmsg_data(nlh); 988 e->error = -EMSGSIZE; 989 memset(&e->msg, 0, sizeof(e->msg)); 990 } 991 992 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 993 } else { 994 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 995 } 996 } 997 } 998 999 /* Bounce a cache query up to mrouted and netlink. 1000 * 1001 * Called under mrt_lock. 1002 */ 1003 static int ipmr_cache_report(struct mr_table *mrt, 1004 struct sk_buff *pkt, vifi_t vifi, int assert) 1005 { 1006 const int ihl = ip_hdrlen(pkt); 1007 struct sock *mroute_sk; 1008 struct igmphdr *igmp; 1009 struct igmpmsg *msg; 1010 struct sk_buff *skb; 1011 int ret; 1012 1013 if (assert == IGMPMSG_WHOLEPKT) 1014 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1015 else 1016 skb = alloc_skb(128, GFP_ATOMIC); 1017 1018 if (!skb) 1019 return -ENOBUFS; 1020 1021 if (assert == IGMPMSG_WHOLEPKT) { 1022 /* Ugly, but we have no choice with this interface. 1023 * Duplicate old header, fix ihl, length etc. 1024 * And all this only to mangle msg->im_msgtype and 1025 * to set msg->im_mbz to "mbz" :-) 1026 */ 1027 skb_push(skb, sizeof(struct iphdr)); 1028 skb_reset_network_header(skb); 1029 skb_reset_transport_header(skb); 1030 msg = (struct igmpmsg *)skb_network_header(skb); 1031 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1032 msg->im_msgtype = IGMPMSG_WHOLEPKT; 1033 msg->im_mbz = 0; 1034 msg->im_vif = mrt->mroute_reg_vif_num; 1035 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1036 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1037 sizeof(struct iphdr)); 1038 } else { 1039 /* Copy the IP header */ 1040 skb_set_network_header(skb, skb->len); 1041 skb_put(skb, ihl); 1042 skb_copy_to_linear_data(skb, pkt->data, ihl); 1043 /* Flag to the kernel this is a route add */ 1044 ip_hdr(skb)->protocol = 0; 1045 msg = (struct igmpmsg *)skb_network_header(skb); 1046 msg->im_vif = vifi; 1047 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1048 /* Add our header */ 1049 igmp = skb_put(skb, sizeof(struct igmphdr)); 1050 igmp->type = assert; 1051 msg->im_msgtype = assert; 1052 igmp->code = 0; 1053 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1054 skb->transport_header = skb->network_header; 1055 } 1056 1057 rcu_read_lock(); 1058 mroute_sk = rcu_dereference(mrt->mroute_sk); 1059 if (!mroute_sk) { 1060 rcu_read_unlock(); 1061 kfree_skb(skb); 1062 return -EINVAL; 1063 } 1064 1065 igmpmsg_netlink_event(mrt, skb); 1066 1067 /* Deliver to mrouted */ 1068 ret = sock_queue_rcv_skb(mroute_sk, skb); 1069 rcu_read_unlock(); 1070 if (ret < 0) { 1071 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1072 kfree_skb(skb); 1073 } 1074 1075 return ret; 1076 } 1077 1078 /* Queue a packet for resolution. It gets locked cache entry! */ 1079 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1080 struct sk_buff *skb, struct net_device *dev) 1081 { 1082 const struct iphdr *iph = ip_hdr(skb); 1083 struct mfc_cache *c; 1084 bool found = false; 1085 int err; 1086 1087 spin_lock_bh(&mfc_unres_lock); 1088 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 1089 if (c->mfc_mcastgrp == iph->daddr && 1090 c->mfc_origin == iph->saddr) { 1091 found = true; 1092 break; 1093 } 1094 } 1095 1096 if (!found) { 1097 /* Create a new entry if allowable */ 1098 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1099 (c = ipmr_cache_alloc_unres()) == NULL) { 1100 spin_unlock_bh(&mfc_unres_lock); 1101 1102 kfree_skb(skb); 1103 return -ENOBUFS; 1104 } 1105 1106 /* Fill in the new cache entry */ 1107 c->mfc_parent = -1; 1108 c->mfc_origin = iph->saddr; 1109 c->mfc_mcastgrp = iph->daddr; 1110 1111 /* Reflect first query at mrouted. */ 1112 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1113 if (err < 0) { 1114 /* If the report failed throw the cache entry 1115 out - Brad Parker 1116 */ 1117 spin_unlock_bh(&mfc_unres_lock); 1118 1119 ipmr_cache_free(c); 1120 kfree_skb(skb); 1121 return err; 1122 } 1123 1124 atomic_inc(&mrt->cache_resolve_queue_len); 1125 list_add(&c->list, &mrt->mfc_unres_queue); 1126 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1127 1128 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1129 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1130 } 1131 1132 /* See if we can append the packet */ 1133 if (c->mfc_un.unres.unresolved.qlen > 3) { 1134 kfree_skb(skb); 1135 err = -ENOBUFS; 1136 } else { 1137 if (dev) { 1138 skb->dev = dev; 1139 skb->skb_iif = dev->ifindex; 1140 } 1141 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1142 err = 0; 1143 } 1144 1145 spin_unlock_bh(&mfc_unres_lock); 1146 return err; 1147 } 1148 1149 /* MFC cache manipulation by user space mroute daemon */ 1150 1151 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1152 { 1153 struct mfc_cache *c; 1154 1155 /* The entries are added/deleted only under RTNL */ 1156 rcu_read_lock(); 1157 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1158 mfc->mfcc_mcastgrp.s_addr, parent); 1159 rcu_read_unlock(); 1160 if (!c) 1161 return -ENOENT; 1162 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1163 list_del_rcu(&c->list); 1164 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1165 ipmr_cache_free(c); 1166 1167 return 0; 1168 } 1169 1170 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1171 struct mfcctl *mfc, int mrtsock, int parent) 1172 { 1173 struct mfc_cache *uc, *c; 1174 bool found; 1175 int ret; 1176 1177 if (mfc->mfcc_parent >= MAXVIFS) 1178 return -ENFILE; 1179 1180 /* The entries are added/deleted only under RTNL */ 1181 rcu_read_lock(); 1182 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1183 mfc->mfcc_mcastgrp.s_addr, parent); 1184 rcu_read_unlock(); 1185 if (c) { 1186 write_lock_bh(&mrt_lock); 1187 c->mfc_parent = mfc->mfcc_parent; 1188 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1189 if (!mrtsock) 1190 c->mfc_flags |= MFC_STATIC; 1191 write_unlock_bh(&mrt_lock); 1192 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1193 return 0; 1194 } 1195 1196 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1197 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1198 return -EINVAL; 1199 1200 c = ipmr_cache_alloc(); 1201 if (!c) 1202 return -ENOMEM; 1203 1204 c->mfc_origin = mfc->mfcc_origin.s_addr; 1205 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1206 c->mfc_parent = mfc->mfcc_parent; 1207 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1208 if (!mrtsock) 1209 c->mfc_flags |= MFC_STATIC; 1210 1211 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode, 1212 ipmr_rht_params); 1213 if (ret) { 1214 pr_err("ipmr: rhtable insert error %d\n", ret); 1215 ipmr_cache_free(c); 1216 return ret; 1217 } 1218 list_add_tail_rcu(&c->list, &mrt->mfc_cache_list); 1219 /* Check to see if we resolved a queued list. If so we 1220 * need to send on the frames and tidy up. 1221 */ 1222 found = false; 1223 spin_lock_bh(&mfc_unres_lock); 1224 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1225 if (uc->mfc_origin == c->mfc_origin && 1226 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1227 list_del(&uc->list); 1228 atomic_dec(&mrt->cache_resolve_queue_len); 1229 found = true; 1230 break; 1231 } 1232 } 1233 if (list_empty(&mrt->mfc_unres_queue)) 1234 del_timer(&mrt->ipmr_expire_timer); 1235 spin_unlock_bh(&mfc_unres_lock); 1236 1237 if (found) { 1238 ipmr_cache_resolve(net, mrt, uc, c); 1239 ipmr_cache_free(uc); 1240 } 1241 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1242 return 0; 1243 } 1244 1245 /* Close the multicast socket, and clear the vif tables etc */ 1246 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1247 { 1248 struct mfc_cache *c, *tmp; 1249 LIST_HEAD(list); 1250 int i; 1251 1252 /* Shut down all active vif entries */ 1253 for (i = 0; i < mrt->maxvif; i++) { 1254 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1255 continue; 1256 vif_delete(mrt, i, 0, &list); 1257 } 1258 unregister_netdevice_many(&list); 1259 1260 /* Wipe the cache */ 1261 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1262 if (!all && (c->mfc_flags & MFC_STATIC)) 1263 continue; 1264 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1265 list_del_rcu(&c->list); 1266 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1267 ipmr_cache_free(c); 1268 } 1269 1270 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1271 spin_lock_bh(&mfc_unres_lock); 1272 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1273 list_del(&c->list); 1274 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1275 ipmr_destroy_unres(mrt, c); 1276 } 1277 spin_unlock_bh(&mfc_unres_lock); 1278 } 1279 } 1280 1281 /* called from ip_ra_control(), before an RCU grace period, 1282 * we dont need to call synchronize_rcu() here 1283 */ 1284 static void mrtsock_destruct(struct sock *sk) 1285 { 1286 struct net *net = sock_net(sk); 1287 struct mr_table *mrt; 1288 1289 ASSERT_RTNL(); 1290 ipmr_for_each_table(mrt, net) { 1291 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1292 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1293 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1294 NETCONFA_MC_FORWARDING, 1295 NETCONFA_IFINDEX_ALL, 1296 net->ipv4.devconf_all); 1297 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1298 mroute_clean_tables(mrt, false); 1299 } 1300 } 1301 } 1302 1303 /* Socket options and virtual interface manipulation. The whole 1304 * virtual interface system is a complete heap, but unfortunately 1305 * that's how BSD mrouted happens to think. Maybe one day with a proper 1306 * MOSPF/PIM router set up we can clean this up. 1307 */ 1308 1309 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1310 unsigned int optlen) 1311 { 1312 struct net *net = sock_net(sk); 1313 int val, ret = 0, parent = 0; 1314 struct mr_table *mrt; 1315 struct vifctl vif; 1316 struct mfcctl mfc; 1317 u32 uval; 1318 1319 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1320 rtnl_lock(); 1321 if (sk->sk_type != SOCK_RAW || 1322 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1323 ret = -EOPNOTSUPP; 1324 goto out_unlock; 1325 } 1326 1327 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1328 if (!mrt) { 1329 ret = -ENOENT; 1330 goto out_unlock; 1331 } 1332 if (optname != MRT_INIT) { 1333 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1334 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1335 ret = -EACCES; 1336 goto out_unlock; 1337 } 1338 } 1339 1340 switch (optname) { 1341 case MRT_INIT: 1342 if (optlen != sizeof(int)) { 1343 ret = -EINVAL; 1344 break; 1345 } 1346 if (rtnl_dereference(mrt->mroute_sk)) { 1347 ret = -EADDRINUSE; 1348 break; 1349 } 1350 1351 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1352 if (ret == 0) { 1353 rcu_assign_pointer(mrt->mroute_sk, sk); 1354 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1355 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1356 NETCONFA_MC_FORWARDING, 1357 NETCONFA_IFINDEX_ALL, 1358 net->ipv4.devconf_all); 1359 } 1360 break; 1361 case MRT_DONE: 1362 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1363 ret = -EACCES; 1364 } else { 1365 ret = ip_ra_control(sk, 0, NULL); 1366 goto out_unlock; 1367 } 1368 break; 1369 case MRT_ADD_VIF: 1370 case MRT_DEL_VIF: 1371 if (optlen != sizeof(vif)) { 1372 ret = -EINVAL; 1373 break; 1374 } 1375 if (copy_from_user(&vif, optval, sizeof(vif))) { 1376 ret = -EFAULT; 1377 break; 1378 } 1379 if (vif.vifc_vifi >= MAXVIFS) { 1380 ret = -ENFILE; 1381 break; 1382 } 1383 if (optname == MRT_ADD_VIF) { 1384 ret = vif_add(net, mrt, &vif, 1385 sk == rtnl_dereference(mrt->mroute_sk)); 1386 } else { 1387 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1388 } 1389 break; 1390 /* Manipulate the forwarding caches. These live 1391 * in a sort of kernel/user symbiosis. 1392 */ 1393 case MRT_ADD_MFC: 1394 case MRT_DEL_MFC: 1395 parent = -1; 1396 case MRT_ADD_MFC_PROXY: 1397 case MRT_DEL_MFC_PROXY: 1398 if (optlen != sizeof(mfc)) { 1399 ret = -EINVAL; 1400 break; 1401 } 1402 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1403 ret = -EFAULT; 1404 break; 1405 } 1406 if (parent == 0) 1407 parent = mfc.mfcc_parent; 1408 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1409 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1410 else 1411 ret = ipmr_mfc_add(net, mrt, &mfc, 1412 sk == rtnl_dereference(mrt->mroute_sk), 1413 parent); 1414 break; 1415 /* Control PIM assert. */ 1416 case MRT_ASSERT: 1417 if (optlen != sizeof(val)) { 1418 ret = -EINVAL; 1419 break; 1420 } 1421 if (get_user(val, (int __user *)optval)) { 1422 ret = -EFAULT; 1423 break; 1424 } 1425 mrt->mroute_do_assert = val; 1426 break; 1427 case MRT_PIM: 1428 if (!ipmr_pimsm_enabled()) { 1429 ret = -ENOPROTOOPT; 1430 break; 1431 } 1432 if (optlen != sizeof(val)) { 1433 ret = -EINVAL; 1434 break; 1435 } 1436 if (get_user(val, (int __user *)optval)) { 1437 ret = -EFAULT; 1438 break; 1439 } 1440 1441 val = !!val; 1442 if (val != mrt->mroute_do_pim) { 1443 mrt->mroute_do_pim = val; 1444 mrt->mroute_do_assert = val; 1445 } 1446 break; 1447 case MRT_TABLE: 1448 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1449 ret = -ENOPROTOOPT; 1450 break; 1451 } 1452 if (optlen != sizeof(uval)) { 1453 ret = -EINVAL; 1454 break; 1455 } 1456 if (get_user(uval, (u32 __user *)optval)) { 1457 ret = -EFAULT; 1458 break; 1459 } 1460 1461 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1462 ret = -EBUSY; 1463 } else { 1464 mrt = ipmr_new_table(net, uval); 1465 if (IS_ERR(mrt)) 1466 ret = PTR_ERR(mrt); 1467 else 1468 raw_sk(sk)->ipmr_table = uval; 1469 } 1470 break; 1471 /* Spurious command, or MRT_VERSION which you cannot set. */ 1472 default: 1473 ret = -ENOPROTOOPT; 1474 } 1475 out_unlock: 1476 rtnl_unlock(); 1477 return ret; 1478 } 1479 1480 /* Getsock opt support for the multicast routing system. */ 1481 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1482 { 1483 int olr; 1484 int val; 1485 struct net *net = sock_net(sk); 1486 struct mr_table *mrt; 1487 1488 if (sk->sk_type != SOCK_RAW || 1489 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1490 return -EOPNOTSUPP; 1491 1492 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1493 if (!mrt) 1494 return -ENOENT; 1495 1496 switch (optname) { 1497 case MRT_VERSION: 1498 val = 0x0305; 1499 break; 1500 case MRT_PIM: 1501 if (!ipmr_pimsm_enabled()) 1502 return -ENOPROTOOPT; 1503 val = mrt->mroute_do_pim; 1504 break; 1505 case MRT_ASSERT: 1506 val = mrt->mroute_do_assert; 1507 break; 1508 default: 1509 return -ENOPROTOOPT; 1510 } 1511 1512 if (get_user(olr, optlen)) 1513 return -EFAULT; 1514 olr = min_t(unsigned int, olr, sizeof(int)); 1515 if (olr < 0) 1516 return -EINVAL; 1517 if (put_user(olr, optlen)) 1518 return -EFAULT; 1519 if (copy_to_user(optval, &val, olr)) 1520 return -EFAULT; 1521 return 0; 1522 } 1523 1524 /* The IP multicast ioctl support routines. */ 1525 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1526 { 1527 struct sioc_sg_req sr; 1528 struct sioc_vif_req vr; 1529 struct vif_device *vif; 1530 struct mfc_cache *c; 1531 struct net *net = sock_net(sk); 1532 struct mr_table *mrt; 1533 1534 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1535 if (!mrt) 1536 return -ENOENT; 1537 1538 switch (cmd) { 1539 case SIOCGETVIFCNT: 1540 if (copy_from_user(&vr, arg, sizeof(vr))) 1541 return -EFAULT; 1542 if (vr.vifi >= mrt->maxvif) 1543 return -EINVAL; 1544 read_lock(&mrt_lock); 1545 vif = &mrt->vif_table[vr.vifi]; 1546 if (VIF_EXISTS(mrt, vr.vifi)) { 1547 vr.icount = vif->pkt_in; 1548 vr.ocount = vif->pkt_out; 1549 vr.ibytes = vif->bytes_in; 1550 vr.obytes = vif->bytes_out; 1551 read_unlock(&mrt_lock); 1552 1553 if (copy_to_user(arg, &vr, sizeof(vr))) 1554 return -EFAULT; 1555 return 0; 1556 } 1557 read_unlock(&mrt_lock); 1558 return -EADDRNOTAVAIL; 1559 case SIOCGETSGCNT: 1560 if (copy_from_user(&sr, arg, sizeof(sr))) 1561 return -EFAULT; 1562 1563 rcu_read_lock(); 1564 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1565 if (c) { 1566 sr.pktcnt = c->mfc_un.res.pkt; 1567 sr.bytecnt = c->mfc_un.res.bytes; 1568 sr.wrong_if = c->mfc_un.res.wrong_if; 1569 rcu_read_unlock(); 1570 1571 if (copy_to_user(arg, &sr, sizeof(sr))) 1572 return -EFAULT; 1573 return 0; 1574 } 1575 rcu_read_unlock(); 1576 return -EADDRNOTAVAIL; 1577 default: 1578 return -ENOIOCTLCMD; 1579 } 1580 } 1581 1582 #ifdef CONFIG_COMPAT 1583 struct compat_sioc_sg_req { 1584 struct in_addr src; 1585 struct in_addr grp; 1586 compat_ulong_t pktcnt; 1587 compat_ulong_t bytecnt; 1588 compat_ulong_t wrong_if; 1589 }; 1590 1591 struct compat_sioc_vif_req { 1592 vifi_t vifi; /* Which iface */ 1593 compat_ulong_t icount; 1594 compat_ulong_t ocount; 1595 compat_ulong_t ibytes; 1596 compat_ulong_t obytes; 1597 }; 1598 1599 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1600 { 1601 struct compat_sioc_sg_req sr; 1602 struct compat_sioc_vif_req vr; 1603 struct vif_device *vif; 1604 struct mfc_cache *c; 1605 struct net *net = sock_net(sk); 1606 struct mr_table *mrt; 1607 1608 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1609 if (!mrt) 1610 return -ENOENT; 1611 1612 switch (cmd) { 1613 case SIOCGETVIFCNT: 1614 if (copy_from_user(&vr, arg, sizeof(vr))) 1615 return -EFAULT; 1616 if (vr.vifi >= mrt->maxvif) 1617 return -EINVAL; 1618 read_lock(&mrt_lock); 1619 vif = &mrt->vif_table[vr.vifi]; 1620 if (VIF_EXISTS(mrt, vr.vifi)) { 1621 vr.icount = vif->pkt_in; 1622 vr.ocount = vif->pkt_out; 1623 vr.ibytes = vif->bytes_in; 1624 vr.obytes = vif->bytes_out; 1625 read_unlock(&mrt_lock); 1626 1627 if (copy_to_user(arg, &vr, sizeof(vr))) 1628 return -EFAULT; 1629 return 0; 1630 } 1631 read_unlock(&mrt_lock); 1632 return -EADDRNOTAVAIL; 1633 case SIOCGETSGCNT: 1634 if (copy_from_user(&sr, arg, sizeof(sr))) 1635 return -EFAULT; 1636 1637 rcu_read_lock(); 1638 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1639 if (c) { 1640 sr.pktcnt = c->mfc_un.res.pkt; 1641 sr.bytecnt = c->mfc_un.res.bytes; 1642 sr.wrong_if = c->mfc_un.res.wrong_if; 1643 rcu_read_unlock(); 1644 1645 if (copy_to_user(arg, &sr, sizeof(sr))) 1646 return -EFAULT; 1647 return 0; 1648 } 1649 rcu_read_unlock(); 1650 return -EADDRNOTAVAIL; 1651 default: 1652 return -ENOIOCTLCMD; 1653 } 1654 } 1655 #endif 1656 1657 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1658 { 1659 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1660 struct net *net = dev_net(dev); 1661 struct mr_table *mrt; 1662 struct vif_device *v; 1663 int ct; 1664 1665 if (event != NETDEV_UNREGISTER) 1666 return NOTIFY_DONE; 1667 1668 ipmr_for_each_table(mrt, net) { 1669 v = &mrt->vif_table[0]; 1670 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1671 if (v->dev == dev) 1672 vif_delete(mrt, ct, 1, NULL); 1673 } 1674 } 1675 return NOTIFY_DONE; 1676 } 1677 1678 static struct notifier_block ip_mr_notifier = { 1679 .notifier_call = ipmr_device_event, 1680 }; 1681 1682 /* Encapsulate a packet by attaching a valid IPIP header to it. 1683 * This avoids tunnel drivers and other mess and gives us the speed so 1684 * important for multicast video. 1685 */ 1686 static void ip_encap(struct net *net, struct sk_buff *skb, 1687 __be32 saddr, __be32 daddr) 1688 { 1689 struct iphdr *iph; 1690 const struct iphdr *old_iph = ip_hdr(skb); 1691 1692 skb_push(skb, sizeof(struct iphdr)); 1693 skb->transport_header = skb->network_header; 1694 skb_reset_network_header(skb); 1695 iph = ip_hdr(skb); 1696 1697 iph->version = 4; 1698 iph->tos = old_iph->tos; 1699 iph->ttl = old_iph->ttl; 1700 iph->frag_off = 0; 1701 iph->daddr = daddr; 1702 iph->saddr = saddr; 1703 iph->protocol = IPPROTO_IPIP; 1704 iph->ihl = 5; 1705 iph->tot_len = htons(skb->len); 1706 ip_select_ident(net, skb, NULL); 1707 ip_send_check(iph); 1708 1709 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1710 nf_reset(skb); 1711 } 1712 1713 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1714 struct sk_buff *skb) 1715 { 1716 struct ip_options *opt = &(IPCB(skb)->opt); 1717 1718 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1719 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1720 1721 if (unlikely(opt->optlen)) 1722 ip_forward_options(skb); 1723 1724 return dst_output(net, sk, skb); 1725 } 1726 1727 /* Processing handlers for ipmr_forward */ 1728 1729 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1730 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1731 { 1732 const struct iphdr *iph = ip_hdr(skb); 1733 struct vif_device *vif = &mrt->vif_table[vifi]; 1734 struct net_device *dev; 1735 struct rtable *rt; 1736 struct flowi4 fl4; 1737 int encap = 0; 1738 1739 if (!vif->dev) 1740 goto out_free; 1741 1742 if (vif->flags & VIFF_REGISTER) { 1743 vif->pkt_out++; 1744 vif->bytes_out += skb->len; 1745 vif->dev->stats.tx_bytes += skb->len; 1746 vif->dev->stats.tx_packets++; 1747 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1748 goto out_free; 1749 } 1750 1751 if (vif->flags & VIFF_TUNNEL) { 1752 rt = ip_route_output_ports(net, &fl4, NULL, 1753 vif->remote, vif->local, 1754 0, 0, 1755 IPPROTO_IPIP, 1756 RT_TOS(iph->tos), vif->link); 1757 if (IS_ERR(rt)) 1758 goto out_free; 1759 encap = sizeof(struct iphdr); 1760 } else { 1761 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1762 0, 0, 1763 IPPROTO_IPIP, 1764 RT_TOS(iph->tos), vif->link); 1765 if (IS_ERR(rt)) 1766 goto out_free; 1767 } 1768 1769 dev = rt->dst.dev; 1770 1771 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1772 /* Do not fragment multicasts. Alas, IPv4 does not 1773 * allow to send ICMP, so that packets will disappear 1774 * to blackhole. 1775 */ 1776 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1777 ip_rt_put(rt); 1778 goto out_free; 1779 } 1780 1781 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1782 1783 if (skb_cow(skb, encap)) { 1784 ip_rt_put(rt); 1785 goto out_free; 1786 } 1787 1788 vif->pkt_out++; 1789 vif->bytes_out += skb->len; 1790 1791 skb_dst_drop(skb); 1792 skb_dst_set(skb, &rt->dst); 1793 ip_decrease_ttl(ip_hdr(skb)); 1794 1795 /* FIXME: forward and output firewalls used to be called here. 1796 * What do we do with netfilter? -- RR 1797 */ 1798 if (vif->flags & VIFF_TUNNEL) { 1799 ip_encap(net, skb, vif->local, vif->remote); 1800 /* FIXME: extra output firewall step used to be here. --RR */ 1801 vif->dev->stats.tx_packets++; 1802 vif->dev->stats.tx_bytes += skb->len; 1803 } 1804 1805 IPCB(skb)->flags |= IPSKB_FORWARDED; 1806 1807 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1808 * not only before forwarding, but after forwarding on all output 1809 * interfaces. It is clear, if mrouter runs a multicasting 1810 * program, it should receive packets not depending to what interface 1811 * program is joined. 1812 * If we will not make it, the program will have to join on all 1813 * interfaces. On the other hand, multihoming host (or router, but 1814 * not mrouter) cannot join to more than one interface - it will 1815 * result in receiving multiple packets. 1816 */ 1817 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1818 net, NULL, skb, skb->dev, dev, 1819 ipmr_forward_finish); 1820 return; 1821 1822 out_free: 1823 kfree_skb(skb); 1824 } 1825 1826 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1827 { 1828 int ct; 1829 1830 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1831 if (mrt->vif_table[ct].dev == dev) 1832 break; 1833 } 1834 return ct; 1835 } 1836 1837 /* "local" means that we should preserve one skb (for local delivery) */ 1838 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 1839 struct net_device *dev, struct sk_buff *skb, 1840 struct mfc_cache *cache, int local) 1841 { 1842 int true_vifi = ipmr_find_vif(mrt, dev); 1843 int psend = -1; 1844 int vif, ct; 1845 1846 vif = cache->mfc_parent; 1847 cache->mfc_un.res.pkt++; 1848 cache->mfc_un.res.bytes += skb->len; 1849 cache->mfc_un.res.lastuse = jiffies; 1850 1851 if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 1852 struct mfc_cache *cache_proxy; 1853 1854 /* For an (*,G) entry, we only check that the incomming 1855 * interface is part of the static tree. 1856 */ 1857 cache_proxy = ipmr_cache_find_any_parent(mrt, vif); 1858 if (cache_proxy && 1859 cache_proxy->mfc_un.res.ttls[true_vifi] < 255) 1860 goto forward; 1861 } 1862 1863 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 1864 if (mrt->vif_table[vif].dev != dev) { 1865 if (rt_is_output_route(skb_rtable(skb))) { 1866 /* It is our own packet, looped back. 1867 * Very complicated situation... 1868 * 1869 * The best workaround until routing daemons will be 1870 * fixed is not to redistribute packet, if it was 1871 * send through wrong interface. It means, that 1872 * multicast applications WILL NOT work for 1873 * (S,G), which have default multicast route pointing 1874 * to wrong oif. In any case, it is not a good 1875 * idea to use multicasting applications on router. 1876 */ 1877 goto dont_forward; 1878 } 1879 1880 cache->mfc_un.res.wrong_if++; 1881 1882 if (true_vifi >= 0 && mrt->mroute_do_assert && 1883 /* pimsm uses asserts, when switching from RPT to SPT, 1884 * so that we cannot check that packet arrived on an oif. 1885 * It is bad, but otherwise we would need to move pretty 1886 * large chunk of pimd to kernel. Ough... --ANK 1887 */ 1888 (mrt->mroute_do_pim || 1889 cache->mfc_un.res.ttls[true_vifi] < 255) && 1890 time_after(jiffies, 1891 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1892 cache->mfc_un.res.last_assert = jiffies; 1893 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1894 } 1895 goto dont_forward; 1896 } 1897 1898 forward: 1899 mrt->vif_table[vif].pkt_in++; 1900 mrt->vif_table[vif].bytes_in += skb->len; 1901 1902 /* Forward the frame */ 1903 if (cache->mfc_origin == htonl(INADDR_ANY) && 1904 cache->mfc_mcastgrp == htonl(INADDR_ANY)) { 1905 if (true_vifi >= 0 && 1906 true_vifi != cache->mfc_parent && 1907 ip_hdr(skb)->ttl > 1908 cache->mfc_un.res.ttls[cache->mfc_parent]) { 1909 /* It's an (*,*) entry and the packet is not coming from 1910 * the upstream: forward the packet to the upstream 1911 * only. 1912 */ 1913 psend = cache->mfc_parent; 1914 goto last_forward; 1915 } 1916 goto dont_forward; 1917 } 1918 for (ct = cache->mfc_un.res.maxvif - 1; 1919 ct >= cache->mfc_un.res.minvif; ct--) { 1920 /* For (*,G) entry, don't forward to the incoming interface */ 1921 if ((cache->mfc_origin != htonl(INADDR_ANY) || 1922 ct != true_vifi) && 1923 ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1924 if (psend != -1) { 1925 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1926 1927 if (skb2) 1928 ipmr_queue_xmit(net, mrt, skb2, cache, 1929 psend); 1930 } 1931 psend = ct; 1932 } 1933 } 1934 last_forward: 1935 if (psend != -1) { 1936 if (local) { 1937 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1938 1939 if (skb2) 1940 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1941 } else { 1942 ipmr_queue_xmit(net, mrt, skb, cache, psend); 1943 return; 1944 } 1945 } 1946 1947 dont_forward: 1948 if (!local) 1949 kfree_skb(skb); 1950 } 1951 1952 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 1953 { 1954 struct rtable *rt = skb_rtable(skb); 1955 struct iphdr *iph = ip_hdr(skb); 1956 struct flowi4 fl4 = { 1957 .daddr = iph->daddr, 1958 .saddr = iph->saddr, 1959 .flowi4_tos = RT_TOS(iph->tos), 1960 .flowi4_oif = (rt_is_output_route(rt) ? 1961 skb->dev->ifindex : 0), 1962 .flowi4_iif = (rt_is_output_route(rt) ? 1963 LOOPBACK_IFINDEX : 1964 skb->dev->ifindex), 1965 .flowi4_mark = skb->mark, 1966 }; 1967 struct mr_table *mrt; 1968 int err; 1969 1970 err = ipmr_fib_lookup(net, &fl4, &mrt); 1971 if (err) 1972 return ERR_PTR(err); 1973 return mrt; 1974 } 1975 1976 /* Multicast packets for forwarding arrive here 1977 * Called with rcu_read_lock(); 1978 */ 1979 int ip_mr_input(struct sk_buff *skb) 1980 { 1981 struct mfc_cache *cache; 1982 struct net *net = dev_net(skb->dev); 1983 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1984 struct mr_table *mrt; 1985 struct net_device *dev; 1986 1987 /* skb->dev passed in is the loX master dev for vrfs. 1988 * As there are no vifs associated with loopback devices, 1989 * get the proper interface that does have a vif associated with it. 1990 */ 1991 dev = skb->dev; 1992 if (netif_is_l3_master(skb->dev)) { 1993 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 1994 if (!dev) { 1995 kfree_skb(skb); 1996 return -ENODEV; 1997 } 1998 } 1999 2000 /* Packet is looped back after forward, it should not be 2001 * forwarded second time, but still can be delivered locally. 2002 */ 2003 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2004 goto dont_forward; 2005 2006 mrt = ipmr_rt_fib_lookup(net, skb); 2007 if (IS_ERR(mrt)) { 2008 kfree_skb(skb); 2009 return PTR_ERR(mrt); 2010 } 2011 if (!local) { 2012 if (IPCB(skb)->opt.router_alert) { 2013 if (ip_call_ra_chain(skb)) 2014 return 0; 2015 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2016 /* IGMPv1 (and broken IGMPv2 implementations sort of 2017 * Cisco IOS <= 11.2(8)) do not put router alert 2018 * option to IGMP packets destined to routable 2019 * groups. It is very bad, because it means 2020 * that we can forward NO IGMP messages. 2021 */ 2022 struct sock *mroute_sk; 2023 2024 mroute_sk = rcu_dereference(mrt->mroute_sk); 2025 if (mroute_sk) { 2026 nf_reset(skb); 2027 raw_rcv(mroute_sk, skb); 2028 return 0; 2029 } 2030 } 2031 } 2032 2033 /* already under rcu_read_lock() */ 2034 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2035 if (!cache) { 2036 int vif = ipmr_find_vif(mrt, dev); 2037 2038 if (vif >= 0) 2039 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2040 vif); 2041 } 2042 2043 /* No usable cache entry */ 2044 if (!cache) { 2045 int vif; 2046 2047 if (local) { 2048 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2049 ip_local_deliver(skb); 2050 if (!skb2) 2051 return -ENOBUFS; 2052 skb = skb2; 2053 } 2054 2055 read_lock(&mrt_lock); 2056 vif = ipmr_find_vif(mrt, dev); 2057 if (vif >= 0) { 2058 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2059 read_unlock(&mrt_lock); 2060 2061 return err2; 2062 } 2063 read_unlock(&mrt_lock); 2064 kfree_skb(skb); 2065 return -ENODEV; 2066 } 2067 2068 read_lock(&mrt_lock); 2069 ip_mr_forward(net, mrt, dev, skb, cache, local); 2070 read_unlock(&mrt_lock); 2071 2072 if (local) 2073 return ip_local_deliver(skb); 2074 2075 return 0; 2076 2077 dont_forward: 2078 if (local) 2079 return ip_local_deliver(skb); 2080 kfree_skb(skb); 2081 return 0; 2082 } 2083 2084 #ifdef CONFIG_IP_PIMSM_V1 2085 /* Handle IGMP messages of PIMv1 */ 2086 int pim_rcv_v1(struct sk_buff *skb) 2087 { 2088 struct igmphdr *pim; 2089 struct net *net = dev_net(skb->dev); 2090 struct mr_table *mrt; 2091 2092 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2093 goto drop; 2094 2095 pim = igmp_hdr(skb); 2096 2097 mrt = ipmr_rt_fib_lookup(net, skb); 2098 if (IS_ERR(mrt)) 2099 goto drop; 2100 if (!mrt->mroute_do_pim || 2101 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2102 goto drop; 2103 2104 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2105 drop: 2106 kfree_skb(skb); 2107 } 2108 return 0; 2109 } 2110 #endif 2111 2112 #ifdef CONFIG_IP_PIMSM_V2 2113 static int pim_rcv(struct sk_buff *skb) 2114 { 2115 struct pimreghdr *pim; 2116 struct net *net = dev_net(skb->dev); 2117 struct mr_table *mrt; 2118 2119 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2120 goto drop; 2121 2122 pim = (struct pimreghdr *)skb_transport_header(skb); 2123 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2124 (pim->flags & PIM_NULL_REGISTER) || 2125 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2126 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2127 goto drop; 2128 2129 mrt = ipmr_rt_fib_lookup(net, skb); 2130 if (IS_ERR(mrt)) 2131 goto drop; 2132 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2133 drop: 2134 kfree_skb(skb); 2135 } 2136 return 0; 2137 } 2138 #endif 2139 2140 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2141 struct mfc_cache *c, struct rtmsg *rtm) 2142 { 2143 struct rta_mfc_stats mfcs; 2144 struct nlattr *mp_attr; 2145 struct rtnexthop *nhp; 2146 unsigned long lastuse; 2147 int ct; 2148 2149 /* If cache is unresolved, don't try to parse IIF and OIF */ 2150 if (c->mfc_parent >= MAXVIFS) { 2151 rtm->rtm_flags |= RTNH_F_UNRESOLVED; 2152 return -ENOENT; 2153 } 2154 2155 if (VIF_EXISTS(mrt, c->mfc_parent) && 2156 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) 2157 return -EMSGSIZE; 2158 2159 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) 2160 return -EMSGSIZE; 2161 2162 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2163 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2164 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { 2165 nla_nest_cancel(skb, mp_attr); 2166 return -EMSGSIZE; 2167 } 2168 2169 nhp->rtnh_flags = 0; 2170 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2171 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 2172 nhp->rtnh_len = sizeof(*nhp); 2173 } 2174 } 2175 2176 nla_nest_end(skb, mp_attr); 2177 2178 lastuse = READ_ONCE(c->mfc_un.res.lastuse); 2179 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; 2180 2181 mfcs.mfcs_packets = c->mfc_un.res.pkt; 2182 mfcs.mfcs_bytes = c->mfc_un.res.bytes; 2183 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; 2184 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || 2185 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), 2186 RTA_PAD)) 2187 return -EMSGSIZE; 2188 2189 rtm->rtm_type = RTN_MULTICAST; 2190 return 1; 2191 } 2192 2193 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2194 __be32 saddr, __be32 daddr, 2195 struct rtmsg *rtm, u32 portid) 2196 { 2197 struct mfc_cache *cache; 2198 struct mr_table *mrt; 2199 int err; 2200 2201 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2202 if (!mrt) 2203 return -ENOENT; 2204 2205 rcu_read_lock(); 2206 cache = ipmr_cache_find(mrt, saddr, daddr); 2207 if (!cache && skb->dev) { 2208 int vif = ipmr_find_vif(mrt, skb->dev); 2209 2210 if (vif >= 0) 2211 cache = ipmr_cache_find_any(mrt, daddr, vif); 2212 } 2213 if (!cache) { 2214 struct sk_buff *skb2; 2215 struct iphdr *iph; 2216 struct net_device *dev; 2217 int vif = -1; 2218 2219 dev = skb->dev; 2220 read_lock(&mrt_lock); 2221 if (dev) 2222 vif = ipmr_find_vif(mrt, dev); 2223 if (vif < 0) { 2224 read_unlock(&mrt_lock); 2225 rcu_read_unlock(); 2226 return -ENODEV; 2227 } 2228 skb2 = skb_clone(skb, GFP_ATOMIC); 2229 if (!skb2) { 2230 read_unlock(&mrt_lock); 2231 rcu_read_unlock(); 2232 return -ENOMEM; 2233 } 2234 2235 NETLINK_CB(skb2).portid = portid; 2236 skb_push(skb2, sizeof(struct iphdr)); 2237 skb_reset_network_header(skb2); 2238 iph = ip_hdr(skb2); 2239 iph->ihl = sizeof(struct iphdr) >> 2; 2240 iph->saddr = saddr; 2241 iph->daddr = daddr; 2242 iph->version = 0; 2243 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2244 read_unlock(&mrt_lock); 2245 rcu_read_unlock(); 2246 return err; 2247 } 2248 2249 read_lock(&mrt_lock); 2250 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2251 read_unlock(&mrt_lock); 2252 rcu_read_unlock(); 2253 return err; 2254 } 2255 2256 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2257 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2258 int flags) 2259 { 2260 struct nlmsghdr *nlh; 2261 struct rtmsg *rtm; 2262 int err; 2263 2264 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2265 if (!nlh) 2266 return -EMSGSIZE; 2267 2268 rtm = nlmsg_data(nlh); 2269 rtm->rtm_family = RTNL_FAMILY_IPMR; 2270 rtm->rtm_dst_len = 32; 2271 rtm->rtm_src_len = 32; 2272 rtm->rtm_tos = 0; 2273 rtm->rtm_table = mrt->id; 2274 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2275 goto nla_put_failure; 2276 rtm->rtm_type = RTN_MULTICAST; 2277 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2278 if (c->mfc_flags & MFC_STATIC) 2279 rtm->rtm_protocol = RTPROT_STATIC; 2280 else 2281 rtm->rtm_protocol = RTPROT_MROUTED; 2282 rtm->rtm_flags = 0; 2283 2284 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2285 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2286 goto nla_put_failure; 2287 err = __ipmr_fill_mroute(mrt, skb, c, rtm); 2288 /* do not break the dump if cache is unresolved */ 2289 if (err < 0 && err != -ENOENT) 2290 goto nla_put_failure; 2291 2292 nlmsg_end(skb, nlh); 2293 return 0; 2294 2295 nla_put_failure: 2296 nlmsg_cancel(skb, nlh); 2297 return -EMSGSIZE; 2298 } 2299 2300 static size_t mroute_msgsize(bool unresolved, int maxvif) 2301 { 2302 size_t len = 2303 NLMSG_ALIGN(sizeof(struct rtmsg)) 2304 + nla_total_size(4) /* RTA_TABLE */ 2305 + nla_total_size(4) /* RTA_SRC */ 2306 + nla_total_size(4) /* RTA_DST */ 2307 ; 2308 2309 if (!unresolved) 2310 len = len 2311 + nla_total_size(4) /* RTA_IIF */ 2312 + nla_total_size(0) /* RTA_MULTIPATH */ 2313 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2314 /* RTA_MFC_STATS */ 2315 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2316 ; 2317 2318 return len; 2319 } 2320 2321 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2322 int cmd) 2323 { 2324 struct net *net = read_pnet(&mrt->net); 2325 struct sk_buff *skb; 2326 int err = -ENOBUFS; 2327 2328 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), 2329 GFP_ATOMIC); 2330 if (!skb) 2331 goto errout; 2332 2333 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2334 if (err < 0) 2335 goto errout; 2336 2337 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2338 return; 2339 2340 errout: 2341 kfree_skb(skb); 2342 if (err < 0) 2343 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2344 } 2345 2346 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2347 { 2348 size_t len = 2349 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2350 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2351 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2352 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2353 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2354 /* IPMRA_CREPORT_PKT */ 2355 + nla_total_size(payloadlen) 2356 ; 2357 2358 return len; 2359 } 2360 2361 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2362 { 2363 struct net *net = read_pnet(&mrt->net); 2364 struct nlmsghdr *nlh; 2365 struct rtgenmsg *rtgenm; 2366 struct igmpmsg *msg; 2367 struct sk_buff *skb; 2368 struct nlattr *nla; 2369 int payloadlen; 2370 2371 payloadlen = pkt->len - sizeof(struct igmpmsg); 2372 msg = (struct igmpmsg *)skb_network_header(pkt); 2373 2374 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2375 if (!skb) 2376 goto errout; 2377 2378 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2379 sizeof(struct rtgenmsg), 0); 2380 if (!nlh) 2381 goto errout; 2382 rtgenm = nlmsg_data(nlh); 2383 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2384 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2385 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || 2386 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2387 msg->im_src.s_addr) || 2388 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2389 msg->im_dst.s_addr)) 2390 goto nla_put_failure; 2391 2392 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2393 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2394 nla_data(nla), payloadlen)) 2395 goto nla_put_failure; 2396 2397 nlmsg_end(skb, nlh); 2398 2399 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2400 return; 2401 2402 nla_put_failure: 2403 nlmsg_cancel(skb, nlh); 2404 errout: 2405 kfree_skb(skb); 2406 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2407 } 2408 2409 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2410 struct netlink_ext_ack *extack) 2411 { 2412 struct net *net = sock_net(in_skb->sk); 2413 struct nlattr *tb[RTA_MAX + 1]; 2414 struct sk_buff *skb = NULL; 2415 struct mfc_cache *cache; 2416 struct mr_table *mrt; 2417 struct rtmsg *rtm; 2418 __be32 src, grp; 2419 u32 tableid; 2420 int err; 2421 2422 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 2423 rtm_ipv4_policy, extack); 2424 if (err < 0) 2425 goto errout; 2426 2427 rtm = nlmsg_data(nlh); 2428 2429 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2430 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2431 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2432 2433 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2434 if (!mrt) { 2435 err = -ENOENT; 2436 goto errout_free; 2437 } 2438 2439 /* entries are added/deleted only under RTNL */ 2440 rcu_read_lock(); 2441 cache = ipmr_cache_find(mrt, src, grp); 2442 rcu_read_unlock(); 2443 if (!cache) { 2444 err = -ENOENT; 2445 goto errout_free; 2446 } 2447 2448 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2449 if (!skb) { 2450 err = -ENOBUFS; 2451 goto errout_free; 2452 } 2453 2454 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2455 nlh->nlmsg_seq, cache, 2456 RTM_NEWROUTE, 0); 2457 if (err < 0) 2458 goto errout_free; 2459 2460 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2461 2462 errout: 2463 return err; 2464 2465 errout_free: 2466 kfree_skb(skb); 2467 goto errout; 2468 } 2469 2470 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2471 { 2472 struct net *net = sock_net(skb->sk); 2473 struct mr_table *mrt; 2474 struct mfc_cache *mfc; 2475 unsigned int t = 0, s_t; 2476 unsigned int e = 0, s_e; 2477 2478 s_t = cb->args[0]; 2479 s_e = cb->args[1]; 2480 2481 rcu_read_lock(); 2482 ipmr_for_each_table(mrt, net) { 2483 if (t < s_t) 2484 goto next_table; 2485 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { 2486 if (e < s_e) 2487 goto next_entry; 2488 if (ipmr_fill_mroute(mrt, skb, 2489 NETLINK_CB(cb->skb).portid, 2490 cb->nlh->nlmsg_seq, 2491 mfc, RTM_NEWROUTE, 2492 NLM_F_MULTI) < 0) 2493 goto done; 2494 next_entry: 2495 e++; 2496 } 2497 e = 0; 2498 s_e = 0; 2499 2500 spin_lock_bh(&mfc_unres_lock); 2501 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 2502 if (e < s_e) 2503 goto next_entry2; 2504 if (ipmr_fill_mroute(mrt, skb, 2505 NETLINK_CB(cb->skb).portid, 2506 cb->nlh->nlmsg_seq, 2507 mfc, RTM_NEWROUTE, 2508 NLM_F_MULTI) < 0) { 2509 spin_unlock_bh(&mfc_unres_lock); 2510 goto done; 2511 } 2512 next_entry2: 2513 e++; 2514 } 2515 spin_unlock_bh(&mfc_unres_lock); 2516 e = 0; 2517 s_e = 0; 2518 next_table: 2519 t++; 2520 } 2521 done: 2522 rcu_read_unlock(); 2523 2524 cb->args[1] = e; 2525 cb->args[0] = t; 2526 2527 return skb->len; 2528 } 2529 2530 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2531 [RTA_SRC] = { .type = NLA_U32 }, 2532 [RTA_DST] = { .type = NLA_U32 }, 2533 [RTA_IIF] = { .type = NLA_U32 }, 2534 [RTA_TABLE] = { .type = NLA_U32 }, 2535 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2536 }; 2537 2538 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2539 { 2540 switch (rtm_protocol) { 2541 case RTPROT_STATIC: 2542 case RTPROT_MROUTED: 2543 return true; 2544 } 2545 return false; 2546 } 2547 2548 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2549 { 2550 struct rtnexthop *rtnh = nla_data(nla); 2551 int remaining = nla_len(nla), vifi = 0; 2552 2553 while (rtnh_ok(rtnh, remaining)) { 2554 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2555 if (++vifi == MAXVIFS) 2556 break; 2557 rtnh = rtnh_next(rtnh, &remaining); 2558 } 2559 2560 return remaining > 0 ? -EINVAL : vifi; 2561 } 2562 2563 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2564 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2565 struct mfcctl *mfcc, int *mrtsock, 2566 struct mr_table **mrtret, 2567 struct netlink_ext_ack *extack) 2568 { 2569 struct net_device *dev = NULL; 2570 u32 tblid = RT_TABLE_DEFAULT; 2571 struct mr_table *mrt; 2572 struct nlattr *attr; 2573 struct rtmsg *rtm; 2574 int ret, rem; 2575 2576 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, 2577 extack); 2578 if (ret < 0) 2579 goto out; 2580 rtm = nlmsg_data(nlh); 2581 2582 ret = -EINVAL; 2583 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2584 rtm->rtm_type != RTN_MULTICAST || 2585 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2586 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2587 goto out; 2588 2589 memset(mfcc, 0, sizeof(*mfcc)); 2590 mfcc->mfcc_parent = -1; 2591 ret = 0; 2592 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2593 switch (nla_type(attr)) { 2594 case RTA_SRC: 2595 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2596 break; 2597 case RTA_DST: 2598 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2599 break; 2600 case RTA_IIF: 2601 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2602 if (!dev) { 2603 ret = -ENODEV; 2604 goto out; 2605 } 2606 break; 2607 case RTA_MULTIPATH: 2608 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2609 ret = -EINVAL; 2610 goto out; 2611 } 2612 break; 2613 case RTA_PREFSRC: 2614 ret = 1; 2615 break; 2616 case RTA_TABLE: 2617 tblid = nla_get_u32(attr); 2618 break; 2619 } 2620 } 2621 mrt = ipmr_get_table(net, tblid); 2622 if (!mrt) { 2623 ret = -ENOENT; 2624 goto out; 2625 } 2626 *mrtret = mrt; 2627 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2628 if (dev) 2629 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2630 2631 out: 2632 return ret; 2633 } 2634 2635 /* takes care of both newroute and delroute */ 2636 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2637 struct netlink_ext_ack *extack) 2638 { 2639 struct net *net = sock_net(skb->sk); 2640 int ret, mrtsock, parent; 2641 struct mr_table *tbl; 2642 struct mfcctl mfcc; 2643 2644 mrtsock = 0; 2645 tbl = NULL; 2646 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2647 if (ret < 0) 2648 return ret; 2649 2650 parent = ret ? mfcc.mfcc_parent : -1; 2651 if (nlh->nlmsg_type == RTM_NEWROUTE) 2652 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2653 else 2654 return ipmr_mfc_delete(tbl, &mfcc, parent); 2655 } 2656 2657 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2658 { 2659 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2660 2661 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2662 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2663 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2664 mrt->mroute_reg_vif_num) || 2665 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2666 mrt->mroute_do_assert) || 2667 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim)) 2668 return false; 2669 2670 return true; 2671 } 2672 2673 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2674 { 2675 struct nlattr *vif_nest; 2676 struct vif_device *vif; 2677 2678 /* if the VIF doesn't exist just continue */ 2679 if (!VIF_EXISTS(mrt, vifid)) 2680 return true; 2681 2682 vif = &mrt->vif_table[vifid]; 2683 vif_nest = nla_nest_start(skb, IPMRA_VIF); 2684 if (!vif_nest) 2685 return false; 2686 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2687 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2688 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2689 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2690 IPMRA_VIFA_PAD) || 2691 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2692 IPMRA_VIFA_PAD) || 2693 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2694 IPMRA_VIFA_PAD) || 2695 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2696 IPMRA_VIFA_PAD) || 2697 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2698 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2699 nla_nest_cancel(skb, vif_nest); 2700 return false; 2701 } 2702 nla_nest_end(skb, vif_nest); 2703 2704 return true; 2705 } 2706 2707 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2708 { 2709 struct net *net = sock_net(skb->sk); 2710 struct nlmsghdr *nlh = NULL; 2711 unsigned int t = 0, s_t; 2712 unsigned int e = 0, s_e; 2713 struct mr_table *mrt; 2714 2715 s_t = cb->args[0]; 2716 s_e = cb->args[1]; 2717 2718 ipmr_for_each_table(mrt, net) { 2719 struct nlattr *vifs, *af; 2720 struct ifinfomsg *hdr; 2721 u32 i; 2722 2723 if (t < s_t) 2724 goto skip_table; 2725 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2726 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2727 sizeof(*hdr), NLM_F_MULTI); 2728 if (!nlh) 2729 break; 2730 2731 hdr = nlmsg_data(nlh); 2732 memset(hdr, 0, sizeof(*hdr)); 2733 hdr->ifi_family = RTNL_FAMILY_IPMR; 2734 2735 af = nla_nest_start(skb, IFLA_AF_SPEC); 2736 if (!af) { 2737 nlmsg_cancel(skb, nlh); 2738 goto out; 2739 } 2740 2741 if (!ipmr_fill_table(mrt, skb)) { 2742 nlmsg_cancel(skb, nlh); 2743 goto out; 2744 } 2745 2746 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); 2747 if (!vifs) { 2748 nla_nest_end(skb, af); 2749 nlmsg_end(skb, nlh); 2750 goto out; 2751 } 2752 for (i = 0; i < mrt->maxvif; i++) { 2753 if (e < s_e) 2754 goto skip_entry; 2755 if (!ipmr_fill_vif(mrt, i, skb)) { 2756 nla_nest_end(skb, vifs); 2757 nla_nest_end(skb, af); 2758 nlmsg_end(skb, nlh); 2759 goto out; 2760 } 2761 skip_entry: 2762 e++; 2763 } 2764 s_e = 0; 2765 e = 0; 2766 nla_nest_end(skb, vifs); 2767 nla_nest_end(skb, af); 2768 nlmsg_end(skb, nlh); 2769 skip_table: 2770 t++; 2771 } 2772 2773 out: 2774 cb->args[1] = e; 2775 cb->args[0] = t; 2776 2777 return skb->len; 2778 } 2779 2780 #ifdef CONFIG_PROC_FS 2781 /* The /proc interfaces to multicast routing : 2782 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2783 */ 2784 struct ipmr_vif_iter { 2785 struct seq_net_private p; 2786 struct mr_table *mrt; 2787 int ct; 2788 }; 2789 2790 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2791 struct ipmr_vif_iter *iter, 2792 loff_t pos) 2793 { 2794 struct mr_table *mrt = iter->mrt; 2795 2796 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2797 if (!VIF_EXISTS(mrt, iter->ct)) 2798 continue; 2799 if (pos-- == 0) 2800 return &mrt->vif_table[iter->ct]; 2801 } 2802 return NULL; 2803 } 2804 2805 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2806 __acquires(mrt_lock) 2807 { 2808 struct ipmr_vif_iter *iter = seq->private; 2809 struct net *net = seq_file_net(seq); 2810 struct mr_table *mrt; 2811 2812 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2813 if (!mrt) 2814 return ERR_PTR(-ENOENT); 2815 2816 iter->mrt = mrt; 2817 2818 read_lock(&mrt_lock); 2819 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2820 : SEQ_START_TOKEN; 2821 } 2822 2823 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2824 { 2825 struct ipmr_vif_iter *iter = seq->private; 2826 struct net *net = seq_file_net(seq); 2827 struct mr_table *mrt = iter->mrt; 2828 2829 ++*pos; 2830 if (v == SEQ_START_TOKEN) 2831 return ipmr_vif_seq_idx(net, iter, 0); 2832 2833 while (++iter->ct < mrt->maxvif) { 2834 if (!VIF_EXISTS(mrt, iter->ct)) 2835 continue; 2836 return &mrt->vif_table[iter->ct]; 2837 } 2838 return NULL; 2839 } 2840 2841 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2842 __releases(mrt_lock) 2843 { 2844 read_unlock(&mrt_lock); 2845 } 2846 2847 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2848 { 2849 struct ipmr_vif_iter *iter = seq->private; 2850 struct mr_table *mrt = iter->mrt; 2851 2852 if (v == SEQ_START_TOKEN) { 2853 seq_puts(seq, 2854 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2855 } else { 2856 const struct vif_device *vif = v; 2857 const char *name = vif->dev ? vif->dev->name : "none"; 2858 2859 seq_printf(seq, 2860 "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2861 vif - mrt->vif_table, 2862 name, vif->bytes_in, vif->pkt_in, 2863 vif->bytes_out, vif->pkt_out, 2864 vif->flags, vif->local, vif->remote); 2865 } 2866 return 0; 2867 } 2868 2869 static const struct seq_operations ipmr_vif_seq_ops = { 2870 .start = ipmr_vif_seq_start, 2871 .next = ipmr_vif_seq_next, 2872 .stop = ipmr_vif_seq_stop, 2873 .show = ipmr_vif_seq_show, 2874 }; 2875 2876 static int ipmr_vif_open(struct inode *inode, struct file *file) 2877 { 2878 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 2879 sizeof(struct ipmr_vif_iter)); 2880 } 2881 2882 static const struct file_operations ipmr_vif_fops = { 2883 .owner = THIS_MODULE, 2884 .open = ipmr_vif_open, 2885 .read = seq_read, 2886 .llseek = seq_lseek, 2887 .release = seq_release_net, 2888 }; 2889 2890 struct ipmr_mfc_iter { 2891 struct seq_net_private p; 2892 struct mr_table *mrt; 2893 struct list_head *cache; 2894 }; 2895 2896 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2897 struct ipmr_mfc_iter *it, loff_t pos) 2898 { 2899 struct mr_table *mrt = it->mrt; 2900 struct mfc_cache *mfc; 2901 2902 rcu_read_lock(); 2903 it->cache = &mrt->mfc_cache_list; 2904 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 2905 if (pos-- == 0) 2906 return mfc; 2907 rcu_read_unlock(); 2908 2909 spin_lock_bh(&mfc_unres_lock); 2910 it->cache = &mrt->mfc_unres_queue; 2911 list_for_each_entry(mfc, it->cache, list) 2912 if (pos-- == 0) 2913 return mfc; 2914 spin_unlock_bh(&mfc_unres_lock); 2915 2916 it->cache = NULL; 2917 return NULL; 2918 } 2919 2920 2921 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2922 { 2923 struct ipmr_mfc_iter *it = seq->private; 2924 struct net *net = seq_file_net(seq); 2925 struct mr_table *mrt; 2926 2927 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2928 if (!mrt) 2929 return ERR_PTR(-ENOENT); 2930 2931 it->mrt = mrt; 2932 it->cache = NULL; 2933 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2934 : SEQ_START_TOKEN; 2935 } 2936 2937 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2938 { 2939 struct ipmr_mfc_iter *it = seq->private; 2940 struct net *net = seq_file_net(seq); 2941 struct mr_table *mrt = it->mrt; 2942 struct mfc_cache *mfc = v; 2943 2944 ++*pos; 2945 2946 if (v == SEQ_START_TOKEN) 2947 return ipmr_mfc_seq_idx(net, seq->private, 0); 2948 2949 if (mfc->list.next != it->cache) 2950 return list_entry(mfc->list.next, struct mfc_cache, list); 2951 2952 if (it->cache == &mrt->mfc_unres_queue) 2953 goto end_of_list; 2954 2955 /* exhausted cache_array, show unresolved */ 2956 rcu_read_unlock(); 2957 it->cache = &mrt->mfc_unres_queue; 2958 2959 spin_lock_bh(&mfc_unres_lock); 2960 if (!list_empty(it->cache)) 2961 return list_first_entry(it->cache, struct mfc_cache, list); 2962 2963 end_of_list: 2964 spin_unlock_bh(&mfc_unres_lock); 2965 it->cache = NULL; 2966 2967 return NULL; 2968 } 2969 2970 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2971 { 2972 struct ipmr_mfc_iter *it = seq->private; 2973 struct mr_table *mrt = it->mrt; 2974 2975 if (it->cache == &mrt->mfc_unres_queue) 2976 spin_unlock_bh(&mfc_unres_lock); 2977 else if (it->cache == &mrt->mfc_cache_list) 2978 rcu_read_unlock(); 2979 } 2980 2981 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2982 { 2983 int n; 2984 2985 if (v == SEQ_START_TOKEN) { 2986 seq_puts(seq, 2987 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2988 } else { 2989 const struct mfc_cache *mfc = v; 2990 const struct ipmr_mfc_iter *it = seq->private; 2991 const struct mr_table *mrt = it->mrt; 2992 2993 seq_printf(seq, "%08X %08X %-3hd", 2994 (__force u32) mfc->mfc_mcastgrp, 2995 (__force u32) mfc->mfc_origin, 2996 mfc->mfc_parent); 2997 2998 if (it->cache != &mrt->mfc_unres_queue) { 2999 seq_printf(seq, " %8lu %8lu %8lu", 3000 mfc->mfc_un.res.pkt, 3001 mfc->mfc_un.res.bytes, 3002 mfc->mfc_un.res.wrong_if); 3003 for (n = mfc->mfc_un.res.minvif; 3004 n < mfc->mfc_un.res.maxvif; n++) { 3005 if (VIF_EXISTS(mrt, n) && 3006 mfc->mfc_un.res.ttls[n] < 255) 3007 seq_printf(seq, 3008 " %2d:%-3d", 3009 n, mfc->mfc_un.res.ttls[n]); 3010 } 3011 } else { 3012 /* unresolved mfc_caches don't contain 3013 * pkt, bytes and wrong_if values 3014 */ 3015 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3016 } 3017 seq_putc(seq, '\n'); 3018 } 3019 return 0; 3020 } 3021 3022 static const struct seq_operations ipmr_mfc_seq_ops = { 3023 .start = ipmr_mfc_seq_start, 3024 .next = ipmr_mfc_seq_next, 3025 .stop = ipmr_mfc_seq_stop, 3026 .show = ipmr_mfc_seq_show, 3027 }; 3028 3029 static int ipmr_mfc_open(struct inode *inode, struct file *file) 3030 { 3031 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 3032 sizeof(struct ipmr_mfc_iter)); 3033 } 3034 3035 static const struct file_operations ipmr_mfc_fops = { 3036 .owner = THIS_MODULE, 3037 .open = ipmr_mfc_open, 3038 .read = seq_read, 3039 .llseek = seq_lseek, 3040 .release = seq_release_net, 3041 }; 3042 #endif 3043 3044 #ifdef CONFIG_IP_PIMSM_V2 3045 static const struct net_protocol pim_protocol = { 3046 .handler = pim_rcv, 3047 .netns_ok = 1, 3048 }; 3049 #endif 3050 3051 /* Setup for IP multicast routing */ 3052 static int __net_init ipmr_net_init(struct net *net) 3053 { 3054 int err; 3055 3056 err = ipmr_rules_init(net); 3057 if (err < 0) 3058 goto fail; 3059 3060 #ifdef CONFIG_PROC_FS 3061 err = -ENOMEM; 3062 if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) 3063 goto proc_vif_fail; 3064 if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) 3065 goto proc_cache_fail; 3066 #endif 3067 return 0; 3068 3069 #ifdef CONFIG_PROC_FS 3070 proc_cache_fail: 3071 remove_proc_entry("ip_mr_vif", net->proc_net); 3072 proc_vif_fail: 3073 ipmr_rules_exit(net); 3074 #endif 3075 fail: 3076 return err; 3077 } 3078 3079 static void __net_exit ipmr_net_exit(struct net *net) 3080 { 3081 #ifdef CONFIG_PROC_FS 3082 remove_proc_entry("ip_mr_cache", net->proc_net); 3083 remove_proc_entry("ip_mr_vif", net->proc_net); 3084 #endif 3085 ipmr_rules_exit(net); 3086 } 3087 3088 static struct pernet_operations ipmr_net_ops = { 3089 .init = ipmr_net_init, 3090 .exit = ipmr_net_exit, 3091 }; 3092 3093 int __init ip_mr_init(void) 3094 { 3095 int err; 3096 3097 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3098 sizeof(struct mfc_cache), 3099 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3100 NULL); 3101 3102 err = register_pernet_subsys(&ipmr_net_ops); 3103 if (err) 3104 goto reg_pernet_fail; 3105 3106 err = register_netdevice_notifier(&ip_mr_notifier); 3107 if (err) 3108 goto reg_notif_fail; 3109 #ifdef CONFIG_IP_PIMSM_V2 3110 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3111 pr_err("%s: can't add PIM protocol\n", __func__); 3112 err = -EAGAIN; 3113 goto add_proto_fail; 3114 } 3115 #endif 3116 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3117 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3118 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3119 ipmr_rtm_route, NULL, 0); 3120 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3121 ipmr_rtm_route, NULL, 0); 3122 3123 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3124 NULL, ipmr_rtm_dumplink, 0); 3125 return 0; 3126 3127 #ifdef CONFIG_IP_PIMSM_V2 3128 add_proto_fail: 3129 unregister_netdevice_notifier(&ip_mr_notifier); 3130 #endif 3131 reg_notif_fail: 3132 unregister_pernet_subsys(&ipmr_net_ops); 3133 reg_pernet_fail: 3134 kmem_cache_destroy(mrt_cachep); 3135 return err; 3136 } 3137