1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <asm/system.h> 20 #include <asm/uaccess.h> 21 #include <linux/types.h> 22 #include <linux/sched.h> 23 #include <linux/errno.h> 24 #include <linux/timer.h> 25 #include <linux/mm.h> 26 #include <linux/kernel.h> 27 #include <linux/fcntl.h> 28 #include <linux/stat.h> 29 #include <linux/socket.h> 30 #include <linux/inet.h> 31 #include <linux/netdevice.h> 32 #include <linux/inetdevice.h> 33 #include <linux/proc_fs.h> 34 #include <linux/seq_file.h> 35 #include <linux/init.h> 36 #include <linux/slab.h> 37 #include <linux/compat.h> 38 #include <net/protocol.h> 39 #include <linux/skbuff.h> 40 #include <net/sock.h> 41 #include <net/raw.h> 42 #include <linux/notifier.h> 43 #include <linux/if_arp.h> 44 #include <net/checksum.h> 45 #include <net/netlink.h> 46 #include <net/fib_rules.h> 47 48 #include <net/ipv6.h> 49 #include <net/ip6_route.h> 50 #include <linux/mroute6.h> 51 #include <linux/pim.h> 52 #include <net/addrconf.h> 53 #include <linux/netfilter_ipv6.h> 54 #include <linux/export.h> 55 #include <net/ip6_checksum.h> 56 57 struct mr6_table { 58 struct list_head list; 59 #ifdef CONFIG_NET_NS 60 struct net *net; 61 #endif 62 u32 id; 63 struct sock *mroute6_sk; 64 struct timer_list ipmr_expire_timer; 65 struct list_head mfc6_unres_queue; 66 struct list_head mfc6_cache_array[MFC6_LINES]; 67 struct mif_device vif6_table[MAXMIFS]; 68 int maxvif; 69 atomic_t cache_resolve_queue_len; 70 int mroute_do_assert; 71 int mroute_do_pim; 72 #ifdef CONFIG_IPV6_PIMSM_V2 73 int mroute_reg_vif_num; 74 #endif 75 }; 76 77 struct ip6mr_rule { 78 struct fib_rule common; 79 }; 80 81 struct ip6mr_result { 82 struct mr6_table *mrt; 83 }; 84 85 /* Big lock, protecting vif table, mrt cache and mroute socket state. 86 Note that the changes are semaphored via rtnl_lock. 87 */ 88 89 static DEFINE_RWLOCK(mrt_lock); 90 91 /* 92 * Multicast router control variables 93 */ 94 95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL) 96 97 /* Special spinlock for queue of unresolved entries */ 98 static DEFINE_SPINLOCK(mfc_unres_lock); 99 100 /* We return to original Alan's scheme. Hash table of resolved 101 entries is changed only in process context and protected 102 with weak lock mrt_lock. Queue of unresolved entries is protected 103 with strong spinlock mfc_unres_lock. 104 105 In this case data path is free of exclusive locks at all. 106 */ 107 108 static struct kmem_cache *mrt_cachep __read_mostly; 109 110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); 111 static void ip6mr_free_table(struct mr6_table *mrt); 112 113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 114 struct sk_buff *skb, struct mfc6_cache *cache); 115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 116 mifi_t mifi, int assert); 117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 118 struct mfc6_cache *c, struct rtmsg *rtm); 119 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 120 struct netlink_callback *cb); 121 static void mroute_clean_tables(struct mr6_table *mrt); 122 static void ipmr_expire_process(unsigned long arg); 123 124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 125 #define ip6mr_for_each_table(mrt, net) \ 126 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 127 128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) 129 { 130 struct mr6_table *mrt; 131 132 ip6mr_for_each_table(mrt, net) { 133 if (mrt->id == id) 134 return mrt; 135 } 136 return NULL; 137 } 138 139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 140 struct mr6_table **mrt) 141 { 142 struct ip6mr_result res; 143 struct fib_lookup_arg arg = { .result = &res, }; 144 int err; 145 146 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 147 flowi6_to_flowi(flp6), 0, &arg); 148 if (err < 0) 149 return err; 150 *mrt = res.mrt; 151 return 0; 152 } 153 154 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 155 int flags, struct fib_lookup_arg *arg) 156 { 157 struct ip6mr_result *res = arg->result; 158 struct mr6_table *mrt; 159 160 switch (rule->action) { 161 case FR_ACT_TO_TBL: 162 break; 163 case FR_ACT_UNREACHABLE: 164 return -ENETUNREACH; 165 case FR_ACT_PROHIBIT: 166 return -EACCES; 167 case FR_ACT_BLACKHOLE: 168 default: 169 return -EINVAL; 170 } 171 172 mrt = ip6mr_get_table(rule->fr_net, rule->table); 173 if (mrt == NULL) 174 return -EAGAIN; 175 res->mrt = mrt; 176 return 0; 177 } 178 179 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 180 { 181 return 1; 182 } 183 184 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 185 FRA_GENERIC_POLICY, 186 }; 187 188 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 189 struct fib_rule_hdr *frh, struct nlattr **tb) 190 { 191 return 0; 192 } 193 194 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 195 struct nlattr **tb) 196 { 197 return 1; 198 } 199 200 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 201 struct fib_rule_hdr *frh) 202 { 203 frh->dst_len = 0; 204 frh->src_len = 0; 205 frh->tos = 0; 206 return 0; 207 } 208 209 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = { 210 .family = RTNL_FAMILY_IP6MR, 211 .rule_size = sizeof(struct ip6mr_rule), 212 .addr_size = sizeof(struct in6_addr), 213 .action = ip6mr_rule_action, 214 .match = ip6mr_rule_match, 215 .configure = ip6mr_rule_configure, 216 .compare = ip6mr_rule_compare, 217 .default_pref = fib_default_rule_pref, 218 .fill = ip6mr_rule_fill, 219 .nlgroup = RTNLGRP_IPV6_RULE, 220 .policy = ip6mr_rule_policy, 221 .owner = THIS_MODULE, 222 }; 223 224 static int __net_init ip6mr_rules_init(struct net *net) 225 { 226 struct fib_rules_ops *ops; 227 struct mr6_table *mrt; 228 int err; 229 230 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 231 if (IS_ERR(ops)) 232 return PTR_ERR(ops); 233 234 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 235 236 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 237 if (mrt == NULL) { 238 err = -ENOMEM; 239 goto err1; 240 } 241 242 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 243 if (err < 0) 244 goto err2; 245 246 net->ipv6.mr6_rules_ops = ops; 247 return 0; 248 249 err2: 250 kfree(mrt); 251 err1: 252 fib_rules_unregister(ops); 253 return err; 254 } 255 256 static void __net_exit ip6mr_rules_exit(struct net *net) 257 { 258 struct mr6_table *mrt, *next; 259 260 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 261 list_del(&mrt->list); 262 ip6mr_free_table(mrt); 263 } 264 fib_rules_unregister(net->ipv6.mr6_rules_ops); 265 } 266 #else 267 #define ip6mr_for_each_table(mrt, net) \ 268 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 269 270 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) 271 { 272 return net->ipv6.mrt6; 273 } 274 275 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 276 struct mr6_table **mrt) 277 { 278 *mrt = net->ipv6.mrt6; 279 return 0; 280 } 281 282 static int __net_init ip6mr_rules_init(struct net *net) 283 { 284 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT); 285 return net->ipv6.mrt6 ? 0 : -ENOMEM; 286 } 287 288 static void __net_exit ip6mr_rules_exit(struct net *net) 289 { 290 ip6mr_free_table(net->ipv6.mrt6); 291 } 292 #endif 293 294 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) 295 { 296 struct mr6_table *mrt; 297 unsigned int i; 298 299 mrt = ip6mr_get_table(net, id); 300 if (mrt != NULL) 301 return mrt; 302 303 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 304 if (mrt == NULL) 305 return NULL; 306 mrt->id = id; 307 write_pnet(&mrt->net, net); 308 309 /* Forwarding cache */ 310 for (i = 0; i < MFC6_LINES; i++) 311 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]); 312 313 INIT_LIST_HEAD(&mrt->mfc6_unres_queue); 314 315 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 316 (unsigned long)mrt); 317 318 #ifdef CONFIG_IPV6_PIMSM_V2 319 mrt->mroute_reg_vif_num = -1; 320 #endif 321 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 322 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 323 #endif 324 return mrt; 325 } 326 327 static void ip6mr_free_table(struct mr6_table *mrt) 328 { 329 del_timer(&mrt->ipmr_expire_timer); 330 mroute_clean_tables(mrt); 331 kfree(mrt); 332 } 333 334 #ifdef CONFIG_PROC_FS 335 336 struct ipmr_mfc_iter { 337 struct seq_net_private p; 338 struct mr6_table *mrt; 339 struct list_head *cache; 340 int ct; 341 }; 342 343 344 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, 345 struct ipmr_mfc_iter *it, loff_t pos) 346 { 347 struct mr6_table *mrt = it->mrt; 348 struct mfc6_cache *mfc; 349 350 read_lock(&mrt_lock); 351 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) { 352 it->cache = &mrt->mfc6_cache_array[it->ct]; 353 list_for_each_entry(mfc, it->cache, list) 354 if (pos-- == 0) 355 return mfc; 356 } 357 read_unlock(&mrt_lock); 358 359 spin_lock_bh(&mfc_unres_lock); 360 it->cache = &mrt->mfc6_unres_queue; 361 list_for_each_entry(mfc, it->cache, list) 362 if (pos-- == 0) 363 return mfc; 364 spin_unlock_bh(&mfc_unres_lock); 365 366 it->cache = NULL; 367 return NULL; 368 } 369 370 /* 371 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif 372 */ 373 374 struct ipmr_vif_iter { 375 struct seq_net_private p; 376 struct mr6_table *mrt; 377 int ct; 378 }; 379 380 static struct mif_device *ip6mr_vif_seq_idx(struct net *net, 381 struct ipmr_vif_iter *iter, 382 loff_t pos) 383 { 384 struct mr6_table *mrt = iter->mrt; 385 386 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 387 if (!MIF_EXISTS(mrt, iter->ct)) 388 continue; 389 if (pos-- == 0) 390 return &mrt->vif6_table[iter->ct]; 391 } 392 return NULL; 393 } 394 395 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 396 __acquires(mrt_lock) 397 { 398 struct ipmr_vif_iter *iter = seq->private; 399 struct net *net = seq_file_net(seq); 400 struct mr6_table *mrt; 401 402 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 403 if (mrt == NULL) 404 return ERR_PTR(-ENOENT); 405 406 iter->mrt = mrt; 407 408 read_lock(&mrt_lock); 409 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) 410 : SEQ_START_TOKEN; 411 } 412 413 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 414 { 415 struct ipmr_vif_iter *iter = seq->private; 416 struct net *net = seq_file_net(seq); 417 struct mr6_table *mrt = iter->mrt; 418 419 ++*pos; 420 if (v == SEQ_START_TOKEN) 421 return ip6mr_vif_seq_idx(net, iter, 0); 422 423 while (++iter->ct < mrt->maxvif) { 424 if (!MIF_EXISTS(mrt, iter->ct)) 425 continue; 426 return &mrt->vif6_table[iter->ct]; 427 } 428 return NULL; 429 } 430 431 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 432 __releases(mrt_lock) 433 { 434 read_unlock(&mrt_lock); 435 } 436 437 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 438 { 439 struct ipmr_vif_iter *iter = seq->private; 440 struct mr6_table *mrt = iter->mrt; 441 442 if (v == SEQ_START_TOKEN) { 443 seq_puts(seq, 444 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 445 } else { 446 const struct mif_device *vif = v; 447 const char *name = vif->dev ? vif->dev->name : "none"; 448 449 seq_printf(seq, 450 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 451 vif - mrt->vif6_table, 452 name, vif->bytes_in, vif->pkt_in, 453 vif->bytes_out, vif->pkt_out, 454 vif->flags); 455 } 456 return 0; 457 } 458 459 static const struct seq_operations ip6mr_vif_seq_ops = { 460 .start = ip6mr_vif_seq_start, 461 .next = ip6mr_vif_seq_next, 462 .stop = ip6mr_vif_seq_stop, 463 .show = ip6mr_vif_seq_show, 464 }; 465 466 static int ip6mr_vif_open(struct inode *inode, struct file *file) 467 { 468 return seq_open_net(inode, file, &ip6mr_vif_seq_ops, 469 sizeof(struct ipmr_vif_iter)); 470 } 471 472 static const struct file_operations ip6mr_vif_fops = { 473 .owner = THIS_MODULE, 474 .open = ip6mr_vif_open, 475 .read = seq_read, 476 .llseek = seq_lseek, 477 .release = seq_release_net, 478 }; 479 480 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 481 { 482 struct ipmr_mfc_iter *it = seq->private; 483 struct net *net = seq_file_net(seq); 484 struct mr6_table *mrt; 485 486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 487 if (mrt == NULL) 488 return ERR_PTR(-ENOENT); 489 490 it->mrt = mrt; 491 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 492 : SEQ_START_TOKEN; 493 } 494 495 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 496 { 497 struct mfc6_cache *mfc = v; 498 struct ipmr_mfc_iter *it = seq->private; 499 struct net *net = seq_file_net(seq); 500 struct mr6_table *mrt = it->mrt; 501 502 ++*pos; 503 504 if (v == SEQ_START_TOKEN) 505 return ipmr_mfc_seq_idx(net, seq->private, 0); 506 507 if (mfc->list.next != it->cache) 508 return list_entry(mfc->list.next, struct mfc6_cache, list); 509 510 if (it->cache == &mrt->mfc6_unres_queue) 511 goto end_of_list; 512 513 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]); 514 515 while (++it->ct < MFC6_LINES) { 516 it->cache = &mrt->mfc6_cache_array[it->ct]; 517 if (list_empty(it->cache)) 518 continue; 519 return list_first_entry(it->cache, struct mfc6_cache, list); 520 } 521 522 /* exhausted cache_array, show unresolved */ 523 read_unlock(&mrt_lock); 524 it->cache = &mrt->mfc6_unres_queue; 525 it->ct = 0; 526 527 spin_lock_bh(&mfc_unres_lock); 528 if (!list_empty(it->cache)) 529 return list_first_entry(it->cache, struct mfc6_cache, list); 530 531 end_of_list: 532 spin_unlock_bh(&mfc_unres_lock); 533 it->cache = NULL; 534 535 return NULL; 536 } 537 538 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 539 { 540 struct ipmr_mfc_iter *it = seq->private; 541 struct mr6_table *mrt = it->mrt; 542 543 if (it->cache == &mrt->mfc6_unres_queue) 544 spin_unlock_bh(&mfc_unres_lock); 545 else if (it->cache == mrt->mfc6_cache_array) 546 read_unlock(&mrt_lock); 547 } 548 549 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 550 { 551 int n; 552 553 if (v == SEQ_START_TOKEN) { 554 seq_puts(seq, 555 "Group " 556 "Origin " 557 "Iif Pkts Bytes Wrong Oifs\n"); 558 } else { 559 const struct mfc6_cache *mfc = v; 560 const struct ipmr_mfc_iter *it = seq->private; 561 struct mr6_table *mrt = it->mrt; 562 563 seq_printf(seq, "%pI6 %pI6 %-3hd", 564 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 565 mfc->mf6c_parent); 566 567 if (it->cache != &mrt->mfc6_unres_queue) { 568 seq_printf(seq, " %8lu %8lu %8lu", 569 mfc->mfc_un.res.pkt, 570 mfc->mfc_un.res.bytes, 571 mfc->mfc_un.res.wrong_if); 572 for (n = mfc->mfc_un.res.minvif; 573 n < mfc->mfc_un.res.maxvif; n++) { 574 if (MIF_EXISTS(mrt, n) && 575 mfc->mfc_un.res.ttls[n] < 255) 576 seq_printf(seq, 577 " %2d:%-3d", 578 n, mfc->mfc_un.res.ttls[n]); 579 } 580 } else { 581 /* unresolved mfc_caches don't contain 582 * pkt, bytes and wrong_if values 583 */ 584 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 585 } 586 seq_putc(seq, '\n'); 587 } 588 return 0; 589 } 590 591 static const struct seq_operations ipmr_mfc_seq_ops = { 592 .start = ipmr_mfc_seq_start, 593 .next = ipmr_mfc_seq_next, 594 .stop = ipmr_mfc_seq_stop, 595 .show = ipmr_mfc_seq_show, 596 }; 597 598 static int ipmr_mfc_open(struct inode *inode, struct file *file) 599 { 600 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 601 sizeof(struct ipmr_mfc_iter)); 602 } 603 604 static const struct file_operations ip6mr_mfc_fops = { 605 .owner = THIS_MODULE, 606 .open = ipmr_mfc_open, 607 .read = seq_read, 608 .llseek = seq_lseek, 609 .release = seq_release_net, 610 }; 611 #endif 612 613 #ifdef CONFIG_IPV6_PIMSM_V2 614 615 static int pim6_rcv(struct sk_buff *skb) 616 { 617 struct pimreghdr *pim; 618 struct ipv6hdr *encap; 619 struct net_device *reg_dev = NULL; 620 struct net *net = dev_net(skb->dev); 621 struct mr6_table *mrt; 622 struct flowi6 fl6 = { 623 .flowi6_iif = skb->dev->ifindex, 624 .flowi6_mark = skb->mark, 625 }; 626 int reg_vif_num; 627 628 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 629 goto drop; 630 631 pim = (struct pimreghdr *)skb_transport_header(skb); 632 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || 633 (pim->flags & PIM_NULL_REGISTER) || 634 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 635 sizeof(*pim), IPPROTO_PIM, 636 csum_partial((void *)pim, sizeof(*pim), 0)) && 637 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 638 goto drop; 639 640 /* check if the inner packet is destined to mcast group */ 641 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 642 sizeof(*pim)); 643 644 if (!ipv6_addr_is_multicast(&encap->daddr) || 645 encap->payload_len == 0 || 646 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 647 goto drop; 648 649 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 650 goto drop; 651 reg_vif_num = mrt->mroute_reg_vif_num; 652 653 read_lock(&mrt_lock); 654 if (reg_vif_num >= 0) 655 reg_dev = mrt->vif6_table[reg_vif_num].dev; 656 if (reg_dev) 657 dev_hold(reg_dev); 658 read_unlock(&mrt_lock); 659 660 if (reg_dev == NULL) 661 goto drop; 662 663 skb->mac_header = skb->network_header; 664 skb_pull(skb, (u8 *)encap - skb->data); 665 skb_reset_network_header(skb); 666 skb->protocol = htons(ETH_P_IPV6); 667 skb->ip_summed = CHECKSUM_NONE; 668 skb->pkt_type = PACKET_HOST; 669 670 skb_tunnel_rx(skb, reg_dev); 671 672 netif_rx(skb); 673 674 dev_put(reg_dev); 675 return 0; 676 drop: 677 kfree_skb(skb); 678 return 0; 679 } 680 681 static const struct inet6_protocol pim6_protocol = { 682 .handler = pim6_rcv, 683 }; 684 685 /* Service routines creating virtual interfaces: PIMREG */ 686 687 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 688 struct net_device *dev) 689 { 690 struct net *net = dev_net(dev); 691 struct mr6_table *mrt; 692 struct flowi6 fl6 = { 693 .flowi6_oif = dev->ifindex, 694 .flowi6_iif = skb->skb_iif, 695 .flowi6_mark = skb->mark, 696 }; 697 int err; 698 699 err = ip6mr_fib_lookup(net, &fl6, &mrt); 700 if (err < 0) { 701 kfree_skb(skb); 702 return err; 703 } 704 705 read_lock(&mrt_lock); 706 dev->stats.tx_bytes += skb->len; 707 dev->stats.tx_packets++; 708 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 709 read_unlock(&mrt_lock); 710 kfree_skb(skb); 711 return NETDEV_TX_OK; 712 } 713 714 static const struct net_device_ops reg_vif_netdev_ops = { 715 .ndo_start_xmit = reg_vif_xmit, 716 }; 717 718 static void reg_vif_setup(struct net_device *dev) 719 { 720 dev->type = ARPHRD_PIMREG; 721 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 722 dev->flags = IFF_NOARP; 723 dev->netdev_ops = ®_vif_netdev_ops; 724 dev->destructor = free_netdev; 725 dev->features |= NETIF_F_NETNS_LOCAL; 726 } 727 728 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) 729 { 730 struct net_device *dev; 731 char name[IFNAMSIZ]; 732 733 if (mrt->id == RT6_TABLE_DFLT) 734 sprintf(name, "pim6reg"); 735 else 736 sprintf(name, "pim6reg%u", mrt->id); 737 738 dev = alloc_netdev(0, name, reg_vif_setup); 739 if (dev == NULL) 740 return NULL; 741 742 dev_net_set(dev, net); 743 744 if (register_netdevice(dev)) { 745 free_netdev(dev); 746 return NULL; 747 } 748 dev->iflink = 0; 749 750 if (dev_open(dev)) 751 goto failure; 752 753 dev_hold(dev); 754 return dev; 755 756 failure: 757 /* allow the register to be completed before unregistering. */ 758 rtnl_unlock(); 759 rtnl_lock(); 760 761 unregister_netdevice(dev); 762 return NULL; 763 } 764 #endif 765 766 /* 767 * Delete a VIF entry 768 */ 769 770 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) 771 { 772 struct mif_device *v; 773 struct net_device *dev; 774 struct inet6_dev *in6_dev; 775 776 if (vifi < 0 || vifi >= mrt->maxvif) 777 return -EADDRNOTAVAIL; 778 779 v = &mrt->vif6_table[vifi]; 780 781 write_lock_bh(&mrt_lock); 782 dev = v->dev; 783 v->dev = NULL; 784 785 if (!dev) { 786 write_unlock_bh(&mrt_lock); 787 return -EADDRNOTAVAIL; 788 } 789 790 #ifdef CONFIG_IPV6_PIMSM_V2 791 if (vifi == mrt->mroute_reg_vif_num) 792 mrt->mroute_reg_vif_num = -1; 793 #endif 794 795 if (vifi + 1 == mrt->maxvif) { 796 int tmp; 797 for (tmp = vifi - 1; tmp >= 0; tmp--) { 798 if (MIF_EXISTS(mrt, tmp)) 799 break; 800 } 801 mrt->maxvif = tmp + 1; 802 } 803 804 write_unlock_bh(&mrt_lock); 805 806 dev_set_allmulti(dev, -1); 807 808 in6_dev = __in6_dev_get(dev); 809 if (in6_dev) 810 in6_dev->cnf.mc_forwarding--; 811 812 if (v->flags & MIFF_REGISTER) 813 unregister_netdevice_queue(dev, head); 814 815 dev_put(dev); 816 return 0; 817 } 818 819 static inline void ip6mr_cache_free(struct mfc6_cache *c) 820 { 821 kmem_cache_free(mrt_cachep, c); 822 } 823 824 /* Destroy an unresolved cache entry, killing queued skbs 825 and reporting error to netlink readers. 826 */ 827 828 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) 829 { 830 struct net *net = read_pnet(&mrt->net); 831 struct sk_buff *skb; 832 833 atomic_dec(&mrt->cache_resolve_queue_len); 834 835 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { 836 if (ipv6_hdr(skb)->version == 0) { 837 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 838 nlh->nlmsg_type = NLMSG_ERROR; 839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 840 skb_trim(skb, nlh->nlmsg_len); 841 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 842 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 843 } else 844 kfree_skb(skb); 845 } 846 847 ip6mr_cache_free(c); 848 } 849 850 851 /* Timer process for all the unresolved queue. */ 852 853 static void ipmr_do_expire_process(struct mr6_table *mrt) 854 { 855 unsigned long now = jiffies; 856 unsigned long expires = 10 * HZ; 857 struct mfc6_cache *c, *next; 858 859 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { 860 if (time_after(c->mfc_un.unres.expires, now)) { 861 /* not yet... */ 862 unsigned long interval = c->mfc_un.unres.expires - now; 863 if (interval < expires) 864 expires = interval; 865 continue; 866 } 867 868 list_del(&c->list); 869 ip6mr_destroy_unres(mrt, c); 870 } 871 872 if (!list_empty(&mrt->mfc6_unres_queue)) 873 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 874 } 875 876 static void ipmr_expire_process(unsigned long arg) 877 { 878 struct mr6_table *mrt = (struct mr6_table *)arg; 879 880 if (!spin_trylock(&mfc_unres_lock)) { 881 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 882 return; 883 } 884 885 if (!list_empty(&mrt->mfc6_unres_queue)) 886 ipmr_do_expire_process(mrt); 887 888 spin_unlock(&mfc_unres_lock); 889 } 890 891 /* Fill oifs list. It is called under write locked mrt_lock. */ 892 893 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache, 894 unsigned char *ttls) 895 { 896 int vifi; 897 898 cache->mfc_un.res.minvif = MAXMIFS; 899 cache->mfc_un.res.maxvif = 0; 900 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 901 902 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 903 if (MIF_EXISTS(mrt, vifi) && 904 ttls[vifi] && ttls[vifi] < 255) { 905 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 906 if (cache->mfc_un.res.minvif > vifi) 907 cache->mfc_un.res.minvif = vifi; 908 if (cache->mfc_un.res.maxvif <= vifi) 909 cache->mfc_un.res.maxvif = vifi + 1; 910 } 911 } 912 } 913 914 static int mif6_add(struct net *net, struct mr6_table *mrt, 915 struct mif6ctl *vifc, int mrtsock) 916 { 917 int vifi = vifc->mif6c_mifi; 918 struct mif_device *v = &mrt->vif6_table[vifi]; 919 struct net_device *dev; 920 struct inet6_dev *in6_dev; 921 int err; 922 923 /* Is vif busy ? */ 924 if (MIF_EXISTS(mrt, vifi)) 925 return -EADDRINUSE; 926 927 switch (vifc->mif6c_flags) { 928 #ifdef CONFIG_IPV6_PIMSM_V2 929 case MIFF_REGISTER: 930 /* 931 * Special Purpose VIF in PIM 932 * All the packets will be sent to the daemon 933 */ 934 if (mrt->mroute_reg_vif_num >= 0) 935 return -EADDRINUSE; 936 dev = ip6mr_reg_vif(net, mrt); 937 if (!dev) 938 return -ENOBUFS; 939 err = dev_set_allmulti(dev, 1); 940 if (err) { 941 unregister_netdevice(dev); 942 dev_put(dev); 943 return err; 944 } 945 break; 946 #endif 947 case 0: 948 dev = dev_get_by_index(net, vifc->mif6c_pifi); 949 if (!dev) 950 return -EADDRNOTAVAIL; 951 err = dev_set_allmulti(dev, 1); 952 if (err) { 953 dev_put(dev); 954 return err; 955 } 956 break; 957 default: 958 return -EINVAL; 959 } 960 961 in6_dev = __in6_dev_get(dev); 962 if (in6_dev) 963 in6_dev->cnf.mc_forwarding++; 964 965 /* 966 * Fill in the VIF structures 967 */ 968 v->rate_limit = vifc->vifc_rate_limit; 969 v->flags = vifc->mif6c_flags; 970 if (!mrtsock) 971 v->flags |= VIFF_STATIC; 972 v->threshold = vifc->vifc_threshold; 973 v->bytes_in = 0; 974 v->bytes_out = 0; 975 v->pkt_in = 0; 976 v->pkt_out = 0; 977 v->link = dev->ifindex; 978 if (v->flags & MIFF_REGISTER) 979 v->link = dev->iflink; 980 981 /* And finish update writing critical data */ 982 write_lock_bh(&mrt_lock); 983 v->dev = dev; 984 #ifdef CONFIG_IPV6_PIMSM_V2 985 if (v->flags & MIFF_REGISTER) 986 mrt->mroute_reg_vif_num = vifi; 987 #endif 988 if (vifi + 1 > mrt->maxvif) 989 mrt->maxvif = vifi + 1; 990 write_unlock_bh(&mrt_lock); 991 return 0; 992 } 993 994 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, 995 const struct in6_addr *origin, 996 const struct in6_addr *mcastgrp) 997 { 998 int line = MFC6_HASH(mcastgrp, origin); 999 struct mfc6_cache *c; 1000 1001 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { 1002 if (ipv6_addr_equal(&c->mf6c_origin, origin) && 1003 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) 1004 return c; 1005 } 1006 return NULL; 1007 } 1008 1009 /* 1010 * Allocate a multicast cache entry 1011 */ 1012 static struct mfc6_cache *ip6mr_cache_alloc(void) 1013 { 1014 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1015 if (c == NULL) 1016 return NULL; 1017 c->mfc_un.res.minvif = MAXMIFS; 1018 return c; 1019 } 1020 1021 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1022 { 1023 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1024 if (c == NULL) 1025 return NULL; 1026 skb_queue_head_init(&c->mfc_un.unres.unresolved); 1027 c->mfc_un.unres.expires = jiffies + 10 * HZ; 1028 return c; 1029 } 1030 1031 /* 1032 * A cache entry has gone into a resolved state from queued 1033 */ 1034 1035 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, 1036 struct mfc6_cache *uc, struct mfc6_cache *c) 1037 { 1038 struct sk_buff *skb; 1039 1040 /* 1041 * Play the pending entries through our router 1042 */ 1043 1044 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1045 if (ipv6_hdr(skb)->version == 0) { 1046 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 1047 1048 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 1049 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1050 } else { 1051 nlh->nlmsg_type = NLMSG_ERROR; 1052 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 1053 skb_trim(skb, nlh->nlmsg_len); 1054 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1055 } 1056 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 1057 } else 1058 ip6_mr_forward(net, mrt, skb, c); 1059 } 1060 } 1061 1062 /* 1063 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd 1064 * expects the following bizarre scheme. 1065 * 1066 * Called under mrt_lock. 1067 */ 1068 1069 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 1070 mifi_t mifi, int assert) 1071 { 1072 struct sk_buff *skb; 1073 struct mrt6msg *msg; 1074 int ret; 1075 1076 #ifdef CONFIG_IPV6_PIMSM_V2 1077 if (assert == MRT6MSG_WHOLEPKT) 1078 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1079 +sizeof(*msg)); 1080 else 1081 #endif 1082 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1083 1084 if (!skb) 1085 return -ENOBUFS; 1086 1087 /* I suppose that internal messages 1088 * do not require checksums */ 1089 1090 skb->ip_summed = CHECKSUM_UNNECESSARY; 1091 1092 #ifdef CONFIG_IPV6_PIMSM_V2 1093 if (assert == MRT6MSG_WHOLEPKT) { 1094 /* Ugly, but we have no choice with this interface. 1095 Duplicate old header, fix length etc. 1096 And all this only to mangle msg->im6_msgtype and 1097 to set msg->im6_mbz to "mbz" :-) 1098 */ 1099 skb_push(skb, -skb_network_offset(pkt)); 1100 1101 skb_push(skb, sizeof(*msg)); 1102 skb_reset_transport_header(skb); 1103 msg = (struct mrt6msg *)skb_transport_header(skb); 1104 msg->im6_mbz = 0; 1105 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1106 msg->im6_mif = mrt->mroute_reg_vif_num; 1107 msg->im6_pad = 0; 1108 msg->im6_src = ipv6_hdr(pkt)->saddr; 1109 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1110 1111 skb->ip_summed = CHECKSUM_UNNECESSARY; 1112 } else 1113 #endif 1114 { 1115 /* 1116 * Copy the IP header 1117 */ 1118 1119 skb_put(skb, sizeof(struct ipv6hdr)); 1120 skb_reset_network_header(skb); 1121 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1122 1123 /* 1124 * Add our header 1125 */ 1126 skb_put(skb, sizeof(*msg)); 1127 skb_reset_transport_header(skb); 1128 msg = (struct mrt6msg *)skb_transport_header(skb); 1129 1130 msg->im6_mbz = 0; 1131 msg->im6_msgtype = assert; 1132 msg->im6_mif = mifi; 1133 msg->im6_pad = 0; 1134 msg->im6_src = ipv6_hdr(pkt)->saddr; 1135 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1136 1137 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1138 skb->ip_summed = CHECKSUM_UNNECESSARY; 1139 } 1140 1141 if (mrt->mroute6_sk == NULL) { 1142 kfree_skb(skb); 1143 return -EINVAL; 1144 } 1145 1146 /* 1147 * Deliver to user space multicast routing algorithms 1148 */ 1149 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); 1150 if (ret < 0) { 1151 if (net_ratelimit()) 1152 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); 1153 kfree_skb(skb); 1154 } 1155 1156 return ret; 1157 } 1158 1159 /* 1160 * Queue a packet for resolution. It gets locked cache entry! 1161 */ 1162 1163 static int 1164 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) 1165 { 1166 bool found = false; 1167 int err; 1168 struct mfc6_cache *c; 1169 1170 spin_lock_bh(&mfc_unres_lock); 1171 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) { 1172 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1173 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1174 found = true; 1175 break; 1176 } 1177 } 1178 1179 if (!found) { 1180 /* 1181 * Create a new entry if allowable 1182 */ 1183 1184 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1185 (c = ip6mr_cache_alloc_unres()) == NULL) { 1186 spin_unlock_bh(&mfc_unres_lock); 1187 1188 kfree_skb(skb); 1189 return -ENOBUFS; 1190 } 1191 1192 /* 1193 * Fill in the new cache entry 1194 */ 1195 c->mf6c_parent = -1; 1196 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1197 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1198 1199 /* 1200 * Reflect first query at pim6sd 1201 */ 1202 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1203 if (err < 0) { 1204 /* If the report failed throw the cache entry 1205 out - Brad Parker 1206 */ 1207 spin_unlock_bh(&mfc_unres_lock); 1208 1209 ip6mr_cache_free(c); 1210 kfree_skb(skb); 1211 return err; 1212 } 1213 1214 atomic_inc(&mrt->cache_resolve_queue_len); 1215 list_add(&c->list, &mrt->mfc6_unres_queue); 1216 1217 ipmr_do_expire_process(mrt); 1218 } 1219 1220 /* 1221 * See if we can append the packet 1222 */ 1223 if (c->mfc_un.unres.unresolved.qlen > 3) { 1224 kfree_skb(skb); 1225 err = -ENOBUFS; 1226 } else { 1227 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1228 err = 0; 1229 } 1230 1231 spin_unlock_bh(&mfc_unres_lock); 1232 return err; 1233 } 1234 1235 /* 1236 * MFC6 cache manipulation by user space 1237 */ 1238 1239 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) 1240 { 1241 int line; 1242 struct mfc6_cache *c, *next; 1243 1244 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1245 1246 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { 1247 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1248 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { 1249 write_lock_bh(&mrt_lock); 1250 list_del(&c->list); 1251 write_unlock_bh(&mrt_lock); 1252 1253 ip6mr_cache_free(c); 1254 return 0; 1255 } 1256 } 1257 return -ENOENT; 1258 } 1259 1260 static int ip6mr_device_event(struct notifier_block *this, 1261 unsigned long event, void *ptr) 1262 { 1263 struct net_device *dev = ptr; 1264 struct net *net = dev_net(dev); 1265 struct mr6_table *mrt; 1266 struct mif_device *v; 1267 int ct; 1268 LIST_HEAD(list); 1269 1270 if (event != NETDEV_UNREGISTER) 1271 return NOTIFY_DONE; 1272 1273 ip6mr_for_each_table(mrt, net) { 1274 v = &mrt->vif6_table[0]; 1275 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1276 if (v->dev == dev) 1277 mif6_delete(mrt, ct, &list); 1278 } 1279 } 1280 unregister_netdevice_many(&list); 1281 1282 return NOTIFY_DONE; 1283 } 1284 1285 static struct notifier_block ip6_mr_notifier = { 1286 .notifier_call = ip6mr_device_event 1287 }; 1288 1289 /* 1290 * Setup for IP multicast routing 1291 */ 1292 1293 static int __net_init ip6mr_net_init(struct net *net) 1294 { 1295 int err; 1296 1297 err = ip6mr_rules_init(net); 1298 if (err < 0) 1299 goto fail; 1300 1301 #ifdef CONFIG_PROC_FS 1302 err = -ENOMEM; 1303 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) 1304 goto proc_vif_fail; 1305 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) 1306 goto proc_cache_fail; 1307 #endif 1308 1309 return 0; 1310 1311 #ifdef CONFIG_PROC_FS 1312 proc_cache_fail: 1313 proc_net_remove(net, "ip6_mr_vif"); 1314 proc_vif_fail: 1315 ip6mr_rules_exit(net); 1316 #endif 1317 fail: 1318 return err; 1319 } 1320 1321 static void __net_exit ip6mr_net_exit(struct net *net) 1322 { 1323 #ifdef CONFIG_PROC_FS 1324 proc_net_remove(net, "ip6_mr_cache"); 1325 proc_net_remove(net, "ip6_mr_vif"); 1326 #endif 1327 ip6mr_rules_exit(net); 1328 } 1329 1330 static struct pernet_operations ip6mr_net_ops = { 1331 .init = ip6mr_net_init, 1332 .exit = ip6mr_net_exit, 1333 }; 1334 1335 int __init ip6_mr_init(void) 1336 { 1337 int err; 1338 1339 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1340 sizeof(struct mfc6_cache), 1341 0, SLAB_HWCACHE_ALIGN, 1342 NULL); 1343 if (!mrt_cachep) 1344 return -ENOMEM; 1345 1346 err = register_pernet_subsys(&ip6mr_net_ops); 1347 if (err) 1348 goto reg_pernet_fail; 1349 1350 err = register_netdevice_notifier(&ip6_mr_notifier); 1351 if (err) 1352 goto reg_notif_fail; 1353 #ifdef CONFIG_IPV6_PIMSM_V2 1354 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1355 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n"); 1356 err = -EAGAIN; 1357 goto add_proto_fail; 1358 } 1359 #endif 1360 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, 1361 ip6mr_rtm_dumproute, NULL); 1362 return 0; 1363 #ifdef CONFIG_IPV6_PIMSM_V2 1364 add_proto_fail: 1365 unregister_netdevice_notifier(&ip6_mr_notifier); 1366 #endif 1367 reg_notif_fail: 1368 unregister_pernet_subsys(&ip6mr_net_ops); 1369 reg_pernet_fail: 1370 kmem_cache_destroy(mrt_cachep); 1371 return err; 1372 } 1373 1374 void ip6_mr_cleanup(void) 1375 { 1376 unregister_netdevice_notifier(&ip6_mr_notifier); 1377 unregister_pernet_subsys(&ip6mr_net_ops); 1378 kmem_cache_destroy(mrt_cachep); 1379 } 1380 1381 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, 1382 struct mf6cctl *mfc, int mrtsock) 1383 { 1384 bool found = false; 1385 int line; 1386 struct mfc6_cache *uc, *c; 1387 unsigned char ttls[MAXMIFS]; 1388 int i; 1389 1390 if (mfc->mf6cc_parent >= MAXMIFS) 1391 return -ENFILE; 1392 1393 memset(ttls, 255, MAXMIFS); 1394 for (i = 0; i < MAXMIFS; i++) { 1395 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1396 ttls[i] = 1; 1397 1398 } 1399 1400 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1401 1402 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { 1403 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1404 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { 1405 found = true; 1406 break; 1407 } 1408 } 1409 1410 if (found) { 1411 write_lock_bh(&mrt_lock); 1412 c->mf6c_parent = mfc->mf6cc_parent; 1413 ip6mr_update_thresholds(mrt, c, ttls); 1414 if (!mrtsock) 1415 c->mfc_flags |= MFC_STATIC; 1416 write_unlock_bh(&mrt_lock); 1417 return 0; 1418 } 1419 1420 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1421 return -EINVAL; 1422 1423 c = ip6mr_cache_alloc(); 1424 if (c == NULL) 1425 return -ENOMEM; 1426 1427 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1428 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1429 c->mf6c_parent = mfc->mf6cc_parent; 1430 ip6mr_update_thresholds(mrt, c, ttls); 1431 if (!mrtsock) 1432 c->mfc_flags |= MFC_STATIC; 1433 1434 write_lock_bh(&mrt_lock); 1435 list_add(&c->list, &mrt->mfc6_cache_array[line]); 1436 write_unlock_bh(&mrt_lock); 1437 1438 /* 1439 * Check to see if we resolved a queued list. If so we 1440 * need to send on the frames and tidy up. 1441 */ 1442 found = false; 1443 spin_lock_bh(&mfc_unres_lock); 1444 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) { 1445 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1446 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1447 list_del(&uc->list); 1448 atomic_dec(&mrt->cache_resolve_queue_len); 1449 found = true; 1450 break; 1451 } 1452 } 1453 if (list_empty(&mrt->mfc6_unres_queue)) 1454 del_timer(&mrt->ipmr_expire_timer); 1455 spin_unlock_bh(&mfc_unres_lock); 1456 1457 if (found) { 1458 ip6mr_cache_resolve(net, mrt, uc, c); 1459 ip6mr_cache_free(uc); 1460 } 1461 return 0; 1462 } 1463 1464 /* 1465 * Close the multicast socket, and clear the vif tables etc 1466 */ 1467 1468 static void mroute_clean_tables(struct mr6_table *mrt) 1469 { 1470 int i; 1471 LIST_HEAD(list); 1472 struct mfc6_cache *c, *next; 1473 1474 /* 1475 * Shut down all active vif entries 1476 */ 1477 for (i = 0; i < mrt->maxvif; i++) { 1478 if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) 1479 mif6_delete(mrt, i, &list); 1480 } 1481 unregister_netdevice_many(&list); 1482 1483 /* 1484 * Wipe the cache 1485 */ 1486 for (i = 0; i < MFC6_LINES; i++) { 1487 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { 1488 if (c->mfc_flags & MFC_STATIC) 1489 continue; 1490 write_lock_bh(&mrt_lock); 1491 list_del(&c->list); 1492 write_unlock_bh(&mrt_lock); 1493 1494 ip6mr_cache_free(c); 1495 } 1496 } 1497 1498 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1499 spin_lock_bh(&mfc_unres_lock); 1500 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { 1501 list_del(&c->list); 1502 ip6mr_destroy_unres(mrt, c); 1503 } 1504 spin_unlock_bh(&mfc_unres_lock); 1505 } 1506 } 1507 1508 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) 1509 { 1510 int err = 0; 1511 struct net *net = sock_net(sk); 1512 1513 rtnl_lock(); 1514 write_lock_bh(&mrt_lock); 1515 if (likely(mrt->mroute6_sk == NULL)) { 1516 mrt->mroute6_sk = sk; 1517 net->ipv6.devconf_all->mc_forwarding++; 1518 } 1519 else 1520 err = -EADDRINUSE; 1521 write_unlock_bh(&mrt_lock); 1522 1523 rtnl_unlock(); 1524 1525 return err; 1526 } 1527 1528 int ip6mr_sk_done(struct sock *sk) 1529 { 1530 int err = -EACCES; 1531 struct net *net = sock_net(sk); 1532 struct mr6_table *mrt; 1533 1534 rtnl_lock(); 1535 ip6mr_for_each_table(mrt, net) { 1536 if (sk == mrt->mroute6_sk) { 1537 write_lock_bh(&mrt_lock); 1538 mrt->mroute6_sk = NULL; 1539 net->ipv6.devconf_all->mc_forwarding--; 1540 write_unlock_bh(&mrt_lock); 1541 1542 mroute_clean_tables(mrt); 1543 err = 0; 1544 break; 1545 } 1546 } 1547 rtnl_unlock(); 1548 1549 return err; 1550 } 1551 1552 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) 1553 { 1554 struct mr6_table *mrt; 1555 struct flowi6 fl6 = { 1556 .flowi6_iif = skb->skb_iif, 1557 .flowi6_oif = skb->dev->ifindex, 1558 .flowi6_mark = skb->mark, 1559 }; 1560 1561 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1562 return NULL; 1563 1564 return mrt->mroute6_sk; 1565 } 1566 1567 /* 1568 * Socket options and virtual interface manipulation. The whole 1569 * virtual interface system is a complete heap, but unfortunately 1570 * that's how BSD mrouted happens to think. Maybe one day with a proper 1571 * MOSPF/PIM router set up we can clean this up. 1572 */ 1573 1574 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1575 { 1576 int ret; 1577 struct mif6ctl vif; 1578 struct mf6cctl mfc; 1579 mifi_t mifi; 1580 struct net *net = sock_net(sk); 1581 struct mr6_table *mrt; 1582 1583 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1584 if (mrt == NULL) 1585 return -ENOENT; 1586 1587 if (optname != MRT6_INIT) { 1588 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN)) 1589 return -EACCES; 1590 } 1591 1592 switch (optname) { 1593 case MRT6_INIT: 1594 if (sk->sk_type != SOCK_RAW || 1595 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1596 return -EOPNOTSUPP; 1597 if (optlen < sizeof(int)) 1598 return -EINVAL; 1599 1600 return ip6mr_sk_init(mrt, sk); 1601 1602 case MRT6_DONE: 1603 return ip6mr_sk_done(sk); 1604 1605 case MRT6_ADD_MIF: 1606 if (optlen < sizeof(vif)) 1607 return -EINVAL; 1608 if (copy_from_user(&vif, optval, sizeof(vif))) 1609 return -EFAULT; 1610 if (vif.mif6c_mifi >= MAXMIFS) 1611 return -ENFILE; 1612 rtnl_lock(); 1613 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk); 1614 rtnl_unlock(); 1615 return ret; 1616 1617 case MRT6_DEL_MIF: 1618 if (optlen < sizeof(mifi_t)) 1619 return -EINVAL; 1620 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1621 return -EFAULT; 1622 rtnl_lock(); 1623 ret = mif6_delete(mrt, mifi, NULL); 1624 rtnl_unlock(); 1625 return ret; 1626 1627 /* 1628 * Manipulate the forwarding caches. These live 1629 * in a sort of kernel/user symbiosis. 1630 */ 1631 case MRT6_ADD_MFC: 1632 case MRT6_DEL_MFC: 1633 if (optlen < sizeof(mfc)) 1634 return -EINVAL; 1635 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1636 return -EFAULT; 1637 rtnl_lock(); 1638 if (optname == MRT6_DEL_MFC) 1639 ret = ip6mr_mfc_delete(mrt, &mfc); 1640 else 1641 ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); 1642 rtnl_unlock(); 1643 return ret; 1644 1645 /* 1646 * Control PIM assert (to activate pim will activate assert) 1647 */ 1648 case MRT6_ASSERT: 1649 { 1650 int v; 1651 if (get_user(v, (int __user *)optval)) 1652 return -EFAULT; 1653 mrt->mroute_do_assert = !!v; 1654 return 0; 1655 } 1656 1657 #ifdef CONFIG_IPV6_PIMSM_V2 1658 case MRT6_PIM: 1659 { 1660 int v; 1661 if (get_user(v, (int __user *)optval)) 1662 return -EFAULT; 1663 v = !!v; 1664 rtnl_lock(); 1665 ret = 0; 1666 if (v != mrt->mroute_do_pim) { 1667 mrt->mroute_do_pim = v; 1668 mrt->mroute_do_assert = v; 1669 } 1670 rtnl_unlock(); 1671 return ret; 1672 } 1673 1674 #endif 1675 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1676 case MRT6_TABLE: 1677 { 1678 u32 v; 1679 1680 if (optlen != sizeof(u32)) 1681 return -EINVAL; 1682 if (get_user(v, (u32 __user *)optval)) 1683 return -EFAULT; 1684 if (sk == mrt->mroute6_sk) 1685 return -EBUSY; 1686 1687 rtnl_lock(); 1688 ret = 0; 1689 if (!ip6mr_new_table(net, v)) 1690 ret = -ENOMEM; 1691 raw6_sk(sk)->ip6mr_table = v; 1692 rtnl_unlock(); 1693 return ret; 1694 } 1695 #endif 1696 /* 1697 * Spurious command, or MRT6_VERSION which you cannot 1698 * set. 1699 */ 1700 default: 1701 return -ENOPROTOOPT; 1702 } 1703 } 1704 1705 /* 1706 * Getsock opt support for the multicast routing system. 1707 */ 1708 1709 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1710 int __user *optlen) 1711 { 1712 int olr; 1713 int val; 1714 struct net *net = sock_net(sk); 1715 struct mr6_table *mrt; 1716 1717 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1718 if (mrt == NULL) 1719 return -ENOENT; 1720 1721 switch (optname) { 1722 case MRT6_VERSION: 1723 val = 0x0305; 1724 break; 1725 #ifdef CONFIG_IPV6_PIMSM_V2 1726 case MRT6_PIM: 1727 val = mrt->mroute_do_pim; 1728 break; 1729 #endif 1730 case MRT6_ASSERT: 1731 val = mrt->mroute_do_assert; 1732 break; 1733 default: 1734 return -ENOPROTOOPT; 1735 } 1736 1737 if (get_user(olr, optlen)) 1738 return -EFAULT; 1739 1740 olr = min_t(int, olr, sizeof(int)); 1741 if (olr < 0) 1742 return -EINVAL; 1743 1744 if (put_user(olr, optlen)) 1745 return -EFAULT; 1746 if (copy_to_user(optval, &val, olr)) 1747 return -EFAULT; 1748 return 0; 1749 } 1750 1751 /* 1752 * The IP multicast ioctl support routines. 1753 */ 1754 1755 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1756 { 1757 struct sioc_sg_req6 sr; 1758 struct sioc_mif_req6 vr; 1759 struct mif_device *vif; 1760 struct mfc6_cache *c; 1761 struct net *net = sock_net(sk); 1762 struct mr6_table *mrt; 1763 1764 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1765 if (mrt == NULL) 1766 return -ENOENT; 1767 1768 switch (cmd) { 1769 case SIOCGETMIFCNT_IN6: 1770 if (copy_from_user(&vr, arg, sizeof(vr))) 1771 return -EFAULT; 1772 if (vr.mifi >= mrt->maxvif) 1773 return -EINVAL; 1774 read_lock(&mrt_lock); 1775 vif = &mrt->vif6_table[vr.mifi]; 1776 if (MIF_EXISTS(mrt, vr.mifi)) { 1777 vr.icount = vif->pkt_in; 1778 vr.ocount = vif->pkt_out; 1779 vr.ibytes = vif->bytes_in; 1780 vr.obytes = vif->bytes_out; 1781 read_unlock(&mrt_lock); 1782 1783 if (copy_to_user(arg, &vr, sizeof(vr))) 1784 return -EFAULT; 1785 return 0; 1786 } 1787 read_unlock(&mrt_lock); 1788 return -EADDRNOTAVAIL; 1789 case SIOCGETSGCNT_IN6: 1790 if (copy_from_user(&sr, arg, sizeof(sr))) 1791 return -EFAULT; 1792 1793 read_lock(&mrt_lock); 1794 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1795 if (c) { 1796 sr.pktcnt = c->mfc_un.res.pkt; 1797 sr.bytecnt = c->mfc_un.res.bytes; 1798 sr.wrong_if = c->mfc_un.res.wrong_if; 1799 read_unlock(&mrt_lock); 1800 1801 if (copy_to_user(arg, &sr, sizeof(sr))) 1802 return -EFAULT; 1803 return 0; 1804 } 1805 read_unlock(&mrt_lock); 1806 return -EADDRNOTAVAIL; 1807 default: 1808 return -ENOIOCTLCMD; 1809 } 1810 } 1811 1812 #ifdef CONFIG_COMPAT 1813 struct compat_sioc_sg_req6 { 1814 struct sockaddr_in6 src; 1815 struct sockaddr_in6 grp; 1816 compat_ulong_t pktcnt; 1817 compat_ulong_t bytecnt; 1818 compat_ulong_t wrong_if; 1819 }; 1820 1821 struct compat_sioc_mif_req6 { 1822 mifi_t mifi; 1823 compat_ulong_t icount; 1824 compat_ulong_t ocount; 1825 compat_ulong_t ibytes; 1826 compat_ulong_t obytes; 1827 }; 1828 1829 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1830 { 1831 struct compat_sioc_sg_req6 sr; 1832 struct compat_sioc_mif_req6 vr; 1833 struct mif_device *vif; 1834 struct mfc6_cache *c; 1835 struct net *net = sock_net(sk); 1836 struct mr6_table *mrt; 1837 1838 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1839 if (mrt == NULL) 1840 return -ENOENT; 1841 1842 switch (cmd) { 1843 case SIOCGETMIFCNT_IN6: 1844 if (copy_from_user(&vr, arg, sizeof(vr))) 1845 return -EFAULT; 1846 if (vr.mifi >= mrt->maxvif) 1847 return -EINVAL; 1848 read_lock(&mrt_lock); 1849 vif = &mrt->vif6_table[vr.mifi]; 1850 if (MIF_EXISTS(mrt, vr.mifi)) { 1851 vr.icount = vif->pkt_in; 1852 vr.ocount = vif->pkt_out; 1853 vr.ibytes = vif->bytes_in; 1854 vr.obytes = vif->bytes_out; 1855 read_unlock(&mrt_lock); 1856 1857 if (copy_to_user(arg, &vr, sizeof(vr))) 1858 return -EFAULT; 1859 return 0; 1860 } 1861 read_unlock(&mrt_lock); 1862 return -EADDRNOTAVAIL; 1863 case SIOCGETSGCNT_IN6: 1864 if (copy_from_user(&sr, arg, sizeof(sr))) 1865 return -EFAULT; 1866 1867 read_lock(&mrt_lock); 1868 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1869 if (c) { 1870 sr.pktcnt = c->mfc_un.res.pkt; 1871 sr.bytecnt = c->mfc_un.res.bytes; 1872 sr.wrong_if = c->mfc_un.res.wrong_if; 1873 read_unlock(&mrt_lock); 1874 1875 if (copy_to_user(arg, &sr, sizeof(sr))) 1876 return -EFAULT; 1877 return 0; 1878 } 1879 read_unlock(&mrt_lock); 1880 return -EADDRNOTAVAIL; 1881 default: 1882 return -ENOIOCTLCMD; 1883 } 1884 } 1885 #endif 1886 1887 static inline int ip6mr_forward2_finish(struct sk_buff *skb) 1888 { 1889 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), 1890 IPSTATS_MIB_OUTFORWDATAGRAMS); 1891 return dst_output(skb); 1892 } 1893 1894 /* 1895 * Processing handlers for ip6mr_forward 1896 */ 1897 1898 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, 1899 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1900 { 1901 struct ipv6hdr *ipv6h; 1902 struct mif_device *vif = &mrt->vif6_table[vifi]; 1903 struct net_device *dev; 1904 struct dst_entry *dst; 1905 struct flowi6 fl6; 1906 1907 if (vif->dev == NULL) 1908 goto out_free; 1909 1910 #ifdef CONFIG_IPV6_PIMSM_V2 1911 if (vif->flags & MIFF_REGISTER) { 1912 vif->pkt_out++; 1913 vif->bytes_out += skb->len; 1914 vif->dev->stats.tx_bytes += skb->len; 1915 vif->dev->stats.tx_packets++; 1916 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 1917 goto out_free; 1918 } 1919 #endif 1920 1921 ipv6h = ipv6_hdr(skb); 1922 1923 fl6 = (struct flowi6) { 1924 .flowi6_oif = vif->link, 1925 .daddr = ipv6h->daddr, 1926 }; 1927 1928 dst = ip6_route_output(net, NULL, &fl6); 1929 if (dst->error) { 1930 dst_release(dst); 1931 goto out_free; 1932 } 1933 1934 skb_dst_drop(skb); 1935 skb_dst_set(skb, dst); 1936 1937 /* 1938 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1939 * not only before forwarding, but after forwarding on all output 1940 * interfaces. It is clear, if mrouter runs a multicasting 1941 * program, it should receive packets not depending to what interface 1942 * program is joined. 1943 * If we will not make it, the program will have to join on all 1944 * interfaces. On the other hand, multihoming host (or router, but 1945 * not mrouter) cannot join to more than one interface - it will 1946 * result in receiving multiple packets. 1947 */ 1948 dev = vif->dev; 1949 skb->dev = dev; 1950 vif->pkt_out++; 1951 vif->bytes_out += skb->len; 1952 1953 /* We are about to write */ 1954 /* XXX: extension headers? */ 1955 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 1956 goto out_free; 1957 1958 ipv6h = ipv6_hdr(skb); 1959 ipv6h->hop_limit--; 1960 1961 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 1962 1963 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, 1964 ip6mr_forward2_finish); 1965 1966 out_free: 1967 kfree_skb(skb); 1968 return 0; 1969 } 1970 1971 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev) 1972 { 1973 int ct; 1974 1975 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 1976 if (mrt->vif6_table[ct].dev == dev) 1977 break; 1978 } 1979 return ct; 1980 } 1981 1982 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 1983 struct sk_buff *skb, struct mfc6_cache *cache) 1984 { 1985 int psend = -1; 1986 int vif, ct; 1987 1988 vif = cache->mf6c_parent; 1989 cache->mfc_un.res.pkt++; 1990 cache->mfc_un.res.bytes += skb->len; 1991 1992 /* 1993 * Wrong interface: drop packet and (maybe) send PIM assert. 1994 */ 1995 if (mrt->vif6_table[vif].dev != skb->dev) { 1996 int true_vifi; 1997 1998 cache->mfc_un.res.wrong_if++; 1999 true_vifi = ip6mr_find_vif(mrt, skb->dev); 2000 2001 if (true_vifi >= 0 && mrt->mroute_do_assert && 2002 /* pimsm uses asserts, when switching from RPT to SPT, 2003 so that we cannot check that packet arrived on an oif. 2004 It is bad, but otherwise we would need to move pretty 2005 large chunk of pimd to kernel. Ough... --ANK 2006 */ 2007 (mrt->mroute_do_pim || 2008 cache->mfc_un.res.ttls[true_vifi] < 255) && 2009 time_after(jiffies, 2010 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 2011 cache->mfc_un.res.last_assert = jiffies; 2012 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2013 } 2014 goto dont_forward; 2015 } 2016 2017 mrt->vif6_table[vif].pkt_in++; 2018 mrt->vif6_table[vif].bytes_in += skb->len; 2019 2020 /* 2021 * Forward the frame 2022 */ 2023 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { 2024 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { 2025 if (psend != -1) { 2026 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2027 if (skb2) 2028 ip6mr_forward2(net, mrt, skb2, cache, psend); 2029 } 2030 psend = ct; 2031 } 2032 } 2033 if (psend != -1) { 2034 ip6mr_forward2(net, mrt, skb, cache, psend); 2035 return 0; 2036 } 2037 2038 dont_forward: 2039 kfree_skb(skb); 2040 return 0; 2041 } 2042 2043 2044 /* 2045 * Multicast packets for forwarding arrive here 2046 */ 2047 2048 int ip6_mr_input(struct sk_buff *skb) 2049 { 2050 struct mfc6_cache *cache; 2051 struct net *net = dev_net(skb->dev); 2052 struct mr6_table *mrt; 2053 struct flowi6 fl6 = { 2054 .flowi6_iif = skb->dev->ifindex, 2055 .flowi6_mark = skb->mark, 2056 }; 2057 int err; 2058 2059 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2060 if (err < 0) { 2061 kfree_skb(skb); 2062 return err; 2063 } 2064 2065 read_lock(&mrt_lock); 2066 cache = ip6mr_cache_find(mrt, 2067 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2068 2069 /* 2070 * No usable cache entry 2071 */ 2072 if (cache == NULL) { 2073 int vif; 2074 2075 vif = ip6mr_find_vif(mrt, skb->dev); 2076 if (vif >= 0) { 2077 int err = ip6mr_cache_unresolved(mrt, vif, skb); 2078 read_unlock(&mrt_lock); 2079 2080 return err; 2081 } 2082 read_unlock(&mrt_lock); 2083 kfree_skb(skb); 2084 return -ENODEV; 2085 } 2086 2087 ip6_mr_forward(net, mrt, skb, cache); 2088 2089 read_unlock(&mrt_lock); 2090 2091 return 0; 2092 } 2093 2094 2095 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2096 struct mfc6_cache *c, struct rtmsg *rtm) 2097 { 2098 int ct; 2099 struct rtnexthop *nhp; 2100 u8 *b = skb_tail_pointer(skb); 2101 struct rtattr *mp_head; 2102 2103 /* If cache is unresolved, don't try to parse IIF and OIF */ 2104 if (c->mf6c_parent >= MAXMIFS) 2105 return -ENOENT; 2106 2107 if (MIF_EXISTS(mrt, c->mf6c_parent)) 2108 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex); 2109 2110 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 2111 2112 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2113 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2114 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 2115 goto rtattr_failure; 2116 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 2117 nhp->rtnh_flags = 0; 2118 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2119 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; 2120 nhp->rtnh_len = sizeof(*nhp); 2121 } 2122 } 2123 mp_head->rta_type = RTA_MULTIPATH; 2124 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 2125 rtm->rtm_type = RTN_MULTICAST; 2126 return 1; 2127 2128 rtattr_failure: 2129 nlmsg_trim(skb, b); 2130 return -EMSGSIZE; 2131 } 2132 2133 int ip6mr_get_route(struct net *net, 2134 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 2135 { 2136 int err; 2137 struct mr6_table *mrt; 2138 struct mfc6_cache *cache; 2139 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2140 2141 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2142 if (mrt == NULL) 2143 return -ENOENT; 2144 2145 read_lock(&mrt_lock); 2146 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2147 2148 if (!cache) { 2149 struct sk_buff *skb2; 2150 struct ipv6hdr *iph; 2151 struct net_device *dev; 2152 int vif; 2153 2154 if (nowait) { 2155 read_unlock(&mrt_lock); 2156 return -EAGAIN; 2157 } 2158 2159 dev = skb->dev; 2160 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2161 read_unlock(&mrt_lock); 2162 return -ENODEV; 2163 } 2164 2165 /* really correct? */ 2166 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2167 if (!skb2) { 2168 read_unlock(&mrt_lock); 2169 return -ENOMEM; 2170 } 2171 2172 skb_reset_transport_header(skb2); 2173 2174 skb_put(skb2, sizeof(struct ipv6hdr)); 2175 skb_reset_network_header(skb2); 2176 2177 iph = ipv6_hdr(skb2); 2178 iph->version = 0; 2179 iph->priority = 0; 2180 iph->flow_lbl[0] = 0; 2181 iph->flow_lbl[1] = 0; 2182 iph->flow_lbl[2] = 0; 2183 iph->payload_len = 0; 2184 iph->nexthdr = IPPROTO_NONE; 2185 iph->hop_limit = 0; 2186 iph->saddr = rt->rt6i_src.addr; 2187 iph->daddr = rt->rt6i_dst.addr; 2188 2189 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2190 read_unlock(&mrt_lock); 2191 2192 return err; 2193 } 2194 2195 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 2196 cache->mfc_flags |= MFC_NOTIFY; 2197 2198 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); 2199 read_unlock(&mrt_lock); 2200 return err; 2201 } 2202 2203 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2204 u32 pid, u32 seq, struct mfc6_cache *c) 2205 { 2206 struct nlmsghdr *nlh; 2207 struct rtmsg *rtm; 2208 2209 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2210 if (nlh == NULL) 2211 return -EMSGSIZE; 2212 2213 rtm = nlmsg_data(nlh); 2214 rtm->rtm_family = RTNL_FAMILY_IPMR; 2215 rtm->rtm_dst_len = 128; 2216 rtm->rtm_src_len = 128; 2217 rtm->rtm_tos = 0; 2218 rtm->rtm_table = mrt->id; 2219 NLA_PUT_U32(skb, RTA_TABLE, mrt->id); 2220 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2221 rtm->rtm_protocol = RTPROT_UNSPEC; 2222 rtm->rtm_flags = 0; 2223 2224 NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin); 2225 NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp); 2226 2227 if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0) 2228 goto nla_put_failure; 2229 2230 return nlmsg_end(skb, nlh); 2231 2232 nla_put_failure: 2233 nlmsg_cancel(skb, nlh); 2234 return -EMSGSIZE; 2235 } 2236 2237 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2238 { 2239 struct net *net = sock_net(skb->sk); 2240 struct mr6_table *mrt; 2241 struct mfc6_cache *mfc; 2242 unsigned int t = 0, s_t; 2243 unsigned int h = 0, s_h; 2244 unsigned int e = 0, s_e; 2245 2246 s_t = cb->args[0]; 2247 s_h = cb->args[1]; 2248 s_e = cb->args[2]; 2249 2250 read_lock(&mrt_lock); 2251 ip6mr_for_each_table(mrt, net) { 2252 if (t < s_t) 2253 goto next_table; 2254 if (t > s_t) 2255 s_h = 0; 2256 for (h = s_h; h < MFC6_LINES; h++) { 2257 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) { 2258 if (e < s_e) 2259 goto next_entry; 2260 if (ip6mr_fill_mroute(mrt, skb, 2261 NETLINK_CB(cb->skb).pid, 2262 cb->nlh->nlmsg_seq, 2263 mfc) < 0) 2264 goto done; 2265 next_entry: 2266 e++; 2267 } 2268 e = s_e = 0; 2269 } 2270 s_h = 0; 2271 next_table: 2272 t++; 2273 } 2274 done: 2275 read_unlock(&mrt_lock); 2276 2277 cb->args[2] = e; 2278 cb->args[1] = h; 2279 cb->args[0] = t; 2280 2281 return skb->len; 2282 } 2283