1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <linux/slab.h> 44 #include <net/net_namespace.h> 45 #include <net/snmp.h> 46 #include <net/ipv6.h> 47 #include <net/ip6_fib.h> 48 #include <net/ip6_route.h> 49 #include <net/ndisc.h> 50 #include <net/addrconf.h> 51 #include <net/tcp.h> 52 #include <linux/rtnetlink.h> 53 #include <net/dst.h> 54 #include <net/xfrm.h> 55 #include <net/netevent.h> 56 #include <net/netlink.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 #define CLONE_OFFLINK_ROUTE 0 76 77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 79 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 80 static void ip6_dst_destroy(struct dst_entry *); 81 static void ip6_dst_ifdown(struct dst_entry *, 82 struct net_device *dev, int how); 83 static int ip6_dst_gc(struct dst_ops *ops); 84 85 static int ip6_pkt_discard(struct sk_buff *skb); 86 static int ip6_pkt_discard_out(struct sk_buff *skb); 87 static void ip6_link_failure(struct sk_buff *skb); 88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 89 90 #ifdef CONFIG_IPV6_ROUTE_INFO 91 static struct rt6_info *rt6_add_route_info(struct net *net, 92 struct in6_addr *prefix, int prefixlen, 93 struct in6_addr *gwaddr, int ifindex, 94 unsigned pref); 95 static struct rt6_info *rt6_get_route_info(struct net *net, 96 struct in6_addr *prefix, int prefixlen, 97 struct in6_addr *gwaddr, int ifindex); 98 #endif 99 100 static struct dst_ops ip6_dst_ops_template = { 101 .family = AF_INET6, 102 .protocol = cpu_to_be16(ETH_P_IPV6), 103 .gc = ip6_dst_gc, 104 .gc_thresh = 1024, 105 .check = ip6_dst_check, 106 .destroy = ip6_dst_destroy, 107 .ifdown = ip6_dst_ifdown, 108 .negative_advice = ip6_negative_advice, 109 .link_failure = ip6_link_failure, 110 .update_pmtu = ip6_rt_update_pmtu, 111 .local_out = __ip6_local_out, 112 }; 113 114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 115 { 116 } 117 118 static struct dst_ops ip6_dst_blackhole_ops = { 119 .family = AF_INET6, 120 .protocol = cpu_to_be16(ETH_P_IPV6), 121 .destroy = ip6_dst_destroy, 122 .check = ip6_dst_check, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu, 124 }; 125 126 static struct rt6_info ip6_null_entry_template = { 127 .dst = { 128 .__refcnt = ATOMIC_INIT(1), 129 .__use = 1, 130 .obsolete = -1, 131 .error = -ENETUNREACH, 132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 133 .input = ip6_pkt_discard, 134 .output = ip6_pkt_discard_out, 135 }, 136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 137 .rt6i_protocol = RTPROT_KERNEL, 138 .rt6i_metric = ~(u32) 0, 139 .rt6i_ref = ATOMIC_INIT(1), 140 }; 141 142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 143 144 static int ip6_pkt_prohibit(struct sk_buff *skb); 145 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 146 147 static struct rt6_info ip6_prohibit_entry_template = { 148 .dst = { 149 .__refcnt = ATOMIC_INIT(1), 150 .__use = 1, 151 .obsolete = -1, 152 .error = -EACCES, 153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 154 .input = ip6_pkt_prohibit, 155 .output = ip6_pkt_prohibit_out, 156 }, 157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 158 .rt6i_protocol = RTPROT_KERNEL, 159 .rt6i_metric = ~(u32) 0, 160 .rt6i_ref = ATOMIC_INIT(1), 161 }; 162 163 static struct rt6_info ip6_blk_hole_entry_template = { 164 .dst = { 165 .__refcnt = ATOMIC_INIT(1), 166 .__use = 1, 167 .obsolete = -1, 168 .error = -EINVAL, 169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 170 .input = dst_discard, 171 .output = dst_discard, 172 }, 173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 174 .rt6i_protocol = RTPROT_KERNEL, 175 .rt6i_metric = ~(u32) 0, 176 .rt6i_ref = ATOMIC_INIT(1), 177 }; 178 179 #endif 180 181 /* allocate dst with ip6_dst_ops */ 182 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 183 { 184 return (struct rt6_info *)dst_alloc(ops); 185 } 186 187 static void ip6_dst_destroy(struct dst_entry *dst) 188 { 189 struct rt6_info *rt = (struct rt6_info *)dst; 190 struct inet6_dev *idev = rt->rt6i_idev; 191 192 if (idev != NULL) { 193 rt->rt6i_idev = NULL; 194 in6_dev_put(idev); 195 } 196 } 197 198 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 199 int how) 200 { 201 struct rt6_info *rt = (struct rt6_info *)dst; 202 struct inet6_dev *idev = rt->rt6i_idev; 203 struct net_device *loopback_dev = 204 dev_net(dev)->loopback_dev; 205 206 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 207 struct inet6_dev *loopback_idev = 208 in6_dev_get(loopback_dev); 209 if (loopback_idev != NULL) { 210 rt->rt6i_idev = loopback_idev; 211 in6_dev_put(idev); 212 } 213 } 214 } 215 216 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 217 { 218 return (rt->rt6i_flags & RTF_EXPIRES) && 219 time_after(jiffies, rt->rt6i_expires); 220 } 221 222 static inline int rt6_need_strict(struct in6_addr *daddr) 223 { 224 return ipv6_addr_type(daddr) & 225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 226 } 227 228 /* 229 * Route lookup. Any table->tb6_lock is implied. 230 */ 231 232 static inline struct rt6_info *rt6_device_match(struct net *net, 233 struct rt6_info *rt, 234 struct in6_addr *saddr, 235 int oif, 236 int flags) 237 { 238 struct rt6_info *local = NULL; 239 struct rt6_info *sprt; 240 241 if (!oif && ipv6_addr_any(saddr)) 242 goto out; 243 244 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 245 struct net_device *dev = sprt->rt6i_dev; 246 247 if (oif) { 248 if (dev->ifindex == oif) 249 return sprt; 250 if (dev->flags & IFF_LOOPBACK) { 251 if (sprt->rt6i_idev == NULL || 252 sprt->rt6i_idev->dev->ifindex != oif) { 253 if (flags & RT6_LOOKUP_F_IFACE && oif) 254 continue; 255 if (local && (!oif || 256 local->rt6i_idev->dev->ifindex == oif)) 257 continue; 258 } 259 local = sprt; 260 } 261 } else { 262 if (ipv6_chk_addr(net, saddr, dev, 263 flags & RT6_LOOKUP_F_IFACE)) 264 return sprt; 265 } 266 } 267 268 if (oif) { 269 if (local) 270 return local; 271 272 if (flags & RT6_LOOKUP_F_IFACE) 273 return net->ipv6.ip6_null_entry; 274 } 275 out: 276 return rt; 277 } 278 279 #ifdef CONFIG_IPV6_ROUTER_PREF 280 static void rt6_probe(struct rt6_info *rt) 281 { 282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 283 /* 284 * Okay, this does not seem to be appropriate 285 * for now, however, we need to check if it 286 * is really so; aka Router Reachability Probing. 287 * 288 * Router Reachability Probe MUST be rate-limited 289 * to no more than one per minute. 290 */ 291 if (!neigh || (neigh->nud_state & NUD_VALID)) 292 return; 293 read_lock_bh(&neigh->lock); 294 if (!(neigh->nud_state & NUD_VALID) && 295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 296 struct in6_addr mcaddr; 297 struct in6_addr *target; 298 299 neigh->updated = jiffies; 300 read_unlock_bh(&neigh->lock); 301 302 target = (struct in6_addr *)&neigh->primary_key; 303 addrconf_addr_solict_mult(target, &mcaddr); 304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 305 } else 306 read_unlock_bh(&neigh->lock); 307 } 308 #else 309 static inline void rt6_probe(struct rt6_info *rt) 310 { 311 } 312 #endif 313 314 /* 315 * Default Router Selection (RFC 2461 6.3.6) 316 */ 317 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 318 { 319 struct net_device *dev = rt->rt6i_dev; 320 if (!oif || dev->ifindex == oif) 321 return 2; 322 if ((dev->flags & IFF_LOOPBACK) && 323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 324 return 1; 325 return 0; 326 } 327 328 static inline int rt6_check_neigh(struct rt6_info *rt) 329 { 330 struct neighbour *neigh = rt->rt6i_nexthop; 331 int m; 332 if (rt->rt6i_flags & RTF_NONEXTHOP || 333 !(rt->rt6i_flags & RTF_GATEWAY)) 334 m = 1; 335 else if (neigh) { 336 read_lock_bh(&neigh->lock); 337 if (neigh->nud_state & NUD_VALID) 338 m = 2; 339 #ifdef CONFIG_IPV6_ROUTER_PREF 340 else if (neigh->nud_state & NUD_FAILED) 341 m = 0; 342 #endif 343 else 344 m = 1; 345 read_unlock_bh(&neigh->lock); 346 } else 347 m = 0; 348 return m; 349 } 350 351 static int rt6_score_route(struct rt6_info *rt, int oif, 352 int strict) 353 { 354 int m, n; 355 356 m = rt6_check_dev(rt, oif); 357 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 358 return -1; 359 #ifdef CONFIG_IPV6_ROUTER_PREF 360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 361 #endif 362 n = rt6_check_neigh(rt); 363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 364 return -1; 365 return m; 366 } 367 368 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 369 int *mpri, struct rt6_info *match) 370 { 371 int m; 372 373 if (rt6_check_expired(rt)) 374 goto out; 375 376 m = rt6_score_route(rt, oif, strict); 377 if (m < 0) 378 goto out; 379 380 if (m > *mpri) { 381 if (strict & RT6_LOOKUP_F_REACHABLE) 382 rt6_probe(match); 383 *mpri = m; 384 match = rt; 385 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 386 rt6_probe(rt); 387 } 388 389 out: 390 return match; 391 } 392 393 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 394 struct rt6_info *rr_head, 395 u32 metric, int oif, int strict) 396 { 397 struct rt6_info *rt, *match; 398 int mpri = -1; 399 400 match = NULL; 401 for (rt = rr_head; rt && rt->rt6i_metric == metric; 402 rt = rt->dst.rt6_next) 403 match = find_match(rt, oif, strict, &mpri, match); 404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 405 rt = rt->dst.rt6_next) 406 match = find_match(rt, oif, strict, &mpri, match); 407 408 return match; 409 } 410 411 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 412 { 413 struct rt6_info *match, *rt0; 414 struct net *net; 415 416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 417 __func__, fn->leaf, oif); 418 419 rt0 = fn->rr_ptr; 420 if (!rt0) 421 fn->rr_ptr = rt0 = fn->leaf; 422 423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 424 425 if (!match && 426 (strict & RT6_LOOKUP_F_REACHABLE)) { 427 struct rt6_info *next = rt0->dst.rt6_next; 428 429 /* no entries matched; do round-robin */ 430 if (!next || next->rt6i_metric != rt0->rt6i_metric) 431 next = fn->leaf; 432 433 if (next != rt0) 434 fn->rr_ptr = next; 435 } 436 437 RT6_TRACE("%s() => %p\n", 438 __func__, match); 439 440 net = dev_net(rt0->rt6i_dev); 441 return match ? match : net->ipv6.ip6_null_entry; 442 } 443 444 #ifdef CONFIG_IPV6_ROUTE_INFO 445 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 446 struct in6_addr *gwaddr) 447 { 448 struct net *net = dev_net(dev); 449 struct route_info *rinfo = (struct route_info *) opt; 450 struct in6_addr prefix_buf, *prefix; 451 unsigned int pref; 452 unsigned long lifetime; 453 struct rt6_info *rt; 454 455 if (len < sizeof(struct route_info)) { 456 return -EINVAL; 457 } 458 459 /* Sanity check for prefix_len and length */ 460 if (rinfo->length > 3) { 461 return -EINVAL; 462 } else if (rinfo->prefix_len > 128) { 463 return -EINVAL; 464 } else if (rinfo->prefix_len > 64) { 465 if (rinfo->length < 2) { 466 return -EINVAL; 467 } 468 } else if (rinfo->prefix_len > 0) { 469 if (rinfo->length < 1) { 470 return -EINVAL; 471 } 472 } 473 474 pref = rinfo->route_pref; 475 if (pref == ICMPV6_ROUTER_PREF_INVALID) 476 return -EINVAL; 477 478 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 479 480 if (rinfo->length == 3) 481 prefix = (struct in6_addr *)rinfo->prefix; 482 else { 483 /* this function is safe */ 484 ipv6_addr_prefix(&prefix_buf, 485 (struct in6_addr *)rinfo->prefix, 486 rinfo->prefix_len); 487 prefix = &prefix_buf; 488 } 489 490 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 491 dev->ifindex); 492 493 if (rt && !lifetime) { 494 ip6_del_rt(rt); 495 rt = NULL; 496 } 497 498 if (!rt && lifetime) 499 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 500 pref); 501 else if (rt) 502 rt->rt6i_flags = RTF_ROUTEINFO | 503 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 504 505 if (rt) { 506 if (!addrconf_finite_timeout(lifetime)) { 507 rt->rt6i_flags &= ~RTF_EXPIRES; 508 } else { 509 rt->rt6i_expires = jiffies + HZ * lifetime; 510 rt->rt6i_flags |= RTF_EXPIRES; 511 } 512 dst_release(&rt->dst); 513 } 514 return 0; 515 } 516 #endif 517 518 #define BACKTRACK(__net, saddr) \ 519 do { \ 520 if (rt == __net->ipv6.ip6_null_entry) { \ 521 struct fib6_node *pn; \ 522 while (1) { \ 523 if (fn->fn_flags & RTN_TL_ROOT) \ 524 goto out; \ 525 pn = fn->parent; \ 526 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 527 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 528 else \ 529 fn = pn; \ 530 if (fn->fn_flags & RTN_RTINFO) \ 531 goto restart; \ 532 } \ 533 } \ 534 } while(0) 535 536 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 537 struct fib6_table *table, 538 struct flowi *fl, int flags) 539 { 540 struct fib6_node *fn; 541 struct rt6_info *rt; 542 543 read_lock_bh(&table->tb6_lock); 544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 545 restart: 546 rt = fn->leaf; 547 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 548 BACKTRACK(net, &fl->fl6_src); 549 out: 550 dst_use(&rt->dst, jiffies); 551 read_unlock_bh(&table->tb6_lock); 552 return rt; 553 554 } 555 556 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 557 const struct in6_addr *saddr, int oif, int strict) 558 { 559 struct flowi fl = { 560 .oif = oif, 561 .nl_u = { 562 .ip6_u = { 563 .daddr = *daddr, 564 }, 565 }, 566 }; 567 struct dst_entry *dst; 568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 569 570 if (saddr) { 571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 572 flags |= RT6_LOOKUP_F_HAS_SADDR; 573 } 574 575 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 576 if (dst->error == 0) 577 return (struct rt6_info *) dst; 578 579 dst_release(dst); 580 581 return NULL; 582 } 583 584 EXPORT_SYMBOL(rt6_lookup); 585 586 /* ip6_ins_rt is called with FREE table->tb6_lock. 587 It takes new route entry, the addition fails by any reason the 588 route is freed. In any case, if caller does not hold it, it may 589 be destroyed. 590 */ 591 592 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 593 { 594 int err; 595 struct fib6_table *table; 596 597 table = rt->rt6i_table; 598 write_lock_bh(&table->tb6_lock); 599 err = fib6_add(&table->tb6_root, rt, info); 600 write_unlock_bh(&table->tb6_lock); 601 602 return err; 603 } 604 605 int ip6_ins_rt(struct rt6_info *rt) 606 { 607 struct nl_info info = { 608 .nl_net = dev_net(rt->rt6i_dev), 609 }; 610 return __ip6_ins_rt(rt, &info); 611 } 612 613 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 614 struct in6_addr *saddr) 615 { 616 struct rt6_info *rt; 617 618 /* 619 * Clone the route. 620 */ 621 622 rt = ip6_rt_copy(ort); 623 624 if (rt) { 625 struct neighbour *neigh; 626 int attempts = !in_softirq(); 627 628 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 629 if (rt->rt6i_dst.plen != 128 && 630 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 631 rt->rt6i_flags |= RTF_ANYCAST; 632 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 633 } 634 635 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 636 rt->rt6i_dst.plen = 128; 637 rt->rt6i_flags |= RTF_CACHE; 638 rt->dst.flags |= DST_HOST; 639 640 #ifdef CONFIG_IPV6_SUBTREES 641 if (rt->rt6i_src.plen && saddr) { 642 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 643 rt->rt6i_src.plen = 128; 644 } 645 #endif 646 647 retry: 648 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 649 if (IS_ERR(neigh)) { 650 struct net *net = dev_net(rt->rt6i_dev); 651 int saved_rt_min_interval = 652 net->ipv6.sysctl.ip6_rt_gc_min_interval; 653 int saved_rt_elasticity = 654 net->ipv6.sysctl.ip6_rt_gc_elasticity; 655 656 if (attempts-- > 0) { 657 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 658 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 659 660 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 661 662 net->ipv6.sysctl.ip6_rt_gc_elasticity = 663 saved_rt_elasticity; 664 net->ipv6.sysctl.ip6_rt_gc_min_interval = 665 saved_rt_min_interval; 666 goto retry; 667 } 668 669 if (net_ratelimit()) 670 printk(KERN_WARNING 671 "ipv6: Neighbour table overflow.\n"); 672 dst_free(&rt->dst); 673 return NULL; 674 } 675 rt->rt6i_nexthop = neigh; 676 677 } 678 679 return rt; 680 } 681 682 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 683 { 684 struct rt6_info *rt = ip6_rt_copy(ort); 685 if (rt) { 686 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 687 rt->rt6i_dst.plen = 128; 688 rt->rt6i_flags |= RTF_CACHE; 689 rt->dst.flags |= DST_HOST; 690 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 691 } 692 return rt; 693 } 694 695 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 696 struct flowi *fl, int flags) 697 { 698 struct fib6_node *fn; 699 struct rt6_info *rt, *nrt; 700 int strict = 0; 701 int attempts = 3; 702 int err; 703 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 704 705 strict |= flags & RT6_LOOKUP_F_IFACE; 706 707 relookup: 708 read_lock_bh(&table->tb6_lock); 709 710 restart_2: 711 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 712 713 restart: 714 rt = rt6_select(fn, oif, strict | reachable); 715 716 BACKTRACK(net, &fl->fl6_src); 717 if (rt == net->ipv6.ip6_null_entry || 718 rt->rt6i_flags & RTF_CACHE) 719 goto out; 720 721 dst_hold(&rt->dst); 722 read_unlock_bh(&table->tb6_lock); 723 724 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 725 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 726 else { 727 #if CLONE_OFFLINK_ROUTE 728 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 729 #else 730 goto out2; 731 #endif 732 } 733 734 dst_release(&rt->dst); 735 rt = nrt ? : net->ipv6.ip6_null_entry; 736 737 dst_hold(&rt->dst); 738 if (nrt) { 739 err = ip6_ins_rt(nrt); 740 if (!err) 741 goto out2; 742 } 743 744 if (--attempts <= 0) 745 goto out2; 746 747 /* 748 * Race condition! In the gap, when table->tb6_lock was 749 * released someone could insert this route. Relookup. 750 */ 751 dst_release(&rt->dst); 752 goto relookup; 753 754 out: 755 if (reachable) { 756 reachable = 0; 757 goto restart_2; 758 } 759 dst_hold(&rt->dst); 760 read_unlock_bh(&table->tb6_lock); 761 out2: 762 rt->dst.lastuse = jiffies; 763 rt->dst.__use++; 764 765 return rt; 766 } 767 768 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 769 struct flowi *fl, int flags) 770 { 771 return ip6_pol_route(net, table, fl->iif, fl, flags); 772 } 773 774 void ip6_route_input(struct sk_buff *skb) 775 { 776 struct ipv6hdr *iph = ipv6_hdr(skb); 777 struct net *net = dev_net(skb->dev); 778 int flags = RT6_LOOKUP_F_HAS_SADDR; 779 struct flowi fl = { 780 .iif = skb->dev->ifindex, 781 .nl_u = { 782 .ip6_u = { 783 .daddr = iph->daddr, 784 .saddr = iph->saddr, 785 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 786 }, 787 }, 788 .mark = skb->mark, 789 .proto = iph->nexthdr, 790 }; 791 792 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 793 flags |= RT6_LOOKUP_F_IFACE; 794 795 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); 796 } 797 798 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 799 struct flowi *fl, int flags) 800 { 801 return ip6_pol_route(net, table, fl->oif, fl, flags); 802 } 803 804 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 805 struct flowi *fl) 806 { 807 int flags = 0; 808 809 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) 810 flags |= RT6_LOOKUP_F_IFACE; 811 812 if (!ipv6_addr_any(&fl->fl6_src)) 813 flags |= RT6_LOOKUP_F_HAS_SADDR; 814 else if (sk) 815 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 816 817 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 818 } 819 820 EXPORT_SYMBOL(ip6_route_output); 821 822 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 823 { 824 struct rt6_info *ort = (struct rt6_info *) *dstp; 825 struct rt6_info *rt = (struct rt6_info *) 826 dst_alloc(&ip6_dst_blackhole_ops); 827 struct dst_entry *new = NULL; 828 829 if (rt) { 830 new = &rt->dst; 831 832 atomic_set(&new->__refcnt, 1); 833 new->__use = 1; 834 new->input = dst_discard; 835 new->output = dst_discard; 836 837 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); 838 new->dev = ort->dst.dev; 839 if (new->dev) 840 dev_hold(new->dev); 841 rt->rt6i_idev = ort->rt6i_idev; 842 if (rt->rt6i_idev) 843 in6_dev_hold(rt->rt6i_idev); 844 rt->rt6i_expires = 0; 845 846 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 847 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 848 rt->rt6i_metric = 0; 849 850 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 851 #ifdef CONFIG_IPV6_SUBTREES 852 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 853 #endif 854 855 dst_free(new); 856 } 857 858 dst_release(*dstp); 859 *dstp = new; 860 return new ? 0 : -ENOMEM; 861 } 862 EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 863 864 /* 865 * Destination cache support functions 866 */ 867 868 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 869 { 870 struct rt6_info *rt; 871 872 rt = (struct rt6_info *) dst; 873 874 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 875 return dst; 876 877 return NULL; 878 } 879 880 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 881 { 882 struct rt6_info *rt = (struct rt6_info *) dst; 883 884 if (rt) { 885 if (rt->rt6i_flags & RTF_CACHE) { 886 if (rt6_check_expired(rt)) { 887 ip6_del_rt(rt); 888 dst = NULL; 889 } 890 } else { 891 dst_release(dst); 892 dst = NULL; 893 } 894 } 895 return dst; 896 } 897 898 static void ip6_link_failure(struct sk_buff *skb) 899 { 900 struct rt6_info *rt; 901 902 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 903 904 rt = (struct rt6_info *) skb_dst(skb); 905 if (rt) { 906 if (rt->rt6i_flags&RTF_CACHE) { 907 dst_set_expires(&rt->dst, 0); 908 rt->rt6i_flags |= RTF_EXPIRES; 909 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 910 rt->rt6i_node->fn_sernum = -1; 911 } 912 } 913 914 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 915 { 916 struct rt6_info *rt6 = (struct rt6_info*)dst; 917 918 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 919 rt6->rt6i_flags |= RTF_MODIFIED; 920 if (mtu < IPV6_MIN_MTU) { 921 mtu = IPV6_MIN_MTU; 922 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 923 } 924 dst->metrics[RTAX_MTU-1] = mtu; 925 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 926 } 927 } 928 929 static int ipv6_get_mtu(struct net_device *dev); 930 931 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) 932 { 933 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 934 935 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 936 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 937 938 /* 939 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 940 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 941 * IPV6_MAXPLEN is also valid and means: "any MSS, 942 * rely only on pmtu discovery" 943 */ 944 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 945 mtu = IPV6_MAXPLEN; 946 return mtu; 947 } 948 949 static struct dst_entry *icmp6_dst_gc_list; 950 static DEFINE_SPINLOCK(icmp6_dst_lock); 951 952 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 953 struct neighbour *neigh, 954 const struct in6_addr *addr) 955 { 956 struct rt6_info *rt; 957 struct inet6_dev *idev = in6_dev_get(dev); 958 struct net *net = dev_net(dev); 959 960 if (unlikely(idev == NULL)) 961 return NULL; 962 963 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 964 if (unlikely(rt == NULL)) { 965 in6_dev_put(idev); 966 goto out; 967 } 968 969 dev_hold(dev); 970 if (neigh) 971 neigh_hold(neigh); 972 else { 973 neigh = ndisc_get_neigh(dev, addr); 974 if (IS_ERR(neigh)) 975 neigh = NULL; 976 } 977 978 rt->rt6i_dev = dev; 979 rt->rt6i_idev = idev; 980 rt->rt6i_nexthop = neigh; 981 atomic_set(&rt->dst.__refcnt, 1); 982 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; 983 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 984 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); 985 rt->dst.output = ip6_output; 986 987 #if 0 /* there's no chance to use these for ndisc */ 988 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 989 ? DST_HOST 990 : 0; 991 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 992 rt->rt6i_dst.plen = 128; 993 #endif 994 995 spin_lock_bh(&icmp6_dst_lock); 996 rt->dst.next = icmp6_dst_gc_list; 997 icmp6_dst_gc_list = &rt->dst; 998 spin_unlock_bh(&icmp6_dst_lock); 999 1000 fib6_force_start_gc(net); 1001 1002 out: 1003 return &rt->dst; 1004 } 1005 1006 int icmp6_dst_gc(void) 1007 { 1008 struct dst_entry *dst, *next, **pprev; 1009 int more = 0; 1010 1011 next = NULL; 1012 1013 spin_lock_bh(&icmp6_dst_lock); 1014 pprev = &icmp6_dst_gc_list; 1015 1016 while ((dst = *pprev) != NULL) { 1017 if (!atomic_read(&dst->__refcnt)) { 1018 *pprev = dst->next; 1019 dst_free(dst); 1020 } else { 1021 pprev = &dst->next; 1022 ++more; 1023 } 1024 } 1025 1026 spin_unlock_bh(&icmp6_dst_lock); 1027 1028 return more; 1029 } 1030 1031 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1032 void *arg) 1033 { 1034 struct dst_entry *dst, **pprev; 1035 1036 spin_lock_bh(&icmp6_dst_lock); 1037 pprev = &icmp6_dst_gc_list; 1038 while ((dst = *pprev) != NULL) { 1039 struct rt6_info *rt = (struct rt6_info *) dst; 1040 if (func(rt, arg)) { 1041 *pprev = dst->next; 1042 dst_free(dst); 1043 } else { 1044 pprev = &dst->next; 1045 } 1046 } 1047 spin_unlock_bh(&icmp6_dst_lock); 1048 } 1049 1050 static int ip6_dst_gc(struct dst_ops *ops) 1051 { 1052 unsigned long now = jiffies; 1053 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1054 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1055 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1059 int entries; 1060 1061 entries = dst_entries_get_fast(ops); 1062 if (time_after(rt_last_gc + rt_min_interval, now) && 1063 entries <= rt_max_size) 1064 goto out; 1065 1066 net->ipv6.ip6_rt_gc_expire++; 1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1068 net->ipv6.ip6_rt_last_gc = now; 1069 entries = dst_entries_get_slow(ops); 1070 if (entries < ops->gc_thresh) 1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1072 out: 1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1074 return entries > rt_max_size; 1075 } 1076 1077 /* Clean host part of a prefix. Not necessary in radix tree, 1078 but results in cleaner routing tables. 1079 1080 Remove it only when all the things will work! 1081 */ 1082 1083 static int ipv6_get_mtu(struct net_device *dev) 1084 { 1085 int mtu = IPV6_MIN_MTU; 1086 struct inet6_dev *idev; 1087 1088 rcu_read_lock(); 1089 idev = __in6_dev_get(dev); 1090 if (idev) 1091 mtu = idev->cnf.mtu6; 1092 rcu_read_unlock(); 1093 return mtu; 1094 } 1095 1096 int ip6_dst_hoplimit(struct dst_entry *dst) 1097 { 1098 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); 1099 if (hoplimit < 0) { 1100 struct net_device *dev = dst->dev; 1101 struct inet6_dev *idev; 1102 1103 rcu_read_lock(); 1104 idev = __in6_dev_get(dev); 1105 if (idev) 1106 hoplimit = idev->cnf.hop_limit; 1107 else 1108 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1109 rcu_read_unlock(); 1110 } 1111 return hoplimit; 1112 } 1113 1114 /* 1115 * 1116 */ 1117 1118 int ip6_route_add(struct fib6_config *cfg) 1119 { 1120 int err; 1121 struct net *net = cfg->fc_nlinfo.nl_net; 1122 struct rt6_info *rt = NULL; 1123 struct net_device *dev = NULL; 1124 struct inet6_dev *idev = NULL; 1125 struct fib6_table *table; 1126 int addr_type; 1127 1128 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1129 return -EINVAL; 1130 #ifndef CONFIG_IPV6_SUBTREES 1131 if (cfg->fc_src_len) 1132 return -EINVAL; 1133 #endif 1134 if (cfg->fc_ifindex) { 1135 err = -ENODEV; 1136 dev = dev_get_by_index(net, cfg->fc_ifindex); 1137 if (!dev) 1138 goto out; 1139 idev = in6_dev_get(dev); 1140 if (!idev) 1141 goto out; 1142 } 1143 1144 if (cfg->fc_metric == 0) 1145 cfg->fc_metric = IP6_RT_PRIO_USER; 1146 1147 table = fib6_new_table(net, cfg->fc_table); 1148 if (table == NULL) { 1149 err = -ENOBUFS; 1150 goto out; 1151 } 1152 1153 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1154 1155 if (rt == NULL) { 1156 err = -ENOMEM; 1157 goto out; 1158 } 1159 1160 rt->dst.obsolete = -1; 1161 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1162 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1163 0; 1164 1165 if (cfg->fc_protocol == RTPROT_UNSPEC) 1166 cfg->fc_protocol = RTPROT_BOOT; 1167 rt->rt6i_protocol = cfg->fc_protocol; 1168 1169 addr_type = ipv6_addr_type(&cfg->fc_dst); 1170 1171 if (addr_type & IPV6_ADDR_MULTICAST) 1172 rt->dst.input = ip6_mc_input; 1173 else if (cfg->fc_flags & RTF_LOCAL) 1174 rt->dst.input = ip6_input; 1175 else 1176 rt->dst.input = ip6_forward; 1177 1178 rt->dst.output = ip6_output; 1179 1180 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1181 rt->rt6i_dst.plen = cfg->fc_dst_len; 1182 if (rt->rt6i_dst.plen == 128) 1183 rt->dst.flags = DST_HOST; 1184 1185 #ifdef CONFIG_IPV6_SUBTREES 1186 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1187 rt->rt6i_src.plen = cfg->fc_src_len; 1188 #endif 1189 1190 rt->rt6i_metric = cfg->fc_metric; 1191 1192 /* We cannot add true routes via loopback here, 1193 they would result in kernel looping; promote them to reject routes 1194 */ 1195 if ((cfg->fc_flags & RTF_REJECT) || 1196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) 1197 && !(cfg->fc_flags&RTF_LOCAL))) { 1198 /* hold loopback dev/idev if we haven't done so. */ 1199 if (dev != net->loopback_dev) { 1200 if (dev) { 1201 dev_put(dev); 1202 in6_dev_put(idev); 1203 } 1204 dev = net->loopback_dev; 1205 dev_hold(dev); 1206 idev = in6_dev_get(dev); 1207 if (!idev) { 1208 err = -ENODEV; 1209 goto out; 1210 } 1211 } 1212 rt->dst.output = ip6_pkt_discard_out; 1213 rt->dst.input = ip6_pkt_discard; 1214 rt->dst.error = -ENETUNREACH; 1215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1216 goto install_route; 1217 } 1218 1219 if (cfg->fc_flags & RTF_GATEWAY) { 1220 struct in6_addr *gw_addr; 1221 int gwa_type; 1222 1223 gw_addr = &cfg->fc_gateway; 1224 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1225 gwa_type = ipv6_addr_type(gw_addr); 1226 1227 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1228 struct rt6_info *grt; 1229 1230 /* IPv6 strictly inhibits using not link-local 1231 addresses as nexthop address. 1232 Otherwise, router will not able to send redirects. 1233 It is very good, but in some (rare!) circumstances 1234 (SIT, PtP, NBMA NOARP links) it is handy to allow 1235 some exceptions. --ANK 1236 */ 1237 err = -EINVAL; 1238 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1239 goto out; 1240 1241 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1242 1243 err = -EHOSTUNREACH; 1244 if (grt == NULL) 1245 goto out; 1246 if (dev) { 1247 if (dev != grt->rt6i_dev) { 1248 dst_release(&grt->dst); 1249 goto out; 1250 } 1251 } else { 1252 dev = grt->rt6i_dev; 1253 idev = grt->rt6i_idev; 1254 dev_hold(dev); 1255 in6_dev_hold(grt->rt6i_idev); 1256 } 1257 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1258 err = 0; 1259 dst_release(&grt->dst); 1260 1261 if (err) 1262 goto out; 1263 } 1264 err = -EINVAL; 1265 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1266 goto out; 1267 } 1268 1269 err = -ENODEV; 1270 if (dev == NULL) 1271 goto out; 1272 1273 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1274 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1275 if (IS_ERR(rt->rt6i_nexthop)) { 1276 err = PTR_ERR(rt->rt6i_nexthop); 1277 rt->rt6i_nexthop = NULL; 1278 goto out; 1279 } 1280 } 1281 1282 rt->rt6i_flags = cfg->fc_flags; 1283 1284 install_route: 1285 if (cfg->fc_mx) { 1286 struct nlattr *nla; 1287 int remaining; 1288 1289 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1290 int type = nla_type(nla); 1291 1292 if (type) { 1293 if (type > RTAX_MAX) { 1294 err = -EINVAL; 1295 goto out; 1296 } 1297 1298 rt->dst.metrics[type - 1] = nla_get_u32(nla); 1299 } 1300 } 1301 } 1302 1303 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) 1304 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; 1305 if (!dst_mtu(&rt->dst)) 1306 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1307 if (!dst_metric(&rt->dst, RTAX_ADVMSS)) 1308 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); 1309 rt->dst.dev = dev; 1310 rt->rt6i_idev = idev; 1311 rt->rt6i_table = table; 1312 1313 cfg->fc_nlinfo.nl_net = dev_net(dev); 1314 1315 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1316 1317 out: 1318 if (dev) 1319 dev_put(dev); 1320 if (idev) 1321 in6_dev_put(idev); 1322 if (rt) 1323 dst_free(&rt->dst); 1324 return err; 1325 } 1326 1327 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1328 { 1329 int err; 1330 struct fib6_table *table; 1331 struct net *net = dev_net(rt->rt6i_dev); 1332 1333 if (rt == net->ipv6.ip6_null_entry) 1334 return -ENOENT; 1335 1336 table = rt->rt6i_table; 1337 write_lock_bh(&table->tb6_lock); 1338 1339 err = fib6_del(rt, info); 1340 dst_release(&rt->dst); 1341 1342 write_unlock_bh(&table->tb6_lock); 1343 1344 return err; 1345 } 1346 1347 int ip6_del_rt(struct rt6_info *rt) 1348 { 1349 struct nl_info info = { 1350 .nl_net = dev_net(rt->rt6i_dev), 1351 }; 1352 return __ip6_del_rt(rt, &info); 1353 } 1354 1355 static int ip6_route_del(struct fib6_config *cfg) 1356 { 1357 struct fib6_table *table; 1358 struct fib6_node *fn; 1359 struct rt6_info *rt; 1360 int err = -ESRCH; 1361 1362 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1363 if (table == NULL) 1364 return err; 1365 1366 read_lock_bh(&table->tb6_lock); 1367 1368 fn = fib6_locate(&table->tb6_root, 1369 &cfg->fc_dst, cfg->fc_dst_len, 1370 &cfg->fc_src, cfg->fc_src_len); 1371 1372 if (fn) { 1373 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1374 if (cfg->fc_ifindex && 1375 (rt->rt6i_dev == NULL || 1376 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1377 continue; 1378 if (cfg->fc_flags & RTF_GATEWAY && 1379 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1380 continue; 1381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1382 continue; 1383 dst_hold(&rt->dst); 1384 read_unlock_bh(&table->tb6_lock); 1385 1386 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1387 } 1388 } 1389 read_unlock_bh(&table->tb6_lock); 1390 1391 return err; 1392 } 1393 1394 /* 1395 * Handle redirects 1396 */ 1397 struct ip6rd_flowi { 1398 struct flowi fl; 1399 struct in6_addr gateway; 1400 }; 1401 1402 static struct rt6_info *__ip6_route_redirect(struct net *net, 1403 struct fib6_table *table, 1404 struct flowi *fl, 1405 int flags) 1406 { 1407 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1408 struct rt6_info *rt; 1409 struct fib6_node *fn; 1410 1411 /* 1412 * Get the "current" route for this destination and 1413 * check if the redirect has come from approriate router. 1414 * 1415 * RFC 2461 specifies that redirects should only be 1416 * accepted if they come from the nexthop to the target. 1417 * Due to the way the routes are chosen, this notion 1418 * is a bit fuzzy and one might need to check all possible 1419 * routes. 1420 */ 1421 1422 read_lock_bh(&table->tb6_lock); 1423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1424 restart: 1425 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1426 /* 1427 * Current route is on-link; redirect is always invalid. 1428 * 1429 * Seems, previous statement is not true. It could 1430 * be node, which looks for us as on-link (f.e. proxy ndisc) 1431 * But then router serving it might decide, that we should 1432 * know truth 8)8) --ANK (980726). 1433 */ 1434 if (rt6_check_expired(rt)) 1435 continue; 1436 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1437 continue; 1438 if (fl->oif != rt->rt6i_dev->ifindex) 1439 continue; 1440 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1441 continue; 1442 break; 1443 } 1444 1445 if (!rt) 1446 rt = net->ipv6.ip6_null_entry; 1447 BACKTRACK(net, &fl->fl6_src); 1448 out: 1449 dst_hold(&rt->dst); 1450 1451 read_unlock_bh(&table->tb6_lock); 1452 1453 return rt; 1454 }; 1455 1456 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1457 struct in6_addr *src, 1458 struct in6_addr *gateway, 1459 struct net_device *dev) 1460 { 1461 int flags = RT6_LOOKUP_F_HAS_SADDR; 1462 struct net *net = dev_net(dev); 1463 struct ip6rd_flowi rdfl = { 1464 .fl = { 1465 .oif = dev->ifindex, 1466 .nl_u = { 1467 .ip6_u = { 1468 .daddr = *dest, 1469 .saddr = *src, 1470 }, 1471 }, 1472 }, 1473 }; 1474 1475 ipv6_addr_copy(&rdfl.gateway, gateway); 1476 1477 if (rt6_need_strict(dest)) 1478 flags |= RT6_LOOKUP_F_IFACE; 1479 1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1481 flags, __ip6_route_redirect); 1482 } 1483 1484 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1485 struct in6_addr *saddr, 1486 struct neighbour *neigh, u8 *lladdr, int on_link) 1487 { 1488 struct rt6_info *rt, *nrt = NULL; 1489 struct netevent_redirect netevent; 1490 struct net *net = dev_net(neigh->dev); 1491 1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1493 1494 if (rt == net->ipv6.ip6_null_entry) { 1495 if (net_ratelimit()) 1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1497 "for redirect target\n"); 1498 goto out; 1499 } 1500 1501 /* 1502 * We have finally decided to accept it. 1503 */ 1504 1505 neigh_update(neigh, lladdr, NUD_STALE, 1506 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1507 NEIGH_UPDATE_F_OVERRIDE| 1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1509 NEIGH_UPDATE_F_ISROUTER)) 1510 ); 1511 1512 /* 1513 * Redirect received -> path was valid. 1514 * Look, redirects are sent only in response to data packets, 1515 * so that this nexthop apparently is reachable. --ANK 1516 */ 1517 dst_confirm(&rt->dst); 1518 1519 /* Duplicate redirect: silently ignore. */ 1520 if (neigh == rt->dst.neighbour) 1521 goto out; 1522 1523 nrt = ip6_rt_copy(rt); 1524 if (nrt == NULL) 1525 goto out; 1526 1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1528 if (on_link) 1529 nrt->rt6i_flags &= ~RTF_GATEWAY; 1530 1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1532 nrt->rt6i_dst.plen = 128; 1533 nrt->dst.flags |= DST_HOST; 1534 1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1536 nrt->rt6i_nexthop = neigh_clone(neigh); 1537 /* Reset pmtu, it may be better */ 1538 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1539 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), 1540 dst_mtu(&nrt->dst)); 1541 1542 if (ip6_ins_rt(nrt)) 1543 goto out; 1544 1545 netevent.old = &rt->dst; 1546 netevent.new = &nrt->dst; 1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1548 1549 if (rt->rt6i_flags&RTF_CACHE) { 1550 ip6_del_rt(rt); 1551 return; 1552 } 1553 1554 out: 1555 dst_release(&rt->dst); 1556 } 1557 1558 /* 1559 * Handle ICMP "packet too big" messages 1560 * i.e. Path MTU discovery 1561 */ 1562 1563 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, 1564 struct net *net, u32 pmtu, int ifindex) 1565 { 1566 struct rt6_info *rt, *nrt; 1567 int allfrag = 0; 1568 1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0); 1570 if (rt == NULL) 1571 return; 1572 1573 if (pmtu >= dst_mtu(&rt->dst)) 1574 goto out; 1575 1576 if (pmtu < IPV6_MIN_MTU) { 1577 /* 1578 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1579 * MTU (1280) and a fragment header should always be included 1580 * after a node receiving Too Big message reporting PMTU is 1581 * less than the IPv6 Minimum Link MTU. 1582 */ 1583 pmtu = IPV6_MIN_MTU; 1584 allfrag = 1; 1585 } 1586 1587 /* New mtu received -> path was valid. 1588 They are sent only in response to data packets, 1589 so that this nexthop apparently is reachable. --ANK 1590 */ 1591 dst_confirm(&rt->dst); 1592 1593 /* Host route. If it is static, it would be better 1594 not to override it, but add new one, so that 1595 when cache entry will expire old pmtu 1596 would return automatically. 1597 */ 1598 if (rt->rt6i_flags & RTF_CACHE) { 1599 rt->dst.metrics[RTAX_MTU-1] = pmtu; 1600 if (allfrag) 1601 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1602 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1603 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1604 goto out; 1605 } 1606 1607 /* Network route. 1608 Two cases are possible: 1609 1. It is connected route. Action: COW 1610 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1611 */ 1612 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1613 nrt = rt6_alloc_cow(rt, daddr, saddr); 1614 else 1615 nrt = rt6_alloc_clone(rt, daddr); 1616 1617 if (nrt) { 1618 nrt->dst.metrics[RTAX_MTU-1] = pmtu; 1619 if (allfrag) 1620 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1621 1622 /* According to RFC 1981, detecting PMTU increase shouldn't be 1623 * happened within 5 mins, the recommended timer is 10 mins. 1624 * Here this route expiration time is set to ip6_rt_mtu_expires 1625 * which is 10 mins. After 10 mins the decreased pmtu is expired 1626 * and detecting PMTU increase will be automatically happened. 1627 */ 1628 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1629 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1630 1631 ip6_ins_rt(nrt); 1632 } 1633 out: 1634 dst_release(&rt->dst); 1635 } 1636 1637 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1638 struct net_device *dev, u32 pmtu) 1639 { 1640 struct net *net = dev_net(dev); 1641 1642 /* 1643 * RFC 1981 states that a node "MUST reduce the size of the packets it 1644 * is sending along the path" that caused the Packet Too Big message. 1645 * Since it's not possible in the general case to determine which 1646 * interface was used to send the original packet, we update the MTU 1647 * on the interface that will be used to send future packets. We also 1648 * update the MTU on the interface that received the Packet Too Big in 1649 * case the original packet was forced out that interface with 1650 * SO_BINDTODEVICE or similar. This is the next best thing to the 1651 * correct behaviour, which would be to update the MTU on all 1652 * interfaces. 1653 */ 1654 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); 1655 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); 1656 } 1657 1658 /* 1659 * Misc support functions 1660 */ 1661 1662 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1663 { 1664 struct net *net = dev_net(ort->rt6i_dev); 1665 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1666 1667 if (rt) { 1668 rt->dst.input = ort->dst.input; 1669 rt->dst.output = ort->dst.output; 1670 1671 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); 1672 rt->dst.error = ort->dst.error; 1673 rt->dst.dev = ort->dst.dev; 1674 if (rt->dst.dev) 1675 dev_hold(rt->dst.dev); 1676 rt->rt6i_idev = ort->rt6i_idev; 1677 if (rt->rt6i_idev) 1678 in6_dev_hold(rt->rt6i_idev); 1679 rt->dst.lastuse = jiffies; 1680 rt->rt6i_expires = 0; 1681 1682 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1683 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1684 rt->rt6i_metric = 0; 1685 1686 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1687 #ifdef CONFIG_IPV6_SUBTREES 1688 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1689 #endif 1690 rt->rt6i_table = ort->rt6i_table; 1691 } 1692 return rt; 1693 } 1694 1695 #ifdef CONFIG_IPV6_ROUTE_INFO 1696 static struct rt6_info *rt6_get_route_info(struct net *net, 1697 struct in6_addr *prefix, int prefixlen, 1698 struct in6_addr *gwaddr, int ifindex) 1699 { 1700 struct fib6_node *fn; 1701 struct rt6_info *rt = NULL; 1702 struct fib6_table *table; 1703 1704 table = fib6_get_table(net, RT6_TABLE_INFO); 1705 if (table == NULL) 1706 return NULL; 1707 1708 write_lock_bh(&table->tb6_lock); 1709 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1710 if (!fn) 1711 goto out; 1712 1713 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1714 if (rt->rt6i_dev->ifindex != ifindex) 1715 continue; 1716 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1717 continue; 1718 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1719 continue; 1720 dst_hold(&rt->dst); 1721 break; 1722 } 1723 out: 1724 write_unlock_bh(&table->tb6_lock); 1725 return rt; 1726 } 1727 1728 static struct rt6_info *rt6_add_route_info(struct net *net, 1729 struct in6_addr *prefix, int prefixlen, 1730 struct in6_addr *gwaddr, int ifindex, 1731 unsigned pref) 1732 { 1733 struct fib6_config cfg = { 1734 .fc_table = RT6_TABLE_INFO, 1735 .fc_metric = IP6_RT_PRIO_USER, 1736 .fc_ifindex = ifindex, 1737 .fc_dst_len = prefixlen, 1738 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1739 RTF_UP | RTF_PREF(pref), 1740 .fc_nlinfo.pid = 0, 1741 .fc_nlinfo.nlh = NULL, 1742 .fc_nlinfo.nl_net = net, 1743 }; 1744 1745 ipv6_addr_copy(&cfg.fc_dst, prefix); 1746 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1747 1748 /* We should treat it as a default route if prefix length is 0. */ 1749 if (!prefixlen) 1750 cfg.fc_flags |= RTF_DEFAULT; 1751 1752 ip6_route_add(&cfg); 1753 1754 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1755 } 1756 #endif 1757 1758 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1759 { 1760 struct rt6_info *rt; 1761 struct fib6_table *table; 1762 1763 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1764 if (table == NULL) 1765 return NULL; 1766 1767 write_lock_bh(&table->tb6_lock); 1768 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1769 if (dev == rt->rt6i_dev && 1770 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1771 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1772 break; 1773 } 1774 if (rt) 1775 dst_hold(&rt->dst); 1776 write_unlock_bh(&table->tb6_lock); 1777 return rt; 1778 } 1779 1780 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1781 struct net_device *dev, 1782 unsigned int pref) 1783 { 1784 struct fib6_config cfg = { 1785 .fc_table = RT6_TABLE_DFLT, 1786 .fc_metric = IP6_RT_PRIO_USER, 1787 .fc_ifindex = dev->ifindex, 1788 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1789 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1790 .fc_nlinfo.pid = 0, 1791 .fc_nlinfo.nlh = NULL, 1792 .fc_nlinfo.nl_net = dev_net(dev), 1793 }; 1794 1795 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1796 1797 ip6_route_add(&cfg); 1798 1799 return rt6_get_dflt_router(gwaddr, dev); 1800 } 1801 1802 void rt6_purge_dflt_routers(struct net *net) 1803 { 1804 struct rt6_info *rt; 1805 struct fib6_table *table; 1806 1807 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1808 table = fib6_get_table(net, RT6_TABLE_DFLT); 1809 if (table == NULL) 1810 return; 1811 1812 restart: 1813 read_lock_bh(&table->tb6_lock); 1814 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1815 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1816 dst_hold(&rt->dst); 1817 read_unlock_bh(&table->tb6_lock); 1818 ip6_del_rt(rt); 1819 goto restart; 1820 } 1821 } 1822 read_unlock_bh(&table->tb6_lock); 1823 } 1824 1825 static void rtmsg_to_fib6_config(struct net *net, 1826 struct in6_rtmsg *rtmsg, 1827 struct fib6_config *cfg) 1828 { 1829 memset(cfg, 0, sizeof(*cfg)); 1830 1831 cfg->fc_table = RT6_TABLE_MAIN; 1832 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1833 cfg->fc_metric = rtmsg->rtmsg_metric; 1834 cfg->fc_expires = rtmsg->rtmsg_info; 1835 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1836 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1837 cfg->fc_flags = rtmsg->rtmsg_flags; 1838 1839 cfg->fc_nlinfo.nl_net = net; 1840 1841 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1842 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1843 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1844 } 1845 1846 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1847 { 1848 struct fib6_config cfg; 1849 struct in6_rtmsg rtmsg; 1850 int err; 1851 1852 switch(cmd) { 1853 case SIOCADDRT: /* Add a route */ 1854 case SIOCDELRT: /* Delete a route */ 1855 if (!capable(CAP_NET_ADMIN)) 1856 return -EPERM; 1857 err = copy_from_user(&rtmsg, arg, 1858 sizeof(struct in6_rtmsg)); 1859 if (err) 1860 return -EFAULT; 1861 1862 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1863 1864 rtnl_lock(); 1865 switch (cmd) { 1866 case SIOCADDRT: 1867 err = ip6_route_add(&cfg); 1868 break; 1869 case SIOCDELRT: 1870 err = ip6_route_del(&cfg); 1871 break; 1872 default: 1873 err = -EINVAL; 1874 } 1875 rtnl_unlock(); 1876 1877 return err; 1878 } 1879 1880 return -EINVAL; 1881 } 1882 1883 /* 1884 * Drop the packet on the floor 1885 */ 1886 1887 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 1888 { 1889 int type; 1890 struct dst_entry *dst = skb_dst(skb); 1891 switch (ipstats_mib_noroutes) { 1892 case IPSTATS_MIB_INNOROUTES: 1893 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1894 if (type == IPV6_ADDR_ANY) { 1895 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1896 IPSTATS_MIB_INADDRERRORS); 1897 break; 1898 } 1899 /* FALLTHROUGH */ 1900 case IPSTATS_MIB_OUTNOROUTES: 1901 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1902 ipstats_mib_noroutes); 1903 break; 1904 } 1905 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 1906 kfree_skb(skb); 1907 return 0; 1908 } 1909 1910 static int ip6_pkt_discard(struct sk_buff *skb) 1911 { 1912 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1913 } 1914 1915 static int ip6_pkt_discard_out(struct sk_buff *skb) 1916 { 1917 skb->dev = skb_dst(skb)->dev; 1918 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1919 } 1920 1921 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1922 1923 static int ip6_pkt_prohibit(struct sk_buff *skb) 1924 { 1925 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1926 } 1927 1928 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1929 { 1930 skb->dev = skb_dst(skb)->dev; 1931 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1932 } 1933 1934 #endif 1935 1936 /* 1937 * Allocate a dst for local (unicast / anycast) address. 1938 */ 1939 1940 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1941 const struct in6_addr *addr, 1942 int anycast) 1943 { 1944 struct net *net = dev_net(idev->dev); 1945 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1946 struct neighbour *neigh; 1947 1948 if (rt == NULL) 1949 return ERR_PTR(-ENOMEM); 1950 1951 dev_hold(net->loopback_dev); 1952 in6_dev_hold(idev); 1953 1954 rt->dst.flags = DST_HOST; 1955 rt->dst.input = ip6_input; 1956 rt->dst.output = ip6_output; 1957 rt->rt6i_dev = net->loopback_dev; 1958 rt->rt6i_idev = idev; 1959 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1960 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); 1961 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; 1962 rt->dst.obsolete = -1; 1963 1964 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1965 if (anycast) 1966 rt->rt6i_flags |= RTF_ANYCAST; 1967 else 1968 rt->rt6i_flags |= RTF_LOCAL; 1969 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1970 if (IS_ERR(neigh)) { 1971 dst_free(&rt->dst); 1972 1973 /* We are casting this because that is the return 1974 * value type. But an errno encoded pointer is the 1975 * same regardless of the underlying pointer type, 1976 * and that's what we are returning. So this is OK. 1977 */ 1978 return (struct rt6_info *) neigh; 1979 } 1980 rt->rt6i_nexthop = neigh; 1981 1982 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1983 rt->rt6i_dst.plen = 128; 1984 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1985 1986 atomic_set(&rt->dst.__refcnt, 1); 1987 1988 return rt; 1989 } 1990 1991 struct arg_dev_net { 1992 struct net_device *dev; 1993 struct net *net; 1994 }; 1995 1996 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1997 { 1998 struct net_device *dev = ((struct arg_dev_net *)arg)->dev; 1999 struct net *net = ((struct arg_dev_net *)arg)->net; 2000 2001 if (((void *)rt->rt6i_dev == dev || dev == NULL) && 2002 rt != net->ipv6.ip6_null_entry) { 2003 RT6_TRACE("deleted by ifdown %p\n", rt); 2004 return -1; 2005 } 2006 return 0; 2007 } 2008 2009 void rt6_ifdown(struct net *net, struct net_device *dev) 2010 { 2011 struct arg_dev_net adn = { 2012 .dev = dev, 2013 .net = net, 2014 }; 2015 2016 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2017 icmp6_clean_all(fib6_ifdown, &adn); 2018 } 2019 2020 struct rt6_mtu_change_arg 2021 { 2022 struct net_device *dev; 2023 unsigned mtu; 2024 }; 2025 2026 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2027 { 2028 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2029 struct inet6_dev *idev; 2030 struct net *net = dev_net(arg->dev); 2031 2032 /* In IPv6 pmtu discovery is not optional, 2033 so that RTAX_MTU lock cannot disable it. 2034 We still use this lock to block changes 2035 caused by addrconf/ndisc. 2036 */ 2037 2038 idev = __in6_dev_get(arg->dev); 2039 if (idev == NULL) 2040 return 0; 2041 2042 /* For administrative MTU increase, there is no way to discover 2043 IPv6 PMTU increase, so PMTU increase should be updated here. 2044 Since RFC 1981 doesn't include administrative MTU increase 2045 update PMTU increase is a MUST. (i.e. jumbo frame) 2046 */ 2047 /* 2048 If new MTU is less than route PMTU, this new MTU will be the 2049 lowest MTU in the path, update the route PMTU to reflect PMTU 2050 decreases; if new MTU is greater than route PMTU, and the 2051 old MTU is the lowest MTU in the path, update the route PMTU 2052 to reflect the increase. In this case if the other nodes' MTU 2053 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2054 PMTU discouvery. 2055 */ 2056 if (rt->rt6i_dev == arg->dev && 2057 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2058 (dst_mtu(&rt->dst) >= arg->mtu || 2059 (dst_mtu(&rt->dst) < arg->mtu && 2060 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2061 rt->dst.metrics[RTAX_MTU-1] = arg->mtu; 2062 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 2063 } 2064 return 0; 2065 } 2066 2067 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2068 { 2069 struct rt6_mtu_change_arg arg = { 2070 .dev = dev, 2071 .mtu = mtu, 2072 }; 2073 2074 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2075 } 2076 2077 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2078 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2079 [RTA_OIF] = { .type = NLA_U32 }, 2080 [RTA_IIF] = { .type = NLA_U32 }, 2081 [RTA_PRIORITY] = { .type = NLA_U32 }, 2082 [RTA_METRICS] = { .type = NLA_NESTED }, 2083 }; 2084 2085 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2086 struct fib6_config *cfg) 2087 { 2088 struct rtmsg *rtm; 2089 struct nlattr *tb[RTA_MAX+1]; 2090 int err; 2091 2092 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2093 if (err < 0) 2094 goto errout; 2095 2096 err = -EINVAL; 2097 rtm = nlmsg_data(nlh); 2098 memset(cfg, 0, sizeof(*cfg)); 2099 2100 cfg->fc_table = rtm->rtm_table; 2101 cfg->fc_dst_len = rtm->rtm_dst_len; 2102 cfg->fc_src_len = rtm->rtm_src_len; 2103 cfg->fc_flags = RTF_UP; 2104 cfg->fc_protocol = rtm->rtm_protocol; 2105 2106 if (rtm->rtm_type == RTN_UNREACHABLE) 2107 cfg->fc_flags |= RTF_REJECT; 2108 2109 if (rtm->rtm_type == RTN_LOCAL) 2110 cfg->fc_flags |= RTF_LOCAL; 2111 2112 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2113 cfg->fc_nlinfo.nlh = nlh; 2114 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2115 2116 if (tb[RTA_GATEWAY]) { 2117 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2118 cfg->fc_flags |= RTF_GATEWAY; 2119 } 2120 2121 if (tb[RTA_DST]) { 2122 int plen = (rtm->rtm_dst_len + 7) >> 3; 2123 2124 if (nla_len(tb[RTA_DST]) < plen) 2125 goto errout; 2126 2127 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2128 } 2129 2130 if (tb[RTA_SRC]) { 2131 int plen = (rtm->rtm_src_len + 7) >> 3; 2132 2133 if (nla_len(tb[RTA_SRC]) < plen) 2134 goto errout; 2135 2136 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2137 } 2138 2139 if (tb[RTA_OIF]) 2140 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2141 2142 if (tb[RTA_PRIORITY]) 2143 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2144 2145 if (tb[RTA_METRICS]) { 2146 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2147 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2148 } 2149 2150 if (tb[RTA_TABLE]) 2151 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2152 2153 err = 0; 2154 errout: 2155 return err; 2156 } 2157 2158 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2159 { 2160 struct fib6_config cfg; 2161 int err; 2162 2163 err = rtm_to_fib6_config(skb, nlh, &cfg); 2164 if (err < 0) 2165 return err; 2166 2167 return ip6_route_del(&cfg); 2168 } 2169 2170 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2171 { 2172 struct fib6_config cfg; 2173 int err; 2174 2175 err = rtm_to_fib6_config(skb, nlh, &cfg); 2176 if (err < 0) 2177 return err; 2178 2179 return ip6_route_add(&cfg); 2180 } 2181 2182 static inline size_t rt6_nlmsg_size(void) 2183 { 2184 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2185 + nla_total_size(16) /* RTA_SRC */ 2186 + nla_total_size(16) /* RTA_DST */ 2187 + nla_total_size(16) /* RTA_GATEWAY */ 2188 + nla_total_size(16) /* RTA_PREFSRC */ 2189 + nla_total_size(4) /* RTA_TABLE */ 2190 + nla_total_size(4) /* RTA_IIF */ 2191 + nla_total_size(4) /* RTA_OIF */ 2192 + nla_total_size(4) /* RTA_PRIORITY */ 2193 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2194 + nla_total_size(sizeof(struct rta_cacheinfo)); 2195 } 2196 2197 static int rt6_fill_node(struct net *net, 2198 struct sk_buff *skb, struct rt6_info *rt, 2199 struct in6_addr *dst, struct in6_addr *src, 2200 int iif, int type, u32 pid, u32 seq, 2201 int prefix, int nowait, unsigned int flags) 2202 { 2203 struct rtmsg *rtm; 2204 struct nlmsghdr *nlh; 2205 long expires; 2206 u32 table; 2207 2208 if (prefix) { /* user wants prefix routes only */ 2209 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2210 /* success since this is not a prefix route */ 2211 return 1; 2212 } 2213 } 2214 2215 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2216 if (nlh == NULL) 2217 return -EMSGSIZE; 2218 2219 rtm = nlmsg_data(nlh); 2220 rtm->rtm_family = AF_INET6; 2221 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2222 rtm->rtm_src_len = rt->rt6i_src.plen; 2223 rtm->rtm_tos = 0; 2224 if (rt->rt6i_table) 2225 table = rt->rt6i_table->tb6_id; 2226 else 2227 table = RT6_TABLE_UNSPEC; 2228 rtm->rtm_table = table; 2229 NLA_PUT_U32(skb, RTA_TABLE, table); 2230 if (rt->rt6i_flags&RTF_REJECT) 2231 rtm->rtm_type = RTN_UNREACHABLE; 2232 else if (rt->rt6i_flags&RTF_LOCAL) 2233 rtm->rtm_type = RTN_LOCAL; 2234 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2235 rtm->rtm_type = RTN_LOCAL; 2236 else 2237 rtm->rtm_type = RTN_UNICAST; 2238 rtm->rtm_flags = 0; 2239 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2240 rtm->rtm_protocol = rt->rt6i_protocol; 2241 if (rt->rt6i_flags&RTF_DYNAMIC) 2242 rtm->rtm_protocol = RTPROT_REDIRECT; 2243 else if (rt->rt6i_flags & RTF_ADDRCONF) 2244 rtm->rtm_protocol = RTPROT_KERNEL; 2245 else if (rt->rt6i_flags&RTF_DEFAULT) 2246 rtm->rtm_protocol = RTPROT_RA; 2247 2248 if (rt->rt6i_flags&RTF_CACHE) 2249 rtm->rtm_flags |= RTM_F_CLONED; 2250 2251 if (dst) { 2252 NLA_PUT(skb, RTA_DST, 16, dst); 2253 rtm->rtm_dst_len = 128; 2254 } else if (rtm->rtm_dst_len) 2255 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2256 #ifdef CONFIG_IPV6_SUBTREES 2257 if (src) { 2258 NLA_PUT(skb, RTA_SRC, 16, src); 2259 rtm->rtm_src_len = 128; 2260 } else if (rtm->rtm_src_len) 2261 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2262 #endif 2263 if (iif) { 2264 #ifdef CONFIG_IPV6_MROUTE 2265 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2266 int err = ip6mr_get_route(net, skb, rtm, nowait); 2267 if (err <= 0) { 2268 if (!nowait) { 2269 if (err == 0) 2270 return 0; 2271 goto nla_put_failure; 2272 } else { 2273 if (err == -EMSGSIZE) 2274 goto nla_put_failure; 2275 } 2276 } 2277 } else 2278 #endif 2279 NLA_PUT_U32(skb, RTA_IIF, iif); 2280 } else if (dst) { 2281 struct inet6_dev *idev = ip6_dst_idev(&rt->dst); 2282 struct in6_addr saddr_buf; 2283 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2284 dst, 0, &saddr_buf) == 0) 2285 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2286 } 2287 2288 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) 2289 goto nla_put_failure; 2290 2291 if (rt->dst.neighbour) 2292 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); 2293 2294 if (rt->dst.dev) 2295 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2296 2297 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2298 2299 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2300 expires = 0; 2301 else if (rt->rt6i_expires - jiffies < INT_MAX) 2302 expires = rt->rt6i_expires - jiffies; 2303 else 2304 expires = INT_MAX; 2305 2306 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, 2307 expires, rt->dst.error) < 0) 2308 goto nla_put_failure; 2309 2310 return nlmsg_end(skb, nlh); 2311 2312 nla_put_failure: 2313 nlmsg_cancel(skb, nlh); 2314 return -EMSGSIZE; 2315 } 2316 2317 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2318 { 2319 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2320 int prefix; 2321 2322 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2323 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2324 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2325 } else 2326 prefix = 0; 2327 2328 return rt6_fill_node(arg->net, 2329 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2330 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2331 prefix, 0, NLM_F_MULTI); 2332 } 2333 2334 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2335 { 2336 struct net *net = sock_net(in_skb->sk); 2337 struct nlattr *tb[RTA_MAX+1]; 2338 struct rt6_info *rt; 2339 struct sk_buff *skb; 2340 struct rtmsg *rtm; 2341 struct flowi fl; 2342 int err, iif = 0; 2343 2344 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2345 if (err < 0) 2346 goto errout; 2347 2348 err = -EINVAL; 2349 memset(&fl, 0, sizeof(fl)); 2350 2351 if (tb[RTA_SRC]) { 2352 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2353 goto errout; 2354 2355 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2356 } 2357 2358 if (tb[RTA_DST]) { 2359 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2360 goto errout; 2361 2362 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2363 } 2364 2365 if (tb[RTA_IIF]) 2366 iif = nla_get_u32(tb[RTA_IIF]); 2367 2368 if (tb[RTA_OIF]) 2369 fl.oif = nla_get_u32(tb[RTA_OIF]); 2370 2371 if (iif) { 2372 struct net_device *dev; 2373 dev = __dev_get_by_index(net, iif); 2374 if (!dev) { 2375 err = -ENODEV; 2376 goto errout; 2377 } 2378 } 2379 2380 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2381 if (skb == NULL) { 2382 err = -ENOBUFS; 2383 goto errout; 2384 } 2385 2386 /* Reserve room for dummy headers, this skb can pass 2387 through good chunk of routing engine. 2388 */ 2389 skb_reset_mac_header(skb); 2390 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2391 2392 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2393 skb_dst_set(skb, &rt->dst); 2394 2395 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2396 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2397 nlh->nlmsg_seq, 0, 0, 0); 2398 if (err < 0) { 2399 kfree_skb(skb); 2400 goto errout; 2401 } 2402 2403 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2404 errout: 2405 return err; 2406 } 2407 2408 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2409 { 2410 struct sk_buff *skb; 2411 struct net *net = info->nl_net; 2412 u32 seq; 2413 int err; 2414 2415 err = -ENOBUFS; 2416 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2417 2418 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2419 if (skb == NULL) 2420 goto errout; 2421 2422 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2423 event, info->pid, seq, 0, 0, 0); 2424 if (err < 0) { 2425 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2426 WARN_ON(err == -EMSGSIZE); 2427 kfree_skb(skb); 2428 goto errout; 2429 } 2430 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2431 info->nlh, gfp_any()); 2432 return; 2433 errout: 2434 if (err < 0) 2435 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2436 } 2437 2438 static int ip6_route_dev_notify(struct notifier_block *this, 2439 unsigned long event, void *data) 2440 { 2441 struct net_device *dev = (struct net_device *)data; 2442 struct net *net = dev_net(dev); 2443 2444 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2445 net->ipv6.ip6_null_entry->dst.dev = dev; 2446 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2447 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2448 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2449 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2450 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2451 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2452 #endif 2453 } 2454 2455 return NOTIFY_OK; 2456 } 2457 2458 /* 2459 * /proc 2460 */ 2461 2462 #ifdef CONFIG_PROC_FS 2463 2464 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2465 2466 struct rt6_proc_arg 2467 { 2468 char *buffer; 2469 int offset; 2470 int length; 2471 int skip; 2472 int len; 2473 }; 2474 2475 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2476 { 2477 struct seq_file *m = p_arg; 2478 2479 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2480 2481 #ifdef CONFIG_IPV6_SUBTREES 2482 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2483 #else 2484 seq_puts(m, "00000000000000000000000000000000 00 "); 2485 #endif 2486 2487 if (rt->rt6i_nexthop) { 2488 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); 2489 } else { 2490 seq_puts(m, "00000000000000000000000000000000"); 2491 } 2492 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2493 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2494 rt->dst.__use, rt->rt6i_flags, 2495 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2496 return 0; 2497 } 2498 2499 static int ipv6_route_show(struct seq_file *m, void *v) 2500 { 2501 struct net *net = (struct net *)m->private; 2502 fib6_clean_all(net, rt6_info_route, 0, m); 2503 return 0; 2504 } 2505 2506 static int ipv6_route_open(struct inode *inode, struct file *file) 2507 { 2508 return single_open_net(inode, file, ipv6_route_show); 2509 } 2510 2511 static const struct file_operations ipv6_route_proc_fops = { 2512 .owner = THIS_MODULE, 2513 .open = ipv6_route_open, 2514 .read = seq_read, 2515 .llseek = seq_lseek, 2516 .release = single_release_net, 2517 }; 2518 2519 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2520 { 2521 struct net *net = (struct net *)seq->private; 2522 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2523 net->ipv6.rt6_stats->fib_nodes, 2524 net->ipv6.rt6_stats->fib_route_nodes, 2525 net->ipv6.rt6_stats->fib_rt_alloc, 2526 net->ipv6.rt6_stats->fib_rt_entries, 2527 net->ipv6.rt6_stats->fib_rt_cache, 2528 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2529 net->ipv6.rt6_stats->fib_discarded_routes); 2530 2531 return 0; 2532 } 2533 2534 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2535 { 2536 return single_open_net(inode, file, rt6_stats_seq_show); 2537 } 2538 2539 static const struct file_operations rt6_stats_seq_fops = { 2540 .owner = THIS_MODULE, 2541 .open = rt6_stats_seq_open, 2542 .read = seq_read, 2543 .llseek = seq_lseek, 2544 .release = single_release_net, 2545 }; 2546 #endif /* CONFIG_PROC_FS */ 2547 2548 #ifdef CONFIG_SYSCTL 2549 2550 static 2551 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2552 void __user *buffer, size_t *lenp, loff_t *ppos) 2553 { 2554 struct net *net = current->nsproxy->net_ns; 2555 int delay = net->ipv6.sysctl.flush_delay; 2556 if (write) { 2557 proc_dointvec(ctl, write, buffer, lenp, ppos); 2558 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2559 return 0; 2560 } else 2561 return -EINVAL; 2562 } 2563 2564 ctl_table ipv6_route_table_template[] = { 2565 { 2566 .procname = "flush", 2567 .data = &init_net.ipv6.sysctl.flush_delay, 2568 .maxlen = sizeof(int), 2569 .mode = 0200, 2570 .proc_handler = ipv6_sysctl_rtcache_flush 2571 }, 2572 { 2573 .procname = "gc_thresh", 2574 .data = &ip6_dst_ops_template.gc_thresh, 2575 .maxlen = sizeof(int), 2576 .mode = 0644, 2577 .proc_handler = proc_dointvec, 2578 }, 2579 { 2580 .procname = "max_size", 2581 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2582 .maxlen = sizeof(int), 2583 .mode = 0644, 2584 .proc_handler = proc_dointvec, 2585 }, 2586 { 2587 .procname = "gc_min_interval", 2588 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2589 .maxlen = sizeof(int), 2590 .mode = 0644, 2591 .proc_handler = proc_dointvec_jiffies, 2592 }, 2593 { 2594 .procname = "gc_timeout", 2595 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2596 .maxlen = sizeof(int), 2597 .mode = 0644, 2598 .proc_handler = proc_dointvec_jiffies, 2599 }, 2600 { 2601 .procname = "gc_interval", 2602 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2603 .maxlen = sizeof(int), 2604 .mode = 0644, 2605 .proc_handler = proc_dointvec_jiffies, 2606 }, 2607 { 2608 .procname = "gc_elasticity", 2609 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2610 .maxlen = sizeof(int), 2611 .mode = 0644, 2612 .proc_handler = proc_dointvec, 2613 }, 2614 { 2615 .procname = "mtu_expires", 2616 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2617 .maxlen = sizeof(int), 2618 .mode = 0644, 2619 .proc_handler = proc_dointvec_jiffies, 2620 }, 2621 { 2622 .procname = "min_adv_mss", 2623 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2624 .maxlen = sizeof(int), 2625 .mode = 0644, 2626 .proc_handler = proc_dointvec, 2627 }, 2628 { 2629 .procname = "gc_min_interval_ms", 2630 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2631 .maxlen = sizeof(int), 2632 .mode = 0644, 2633 .proc_handler = proc_dointvec_ms_jiffies, 2634 }, 2635 { } 2636 }; 2637 2638 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2639 { 2640 struct ctl_table *table; 2641 2642 table = kmemdup(ipv6_route_table_template, 2643 sizeof(ipv6_route_table_template), 2644 GFP_KERNEL); 2645 2646 if (table) { 2647 table[0].data = &net->ipv6.sysctl.flush_delay; 2648 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2649 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2650 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2651 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2652 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2653 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2654 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2655 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2656 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2657 } 2658 2659 return table; 2660 } 2661 #endif 2662 2663 static int __net_init ip6_route_net_init(struct net *net) 2664 { 2665 int ret = -ENOMEM; 2666 2667 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2668 sizeof(net->ipv6.ip6_dst_ops)); 2669 2670 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2671 goto out_ip6_dst_ops; 2672 2673 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2674 sizeof(*net->ipv6.ip6_null_entry), 2675 GFP_KERNEL); 2676 if (!net->ipv6.ip6_null_entry) 2677 goto out_ip6_dst_entries; 2678 net->ipv6.ip6_null_entry->dst.path = 2679 (struct dst_entry *)net->ipv6.ip6_null_entry; 2680 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2681 2682 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2683 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2684 sizeof(*net->ipv6.ip6_prohibit_entry), 2685 GFP_KERNEL); 2686 if (!net->ipv6.ip6_prohibit_entry) 2687 goto out_ip6_null_entry; 2688 net->ipv6.ip6_prohibit_entry->dst.path = 2689 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2690 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2691 2692 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2693 sizeof(*net->ipv6.ip6_blk_hole_entry), 2694 GFP_KERNEL); 2695 if (!net->ipv6.ip6_blk_hole_entry) 2696 goto out_ip6_prohibit_entry; 2697 net->ipv6.ip6_blk_hole_entry->dst.path = 2698 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2699 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2700 #endif 2701 2702 net->ipv6.sysctl.flush_delay = 0; 2703 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2704 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2705 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2706 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2707 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2708 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2709 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2710 2711 #ifdef CONFIG_PROC_FS 2712 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2713 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2714 #endif 2715 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2716 2717 ret = 0; 2718 out: 2719 return ret; 2720 2721 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2722 out_ip6_prohibit_entry: 2723 kfree(net->ipv6.ip6_prohibit_entry); 2724 out_ip6_null_entry: 2725 kfree(net->ipv6.ip6_null_entry); 2726 #endif 2727 out_ip6_dst_entries: 2728 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2729 out_ip6_dst_ops: 2730 goto out; 2731 } 2732 2733 static void __net_exit ip6_route_net_exit(struct net *net) 2734 { 2735 #ifdef CONFIG_PROC_FS 2736 proc_net_remove(net, "ipv6_route"); 2737 proc_net_remove(net, "rt6_stats"); 2738 #endif 2739 kfree(net->ipv6.ip6_null_entry); 2740 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2741 kfree(net->ipv6.ip6_prohibit_entry); 2742 kfree(net->ipv6.ip6_blk_hole_entry); 2743 #endif 2744 } 2745 2746 static struct pernet_operations ip6_route_net_ops = { 2747 .init = ip6_route_net_init, 2748 .exit = ip6_route_net_exit, 2749 }; 2750 2751 static struct notifier_block ip6_route_dev_notifier = { 2752 .notifier_call = ip6_route_dev_notify, 2753 .priority = 0, 2754 }; 2755 2756 int __init ip6_route_init(void) 2757 { 2758 int ret; 2759 2760 ret = -ENOMEM; 2761 ip6_dst_ops_template.kmem_cachep = 2762 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2763 SLAB_HWCACHE_ALIGN, NULL); 2764 if (!ip6_dst_ops_template.kmem_cachep) 2765 goto out; 2766 2767 ret = dst_entries_init(&ip6_dst_blackhole_ops); 2768 if (ret) 2769 goto out_kmem_cache; 2770 2771 ret = register_pernet_subsys(&ip6_route_net_ops); 2772 if (ret) 2773 goto out_dst_entries; 2774 2775 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2776 2777 /* Registering of the loopback is done before this portion of code, 2778 * the loopback reference in rt6_info will not be taken, do it 2779 * manually for init_net */ 2780 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 2781 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2782 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2783 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 2784 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2785 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 2786 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2787 #endif 2788 ret = fib6_init(); 2789 if (ret) 2790 goto out_register_subsys; 2791 2792 ret = xfrm6_init(); 2793 if (ret) 2794 goto out_fib6_init; 2795 2796 ret = fib6_rules_init(); 2797 if (ret) 2798 goto xfrm6_init; 2799 2800 ret = -ENOBUFS; 2801 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || 2802 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || 2803 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) 2804 goto fib6_rules_init; 2805 2806 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2807 if (ret) 2808 goto fib6_rules_init; 2809 2810 out: 2811 return ret; 2812 2813 fib6_rules_init: 2814 fib6_rules_cleanup(); 2815 xfrm6_init: 2816 xfrm6_fini(); 2817 out_fib6_init: 2818 fib6_gc_cleanup(); 2819 out_register_subsys: 2820 unregister_pernet_subsys(&ip6_route_net_ops); 2821 out_dst_entries: 2822 dst_entries_destroy(&ip6_dst_blackhole_ops); 2823 out_kmem_cache: 2824 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2825 goto out; 2826 } 2827 2828 void ip6_route_cleanup(void) 2829 { 2830 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2831 fib6_rules_cleanup(); 2832 xfrm6_fini(); 2833 fib6_gc_cleanup(); 2834 unregister_pernet_subsys(&ip6_route_net_ops); 2835 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2836 } 2837