1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <linux/slab.h> 44 #include <net/net_namespace.h> 45 #include <net/snmp.h> 46 #include <net/ipv6.h> 47 #include <net/ip6_fib.h> 48 #include <net/ip6_route.h> 49 #include <net/ndisc.h> 50 #include <net/addrconf.h> 51 #include <net/tcp.h> 52 #include <linux/rtnetlink.h> 53 #include <net/dst.h> 54 #include <net/xfrm.h> 55 #include <net/netevent.h> 56 #include <net/netlink.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 #define CLONE_OFFLINK_ROUTE 0 76 77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 79 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 80 static void ip6_dst_destroy(struct dst_entry *); 81 static void ip6_dst_ifdown(struct dst_entry *, 82 struct net_device *dev, int how); 83 static int ip6_dst_gc(struct dst_ops *ops); 84 85 static int ip6_pkt_discard(struct sk_buff *skb); 86 static int ip6_pkt_discard_out(struct sk_buff *skb); 87 static void ip6_link_failure(struct sk_buff *skb); 88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 89 90 #ifdef CONFIG_IPV6_ROUTE_INFO 91 static struct rt6_info *rt6_add_route_info(struct net *net, 92 struct in6_addr *prefix, int prefixlen, 93 struct in6_addr *gwaddr, int ifindex, 94 unsigned pref); 95 static struct rt6_info *rt6_get_route_info(struct net *net, 96 struct in6_addr *prefix, int prefixlen, 97 struct in6_addr *gwaddr, int ifindex); 98 #endif 99 100 static struct dst_ops ip6_dst_ops_template = { 101 .family = AF_INET6, 102 .protocol = cpu_to_be16(ETH_P_IPV6), 103 .gc = ip6_dst_gc, 104 .gc_thresh = 1024, 105 .check = ip6_dst_check, 106 .destroy = ip6_dst_destroy, 107 .ifdown = ip6_dst_ifdown, 108 .negative_advice = ip6_negative_advice, 109 .link_failure = ip6_link_failure, 110 .update_pmtu = ip6_rt_update_pmtu, 111 .local_out = __ip6_local_out, 112 .entries = ATOMIC_INIT(0), 113 }; 114 115 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 116 { 117 } 118 119 static struct dst_ops ip6_dst_blackhole_ops = { 120 .family = AF_INET6, 121 .protocol = cpu_to_be16(ETH_P_IPV6), 122 .destroy = ip6_dst_destroy, 123 .check = ip6_dst_check, 124 .update_pmtu = ip6_rt_blackhole_update_pmtu, 125 .entries = ATOMIC_INIT(0), 126 }; 127 128 static struct rt6_info ip6_null_entry_template = { 129 .u = { 130 .dst = { 131 .__refcnt = ATOMIC_INIT(1), 132 .__use = 1, 133 .obsolete = -1, 134 .error = -ENETUNREACH, 135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 136 .input = ip6_pkt_discard, 137 .output = ip6_pkt_discard_out, 138 } 139 }, 140 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 141 .rt6i_protocol = RTPROT_KERNEL, 142 .rt6i_metric = ~(u32) 0, 143 .rt6i_ref = ATOMIC_INIT(1), 144 }; 145 146 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 147 148 static int ip6_pkt_prohibit(struct sk_buff *skb); 149 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 150 151 static struct rt6_info ip6_prohibit_entry_template = { 152 .u = { 153 .dst = { 154 .__refcnt = ATOMIC_INIT(1), 155 .__use = 1, 156 .obsolete = -1, 157 .error = -EACCES, 158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 159 .input = ip6_pkt_prohibit, 160 .output = ip6_pkt_prohibit_out, 161 } 162 }, 163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 164 .rt6i_protocol = RTPROT_KERNEL, 165 .rt6i_metric = ~(u32) 0, 166 .rt6i_ref = ATOMIC_INIT(1), 167 }; 168 169 static struct rt6_info ip6_blk_hole_entry_template = { 170 .u = { 171 .dst = { 172 .__refcnt = ATOMIC_INIT(1), 173 .__use = 1, 174 .obsolete = -1, 175 .error = -EINVAL, 176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 177 .input = dst_discard, 178 .output = dst_discard, 179 } 180 }, 181 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 182 .rt6i_protocol = RTPROT_KERNEL, 183 .rt6i_metric = ~(u32) 0, 184 .rt6i_ref = ATOMIC_INIT(1), 185 }; 186 187 #endif 188 189 /* allocate dst with ip6_dst_ops */ 190 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 191 { 192 return (struct rt6_info *)dst_alloc(ops); 193 } 194 195 static void ip6_dst_destroy(struct dst_entry *dst) 196 { 197 struct rt6_info *rt = (struct rt6_info *)dst; 198 struct inet6_dev *idev = rt->rt6i_idev; 199 200 if (idev != NULL) { 201 rt->rt6i_idev = NULL; 202 in6_dev_put(idev); 203 } 204 } 205 206 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 207 int how) 208 { 209 struct rt6_info *rt = (struct rt6_info *)dst; 210 struct inet6_dev *idev = rt->rt6i_idev; 211 struct net_device *loopback_dev = 212 dev_net(dev)->loopback_dev; 213 214 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 215 struct inet6_dev *loopback_idev = 216 in6_dev_get(loopback_dev); 217 if (loopback_idev != NULL) { 218 rt->rt6i_idev = loopback_idev; 219 in6_dev_put(idev); 220 } 221 } 222 } 223 224 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 225 { 226 return (rt->rt6i_flags & RTF_EXPIRES && 227 time_after(jiffies, rt->rt6i_expires)); 228 } 229 230 static inline int rt6_need_strict(struct in6_addr *daddr) 231 { 232 return (ipv6_addr_type(daddr) & 233 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 234 } 235 236 /* 237 * Route lookup. Any table->tb6_lock is implied. 238 */ 239 240 static inline struct rt6_info *rt6_device_match(struct net *net, 241 struct rt6_info *rt, 242 struct in6_addr *saddr, 243 int oif, 244 int flags) 245 { 246 struct rt6_info *local = NULL; 247 struct rt6_info *sprt; 248 249 if (!oif && ipv6_addr_any(saddr)) 250 goto out; 251 252 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 253 struct net_device *dev = sprt->rt6i_dev; 254 255 if (oif) { 256 if (dev->ifindex == oif) 257 return sprt; 258 if (dev->flags & IFF_LOOPBACK) { 259 if (sprt->rt6i_idev == NULL || 260 sprt->rt6i_idev->dev->ifindex != oif) { 261 if (flags & RT6_LOOKUP_F_IFACE && oif) 262 continue; 263 if (local && (!oif || 264 local->rt6i_idev->dev->ifindex == oif)) 265 continue; 266 } 267 local = sprt; 268 } 269 } else { 270 if (ipv6_chk_addr(net, saddr, dev, 271 flags & RT6_LOOKUP_F_IFACE)) 272 return sprt; 273 } 274 } 275 276 if (oif) { 277 if (local) 278 return local; 279 280 if (flags & RT6_LOOKUP_F_IFACE) 281 return net->ipv6.ip6_null_entry; 282 } 283 out: 284 return rt; 285 } 286 287 #ifdef CONFIG_IPV6_ROUTER_PREF 288 static void rt6_probe(struct rt6_info *rt) 289 { 290 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 291 /* 292 * Okay, this does not seem to be appropriate 293 * for now, however, we need to check if it 294 * is really so; aka Router Reachability Probing. 295 * 296 * Router Reachability Probe MUST be rate-limited 297 * to no more than one per minute. 298 */ 299 if (!neigh || (neigh->nud_state & NUD_VALID)) 300 return; 301 read_lock_bh(&neigh->lock); 302 if (!(neigh->nud_state & NUD_VALID) && 303 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 304 struct in6_addr mcaddr; 305 struct in6_addr *target; 306 307 neigh->updated = jiffies; 308 read_unlock_bh(&neigh->lock); 309 310 target = (struct in6_addr *)&neigh->primary_key; 311 addrconf_addr_solict_mult(target, &mcaddr); 312 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 313 } else 314 read_unlock_bh(&neigh->lock); 315 } 316 #else 317 static inline void rt6_probe(struct rt6_info *rt) 318 { 319 return; 320 } 321 #endif 322 323 /* 324 * Default Router Selection (RFC 2461 6.3.6) 325 */ 326 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 327 { 328 struct net_device *dev = rt->rt6i_dev; 329 if (!oif || dev->ifindex == oif) 330 return 2; 331 if ((dev->flags & IFF_LOOPBACK) && 332 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 333 return 1; 334 return 0; 335 } 336 337 static inline int rt6_check_neigh(struct rt6_info *rt) 338 { 339 struct neighbour *neigh = rt->rt6i_nexthop; 340 int m; 341 if (rt->rt6i_flags & RTF_NONEXTHOP || 342 !(rt->rt6i_flags & RTF_GATEWAY)) 343 m = 1; 344 else if (neigh) { 345 read_lock_bh(&neigh->lock); 346 if (neigh->nud_state & NUD_VALID) 347 m = 2; 348 #ifdef CONFIG_IPV6_ROUTER_PREF 349 else if (neigh->nud_state & NUD_FAILED) 350 m = 0; 351 #endif 352 else 353 m = 1; 354 read_unlock_bh(&neigh->lock); 355 } else 356 m = 0; 357 return m; 358 } 359 360 static int rt6_score_route(struct rt6_info *rt, int oif, 361 int strict) 362 { 363 int m, n; 364 365 m = rt6_check_dev(rt, oif); 366 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 367 return -1; 368 #ifdef CONFIG_IPV6_ROUTER_PREF 369 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 370 #endif 371 n = rt6_check_neigh(rt); 372 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 373 return -1; 374 return m; 375 } 376 377 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 378 int *mpri, struct rt6_info *match) 379 { 380 int m; 381 382 if (rt6_check_expired(rt)) 383 goto out; 384 385 m = rt6_score_route(rt, oif, strict); 386 if (m < 0) 387 goto out; 388 389 if (m > *mpri) { 390 if (strict & RT6_LOOKUP_F_REACHABLE) 391 rt6_probe(match); 392 *mpri = m; 393 match = rt; 394 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 395 rt6_probe(rt); 396 } 397 398 out: 399 return match; 400 } 401 402 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 403 struct rt6_info *rr_head, 404 u32 metric, int oif, int strict) 405 { 406 struct rt6_info *rt, *match; 407 int mpri = -1; 408 409 match = NULL; 410 for (rt = rr_head; rt && rt->rt6i_metric == metric; 411 rt = rt->u.dst.rt6_next) 412 match = find_match(rt, oif, strict, &mpri, match); 413 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 414 rt = rt->u.dst.rt6_next) 415 match = find_match(rt, oif, strict, &mpri, match); 416 417 return match; 418 } 419 420 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 421 { 422 struct rt6_info *match, *rt0; 423 struct net *net; 424 425 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 426 __func__, fn->leaf, oif); 427 428 rt0 = fn->rr_ptr; 429 if (!rt0) 430 fn->rr_ptr = rt0 = fn->leaf; 431 432 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 433 434 if (!match && 435 (strict & RT6_LOOKUP_F_REACHABLE)) { 436 struct rt6_info *next = rt0->u.dst.rt6_next; 437 438 /* no entries matched; do round-robin */ 439 if (!next || next->rt6i_metric != rt0->rt6i_metric) 440 next = fn->leaf; 441 442 if (next != rt0) 443 fn->rr_ptr = next; 444 } 445 446 RT6_TRACE("%s() => %p\n", 447 __func__, match); 448 449 net = dev_net(rt0->rt6i_dev); 450 return (match ? match : net->ipv6.ip6_null_entry); 451 } 452 453 #ifdef CONFIG_IPV6_ROUTE_INFO 454 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 455 struct in6_addr *gwaddr) 456 { 457 struct net *net = dev_net(dev); 458 struct route_info *rinfo = (struct route_info *) opt; 459 struct in6_addr prefix_buf, *prefix; 460 unsigned int pref; 461 unsigned long lifetime; 462 struct rt6_info *rt; 463 464 if (len < sizeof(struct route_info)) { 465 return -EINVAL; 466 } 467 468 /* Sanity check for prefix_len and length */ 469 if (rinfo->length > 3) { 470 return -EINVAL; 471 } else if (rinfo->prefix_len > 128) { 472 return -EINVAL; 473 } else if (rinfo->prefix_len > 64) { 474 if (rinfo->length < 2) { 475 return -EINVAL; 476 } 477 } else if (rinfo->prefix_len > 0) { 478 if (rinfo->length < 1) { 479 return -EINVAL; 480 } 481 } 482 483 pref = rinfo->route_pref; 484 if (pref == ICMPV6_ROUTER_PREF_INVALID) 485 return -EINVAL; 486 487 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 488 489 if (rinfo->length == 3) 490 prefix = (struct in6_addr *)rinfo->prefix; 491 else { 492 /* this function is safe */ 493 ipv6_addr_prefix(&prefix_buf, 494 (struct in6_addr *)rinfo->prefix, 495 rinfo->prefix_len); 496 prefix = &prefix_buf; 497 } 498 499 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 500 dev->ifindex); 501 502 if (rt && !lifetime) { 503 ip6_del_rt(rt); 504 rt = NULL; 505 } 506 507 if (!rt && lifetime) 508 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 509 pref); 510 else if (rt) 511 rt->rt6i_flags = RTF_ROUTEINFO | 512 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 513 514 if (rt) { 515 if (!addrconf_finite_timeout(lifetime)) { 516 rt->rt6i_flags &= ~RTF_EXPIRES; 517 } else { 518 rt->rt6i_expires = jiffies + HZ * lifetime; 519 rt->rt6i_flags |= RTF_EXPIRES; 520 } 521 dst_release(&rt->u.dst); 522 } 523 return 0; 524 } 525 #endif 526 527 #define BACKTRACK(__net, saddr) \ 528 do { \ 529 if (rt == __net->ipv6.ip6_null_entry) { \ 530 struct fib6_node *pn; \ 531 while (1) { \ 532 if (fn->fn_flags & RTN_TL_ROOT) \ 533 goto out; \ 534 pn = fn->parent; \ 535 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 536 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 537 else \ 538 fn = pn; \ 539 if (fn->fn_flags & RTN_RTINFO) \ 540 goto restart; \ 541 } \ 542 } \ 543 } while(0) 544 545 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 546 struct fib6_table *table, 547 struct flowi *fl, int flags) 548 { 549 struct fib6_node *fn; 550 struct rt6_info *rt; 551 552 read_lock_bh(&table->tb6_lock); 553 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 554 restart: 555 rt = fn->leaf; 556 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 557 BACKTRACK(net, &fl->fl6_src); 558 out: 559 dst_use(&rt->u.dst, jiffies); 560 read_unlock_bh(&table->tb6_lock); 561 return rt; 562 563 } 564 565 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 566 const struct in6_addr *saddr, int oif, int strict) 567 { 568 struct flowi fl = { 569 .oif = oif, 570 .nl_u = { 571 .ip6_u = { 572 .daddr = *daddr, 573 }, 574 }, 575 }; 576 struct dst_entry *dst; 577 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 578 579 if (saddr) { 580 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 581 flags |= RT6_LOOKUP_F_HAS_SADDR; 582 } 583 584 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 585 if (dst->error == 0) 586 return (struct rt6_info *) dst; 587 588 dst_release(dst); 589 590 return NULL; 591 } 592 593 EXPORT_SYMBOL(rt6_lookup); 594 595 /* ip6_ins_rt is called with FREE table->tb6_lock. 596 It takes new route entry, the addition fails by any reason the 597 route is freed. In any case, if caller does not hold it, it may 598 be destroyed. 599 */ 600 601 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 602 { 603 int err; 604 struct fib6_table *table; 605 606 table = rt->rt6i_table; 607 write_lock_bh(&table->tb6_lock); 608 err = fib6_add(&table->tb6_root, rt, info); 609 write_unlock_bh(&table->tb6_lock); 610 611 return err; 612 } 613 614 int ip6_ins_rt(struct rt6_info *rt) 615 { 616 struct nl_info info = { 617 .nl_net = dev_net(rt->rt6i_dev), 618 }; 619 return __ip6_ins_rt(rt, &info); 620 } 621 622 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 623 struct in6_addr *saddr) 624 { 625 struct rt6_info *rt; 626 627 /* 628 * Clone the route. 629 */ 630 631 rt = ip6_rt_copy(ort); 632 633 if (rt) { 634 struct neighbour *neigh; 635 int attempts = !in_softirq(); 636 637 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 638 if (rt->rt6i_dst.plen != 128 && 639 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 640 rt->rt6i_flags |= RTF_ANYCAST; 641 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 642 } 643 644 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 645 rt->rt6i_dst.plen = 128; 646 rt->rt6i_flags |= RTF_CACHE; 647 rt->u.dst.flags |= DST_HOST; 648 649 #ifdef CONFIG_IPV6_SUBTREES 650 if (rt->rt6i_src.plen && saddr) { 651 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 652 rt->rt6i_src.plen = 128; 653 } 654 #endif 655 656 retry: 657 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 658 if (IS_ERR(neigh)) { 659 struct net *net = dev_net(rt->rt6i_dev); 660 int saved_rt_min_interval = 661 net->ipv6.sysctl.ip6_rt_gc_min_interval; 662 int saved_rt_elasticity = 663 net->ipv6.sysctl.ip6_rt_gc_elasticity; 664 665 if (attempts-- > 0) { 666 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 667 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 668 669 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 670 671 net->ipv6.sysctl.ip6_rt_gc_elasticity = 672 saved_rt_elasticity; 673 net->ipv6.sysctl.ip6_rt_gc_min_interval = 674 saved_rt_min_interval; 675 goto retry; 676 } 677 678 if (net_ratelimit()) 679 printk(KERN_WARNING 680 "Neighbour table overflow.\n"); 681 dst_free(&rt->u.dst); 682 return NULL; 683 } 684 rt->rt6i_nexthop = neigh; 685 686 } 687 688 return rt; 689 } 690 691 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 692 { 693 struct rt6_info *rt = ip6_rt_copy(ort); 694 if (rt) { 695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 696 rt->rt6i_dst.plen = 128; 697 rt->rt6i_flags |= RTF_CACHE; 698 rt->u.dst.flags |= DST_HOST; 699 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 700 } 701 return rt; 702 } 703 704 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 705 struct flowi *fl, int flags) 706 { 707 struct fib6_node *fn; 708 struct rt6_info *rt, *nrt; 709 int strict = 0; 710 int attempts = 3; 711 int err; 712 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 713 714 strict |= flags & RT6_LOOKUP_F_IFACE; 715 716 relookup: 717 read_lock_bh(&table->tb6_lock); 718 719 restart_2: 720 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 721 722 restart: 723 rt = rt6_select(fn, oif, strict | reachable); 724 725 BACKTRACK(net, &fl->fl6_src); 726 if (rt == net->ipv6.ip6_null_entry || 727 rt->rt6i_flags & RTF_CACHE) 728 goto out; 729 730 dst_hold(&rt->u.dst); 731 read_unlock_bh(&table->tb6_lock); 732 733 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 734 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 735 else { 736 #if CLONE_OFFLINK_ROUTE 737 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 738 #else 739 goto out2; 740 #endif 741 } 742 743 dst_release(&rt->u.dst); 744 rt = nrt ? : net->ipv6.ip6_null_entry; 745 746 dst_hold(&rt->u.dst); 747 if (nrt) { 748 err = ip6_ins_rt(nrt); 749 if (!err) 750 goto out2; 751 } 752 753 if (--attempts <= 0) 754 goto out2; 755 756 /* 757 * Race condition! In the gap, when table->tb6_lock was 758 * released someone could insert this route. Relookup. 759 */ 760 dst_release(&rt->u.dst); 761 goto relookup; 762 763 out: 764 if (reachable) { 765 reachable = 0; 766 goto restart_2; 767 } 768 dst_hold(&rt->u.dst); 769 read_unlock_bh(&table->tb6_lock); 770 out2: 771 rt->u.dst.lastuse = jiffies; 772 rt->u.dst.__use++; 773 774 return rt; 775 } 776 777 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 778 struct flowi *fl, int flags) 779 { 780 return ip6_pol_route(net, table, fl->iif, fl, flags); 781 } 782 783 void ip6_route_input(struct sk_buff *skb) 784 { 785 struct ipv6hdr *iph = ipv6_hdr(skb); 786 struct net *net = dev_net(skb->dev); 787 int flags = RT6_LOOKUP_F_HAS_SADDR; 788 struct flowi fl = { 789 .iif = skb->dev->ifindex, 790 .nl_u = { 791 .ip6_u = { 792 .daddr = iph->daddr, 793 .saddr = iph->saddr, 794 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 795 }, 796 }, 797 .mark = skb->mark, 798 .proto = iph->nexthdr, 799 }; 800 801 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 802 flags |= RT6_LOOKUP_F_IFACE; 803 804 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); 805 } 806 807 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 808 struct flowi *fl, int flags) 809 { 810 return ip6_pol_route(net, table, fl->oif, fl, flags); 811 } 812 813 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 814 struct flowi *fl) 815 { 816 int flags = 0; 817 818 if (rt6_need_strict(&fl->fl6_dst)) 819 flags |= RT6_LOOKUP_F_IFACE; 820 821 if (!ipv6_addr_any(&fl->fl6_src)) 822 flags |= RT6_LOOKUP_F_HAS_SADDR; 823 else if (sk) 824 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 825 826 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 827 } 828 829 EXPORT_SYMBOL(ip6_route_output); 830 831 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 832 { 833 struct rt6_info *ort = (struct rt6_info *) *dstp; 834 struct rt6_info *rt = (struct rt6_info *) 835 dst_alloc(&ip6_dst_blackhole_ops); 836 struct dst_entry *new = NULL; 837 838 if (rt) { 839 new = &rt->u.dst; 840 841 atomic_set(&new->__refcnt, 1); 842 new->__use = 1; 843 new->input = dst_discard; 844 new->output = dst_discard; 845 846 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 847 new->dev = ort->u.dst.dev; 848 if (new->dev) 849 dev_hold(new->dev); 850 rt->rt6i_idev = ort->rt6i_idev; 851 if (rt->rt6i_idev) 852 in6_dev_hold(rt->rt6i_idev); 853 rt->rt6i_expires = 0; 854 855 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 856 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 857 rt->rt6i_metric = 0; 858 859 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 860 #ifdef CONFIG_IPV6_SUBTREES 861 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 862 #endif 863 864 dst_free(new); 865 } 866 867 dst_release(*dstp); 868 *dstp = new; 869 return (new ? 0 : -ENOMEM); 870 } 871 EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 872 873 /* 874 * Destination cache support functions 875 */ 876 877 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 878 { 879 struct rt6_info *rt; 880 881 rt = (struct rt6_info *) dst; 882 883 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 884 return dst; 885 886 return NULL; 887 } 888 889 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 890 { 891 struct rt6_info *rt = (struct rt6_info *) dst; 892 893 if (rt) { 894 if (rt->rt6i_flags & RTF_CACHE) { 895 if (rt6_check_expired(rt)) { 896 ip6_del_rt(rt); 897 dst = NULL; 898 } 899 } else { 900 dst_release(dst); 901 dst = NULL; 902 } 903 } 904 return dst; 905 } 906 907 static void ip6_link_failure(struct sk_buff *skb) 908 { 909 struct rt6_info *rt; 910 911 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 912 913 rt = (struct rt6_info *) skb_dst(skb); 914 if (rt) { 915 if (rt->rt6i_flags&RTF_CACHE) { 916 dst_set_expires(&rt->u.dst, 0); 917 rt->rt6i_flags |= RTF_EXPIRES; 918 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 919 rt->rt6i_node->fn_sernum = -1; 920 } 921 } 922 923 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 924 { 925 struct rt6_info *rt6 = (struct rt6_info*)dst; 926 927 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 928 rt6->rt6i_flags |= RTF_MODIFIED; 929 if (mtu < IPV6_MIN_MTU) { 930 mtu = IPV6_MIN_MTU; 931 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 932 } 933 dst->metrics[RTAX_MTU-1] = mtu; 934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 935 } 936 } 937 938 static int ipv6_get_mtu(struct net_device *dev); 939 940 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) 941 { 942 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 943 944 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 945 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 946 947 /* 948 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 949 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 950 * IPV6_MAXPLEN is also valid and means: "any MSS, 951 * rely only on pmtu discovery" 952 */ 953 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 954 mtu = IPV6_MAXPLEN; 955 return mtu; 956 } 957 958 static struct dst_entry *icmp6_dst_gc_list; 959 static DEFINE_SPINLOCK(icmp6_dst_lock); 960 961 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 962 struct neighbour *neigh, 963 const struct in6_addr *addr) 964 { 965 struct rt6_info *rt; 966 struct inet6_dev *idev = in6_dev_get(dev); 967 struct net *net = dev_net(dev); 968 969 if (unlikely(idev == NULL)) 970 return NULL; 971 972 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 973 if (unlikely(rt == NULL)) { 974 in6_dev_put(idev); 975 goto out; 976 } 977 978 dev_hold(dev); 979 if (neigh) 980 neigh_hold(neigh); 981 else { 982 neigh = ndisc_get_neigh(dev, addr); 983 if (IS_ERR(neigh)) 984 neigh = NULL; 985 } 986 987 rt->rt6i_dev = dev; 988 rt->rt6i_idev = idev; 989 rt->rt6i_nexthop = neigh; 990 atomic_set(&rt->u.dst.__refcnt, 1); 991 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 992 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 993 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 994 rt->u.dst.output = ip6_output; 995 996 #if 0 /* there's no chance to use these for ndisc */ 997 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 998 ? DST_HOST 999 : 0; 1000 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1001 rt->rt6i_dst.plen = 128; 1002 #endif 1003 1004 spin_lock_bh(&icmp6_dst_lock); 1005 rt->u.dst.next = icmp6_dst_gc_list; 1006 icmp6_dst_gc_list = &rt->u.dst; 1007 spin_unlock_bh(&icmp6_dst_lock); 1008 1009 fib6_force_start_gc(net); 1010 1011 out: 1012 return &rt->u.dst; 1013 } 1014 1015 int icmp6_dst_gc(void) 1016 { 1017 struct dst_entry *dst, *next, **pprev; 1018 int more = 0; 1019 1020 next = NULL; 1021 1022 spin_lock_bh(&icmp6_dst_lock); 1023 pprev = &icmp6_dst_gc_list; 1024 1025 while ((dst = *pprev) != NULL) { 1026 if (!atomic_read(&dst->__refcnt)) { 1027 *pprev = dst->next; 1028 dst_free(dst); 1029 } else { 1030 pprev = &dst->next; 1031 ++more; 1032 } 1033 } 1034 1035 spin_unlock_bh(&icmp6_dst_lock); 1036 1037 return more; 1038 } 1039 1040 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1041 void *arg) 1042 { 1043 struct dst_entry *dst, **pprev; 1044 1045 spin_lock_bh(&icmp6_dst_lock); 1046 pprev = &icmp6_dst_gc_list; 1047 while ((dst = *pprev) != NULL) { 1048 struct rt6_info *rt = (struct rt6_info *) dst; 1049 if (func(rt, arg)) { 1050 *pprev = dst->next; 1051 dst_free(dst); 1052 } else { 1053 pprev = &dst->next; 1054 } 1055 } 1056 spin_unlock_bh(&icmp6_dst_lock); 1057 } 1058 1059 static int ip6_dst_gc(struct dst_ops *ops) 1060 { 1061 unsigned long now = jiffies; 1062 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1063 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1064 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1065 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1066 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1067 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1068 1069 if (time_after(rt_last_gc + rt_min_interval, now) && 1070 atomic_read(&ops->entries) <= rt_max_size) 1071 goto out; 1072 1073 net->ipv6.ip6_rt_gc_expire++; 1074 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1075 net->ipv6.ip6_rt_last_gc = now; 1076 if (atomic_read(&ops->entries) < ops->gc_thresh) 1077 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1078 out: 1079 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1080 return (atomic_read(&ops->entries) > rt_max_size); 1081 } 1082 1083 /* Clean host part of a prefix. Not necessary in radix tree, 1084 but results in cleaner routing tables. 1085 1086 Remove it only when all the things will work! 1087 */ 1088 1089 static int ipv6_get_mtu(struct net_device *dev) 1090 { 1091 int mtu = IPV6_MIN_MTU; 1092 struct inet6_dev *idev; 1093 1094 idev = in6_dev_get(dev); 1095 if (idev) { 1096 mtu = idev->cnf.mtu6; 1097 in6_dev_put(idev); 1098 } 1099 return mtu; 1100 } 1101 1102 int ip6_dst_hoplimit(struct dst_entry *dst) 1103 { 1104 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); 1105 if (hoplimit < 0) { 1106 struct net_device *dev = dst->dev; 1107 struct inet6_dev *idev = in6_dev_get(dev); 1108 if (idev) { 1109 hoplimit = idev->cnf.hop_limit; 1110 in6_dev_put(idev); 1111 } else 1112 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1113 } 1114 return hoplimit; 1115 } 1116 1117 /* 1118 * 1119 */ 1120 1121 int ip6_route_add(struct fib6_config *cfg) 1122 { 1123 int err; 1124 struct net *net = cfg->fc_nlinfo.nl_net; 1125 struct rt6_info *rt = NULL; 1126 struct net_device *dev = NULL; 1127 struct inet6_dev *idev = NULL; 1128 struct fib6_table *table; 1129 int addr_type; 1130 1131 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1132 return -EINVAL; 1133 #ifndef CONFIG_IPV6_SUBTREES 1134 if (cfg->fc_src_len) 1135 return -EINVAL; 1136 #endif 1137 if (cfg->fc_ifindex) { 1138 err = -ENODEV; 1139 dev = dev_get_by_index(net, cfg->fc_ifindex); 1140 if (!dev) 1141 goto out; 1142 idev = in6_dev_get(dev); 1143 if (!idev) 1144 goto out; 1145 } 1146 1147 if (cfg->fc_metric == 0) 1148 cfg->fc_metric = IP6_RT_PRIO_USER; 1149 1150 table = fib6_new_table(net, cfg->fc_table); 1151 if (table == NULL) { 1152 err = -ENOBUFS; 1153 goto out; 1154 } 1155 1156 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1157 1158 if (rt == NULL) { 1159 err = -ENOMEM; 1160 goto out; 1161 } 1162 1163 rt->u.dst.obsolete = -1; 1164 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1165 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1166 0; 1167 1168 if (cfg->fc_protocol == RTPROT_UNSPEC) 1169 cfg->fc_protocol = RTPROT_BOOT; 1170 rt->rt6i_protocol = cfg->fc_protocol; 1171 1172 addr_type = ipv6_addr_type(&cfg->fc_dst); 1173 1174 if (addr_type & IPV6_ADDR_MULTICAST) 1175 rt->u.dst.input = ip6_mc_input; 1176 else 1177 rt->u.dst.input = ip6_forward; 1178 1179 rt->u.dst.output = ip6_output; 1180 1181 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1182 rt->rt6i_dst.plen = cfg->fc_dst_len; 1183 if (rt->rt6i_dst.plen == 128) 1184 rt->u.dst.flags = DST_HOST; 1185 1186 #ifdef CONFIG_IPV6_SUBTREES 1187 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1188 rt->rt6i_src.plen = cfg->fc_src_len; 1189 #endif 1190 1191 rt->rt6i_metric = cfg->fc_metric; 1192 1193 /* We cannot add true routes via loopback here, 1194 they would result in kernel looping; promote them to reject routes 1195 */ 1196 if ((cfg->fc_flags & RTF_REJECT) || 1197 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1198 /* hold loopback dev/idev if we haven't done so. */ 1199 if (dev != net->loopback_dev) { 1200 if (dev) { 1201 dev_put(dev); 1202 in6_dev_put(idev); 1203 } 1204 dev = net->loopback_dev; 1205 dev_hold(dev); 1206 idev = in6_dev_get(dev); 1207 if (!idev) { 1208 err = -ENODEV; 1209 goto out; 1210 } 1211 } 1212 rt->u.dst.output = ip6_pkt_discard_out; 1213 rt->u.dst.input = ip6_pkt_discard; 1214 rt->u.dst.error = -ENETUNREACH; 1215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1216 goto install_route; 1217 } 1218 1219 if (cfg->fc_flags & RTF_GATEWAY) { 1220 struct in6_addr *gw_addr; 1221 int gwa_type; 1222 1223 gw_addr = &cfg->fc_gateway; 1224 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1225 gwa_type = ipv6_addr_type(gw_addr); 1226 1227 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1228 struct rt6_info *grt; 1229 1230 /* IPv6 strictly inhibits using not link-local 1231 addresses as nexthop address. 1232 Otherwise, router will not able to send redirects. 1233 It is very good, but in some (rare!) circumstances 1234 (SIT, PtP, NBMA NOARP links) it is handy to allow 1235 some exceptions. --ANK 1236 */ 1237 err = -EINVAL; 1238 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1239 goto out; 1240 1241 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1242 1243 err = -EHOSTUNREACH; 1244 if (grt == NULL) 1245 goto out; 1246 if (dev) { 1247 if (dev != grt->rt6i_dev) { 1248 dst_release(&grt->u.dst); 1249 goto out; 1250 } 1251 } else { 1252 dev = grt->rt6i_dev; 1253 idev = grt->rt6i_idev; 1254 dev_hold(dev); 1255 in6_dev_hold(grt->rt6i_idev); 1256 } 1257 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1258 err = 0; 1259 dst_release(&grt->u.dst); 1260 1261 if (err) 1262 goto out; 1263 } 1264 err = -EINVAL; 1265 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1266 goto out; 1267 } 1268 1269 err = -ENODEV; 1270 if (dev == NULL) 1271 goto out; 1272 1273 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1274 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1275 if (IS_ERR(rt->rt6i_nexthop)) { 1276 err = PTR_ERR(rt->rt6i_nexthop); 1277 rt->rt6i_nexthop = NULL; 1278 goto out; 1279 } 1280 } 1281 1282 rt->rt6i_flags = cfg->fc_flags; 1283 1284 install_route: 1285 if (cfg->fc_mx) { 1286 struct nlattr *nla; 1287 int remaining; 1288 1289 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1290 int type = nla_type(nla); 1291 1292 if (type) { 1293 if (type > RTAX_MAX) { 1294 err = -EINVAL; 1295 goto out; 1296 } 1297 1298 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1299 } 1300 } 1301 } 1302 1303 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1304 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1305 if (!dst_mtu(&rt->u.dst)) 1306 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1307 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) 1308 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1309 rt->u.dst.dev = dev; 1310 rt->rt6i_idev = idev; 1311 rt->rt6i_table = table; 1312 1313 cfg->fc_nlinfo.nl_net = dev_net(dev); 1314 1315 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1316 1317 out: 1318 if (dev) 1319 dev_put(dev); 1320 if (idev) 1321 in6_dev_put(idev); 1322 if (rt) 1323 dst_free(&rt->u.dst); 1324 return err; 1325 } 1326 1327 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1328 { 1329 int err; 1330 struct fib6_table *table; 1331 struct net *net = dev_net(rt->rt6i_dev); 1332 1333 if (rt == net->ipv6.ip6_null_entry) 1334 return -ENOENT; 1335 1336 table = rt->rt6i_table; 1337 write_lock_bh(&table->tb6_lock); 1338 1339 err = fib6_del(rt, info); 1340 dst_release(&rt->u.dst); 1341 1342 write_unlock_bh(&table->tb6_lock); 1343 1344 return err; 1345 } 1346 1347 int ip6_del_rt(struct rt6_info *rt) 1348 { 1349 struct nl_info info = { 1350 .nl_net = dev_net(rt->rt6i_dev), 1351 }; 1352 return __ip6_del_rt(rt, &info); 1353 } 1354 1355 static int ip6_route_del(struct fib6_config *cfg) 1356 { 1357 struct fib6_table *table; 1358 struct fib6_node *fn; 1359 struct rt6_info *rt; 1360 int err = -ESRCH; 1361 1362 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1363 if (table == NULL) 1364 return err; 1365 1366 read_lock_bh(&table->tb6_lock); 1367 1368 fn = fib6_locate(&table->tb6_root, 1369 &cfg->fc_dst, cfg->fc_dst_len, 1370 &cfg->fc_src, cfg->fc_src_len); 1371 1372 if (fn) { 1373 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1374 if (cfg->fc_ifindex && 1375 (rt->rt6i_dev == NULL || 1376 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1377 continue; 1378 if (cfg->fc_flags & RTF_GATEWAY && 1379 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1380 continue; 1381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1382 continue; 1383 dst_hold(&rt->u.dst); 1384 read_unlock_bh(&table->tb6_lock); 1385 1386 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1387 } 1388 } 1389 read_unlock_bh(&table->tb6_lock); 1390 1391 return err; 1392 } 1393 1394 /* 1395 * Handle redirects 1396 */ 1397 struct ip6rd_flowi { 1398 struct flowi fl; 1399 struct in6_addr gateway; 1400 }; 1401 1402 static struct rt6_info *__ip6_route_redirect(struct net *net, 1403 struct fib6_table *table, 1404 struct flowi *fl, 1405 int flags) 1406 { 1407 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1408 struct rt6_info *rt; 1409 struct fib6_node *fn; 1410 1411 /* 1412 * Get the "current" route for this destination and 1413 * check if the redirect has come from approriate router. 1414 * 1415 * RFC 2461 specifies that redirects should only be 1416 * accepted if they come from the nexthop to the target. 1417 * Due to the way the routes are chosen, this notion 1418 * is a bit fuzzy and one might need to check all possible 1419 * routes. 1420 */ 1421 1422 read_lock_bh(&table->tb6_lock); 1423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1424 restart: 1425 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1426 /* 1427 * Current route is on-link; redirect is always invalid. 1428 * 1429 * Seems, previous statement is not true. It could 1430 * be node, which looks for us as on-link (f.e. proxy ndisc) 1431 * But then router serving it might decide, that we should 1432 * know truth 8)8) --ANK (980726). 1433 */ 1434 if (rt6_check_expired(rt)) 1435 continue; 1436 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1437 continue; 1438 if (fl->oif != rt->rt6i_dev->ifindex) 1439 continue; 1440 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1441 continue; 1442 break; 1443 } 1444 1445 if (!rt) 1446 rt = net->ipv6.ip6_null_entry; 1447 BACKTRACK(net, &fl->fl6_src); 1448 out: 1449 dst_hold(&rt->u.dst); 1450 1451 read_unlock_bh(&table->tb6_lock); 1452 1453 return rt; 1454 }; 1455 1456 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1457 struct in6_addr *src, 1458 struct in6_addr *gateway, 1459 struct net_device *dev) 1460 { 1461 int flags = RT6_LOOKUP_F_HAS_SADDR; 1462 struct net *net = dev_net(dev); 1463 struct ip6rd_flowi rdfl = { 1464 .fl = { 1465 .oif = dev->ifindex, 1466 .nl_u = { 1467 .ip6_u = { 1468 .daddr = *dest, 1469 .saddr = *src, 1470 }, 1471 }, 1472 }, 1473 }; 1474 1475 ipv6_addr_copy(&rdfl.gateway, gateway); 1476 1477 if (rt6_need_strict(dest)) 1478 flags |= RT6_LOOKUP_F_IFACE; 1479 1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1481 flags, __ip6_route_redirect); 1482 } 1483 1484 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1485 struct in6_addr *saddr, 1486 struct neighbour *neigh, u8 *lladdr, int on_link) 1487 { 1488 struct rt6_info *rt, *nrt = NULL; 1489 struct netevent_redirect netevent; 1490 struct net *net = dev_net(neigh->dev); 1491 1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1493 1494 if (rt == net->ipv6.ip6_null_entry) { 1495 if (net_ratelimit()) 1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1497 "for redirect target\n"); 1498 goto out; 1499 } 1500 1501 /* 1502 * We have finally decided to accept it. 1503 */ 1504 1505 neigh_update(neigh, lladdr, NUD_STALE, 1506 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1507 NEIGH_UPDATE_F_OVERRIDE| 1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1509 NEIGH_UPDATE_F_ISROUTER)) 1510 ); 1511 1512 /* 1513 * Redirect received -> path was valid. 1514 * Look, redirects are sent only in response to data packets, 1515 * so that this nexthop apparently is reachable. --ANK 1516 */ 1517 dst_confirm(&rt->u.dst); 1518 1519 /* Duplicate redirect: silently ignore. */ 1520 if (neigh == rt->u.dst.neighbour) 1521 goto out; 1522 1523 nrt = ip6_rt_copy(rt); 1524 if (nrt == NULL) 1525 goto out; 1526 1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1528 if (on_link) 1529 nrt->rt6i_flags &= ~RTF_GATEWAY; 1530 1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1532 nrt->rt6i_dst.plen = 128; 1533 nrt->u.dst.flags |= DST_HOST; 1534 1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1536 nrt->rt6i_nexthop = neigh_clone(neigh); 1537 /* Reset pmtu, it may be better */ 1538 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1539 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), 1540 dst_mtu(&nrt->u.dst)); 1541 1542 if (ip6_ins_rt(nrt)) 1543 goto out; 1544 1545 netevent.old = &rt->u.dst; 1546 netevent.new = &nrt->u.dst; 1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1548 1549 if (rt->rt6i_flags&RTF_CACHE) { 1550 ip6_del_rt(rt); 1551 return; 1552 } 1553 1554 out: 1555 dst_release(&rt->u.dst); 1556 return; 1557 } 1558 1559 /* 1560 * Handle ICMP "packet too big" messages 1561 * i.e. Path MTU discovery 1562 */ 1563 1564 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1565 struct net_device *dev, u32 pmtu) 1566 { 1567 struct rt6_info *rt, *nrt; 1568 struct net *net = dev_net(dev); 1569 int allfrag = 0; 1570 1571 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); 1572 if (rt == NULL) 1573 return; 1574 1575 if (pmtu >= dst_mtu(&rt->u.dst)) 1576 goto out; 1577 1578 if (pmtu < IPV6_MIN_MTU) { 1579 /* 1580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1581 * MTU (1280) and a fragment header should always be included 1582 * after a node receiving Too Big message reporting PMTU is 1583 * less than the IPv6 Minimum Link MTU. 1584 */ 1585 pmtu = IPV6_MIN_MTU; 1586 allfrag = 1; 1587 } 1588 1589 /* New mtu received -> path was valid. 1590 They are sent only in response to data packets, 1591 so that this nexthop apparently is reachable. --ANK 1592 */ 1593 dst_confirm(&rt->u.dst); 1594 1595 /* Host route. If it is static, it would be better 1596 not to override it, but add new one, so that 1597 when cache entry will expire old pmtu 1598 would return automatically. 1599 */ 1600 if (rt->rt6i_flags & RTF_CACHE) { 1601 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1602 if (allfrag) 1603 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1604 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1606 goto out; 1607 } 1608 1609 /* Network route. 1610 Two cases are possible: 1611 1. It is connected route. Action: COW 1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1613 */ 1614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1615 nrt = rt6_alloc_cow(rt, daddr, saddr); 1616 else 1617 nrt = rt6_alloc_clone(rt, daddr); 1618 1619 if (nrt) { 1620 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1621 if (allfrag) 1622 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1623 1624 /* According to RFC 1981, detecting PMTU increase shouldn't be 1625 * happened within 5 mins, the recommended timer is 10 mins. 1626 * Here this route expiration time is set to ip6_rt_mtu_expires 1627 * which is 10 mins. After 10 mins the decreased pmtu is expired 1628 * and detecting PMTU increase will be automatically happened. 1629 */ 1630 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1631 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1632 1633 ip6_ins_rt(nrt); 1634 } 1635 out: 1636 dst_release(&rt->u.dst); 1637 } 1638 1639 /* 1640 * Misc support functions 1641 */ 1642 1643 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1644 { 1645 struct net *net = dev_net(ort->rt6i_dev); 1646 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1647 1648 if (rt) { 1649 rt->u.dst.input = ort->u.dst.input; 1650 rt->u.dst.output = ort->u.dst.output; 1651 1652 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1653 rt->u.dst.error = ort->u.dst.error; 1654 rt->u.dst.dev = ort->u.dst.dev; 1655 if (rt->u.dst.dev) 1656 dev_hold(rt->u.dst.dev); 1657 rt->rt6i_idev = ort->rt6i_idev; 1658 if (rt->rt6i_idev) 1659 in6_dev_hold(rt->rt6i_idev); 1660 rt->u.dst.lastuse = jiffies; 1661 rt->rt6i_expires = 0; 1662 1663 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1664 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1665 rt->rt6i_metric = 0; 1666 1667 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1668 #ifdef CONFIG_IPV6_SUBTREES 1669 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1670 #endif 1671 rt->rt6i_table = ort->rt6i_table; 1672 } 1673 return rt; 1674 } 1675 1676 #ifdef CONFIG_IPV6_ROUTE_INFO 1677 static struct rt6_info *rt6_get_route_info(struct net *net, 1678 struct in6_addr *prefix, int prefixlen, 1679 struct in6_addr *gwaddr, int ifindex) 1680 { 1681 struct fib6_node *fn; 1682 struct rt6_info *rt = NULL; 1683 struct fib6_table *table; 1684 1685 table = fib6_get_table(net, RT6_TABLE_INFO); 1686 if (table == NULL) 1687 return NULL; 1688 1689 write_lock_bh(&table->tb6_lock); 1690 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1691 if (!fn) 1692 goto out; 1693 1694 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1695 if (rt->rt6i_dev->ifindex != ifindex) 1696 continue; 1697 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1698 continue; 1699 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1700 continue; 1701 dst_hold(&rt->u.dst); 1702 break; 1703 } 1704 out: 1705 write_unlock_bh(&table->tb6_lock); 1706 return rt; 1707 } 1708 1709 static struct rt6_info *rt6_add_route_info(struct net *net, 1710 struct in6_addr *prefix, int prefixlen, 1711 struct in6_addr *gwaddr, int ifindex, 1712 unsigned pref) 1713 { 1714 struct fib6_config cfg = { 1715 .fc_table = RT6_TABLE_INFO, 1716 .fc_metric = IP6_RT_PRIO_USER, 1717 .fc_ifindex = ifindex, 1718 .fc_dst_len = prefixlen, 1719 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1720 RTF_UP | RTF_PREF(pref), 1721 .fc_nlinfo.pid = 0, 1722 .fc_nlinfo.nlh = NULL, 1723 .fc_nlinfo.nl_net = net, 1724 }; 1725 1726 ipv6_addr_copy(&cfg.fc_dst, prefix); 1727 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1728 1729 /* We should treat it as a default route if prefix length is 0. */ 1730 if (!prefixlen) 1731 cfg.fc_flags |= RTF_DEFAULT; 1732 1733 ip6_route_add(&cfg); 1734 1735 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1736 } 1737 #endif 1738 1739 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1740 { 1741 struct rt6_info *rt; 1742 struct fib6_table *table; 1743 1744 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1745 if (table == NULL) 1746 return NULL; 1747 1748 write_lock_bh(&table->tb6_lock); 1749 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1750 if (dev == rt->rt6i_dev && 1751 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1752 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1753 break; 1754 } 1755 if (rt) 1756 dst_hold(&rt->u.dst); 1757 write_unlock_bh(&table->tb6_lock); 1758 return rt; 1759 } 1760 1761 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1762 struct net_device *dev, 1763 unsigned int pref) 1764 { 1765 struct fib6_config cfg = { 1766 .fc_table = RT6_TABLE_DFLT, 1767 .fc_metric = IP6_RT_PRIO_USER, 1768 .fc_ifindex = dev->ifindex, 1769 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1770 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1771 .fc_nlinfo.pid = 0, 1772 .fc_nlinfo.nlh = NULL, 1773 .fc_nlinfo.nl_net = dev_net(dev), 1774 }; 1775 1776 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1777 1778 ip6_route_add(&cfg); 1779 1780 return rt6_get_dflt_router(gwaddr, dev); 1781 } 1782 1783 void rt6_purge_dflt_routers(struct net *net) 1784 { 1785 struct rt6_info *rt; 1786 struct fib6_table *table; 1787 1788 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1789 table = fib6_get_table(net, RT6_TABLE_DFLT); 1790 if (table == NULL) 1791 return; 1792 1793 restart: 1794 read_lock_bh(&table->tb6_lock); 1795 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1796 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1797 dst_hold(&rt->u.dst); 1798 read_unlock_bh(&table->tb6_lock); 1799 ip6_del_rt(rt); 1800 goto restart; 1801 } 1802 } 1803 read_unlock_bh(&table->tb6_lock); 1804 } 1805 1806 static void rtmsg_to_fib6_config(struct net *net, 1807 struct in6_rtmsg *rtmsg, 1808 struct fib6_config *cfg) 1809 { 1810 memset(cfg, 0, sizeof(*cfg)); 1811 1812 cfg->fc_table = RT6_TABLE_MAIN; 1813 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1814 cfg->fc_metric = rtmsg->rtmsg_metric; 1815 cfg->fc_expires = rtmsg->rtmsg_info; 1816 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1817 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1818 cfg->fc_flags = rtmsg->rtmsg_flags; 1819 1820 cfg->fc_nlinfo.nl_net = net; 1821 1822 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1823 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1824 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1825 } 1826 1827 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1828 { 1829 struct fib6_config cfg; 1830 struct in6_rtmsg rtmsg; 1831 int err; 1832 1833 switch(cmd) { 1834 case SIOCADDRT: /* Add a route */ 1835 case SIOCDELRT: /* Delete a route */ 1836 if (!capable(CAP_NET_ADMIN)) 1837 return -EPERM; 1838 err = copy_from_user(&rtmsg, arg, 1839 sizeof(struct in6_rtmsg)); 1840 if (err) 1841 return -EFAULT; 1842 1843 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1844 1845 rtnl_lock(); 1846 switch (cmd) { 1847 case SIOCADDRT: 1848 err = ip6_route_add(&cfg); 1849 break; 1850 case SIOCDELRT: 1851 err = ip6_route_del(&cfg); 1852 break; 1853 default: 1854 err = -EINVAL; 1855 } 1856 rtnl_unlock(); 1857 1858 return err; 1859 } 1860 1861 return -EINVAL; 1862 } 1863 1864 /* 1865 * Drop the packet on the floor 1866 */ 1867 1868 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 1869 { 1870 int type; 1871 struct dst_entry *dst = skb_dst(skb); 1872 switch (ipstats_mib_noroutes) { 1873 case IPSTATS_MIB_INNOROUTES: 1874 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1875 if (type == IPV6_ADDR_ANY) { 1876 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1877 IPSTATS_MIB_INADDRERRORS); 1878 break; 1879 } 1880 /* FALLTHROUGH */ 1881 case IPSTATS_MIB_OUTNOROUTES: 1882 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1883 ipstats_mib_noroutes); 1884 break; 1885 } 1886 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 1887 kfree_skb(skb); 1888 return 0; 1889 } 1890 1891 static int ip6_pkt_discard(struct sk_buff *skb) 1892 { 1893 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1894 } 1895 1896 static int ip6_pkt_discard_out(struct sk_buff *skb) 1897 { 1898 skb->dev = skb_dst(skb)->dev; 1899 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1900 } 1901 1902 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1903 1904 static int ip6_pkt_prohibit(struct sk_buff *skb) 1905 { 1906 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1907 } 1908 1909 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1910 { 1911 skb->dev = skb_dst(skb)->dev; 1912 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1913 } 1914 1915 #endif 1916 1917 /* 1918 * Allocate a dst for local (unicast / anycast) address. 1919 */ 1920 1921 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1922 const struct in6_addr *addr, 1923 int anycast) 1924 { 1925 struct net *net = dev_net(idev->dev); 1926 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1927 struct neighbour *neigh; 1928 1929 if (rt == NULL) 1930 return ERR_PTR(-ENOMEM); 1931 1932 dev_hold(net->loopback_dev); 1933 in6_dev_hold(idev); 1934 1935 rt->u.dst.flags = DST_HOST; 1936 rt->u.dst.input = ip6_input; 1937 rt->u.dst.output = ip6_output; 1938 rt->rt6i_dev = net->loopback_dev; 1939 rt->rt6i_idev = idev; 1940 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1942 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1943 rt->u.dst.obsolete = -1; 1944 1945 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1946 if (anycast) 1947 rt->rt6i_flags |= RTF_ANYCAST; 1948 else 1949 rt->rt6i_flags |= RTF_LOCAL; 1950 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1951 if (IS_ERR(neigh)) { 1952 dst_free(&rt->u.dst); 1953 1954 /* We are casting this because that is the return 1955 * value type. But an errno encoded pointer is the 1956 * same regardless of the underlying pointer type, 1957 * and that's what we are returning. So this is OK. 1958 */ 1959 return (struct rt6_info *) neigh; 1960 } 1961 rt->rt6i_nexthop = neigh; 1962 1963 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1964 rt->rt6i_dst.plen = 128; 1965 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1966 1967 atomic_set(&rt->u.dst.__refcnt, 1); 1968 1969 return rt; 1970 } 1971 1972 struct arg_dev_net { 1973 struct net_device *dev; 1974 struct net *net; 1975 }; 1976 1977 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1978 { 1979 struct net_device *dev = ((struct arg_dev_net *)arg)->dev; 1980 struct net *net = ((struct arg_dev_net *)arg)->net; 1981 1982 if (((void *)rt->rt6i_dev == dev || dev == NULL) && 1983 rt != net->ipv6.ip6_null_entry) { 1984 RT6_TRACE("deleted by ifdown %p\n", rt); 1985 return -1; 1986 } 1987 return 0; 1988 } 1989 1990 void rt6_ifdown(struct net *net, struct net_device *dev) 1991 { 1992 struct arg_dev_net adn = { 1993 .dev = dev, 1994 .net = net, 1995 }; 1996 1997 fib6_clean_all(net, fib6_ifdown, 0, &adn); 1998 icmp6_clean_all(fib6_ifdown, &adn); 1999 } 2000 2001 struct rt6_mtu_change_arg 2002 { 2003 struct net_device *dev; 2004 unsigned mtu; 2005 }; 2006 2007 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2008 { 2009 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2010 struct inet6_dev *idev; 2011 struct net *net = dev_net(arg->dev); 2012 2013 /* In IPv6 pmtu discovery is not optional, 2014 so that RTAX_MTU lock cannot disable it. 2015 We still use this lock to block changes 2016 caused by addrconf/ndisc. 2017 */ 2018 2019 idev = __in6_dev_get(arg->dev); 2020 if (idev == NULL) 2021 return 0; 2022 2023 /* For administrative MTU increase, there is no way to discover 2024 IPv6 PMTU increase, so PMTU increase should be updated here. 2025 Since RFC 1981 doesn't include administrative MTU increase 2026 update PMTU increase is a MUST. (i.e. jumbo frame) 2027 */ 2028 /* 2029 If new MTU is less than route PMTU, this new MTU will be the 2030 lowest MTU in the path, update the route PMTU to reflect PMTU 2031 decreases; if new MTU is greater than route PMTU, and the 2032 old MTU is the lowest MTU in the path, update the route PMTU 2033 to reflect the increase. In this case if the other nodes' MTU 2034 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2035 PMTU discouvery. 2036 */ 2037 if (rt->rt6i_dev == arg->dev && 2038 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 2039 (dst_mtu(&rt->u.dst) >= arg->mtu || 2040 (dst_mtu(&rt->u.dst) < arg->mtu && 2041 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 2042 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 2043 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 2044 } 2045 return 0; 2046 } 2047 2048 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2049 { 2050 struct rt6_mtu_change_arg arg = { 2051 .dev = dev, 2052 .mtu = mtu, 2053 }; 2054 2055 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2056 } 2057 2058 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2059 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2060 [RTA_OIF] = { .type = NLA_U32 }, 2061 [RTA_IIF] = { .type = NLA_U32 }, 2062 [RTA_PRIORITY] = { .type = NLA_U32 }, 2063 [RTA_METRICS] = { .type = NLA_NESTED }, 2064 }; 2065 2066 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2067 struct fib6_config *cfg) 2068 { 2069 struct rtmsg *rtm; 2070 struct nlattr *tb[RTA_MAX+1]; 2071 int err; 2072 2073 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2074 if (err < 0) 2075 goto errout; 2076 2077 err = -EINVAL; 2078 rtm = nlmsg_data(nlh); 2079 memset(cfg, 0, sizeof(*cfg)); 2080 2081 cfg->fc_table = rtm->rtm_table; 2082 cfg->fc_dst_len = rtm->rtm_dst_len; 2083 cfg->fc_src_len = rtm->rtm_src_len; 2084 cfg->fc_flags = RTF_UP; 2085 cfg->fc_protocol = rtm->rtm_protocol; 2086 2087 if (rtm->rtm_type == RTN_UNREACHABLE) 2088 cfg->fc_flags |= RTF_REJECT; 2089 2090 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2091 cfg->fc_nlinfo.nlh = nlh; 2092 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2093 2094 if (tb[RTA_GATEWAY]) { 2095 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2096 cfg->fc_flags |= RTF_GATEWAY; 2097 } 2098 2099 if (tb[RTA_DST]) { 2100 int plen = (rtm->rtm_dst_len + 7) >> 3; 2101 2102 if (nla_len(tb[RTA_DST]) < plen) 2103 goto errout; 2104 2105 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2106 } 2107 2108 if (tb[RTA_SRC]) { 2109 int plen = (rtm->rtm_src_len + 7) >> 3; 2110 2111 if (nla_len(tb[RTA_SRC]) < plen) 2112 goto errout; 2113 2114 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2115 } 2116 2117 if (tb[RTA_OIF]) 2118 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2119 2120 if (tb[RTA_PRIORITY]) 2121 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2122 2123 if (tb[RTA_METRICS]) { 2124 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2125 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2126 } 2127 2128 if (tb[RTA_TABLE]) 2129 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2130 2131 err = 0; 2132 errout: 2133 return err; 2134 } 2135 2136 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2137 { 2138 struct fib6_config cfg; 2139 int err; 2140 2141 err = rtm_to_fib6_config(skb, nlh, &cfg); 2142 if (err < 0) 2143 return err; 2144 2145 return ip6_route_del(&cfg); 2146 } 2147 2148 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2149 { 2150 struct fib6_config cfg; 2151 int err; 2152 2153 err = rtm_to_fib6_config(skb, nlh, &cfg); 2154 if (err < 0) 2155 return err; 2156 2157 return ip6_route_add(&cfg); 2158 } 2159 2160 static inline size_t rt6_nlmsg_size(void) 2161 { 2162 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2163 + nla_total_size(16) /* RTA_SRC */ 2164 + nla_total_size(16) /* RTA_DST */ 2165 + nla_total_size(16) /* RTA_GATEWAY */ 2166 + nla_total_size(16) /* RTA_PREFSRC */ 2167 + nla_total_size(4) /* RTA_TABLE */ 2168 + nla_total_size(4) /* RTA_IIF */ 2169 + nla_total_size(4) /* RTA_OIF */ 2170 + nla_total_size(4) /* RTA_PRIORITY */ 2171 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2172 + nla_total_size(sizeof(struct rta_cacheinfo)); 2173 } 2174 2175 static int rt6_fill_node(struct net *net, 2176 struct sk_buff *skb, struct rt6_info *rt, 2177 struct in6_addr *dst, struct in6_addr *src, 2178 int iif, int type, u32 pid, u32 seq, 2179 int prefix, int nowait, unsigned int flags) 2180 { 2181 struct rtmsg *rtm; 2182 struct nlmsghdr *nlh; 2183 long expires; 2184 u32 table; 2185 2186 if (prefix) { /* user wants prefix routes only */ 2187 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2188 /* success since this is not a prefix route */ 2189 return 1; 2190 } 2191 } 2192 2193 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2194 if (nlh == NULL) 2195 return -EMSGSIZE; 2196 2197 rtm = nlmsg_data(nlh); 2198 rtm->rtm_family = AF_INET6; 2199 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2200 rtm->rtm_src_len = rt->rt6i_src.plen; 2201 rtm->rtm_tos = 0; 2202 if (rt->rt6i_table) 2203 table = rt->rt6i_table->tb6_id; 2204 else 2205 table = RT6_TABLE_UNSPEC; 2206 rtm->rtm_table = table; 2207 NLA_PUT_U32(skb, RTA_TABLE, table); 2208 if (rt->rt6i_flags&RTF_REJECT) 2209 rtm->rtm_type = RTN_UNREACHABLE; 2210 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2211 rtm->rtm_type = RTN_LOCAL; 2212 else 2213 rtm->rtm_type = RTN_UNICAST; 2214 rtm->rtm_flags = 0; 2215 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2216 rtm->rtm_protocol = rt->rt6i_protocol; 2217 if (rt->rt6i_flags&RTF_DYNAMIC) 2218 rtm->rtm_protocol = RTPROT_REDIRECT; 2219 else if (rt->rt6i_flags & RTF_ADDRCONF) 2220 rtm->rtm_protocol = RTPROT_KERNEL; 2221 else if (rt->rt6i_flags&RTF_DEFAULT) 2222 rtm->rtm_protocol = RTPROT_RA; 2223 2224 if (rt->rt6i_flags&RTF_CACHE) 2225 rtm->rtm_flags |= RTM_F_CLONED; 2226 2227 if (dst) { 2228 NLA_PUT(skb, RTA_DST, 16, dst); 2229 rtm->rtm_dst_len = 128; 2230 } else if (rtm->rtm_dst_len) 2231 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2232 #ifdef CONFIG_IPV6_SUBTREES 2233 if (src) { 2234 NLA_PUT(skb, RTA_SRC, 16, src); 2235 rtm->rtm_src_len = 128; 2236 } else if (rtm->rtm_src_len) 2237 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2238 #endif 2239 if (iif) { 2240 #ifdef CONFIG_IPV6_MROUTE 2241 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2242 int err = ip6mr_get_route(net, skb, rtm, nowait); 2243 if (err <= 0) { 2244 if (!nowait) { 2245 if (err == 0) 2246 return 0; 2247 goto nla_put_failure; 2248 } else { 2249 if (err == -EMSGSIZE) 2250 goto nla_put_failure; 2251 } 2252 } 2253 } else 2254 #endif 2255 NLA_PUT_U32(skb, RTA_IIF, iif); 2256 } else if (dst) { 2257 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); 2258 struct in6_addr saddr_buf; 2259 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2260 dst, 0, &saddr_buf) == 0) 2261 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2262 } 2263 2264 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2265 goto nla_put_failure; 2266 2267 if (rt->u.dst.neighbour) 2268 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2269 2270 if (rt->u.dst.dev) 2271 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2272 2273 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2274 2275 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2276 expires = 0; 2277 else if (rt->rt6i_expires - jiffies < INT_MAX) 2278 expires = rt->rt6i_expires - jiffies; 2279 else 2280 expires = INT_MAX; 2281 2282 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2283 expires, rt->u.dst.error) < 0) 2284 goto nla_put_failure; 2285 2286 return nlmsg_end(skb, nlh); 2287 2288 nla_put_failure: 2289 nlmsg_cancel(skb, nlh); 2290 return -EMSGSIZE; 2291 } 2292 2293 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2294 { 2295 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2296 int prefix; 2297 2298 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2299 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2300 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2301 } else 2302 prefix = 0; 2303 2304 return rt6_fill_node(arg->net, 2305 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2306 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2307 prefix, 0, NLM_F_MULTI); 2308 } 2309 2310 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2311 { 2312 struct net *net = sock_net(in_skb->sk); 2313 struct nlattr *tb[RTA_MAX+1]; 2314 struct rt6_info *rt; 2315 struct sk_buff *skb; 2316 struct rtmsg *rtm; 2317 struct flowi fl; 2318 int err, iif = 0; 2319 2320 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2321 if (err < 0) 2322 goto errout; 2323 2324 err = -EINVAL; 2325 memset(&fl, 0, sizeof(fl)); 2326 2327 if (tb[RTA_SRC]) { 2328 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2329 goto errout; 2330 2331 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2332 } 2333 2334 if (tb[RTA_DST]) { 2335 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2336 goto errout; 2337 2338 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2339 } 2340 2341 if (tb[RTA_IIF]) 2342 iif = nla_get_u32(tb[RTA_IIF]); 2343 2344 if (tb[RTA_OIF]) 2345 fl.oif = nla_get_u32(tb[RTA_OIF]); 2346 2347 if (iif) { 2348 struct net_device *dev; 2349 dev = __dev_get_by_index(net, iif); 2350 if (!dev) { 2351 err = -ENODEV; 2352 goto errout; 2353 } 2354 } 2355 2356 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2357 if (skb == NULL) { 2358 err = -ENOBUFS; 2359 goto errout; 2360 } 2361 2362 /* Reserve room for dummy headers, this skb can pass 2363 through good chunk of routing engine. 2364 */ 2365 skb_reset_mac_header(skb); 2366 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2367 2368 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2369 skb_dst_set(skb, &rt->u.dst); 2370 2371 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2372 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2373 nlh->nlmsg_seq, 0, 0, 0); 2374 if (err < 0) { 2375 kfree_skb(skb); 2376 goto errout; 2377 } 2378 2379 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2380 errout: 2381 return err; 2382 } 2383 2384 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2385 { 2386 struct sk_buff *skb; 2387 struct net *net = info->nl_net; 2388 u32 seq; 2389 int err; 2390 2391 err = -ENOBUFS; 2392 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2393 2394 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2395 if (skb == NULL) 2396 goto errout; 2397 2398 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2399 event, info->pid, seq, 0, 0, 0); 2400 if (err < 0) { 2401 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2402 WARN_ON(err == -EMSGSIZE); 2403 kfree_skb(skb); 2404 goto errout; 2405 } 2406 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2407 info->nlh, gfp_any()); 2408 return; 2409 errout: 2410 if (err < 0) 2411 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2412 } 2413 2414 static int ip6_route_dev_notify(struct notifier_block *this, 2415 unsigned long event, void *data) 2416 { 2417 struct net_device *dev = (struct net_device *)data; 2418 struct net *net = dev_net(dev); 2419 2420 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2421 net->ipv6.ip6_null_entry->u.dst.dev = dev; 2422 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2423 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2424 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; 2425 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2426 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; 2427 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2428 #endif 2429 } 2430 2431 return NOTIFY_OK; 2432 } 2433 2434 /* 2435 * /proc 2436 */ 2437 2438 #ifdef CONFIG_PROC_FS 2439 2440 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2441 2442 struct rt6_proc_arg 2443 { 2444 char *buffer; 2445 int offset; 2446 int length; 2447 int skip; 2448 int len; 2449 }; 2450 2451 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2452 { 2453 struct seq_file *m = p_arg; 2454 2455 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2456 2457 #ifdef CONFIG_IPV6_SUBTREES 2458 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2459 #else 2460 seq_puts(m, "00000000000000000000000000000000 00 "); 2461 #endif 2462 2463 if (rt->rt6i_nexthop) { 2464 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); 2465 } else { 2466 seq_puts(m, "00000000000000000000000000000000"); 2467 } 2468 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2469 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2470 rt->u.dst.__use, rt->rt6i_flags, 2471 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2472 return 0; 2473 } 2474 2475 static int ipv6_route_show(struct seq_file *m, void *v) 2476 { 2477 struct net *net = (struct net *)m->private; 2478 fib6_clean_all(net, rt6_info_route, 0, m); 2479 return 0; 2480 } 2481 2482 static int ipv6_route_open(struct inode *inode, struct file *file) 2483 { 2484 return single_open_net(inode, file, ipv6_route_show); 2485 } 2486 2487 static const struct file_operations ipv6_route_proc_fops = { 2488 .owner = THIS_MODULE, 2489 .open = ipv6_route_open, 2490 .read = seq_read, 2491 .llseek = seq_lseek, 2492 .release = single_release_net, 2493 }; 2494 2495 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2496 { 2497 struct net *net = (struct net *)seq->private; 2498 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2499 net->ipv6.rt6_stats->fib_nodes, 2500 net->ipv6.rt6_stats->fib_route_nodes, 2501 net->ipv6.rt6_stats->fib_rt_alloc, 2502 net->ipv6.rt6_stats->fib_rt_entries, 2503 net->ipv6.rt6_stats->fib_rt_cache, 2504 atomic_read(&net->ipv6.ip6_dst_ops.entries), 2505 net->ipv6.rt6_stats->fib_discarded_routes); 2506 2507 return 0; 2508 } 2509 2510 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2511 { 2512 return single_open_net(inode, file, rt6_stats_seq_show); 2513 } 2514 2515 static const struct file_operations rt6_stats_seq_fops = { 2516 .owner = THIS_MODULE, 2517 .open = rt6_stats_seq_open, 2518 .read = seq_read, 2519 .llseek = seq_lseek, 2520 .release = single_release_net, 2521 }; 2522 #endif /* CONFIG_PROC_FS */ 2523 2524 #ifdef CONFIG_SYSCTL 2525 2526 static 2527 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2528 void __user *buffer, size_t *lenp, loff_t *ppos) 2529 { 2530 struct net *net = current->nsproxy->net_ns; 2531 int delay = net->ipv6.sysctl.flush_delay; 2532 if (write) { 2533 proc_dointvec(ctl, write, buffer, lenp, ppos); 2534 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2535 return 0; 2536 } else 2537 return -EINVAL; 2538 } 2539 2540 ctl_table ipv6_route_table_template[] = { 2541 { 2542 .procname = "flush", 2543 .data = &init_net.ipv6.sysctl.flush_delay, 2544 .maxlen = sizeof(int), 2545 .mode = 0200, 2546 .proc_handler = ipv6_sysctl_rtcache_flush 2547 }, 2548 { 2549 .procname = "gc_thresh", 2550 .data = &ip6_dst_ops_template.gc_thresh, 2551 .maxlen = sizeof(int), 2552 .mode = 0644, 2553 .proc_handler = proc_dointvec, 2554 }, 2555 { 2556 .procname = "max_size", 2557 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2558 .maxlen = sizeof(int), 2559 .mode = 0644, 2560 .proc_handler = proc_dointvec, 2561 }, 2562 { 2563 .procname = "gc_min_interval", 2564 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2565 .maxlen = sizeof(int), 2566 .mode = 0644, 2567 .proc_handler = proc_dointvec_jiffies, 2568 }, 2569 { 2570 .procname = "gc_timeout", 2571 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2572 .maxlen = sizeof(int), 2573 .mode = 0644, 2574 .proc_handler = proc_dointvec_jiffies, 2575 }, 2576 { 2577 .procname = "gc_interval", 2578 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2579 .maxlen = sizeof(int), 2580 .mode = 0644, 2581 .proc_handler = proc_dointvec_jiffies, 2582 }, 2583 { 2584 .procname = "gc_elasticity", 2585 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2586 .maxlen = sizeof(int), 2587 .mode = 0644, 2588 .proc_handler = proc_dointvec_jiffies, 2589 }, 2590 { 2591 .procname = "mtu_expires", 2592 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2593 .maxlen = sizeof(int), 2594 .mode = 0644, 2595 .proc_handler = proc_dointvec_jiffies, 2596 }, 2597 { 2598 .procname = "min_adv_mss", 2599 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2600 .maxlen = sizeof(int), 2601 .mode = 0644, 2602 .proc_handler = proc_dointvec_jiffies, 2603 }, 2604 { 2605 .procname = "gc_min_interval_ms", 2606 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2607 .maxlen = sizeof(int), 2608 .mode = 0644, 2609 .proc_handler = proc_dointvec_ms_jiffies, 2610 }, 2611 { } 2612 }; 2613 2614 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2615 { 2616 struct ctl_table *table; 2617 2618 table = kmemdup(ipv6_route_table_template, 2619 sizeof(ipv6_route_table_template), 2620 GFP_KERNEL); 2621 2622 if (table) { 2623 table[0].data = &net->ipv6.sysctl.flush_delay; 2624 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2625 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2626 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2627 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2628 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2629 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2630 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2631 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2632 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2633 } 2634 2635 return table; 2636 } 2637 #endif 2638 2639 static int __net_init ip6_route_net_init(struct net *net) 2640 { 2641 int ret = -ENOMEM; 2642 2643 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2644 sizeof(net->ipv6.ip6_dst_ops)); 2645 2646 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2647 sizeof(*net->ipv6.ip6_null_entry), 2648 GFP_KERNEL); 2649 if (!net->ipv6.ip6_null_entry) 2650 goto out_ip6_dst_ops; 2651 net->ipv6.ip6_null_entry->u.dst.path = 2652 (struct dst_entry *)net->ipv6.ip6_null_entry; 2653 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2654 2655 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2656 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2657 sizeof(*net->ipv6.ip6_prohibit_entry), 2658 GFP_KERNEL); 2659 if (!net->ipv6.ip6_prohibit_entry) 2660 goto out_ip6_null_entry; 2661 net->ipv6.ip6_prohibit_entry->u.dst.path = 2662 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2663 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2664 2665 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2666 sizeof(*net->ipv6.ip6_blk_hole_entry), 2667 GFP_KERNEL); 2668 if (!net->ipv6.ip6_blk_hole_entry) 2669 goto out_ip6_prohibit_entry; 2670 net->ipv6.ip6_blk_hole_entry->u.dst.path = 2671 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2672 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2673 #endif 2674 2675 net->ipv6.sysctl.flush_delay = 0; 2676 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2677 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2678 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2679 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2680 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2681 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2682 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2683 2684 #ifdef CONFIG_PROC_FS 2685 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2686 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2687 #endif 2688 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2689 2690 ret = 0; 2691 out: 2692 return ret; 2693 2694 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2695 out_ip6_prohibit_entry: 2696 kfree(net->ipv6.ip6_prohibit_entry); 2697 out_ip6_null_entry: 2698 kfree(net->ipv6.ip6_null_entry); 2699 #endif 2700 out_ip6_dst_ops: 2701 goto out; 2702 } 2703 2704 static void __net_exit ip6_route_net_exit(struct net *net) 2705 { 2706 #ifdef CONFIG_PROC_FS 2707 proc_net_remove(net, "ipv6_route"); 2708 proc_net_remove(net, "rt6_stats"); 2709 #endif 2710 kfree(net->ipv6.ip6_null_entry); 2711 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2712 kfree(net->ipv6.ip6_prohibit_entry); 2713 kfree(net->ipv6.ip6_blk_hole_entry); 2714 #endif 2715 } 2716 2717 static struct pernet_operations ip6_route_net_ops = { 2718 .init = ip6_route_net_init, 2719 .exit = ip6_route_net_exit, 2720 }; 2721 2722 static struct notifier_block ip6_route_dev_notifier = { 2723 .notifier_call = ip6_route_dev_notify, 2724 .priority = 0, 2725 }; 2726 2727 int __init ip6_route_init(void) 2728 { 2729 int ret; 2730 2731 ret = -ENOMEM; 2732 ip6_dst_ops_template.kmem_cachep = 2733 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2734 SLAB_HWCACHE_ALIGN, NULL); 2735 if (!ip6_dst_ops_template.kmem_cachep) 2736 goto out; 2737 2738 ret = register_pernet_subsys(&ip6_route_net_ops); 2739 if (ret) 2740 goto out_kmem_cache; 2741 2742 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2743 2744 /* Registering of the loopback is done before this portion of code, 2745 * the loopback reference in rt6_info will not be taken, do it 2746 * manually for init_net */ 2747 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; 2748 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2749 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2750 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; 2751 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2752 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; 2753 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2754 #endif 2755 ret = fib6_init(); 2756 if (ret) 2757 goto out_register_subsys; 2758 2759 ret = xfrm6_init(); 2760 if (ret) 2761 goto out_fib6_init; 2762 2763 ret = fib6_rules_init(); 2764 if (ret) 2765 goto xfrm6_init; 2766 2767 ret = -ENOBUFS; 2768 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || 2769 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || 2770 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) 2771 goto fib6_rules_init; 2772 2773 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2774 if (ret) 2775 goto fib6_rules_init; 2776 2777 out: 2778 return ret; 2779 2780 fib6_rules_init: 2781 fib6_rules_cleanup(); 2782 xfrm6_init: 2783 xfrm6_fini(); 2784 out_fib6_init: 2785 fib6_gc_cleanup(); 2786 out_register_subsys: 2787 unregister_pernet_subsys(&ip6_route_net_ops); 2788 out_kmem_cache: 2789 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2790 goto out; 2791 } 2792 2793 void ip6_route_cleanup(void) 2794 { 2795 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2796 fib6_rules_cleanup(); 2797 xfrm6_fini(); 2798 fib6_gc_cleanup(); 2799 unregister_pernet_subsys(&ip6_route_net_ops); 2800 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2801 } 2802