1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <linux/slab.h> 44 #include <net/net_namespace.h> 45 #include <net/snmp.h> 46 #include <net/ipv6.h> 47 #include <net/ip6_fib.h> 48 #include <net/ip6_route.h> 49 #include <net/ndisc.h> 50 #include <net/addrconf.h> 51 #include <net/tcp.h> 52 #include <linux/rtnetlink.h> 53 #include <net/dst.h> 54 #include <net/xfrm.h> 55 #include <net/netevent.h> 56 #include <net/netlink.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 77 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 78 static unsigned int ip6_default_mtu(const struct dst_entry *dst); 79 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 80 static void ip6_dst_destroy(struct dst_entry *); 81 static void ip6_dst_ifdown(struct dst_entry *, 82 struct net_device *dev, int how); 83 static int ip6_dst_gc(struct dst_ops *ops); 84 85 static int ip6_pkt_discard(struct sk_buff *skb); 86 static int ip6_pkt_discard_out(struct sk_buff *skb); 87 static void ip6_link_failure(struct sk_buff *skb); 88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 89 90 #ifdef CONFIG_IPV6_ROUTE_INFO 91 static struct rt6_info *rt6_add_route_info(struct net *net, 92 struct in6_addr *prefix, int prefixlen, 93 struct in6_addr *gwaddr, int ifindex, 94 unsigned pref); 95 static struct rt6_info *rt6_get_route_info(struct net *net, 96 struct in6_addr *prefix, int prefixlen, 97 struct in6_addr *gwaddr, int ifindex); 98 #endif 99 100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 101 { 102 struct rt6_info *rt = (struct rt6_info *) dst; 103 struct inet_peer *peer; 104 u32 *p = NULL; 105 106 if (!rt->rt6i_peer) 107 rt6_bind_peer(rt, 1); 108 109 peer = rt->rt6i_peer; 110 if (peer) { 111 u32 *old_p = __DST_METRICS_PTR(old); 112 unsigned long prev, new; 113 114 p = peer->metrics; 115 if (inet_metrics_new(peer)) 116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 117 118 new = (unsigned long) p; 119 prev = cmpxchg(&dst->_metrics, old, new); 120 121 if (prev != old) { 122 p = __DST_METRICS_PTR(prev); 123 if (prev & DST_METRICS_READ_ONLY) 124 p = NULL; 125 } 126 } 127 return p; 128 } 129 130 static struct dst_ops ip6_dst_ops_template = { 131 .family = AF_INET6, 132 .protocol = cpu_to_be16(ETH_P_IPV6), 133 .gc = ip6_dst_gc, 134 .gc_thresh = 1024, 135 .check = ip6_dst_check, 136 .default_advmss = ip6_default_advmss, 137 .default_mtu = ip6_default_mtu, 138 .cow_metrics = ipv6_cow_metrics, 139 .destroy = ip6_dst_destroy, 140 .ifdown = ip6_dst_ifdown, 141 .negative_advice = ip6_negative_advice, 142 .link_failure = ip6_link_failure, 143 .update_pmtu = ip6_rt_update_pmtu, 144 .local_out = __ip6_local_out, 145 }; 146 147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) 148 { 149 return 0; 150 } 151 152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 153 { 154 } 155 156 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 157 unsigned long old) 158 { 159 return NULL; 160 } 161 162 static struct dst_ops ip6_dst_blackhole_ops = { 163 .family = AF_INET6, 164 .protocol = cpu_to_be16(ETH_P_IPV6), 165 .destroy = ip6_dst_destroy, 166 .check = ip6_dst_check, 167 .default_mtu = ip6_blackhole_default_mtu, 168 .default_advmss = ip6_default_advmss, 169 .update_pmtu = ip6_rt_blackhole_update_pmtu, 170 .cow_metrics = ip6_rt_blackhole_cow_metrics, 171 }; 172 173 static const u32 ip6_template_metrics[RTAX_MAX] = { 174 [RTAX_HOPLIMIT - 1] = 255, 175 }; 176 177 static struct rt6_info ip6_null_entry_template = { 178 .dst = { 179 .__refcnt = ATOMIC_INIT(1), 180 .__use = 1, 181 .obsolete = -1, 182 .error = -ENETUNREACH, 183 .input = ip6_pkt_discard, 184 .output = ip6_pkt_discard_out, 185 }, 186 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 187 .rt6i_protocol = RTPROT_KERNEL, 188 .rt6i_metric = ~(u32) 0, 189 .rt6i_ref = ATOMIC_INIT(1), 190 }; 191 192 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 193 194 static int ip6_pkt_prohibit(struct sk_buff *skb); 195 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 196 197 static struct rt6_info ip6_prohibit_entry_template = { 198 .dst = { 199 .__refcnt = ATOMIC_INIT(1), 200 .__use = 1, 201 .obsolete = -1, 202 .error = -EACCES, 203 .input = ip6_pkt_prohibit, 204 .output = ip6_pkt_prohibit_out, 205 }, 206 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 207 .rt6i_protocol = RTPROT_KERNEL, 208 .rt6i_metric = ~(u32) 0, 209 .rt6i_ref = ATOMIC_INIT(1), 210 }; 211 212 static struct rt6_info ip6_blk_hole_entry_template = { 213 .dst = { 214 .__refcnt = ATOMIC_INIT(1), 215 .__use = 1, 216 .obsolete = -1, 217 .error = -EINVAL, 218 .input = dst_discard, 219 .output = dst_discard, 220 }, 221 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 222 .rt6i_protocol = RTPROT_KERNEL, 223 .rt6i_metric = ~(u32) 0, 224 .rt6i_ref = ATOMIC_INIT(1), 225 }; 226 227 #endif 228 229 /* allocate dst with ip6_dst_ops */ 230 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 231 { 232 return (struct rt6_info *)dst_alloc(ops, 0); 233 } 234 235 static void ip6_dst_destroy(struct dst_entry *dst) 236 { 237 struct rt6_info *rt = (struct rt6_info *)dst; 238 struct inet6_dev *idev = rt->rt6i_idev; 239 struct inet_peer *peer = rt->rt6i_peer; 240 241 if (idev != NULL) { 242 rt->rt6i_idev = NULL; 243 in6_dev_put(idev); 244 } 245 if (peer) { 246 rt->rt6i_peer = NULL; 247 inet_putpeer(peer); 248 } 249 } 250 251 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); 252 253 static u32 rt6_peer_genid(void) 254 { 255 return atomic_read(&__rt6_peer_genid); 256 } 257 258 void rt6_bind_peer(struct rt6_info *rt, int create) 259 { 260 struct inet_peer *peer; 261 262 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); 263 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) 264 inet_putpeer(peer); 265 else 266 rt->rt6i_peer_genid = rt6_peer_genid(); 267 } 268 269 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 270 int how) 271 { 272 struct rt6_info *rt = (struct rt6_info *)dst; 273 struct inet6_dev *idev = rt->rt6i_idev; 274 struct net_device *loopback_dev = 275 dev_net(dev)->loopback_dev; 276 277 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 278 struct inet6_dev *loopback_idev = 279 in6_dev_get(loopback_dev); 280 if (loopback_idev != NULL) { 281 rt->rt6i_idev = loopback_idev; 282 in6_dev_put(idev); 283 } 284 } 285 } 286 287 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 288 { 289 return (rt->rt6i_flags & RTF_EXPIRES) && 290 time_after(jiffies, rt->rt6i_expires); 291 } 292 293 static inline int rt6_need_strict(struct in6_addr *daddr) 294 { 295 return ipv6_addr_type(daddr) & 296 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 297 } 298 299 /* 300 * Route lookup. Any table->tb6_lock is implied. 301 */ 302 303 static inline struct rt6_info *rt6_device_match(struct net *net, 304 struct rt6_info *rt, 305 struct in6_addr *saddr, 306 int oif, 307 int flags) 308 { 309 struct rt6_info *local = NULL; 310 struct rt6_info *sprt; 311 312 if (!oif && ipv6_addr_any(saddr)) 313 goto out; 314 315 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 316 struct net_device *dev = sprt->rt6i_dev; 317 318 if (oif) { 319 if (dev->ifindex == oif) 320 return sprt; 321 if (dev->flags & IFF_LOOPBACK) { 322 if (sprt->rt6i_idev == NULL || 323 sprt->rt6i_idev->dev->ifindex != oif) { 324 if (flags & RT6_LOOKUP_F_IFACE && oif) 325 continue; 326 if (local && (!oif || 327 local->rt6i_idev->dev->ifindex == oif)) 328 continue; 329 } 330 local = sprt; 331 } 332 } else { 333 if (ipv6_chk_addr(net, saddr, dev, 334 flags & RT6_LOOKUP_F_IFACE)) 335 return sprt; 336 } 337 } 338 339 if (oif) { 340 if (local) 341 return local; 342 343 if (flags & RT6_LOOKUP_F_IFACE) 344 return net->ipv6.ip6_null_entry; 345 } 346 out: 347 return rt; 348 } 349 350 #ifdef CONFIG_IPV6_ROUTER_PREF 351 static void rt6_probe(struct rt6_info *rt) 352 { 353 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 354 /* 355 * Okay, this does not seem to be appropriate 356 * for now, however, we need to check if it 357 * is really so; aka Router Reachability Probing. 358 * 359 * Router Reachability Probe MUST be rate-limited 360 * to no more than one per minute. 361 */ 362 if (!neigh || (neigh->nud_state & NUD_VALID)) 363 return; 364 read_lock_bh(&neigh->lock); 365 if (!(neigh->nud_state & NUD_VALID) && 366 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 367 struct in6_addr mcaddr; 368 struct in6_addr *target; 369 370 neigh->updated = jiffies; 371 read_unlock_bh(&neigh->lock); 372 373 target = (struct in6_addr *)&neigh->primary_key; 374 addrconf_addr_solict_mult(target, &mcaddr); 375 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 376 } else 377 read_unlock_bh(&neigh->lock); 378 } 379 #else 380 static inline void rt6_probe(struct rt6_info *rt) 381 { 382 } 383 #endif 384 385 /* 386 * Default Router Selection (RFC 2461 6.3.6) 387 */ 388 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 389 { 390 struct net_device *dev = rt->rt6i_dev; 391 if (!oif || dev->ifindex == oif) 392 return 2; 393 if ((dev->flags & IFF_LOOPBACK) && 394 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 395 return 1; 396 return 0; 397 } 398 399 static inline int rt6_check_neigh(struct rt6_info *rt) 400 { 401 struct neighbour *neigh = rt->rt6i_nexthop; 402 int m; 403 if (rt->rt6i_flags & RTF_NONEXTHOP || 404 !(rt->rt6i_flags & RTF_GATEWAY)) 405 m = 1; 406 else if (neigh) { 407 read_lock_bh(&neigh->lock); 408 if (neigh->nud_state & NUD_VALID) 409 m = 2; 410 #ifdef CONFIG_IPV6_ROUTER_PREF 411 else if (neigh->nud_state & NUD_FAILED) 412 m = 0; 413 #endif 414 else 415 m = 1; 416 read_unlock_bh(&neigh->lock); 417 } else 418 m = 0; 419 return m; 420 } 421 422 static int rt6_score_route(struct rt6_info *rt, int oif, 423 int strict) 424 { 425 int m, n; 426 427 m = rt6_check_dev(rt, oif); 428 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 429 return -1; 430 #ifdef CONFIG_IPV6_ROUTER_PREF 431 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 432 #endif 433 n = rt6_check_neigh(rt); 434 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 435 return -1; 436 return m; 437 } 438 439 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 440 int *mpri, struct rt6_info *match) 441 { 442 int m; 443 444 if (rt6_check_expired(rt)) 445 goto out; 446 447 m = rt6_score_route(rt, oif, strict); 448 if (m < 0) 449 goto out; 450 451 if (m > *mpri) { 452 if (strict & RT6_LOOKUP_F_REACHABLE) 453 rt6_probe(match); 454 *mpri = m; 455 match = rt; 456 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 457 rt6_probe(rt); 458 } 459 460 out: 461 return match; 462 } 463 464 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 465 struct rt6_info *rr_head, 466 u32 metric, int oif, int strict) 467 { 468 struct rt6_info *rt, *match; 469 int mpri = -1; 470 471 match = NULL; 472 for (rt = rr_head; rt && rt->rt6i_metric == metric; 473 rt = rt->dst.rt6_next) 474 match = find_match(rt, oif, strict, &mpri, match); 475 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 476 rt = rt->dst.rt6_next) 477 match = find_match(rt, oif, strict, &mpri, match); 478 479 return match; 480 } 481 482 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 483 { 484 struct rt6_info *match, *rt0; 485 struct net *net; 486 487 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 488 __func__, fn->leaf, oif); 489 490 rt0 = fn->rr_ptr; 491 if (!rt0) 492 fn->rr_ptr = rt0 = fn->leaf; 493 494 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 495 496 if (!match && 497 (strict & RT6_LOOKUP_F_REACHABLE)) { 498 struct rt6_info *next = rt0->dst.rt6_next; 499 500 /* no entries matched; do round-robin */ 501 if (!next || next->rt6i_metric != rt0->rt6i_metric) 502 next = fn->leaf; 503 504 if (next != rt0) 505 fn->rr_ptr = next; 506 } 507 508 RT6_TRACE("%s() => %p\n", 509 __func__, match); 510 511 net = dev_net(rt0->rt6i_dev); 512 return match ? match : net->ipv6.ip6_null_entry; 513 } 514 515 #ifdef CONFIG_IPV6_ROUTE_INFO 516 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 517 struct in6_addr *gwaddr) 518 { 519 struct net *net = dev_net(dev); 520 struct route_info *rinfo = (struct route_info *) opt; 521 struct in6_addr prefix_buf, *prefix; 522 unsigned int pref; 523 unsigned long lifetime; 524 struct rt6_info *rt; 525 526 if (len < sizeof(struct route_info)) { 527 return -EINVAL; 528 } 529 530 /* Sanity check for prefix_len and length */ 531 if (rinfo->length > 3) { 532 return -EINVAL; 533 } else if (rinfo->prefix_len > 128) { 534 return -EINVAL; 535 } else if (rinfo->prefix_len > 64) { 536 if (rinfo->length < 2) { 537 return -EINVAL; 538 } 539 } else if (rinfo->prefix_len > 0) { 540 if (rinfo->length < 1) { 541 return -EINVAL; 542 } 543 } 544 545 pref = rinfo->route_pref; 546 if (pref == ICMPV6_ROUTER_PREF_INVALID) 547 return -EINVAL; 548 549 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 550 551 if (rinfo->length == 3) 552 prefix = (struct in6_addr *)rinfo->prefix; 553 else { 554 /* this function is safe */ 555 ipv6_addr_prefix(&prefix_buf, 556 (struct in6_addr *)rinfo->prefix, 557 rinfo->prefix_len); 558 prefix = &prefix_buf; 559 } 560 561 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 562 dev->ifindex); 563 564 if (rt && !lifetime) { 565 ip6_del_rt(rt); 566 rt = NULL; 567 } 568 569 if (!rt && lifetime) 570 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 571 pref); 572 else if (rt) 573 rt->rt6i_flags = RTF_ROUTEINFO | 574 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 575 576 if (rt) { 577 if (!addrconf_finite_timeout(lifetime)) { 578 rt->rt6i_flags &= ~RTF_EXPIRES; 579 } else { 580 rt->rt6i_expires = jiffies + HZ * lifetime; 581 rt->rt6i_flags |= RTF_EXPIRES; 582 } 583 dst_release(&rt->dst); 584 } 585 return 0; 586 } 587 #endif 588 589 #define BACKTRACK(__net, saddr) \ 590 do { \ 591 if (rt == __net->ipv6.ip6_null_entry) { \ 592 struct fib6_node *pn; \ 593 while (1) { \ 594 if (fn->fn_flags & RTN_TL_ROOT) \ 595 goto out; \ 596 pn = fn->parent; \ 597 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 598 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 599 else \ 600 fn = pn; \ 601 if (fn->fn_flags & RTN_RTINFO) \ 602 goto restart; \ 603 } \ 604 } \ 605 } while(0) 606 607 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 608 struct fib6_table *table, 609 struct flowi6 *fl6, int flags) 610 { 611 struct fib6_node *fn; 612 struct rt6_info *rt; 613 614 read_lock_bh(&table->tb6_lock); 615 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 616 restart: 617 rt = fn->leaf; 618 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 619 BACKTRACK(net, &fl6->saddr); 620 out: 621 dst_use(&rt->dst, jiffies); 622 read_unlock_bh(&table->tb6_lock); 623 return rt; 624 625 } 626 627 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 628 const struct in6_addr *saddr, int oif, int strict) 629 { 630 struct flowi6 fl6 = { 631 .flowi6_oif = oif, 632 .daddr = *daddr, 633 }; 634 struct dst_entry *dst; 635 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 636 637 if (saddr) { 638 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 639 flags |= RT6_LOOKUP_F_HAS_SADDR; 640 } 641 642 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 643 if (dst->error == 0) 644 return (struct rt6_info *) dst; 645 646 dst_release(dst); 647 648 return NULL; 649 } 650 651 EXPORT_SYMBOL(rt6_lookup); 652 653 /* ip6_ins_rt is called with FREE table->tb6_lock. 654 It takes new route entry, the addition fails by any reason the 655 route is freed. In any case, if caller does not hold it, it may 656 be destroyed. 657 */ 658 659 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 660 { 661 int err; 662 struct fib6_table *table; 663 664 table = rt->rt6i_table; 665 write_lock_bh(&table->tb6_lock); 666 err = fib6_add(&table->tb6_root, rt, info); 667 write_unlock_bh(&table->tb6_lock); 668 669 return err; 670 } 671 672 int ip6_ins_rt(struct rt6_info *rt) 673 { 674 struct nl_info info = { 675 .nl_net = dev_net(rt->rt6i_dev), 676 }; 677 return __ip6_ins_rt(rt, &info); 678 } 679 680 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 681 struct in6_addr *saddr) 682 { 683 struct rt6_info *rt; 684 685 /* 686 * Clone the route. 687 */ 688 689 rt = ip6_rt_copy(ort); 690 691 if (rt) { 692 struct neighbour *neigh; 693 int attempts = !in_softirq(); 694 695 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 696 if (rt->rt6i_dst.plen != 128 && 697 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 698 rt->rt6i_flags |= RTF_ANYCAST; 699 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 700 } 701 702 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 703 rt->rt6i_dst.plen = 128; 704 rt->rt6i_flags |= RTF_CACHE; 705 rt->dst.flags |= DST_HOST; 706 707 #ifdef CONFIG_IPV6_SUBTREES 708 if (rt->rt6i_src.plen && saddr) { 709 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 710 rt->rt6i_src.plen = 128; 711 } 712 #endif 713 714 retry: 715 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 716 if (IS_ERR(neigh)) { 717 struct net *net = dev_net(rt->rt6i_dev); 718 int saved_rt_min_interval = 719 net->ipv6.sysctl.ip6_rt_gc_min_interval; 720 int saved_rt_elasticity = 721 net->ipv6.sysctl.ip6_rt_gc_elasticity; 722 723 if (attempts-- > 0) { 724 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 725 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 726 727 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 728 729 net->ipv6.sysctl.ip6_rt_gc_elasticity = 730 saved_rt_elasticity; 731 net->ipv6.sysctl.ip6_rt_gc_min_interval = 732 saved_rt_min_interval; 733 goto retry; 734 } 735 736 if (net_ratelimit()) 737 printk(KERN_WARNING 738 "ipv6: Neighbour table overflow.\n"); 739 dst_free(&rt->dst); 740 return NULL; 741 } 742 rt->rt6i_nexthop = neigh; 743 744 } 745 746 return rt; 747 } 748 749 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 750 { 751 struct rt6_info *rt = ip6_rt_copy(ort); 752 if (rt) { 753 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 754 rt->rt6i_dst.plen = 128; 755 rt->rt6i_flags |= RTF_CACHE; 756 rt->dst.flags |= DST_HOST; 757 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 758 } 759 return rt; 760 } 761 762 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 763 struct flowi6 *fl6, int flags) 764 { 765 struct fib6_node *fn; 766 struct rt6_info *rt, *nrt; 767 int strict = 0; 768 int attempts = 3; 769 int err; 770 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 771 772 strict |= flags & RT6_LOOKUP_F_IFACE; 773 774 relookup: 775 read_lock_bh(&table->tb6_lock); 776 777 restart_2: 778 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 779 780 restart: 781 rt = rt6_select(fn, oif, strict | reachable); 782 783 BACKTRACK(net, &fl6->saddr); 784 if (rt == net->ipv6.ip6_null_entry || 785 rt->rt6i_flags & RTF_CACHE) 786 goto out; 787 788 dst_hold(&rt->dst); 789 read_unlock_bh(&table->tb6_lock); 790 791 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 792 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 793 else if (!(rt->dst.flags & DST_HOST)) 794 nrt = rt6_alloc_clone(rt, &fl6->daddr); 795 else 796 goto out2; 797 798 dst_release(&rt->dst); 799 rt = nrt ? : net->ipv6.ip6_null_entry; 800 801 dst_hold(&rt->dst); 802 if (nrt) { 803 err = ip6_ins_rt(nrt); 804 if (!err) 805 goto out2; 806 } 807 808 if (--attempts <= 0) 809 goto out2; 810 811 /* 812 * Race condition! In the gap, when table->tb6_lock was 813 * released someone could insert this route. Relookup. 814 */ 815 dst_release(&rt->dst); 816 goto relookup; 817 818 out: 819 if (reachable) { 820 reachable = 0; 821 goto restart_2; 822 } 823 dst_hold(&rt->dst); 824 read_unlock_bh(&table->tb6_lock); 825 out2: 826 rt->dst.lastuse = jiffies; 827 rt->dst.__use++; 828 829 return rt; 830 } 831 832 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 833 struct flowi6 *fl6, int flags) 834 { 835 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 836 } 837 838 void ip6_route_input(struct sk_buff *skb) 839 { 840 struct ipv6hdr *iph = ipv6_hdr(skb); 841 struct net *net = dev_net(skb->dev); 842 int flags = RT6_LOOKUP_F_HAS_SADDR; 843 struct flowi6 fl6 = { 844 .flowi6_iif = skb->dev->ifindex, 845 .daddr = iph->daddr, 846 .saddr = iph->saddr, 847 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 848 .flowi6_mark = skb->mark, 849 .flowi6_proto = iph->nexthdr, 850 }; 851 852 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 853 flags |= RT6_LOOKUP_F_IFACE; 854 855 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); 856 } 857 858 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 859 struct flowi6 *fl6, int flags) 860 { 861 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 862 } 863 864 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, 865 struct flowi6 *fl6) 866 { 867 int flags = 0; 868 869 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 870 flags |= RT6_LOOKUP_F_IFACE; 871 872 if (!ipv6_addr_any(&fl6->saddr)) 873 flags |= RT6_LOOKUP_F_HAS_SADDR; 874 else if (sk) 875 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 876 877 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 878 } 879 880 EXPORT_SYMBOL(ip6_route_output); 881 882 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 883 { 884 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1); 885 struct rt6_info *ort = (struct rt6_info *) dst_orig; 886 struct dst_entry *new = NULL; 887 888 if (rt) { 889 new = &rt->dst; 890 891 new->__use = 1; 892 new->input = dst_discard; 893 new->output = dst_discard; 894 895 dst_copy_metrics(new, &ort->dst); 896 new->dev = ort->dst.dev; 897 if (new->dev) 898 dev_hold(new->dev); 899 rt->rt6i_idev = ort->rt6i_idev; 900 if (rt->rt6i_idev) 901 in6_dev_hold(rt->rt6i_idev); 902 rt->rt6i_expires = 0; 903 904 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 905 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 906 rt->rt6i_metric = 0; 907 908 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 909 #ifdef CONFIG_IPV6_SUBTREES 910 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 911 #endif 912 913 dst_free(new); 914 } 915 916 dst_release(dst_orig); 917 return new ? new : ERR_PTR(-ENOMEM); 918 } 919 920 /* 921 * Destination cache support functions 922 */ 923 924 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 925 { 926 struct rt6_info *rt; 927 928 rt = (struct rt6_info *) dst; 929 930 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 931 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 932 if (!rt->rt6i_peer) 933 rt6_bind_peer(rt, 0); 934 rt->rt6i_peer_genid = rt6_peer_genid(); 935 } 936 return dst; 937 } 938 return NULL; 939 } 940 941 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 942 { 943 struct rt6_info *rt = (struct rt6_info *) dst; 944 945 if (rt) { 946 if (rt->rt6i_flags & RTF_CACHE) { 947 if (rt6_check_expired(rt)) { 948 ip6_del_rt(rt); 949 dst = NULL; 950 } 951 } else { 952 dst_release(dst); 953 dst = NULL; 954 } 955 } 956 return dst; 957 } 958 959 static void ip6_link_failure(struct sk_buff *skb) 960 { 961 struct rt6_info *rt; 962 963 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 964 965 rt = (struct rt6_info *) skb_dst(skb); 966 if (rt) { 967 if (rt->rt6i_flags&RTF_CACHE) { 968 dst_set_expires(&rt->dst, 0); 969 rt->rt6i_flags |= RTF_EXPIRES; 970 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 971 rt->rt6i_node->fn_sernum = -1; 972 } 973 } 974 975 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 976 { 977 struct rt6_info *rt6 = (struct rt6_info*)dst; 978 979 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 980 rt6->rt6i_flags |= RTF_MODIFIED; 981 if (mtu < IPV6_MIN_MTU) { 982 u32 features = dst_metric(dst, RTAX_FEATURES); 983 mtu = IPV6_MIN_MTU; 984 features |= RTAX_FEATURE_ALLFRAG; 985 dst_metric_set(dst, RTAX_FEATURES, features); 986 } 987 dst_metric_set(dst, RTAX_MTU, mtu); 988 } 989 } 990 991 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 992 { 993 struct net_device *dev = dst->dev; 994 unsigned int mtu = dst_mtu(dst); 995 struct net *net = dev_net(dev); 996 997 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 998 999 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1000 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1001 1002 /* 1003 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1004 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1005 * IPV6_MAXPLEN is also valid and means: "any MSS, 1006 * rely only on pmtu discovery" 1007 */ 1008 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1009 mtu = IPV6_MAXPLEN; 1010 return mtu; 1011 } 1012 1013 static unsigned int ip6_default_mtu(const struct dst_entry *dst) 1014 { 1015 unsigned int mtu = IPV6_MIN_MTU; 1016 struct inet6_dev *idev; 1017 1018 rcu_read_lock(); 1019 idev = __in6_dev_get(dst->dev); 1020 if (idev) 1021 mtu = idev->cnf.mtu6; 1022 rcu_read_unlock(); 1023 1024 return mtu; 1025 } 1026 1027 static struct dst_entry *icmp6_dst_gc_list; 1028 static DEFINE_SPINLOCK(icmp6_dst_lock); 1029 1030 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1031 struct neighbour *neigh, 1032 const struct in6_addr *addr) 1033 { 1034 struct rt6_info *rt; 1035 struct inet6_dev *idev = in6_dev_get(dev); 1036 struct net *net = dev_net(dev); 1037 1038 if (unlikely(idev == NULL)) 1039 return NULL; 1040 1041 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1042 if (unlikely(rt == NULL)) { 1043 in6_dev_put(idev); 1044 goto out; 1045 } 1046 1047 dev_hold(dev); 1048 if (neigh) 1049 neigh_hold(neigh); 1050 else { 1051 neigh = ndisc_get_neigh(dev, addr); 1052 if (IS_ERR(neigh)) 1053 neigh = NULL; 1054 } 1055 1056 rt->rt6i_dev = dev; 1057 rt->rt6i_idev = idev; 1058 rt->rt6i_nexthop = neigh; 1059 atomic_set(&rt->dst.__refcnt, 1); 1060 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 1061 rt->dst.output = ip6_output; 1062 1063 #if 0 /* there's no chance to use these for ndisc */ 1064 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 1065 ? DST_HOST 1066 : 0; 1067 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1068 rt->rt6i_dst.plen = 128; 1069 #endif 1070 1071 spin_lock_bh(&icmp6_dst_lock); 1072 rt->dst.next = icmp6_dst_gc_list; 1073 icmp6_dst_gc_list = &rt->dst; 1074 spin_unlock_bh(&icmp6_dst_lock); 1075 1076 fib6_force_start_gc(net); 1077 1078 out: 1079 return &rt->dst; 1080 } 1081 1082 int icmp6_dst_gc(void) 1083 { 1084 struct dst_entry *dst, **pprev; 1085 int more = 0; 1086 1087 spin_lock_bh(&icmp6_dst_lock); 1088 pprev = &icmp6_dst_gc_list; 1089 1090 while ((dst = *pprev) != NULL) { 1091 if (!atomic_read(&dst->__refcnt)) { 1092 *pprev = dst->next; 1093 dst_free(dst); 1094 } else { 1095 pprev = &dst->next; 1096 ++more; 1097 } 1098 } 1099 1100 spin_unlock_bh(&icmp6_dst_lock); 1101 1102 return more; 1103 } 1104 1105 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1106 void *arg) 1107 { 1108 struct dst_entry *dst, **pprev; 1109 1110 spin_lock_bh(&icmp6_dst_lock); 1111 pprev = &icmp6_dst_gc_list; 1112 while ((dst = *pprev) != NULL) { 1113 struct rt6_info *rt = (struct rt6_info *) dst; 1114 if (func(rt, arg)) { 1115 *pprev = dst->next; 1116 dst_free(dst); 1117 } else { 1118 pprev = &dst->next; 1119 } 1120 } 1121 spin_unlock_bh(&icmp6_dst_lock); 1122 } 1123 1124 static int ip6_dst_gc(struct dst_ops *ops) 1125 { 1126 unsigned long now = jiffies; 1127 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1128 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1129 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1130 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1131 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1132 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1133 int entries; 1134 1135 entries = dst_entries_get_fast(ops); 1136 if (time_after(rt_last_gc + rt_min_interval, now) && 1137 entries <= rt_max_size) 1138 goto out; 1139 1140 net->ipv6.ip6_rt_gc_expire++; 1141 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1142 net->ipv6.ip6_rt_last_gc = now; 1143 entries = dst_entries_get_slow(ops); 1144 if (entries < ops->gc_thresh) 1145 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1146 out: 1147 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1148 return entries > rt_max_size; 1149 } 1150 1151 /* Clean host part of a prefix. Not necessary in radix tree, 1152 but results in cleaner routing tables. 1153 1154 Remove it only when all the things will work! 1155 */ 1156 1157 int ip6_dst_hoplimit(struct dst_entry *dst) 1158 { 1159 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1160 if (hoplimit == 0) { 1161 struct net_device *dev = dst->dev; 1162 struct inet6_dev *idev; 1163 1164 rcu_read_lock(); 1165 idev = __in6_dev_get(dev); 1166 if (idev) 1167 hoplimit = idev->cnf.hop_limit; 1168 else 1169 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1170 rcu_read_unlock(); 1171 } 1172 return hoplimit; 1173 } 1174 EXPORT_SYMBOL(ip6_dst_hoplimit); 1175 1176 /* 1177 * 1178 */ 1179 1180 int ip6_route_add(struct fib6_config *cfg) 1181 { 1182 int err; 1183 struct net *net = cfg->fc_nlinfo.nl_net; 1184 struct rt6_info *rt = NULL; 1185 struct net_device *dev = NULL; 1186 struct inet6_dev *idev = NULL; 1187 struct fib6_table *table; 1188 int addr_type; 1189 1190 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1191 return -EINVAL; 1192 #ifndef CONFIG_IPV6_SUBTREES 1193 if (cfg->fc_src_len) 1194 return -EINVAL; 1195 #endif 1196 if (cfg->fc_ifindex) { 1197 err = -ENODEV; 1198 dev = dev_get_by_index(net, cfg->fc_ifindex); 1199 if (!dev) 1200 goto out; 1201 idev = in6_dev_get(dev); 1202 if (!idev) 1203 goto out; 1204 } 1205 1206 if (cfg->fc_metric == 0) 1207 cfg->fc_metric = IP6_RT_PRIO_USER; 1208 1209 table = fib6_new_table(net, cfg->fc_table); 1210 if (table == NULL) { 1211 err = -ENOBUFS; 1212 goto out; 1213 } 1214 1215 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1216 1217 if (rt == NULL) { 1218 err = -ENOMEM; 1219 goto out; 1220 } 1221 1222 rt->dst.obsolete = -1; 1223 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1224 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1225 0; 1226 1227 if (cfg->fc_protocol == RTPROT_UNSPEC) 1228 cfg->fc_protocol = RTPROT_BOOT; 1229 rt->rt6i_protocol = cfg->fc_protocol; 1230 1231 addr_type = ipv6_addr_type(&cfg->fc_dst); 1232 1233 if (addr_type & IPV6_ADDR_MULTICAST) 1234 rt->dst.input = ip6_mc_input; 1235 else if (cfg->fc_flags & RTF_LOCAL) 1236 rt->dst.input = ip6_input; 1237 else 1238 rt->dst.input = ip6_forward; 1239 1240 rt->dst.output = ip6_output; 1241 1242 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1243 rt->rt6i_dst.plen = cfg->fc_dst_len; 1244 if (rt->rt6i_dst.plen == 128) 1245 rt->dst.flags = DST_HOST; 1246 1247 #ifdef CONFIG_IPV6_SUBTREES 1248 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1249 rt->rt6i_src.plen = cfg->fc_src_len; 1250 #endif 1251 1252 rt->rt6i_metric = cfg->fc_metric; 1253 1254 /* We cannot add true routes via loopback here, 1255 they would result in kernel looping; promote them to reject routes 1256 */ 1257 if ((cfg->fc_flags & RTF_REJECT) || 1258 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) 1259 && !(cfg->fc_flags&RTF_LOCAL))) { 1260 /* hold loopback dev/idev if we haven't done so. */ 1261 if (dev != net->loopback_dev) { 1262 if (dev) { 1263 dev_put(dev); 1264 in6_dev_put(idev); 1265 } 1266 dev = net->loopback_dev; 1267 dev_hold(dev); 1268 idev = in6_dev_get(dev); 1269 if (!idev) { 1270 err = -ENODEV; 1271 goto out; 1272 } 1273 } 1274 rt->dst.output = ip6_pkt_discard_out; 1275 rt->dst.input = ip6_pkt_discard; 1276 rt->dst.error = -ENETUNREACH; 1277 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1278 goto install_route; 1279 } 1280 1281 if (cfg->fc_flags & RTF_GATEWAY) { 1282 struct in6_addr *gw_addr; 1283 int gwa_type; 1284 1285 gw_addr = &cfg->fc_gateway; 1286 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1287 gwa_type = ipv6_addr_type(gw_addr); 1288 1289 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1290 struct rt6_info *grt; 1291 1292 /* IPv6 strictly inhibits using not link-local 1293 addresses as nexthop address. 1294 Otherwise, router will not able to send redirects. 1295 It is very good, but in some (rare!) circumstances 1296 (SIT, PtP, NBMA NOARP links) it is handy to allow 1297 some exceptions. --ANK 1298 */ 1299 err = -EINVAL; 1300 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1301 goto out; 1302 1303 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1304 1305 err = -EHOSTUNREACH; 1306 if (grt == NULL) 1307 goto out; 1308 if (dev) { 1309 if (dev != grt->rt6i_dev) { 1310 dst_release(&grt->dst); 1311 goto out; 1312 } 1313 } else { 1314 dev = grt->rt6i_dev; 1315 idev = grt->rt6i_idev; 1316 dev_hold(dev); 1317 in6_dev_hold(grt->rt6i_idev); 1318 } 1319 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1320 err = 0; 1321 dst_release(&grt->dst); 1322 1323 if (err) 1324 goto out; 1325 } 1326 err = -EINVAL; 1327 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1328 goto out; 1329 } 1330 1331 err = -ENODEV; 1332 if (dev == NULL) 1333 goto out; 1334 1335 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1336 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1337 if (IS_ERR(rt->rt6i_nexthop)) { 1338 err = PTR_ERR(rt->rt6i_nexthop); 1339 rt->rt6i_nexthop = NULL; 1340 goto out; 1341 } 1342 } 1343 1344 rt->rt6i_flags = cfg->fc_flags; 1345 1346 install_route: 1347 if (cfg->fc_mx) { 1348 struct nlattr *nla; 1349 int remaining; 1350 1351 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1352 int type = nla_type(nla); 1353 1354 if (type) { 1355 if (type > RTAX_MAX) { 1356 err = -EINVAL; 1357 goto out; 1358 } 1359 1360 dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1361 } 1362 } 1363 } 1364 1365 rt->dst.dev = dev; 1366 rt->rt6i_idev = idev; 1367 rt->rt6i_table = table; 1368 1369 cfg->fc_nlinfo.nl_net = dev_net(dev); 1370 1371 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1372 1373 out: 1374 if (dev) 1375 dev_put(dev); 1376 if (idev) 1377 in6_dev_put(idev); 1378 if (rt) 1379 dst_free(&rt->dst); 1380 return err; 1381 } 1382 1383 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1384 { 1385 int err; 1386 struct fib6_table *table; 1387 struct net *net = dev_net(rt->rt6i_dev); 1388 1389 if (rt == net->ipv6.ip6_null_entry) 1390 return -ENOENT; 1391 1392 table = rt->rt6i_table; 1393 write_lock_bh(&table->tb6_lock); 1394 1395 err = fib6_del(rt, info); 1396 dst_release(&rt->dst); 1397 1398 write_unlock_bh(&table->tb6_lock); 1399 1400 return err; 1401 } 1402 1403 int ip6_del_rt(struct rt6_info *rt) 1404 { 1405 struct nl_info info = { 1406 .nl_net = dev_net(rt->rt6i_dev), 1407 }; 1408 return __ip6_del_rt(rt, &info); 1409 } 1410 1411 static int ip6_route_del(struct fib6_config *cfg) 1412 { 1413 struct fib6_table *table; 1414 struct fib6_node *fn; 1415 struct rt6_info *rt; 1416 int err = -ESRCH; 1417 1418 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1419 if (table == NULL) 1420 return err; 1421 1422 read_lock_bh(&table->tb6_lock); 1423 1424 fn = fib6_locate(&table->tb6_root, 1425 &cfg->fc_dst, cfg->fc_dst_len, 1426 &cfg->fc_src, cfg->fc_src_len); 1427 1428 if (fn) { 1429 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1430 if (cfg->fc_ifindex && 1431 (rt->rt6i_dev == NULL || 1432 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1433 continue; 1434 if (cfg->fc_flags & RTF_GATEWAY && 1435 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1436 continue; 1437 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1438 continue; 1439 dst_hold(&rt->dst); 1440 read_unlock_bh(&table->tb6_lock); 1441 1442 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1443 } 1444 } 1445 read_unlock_bh(&table->tb6_lock); 1446 1447 return err; 1448 } 1449 1450 /* 1451 * Handle redirects 1452 */ 1453 struct ip6rd_flowi { 1454 struct flowi6 fl6; 1455 struct in6_addr gateway; 1456 }; 1457 1458 static struct rt6_info *__ip6_route_redirect(struct net *net, 1459 struct fib6_table *table, 1460 struct flowi6 *fl6, 1461 int flags) 1462 { 1463 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1464 struct rt6_info *rt; 1465 struct fib6_node *fn; 1466 1467 /* 1468 * Get the "current" route for this destination and 1469 * check if the redirect has come from approriate router. 1470 * 1471 * RFC 2461 specifies that redirects should only be 1472 * accepted if they come from the nexthop to the target. 1473 * Due to the way the routes are chosen, this notion 1474 * is a bit fuzzy and one might need to check all possible 1475 * routes. 1476 */ 1477 1478 read_lock_bh(&table->tb6_lock); 1479 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1480 restart: 1481 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1482 /* 1483 * Current route is on-link; redirect is always invalid. 1484 * 1485 * Seems, previous statement is not true. It could 1486 * be node, which looks for us as on-link (f.e. proxy ndisc) 1487 * But then router serving it might decide, that we should 1488 * know truth 8)8) --ANK (980726). 1489 */ 1490 if (rt6_check_expired(rt)) 1491 continue; 1492 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1493 continue; 1494 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) 1495 continue; 1496 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1497 continue; 1498 break; 1499 } 1500 1501 if (!rt) 1502 rt = net->ipv6.ip6_null_entry; 1503 BACKTRACK(net, &fl6->saddr); 1504 out: 1505 dst_hold(&rt->dst); 1506 1507 read_unlock_bh(&table->tb6_lock); 1508 1509 return rt; 1510 }; 1511 1512 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1513 struct in6_addr *src, 1514 struct in6_addr *gateway, 1515 struct net_device *dev) 1516 { 1517 int flags = RT6_LOOKUP_F_HAS_SADDR; 1518 struct net *net = dev_net(dev); 1519 struct ip6rd_flowi rdfl = { 1520 .fl6 = { 1521 .flowi6_oif = dev->ifindex, 1522 .daddr = *dest, 1523 .saddr = *src, 1524 }, 1525 }; 1526 1527 ipv6_addr_copy(&rdfl.gateway, gateway); 1528 1529 if (rt6_need_strict(dest)) 1530 flags |= RT6_LOOKUP_F_IFACE; 1531 1532 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, 1533 flags, __ip6_route_redirect); 1534 } 1535 1536 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1537 struct in6_addr *saddr, 1538 struct neighbour *neigh, u8 *lladdr, int on_link) 1539 { 1540 struct rt6_info *rt, *nrt = NULL; 1541 struct netevent_redirect netevent; 1542 struct net *net = dev_net(neigh->dev); 1543 1544 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1545 1546 if (rt == net->ipv6.ip6_null_entry) { 1547 if (net_ratelimit()) 1548 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1549 "for redirect target\n"); 1550 goto out; 1551 } 1552 1553 /* 1554 * We have finally decided to accept it. 1555 */ 1556 1557 neigh_update(neigh, lladdr, NUD_STALE, 1558 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1559 NEIGH_UPDATE_F_OVERRIDE| 1560 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1561 NEIGH_UPDATE_F_ISROUTER)) 1562 ); 1563 1564 /* 1565 * Redirect received -> path was valid. 1566 * Look, redirects are sent only in response to data packets, 1567 * so that this nexthop apparently is reachable. --ANK 1568 */ 1569 dst_confirm(&rt->dst); 1570 1571 /* Duplicate redirect: silently ignore. */ 1572 if (neigh == rt->dst.neighbour) 1573 goto out; 1574 1575 nrt = ip6_rt_copy(rt); 1576 if (nrt == NULL) 1577 goto out; 1578 1579 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1580 if (on_link) 1581 nrt->rt6i_flags &= ~RTF_GATEWAY; 1582 1583 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1584 nrt->rt6i_dst.plen = 128; 1585 nrt->dst.flags |= DST_HOST; 1586 1587 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1588 nrt->rt6i_nexthop = neigh_clone(neigh); 1589 1590 if (ip6_ins_rt(nrt)) 1591 goto out; 1592 1593 netevent.old = &rt->dst; 1594 netevent.new = &nrt->dst; 1595 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1596 1597 if (rt->rt6i_flags&RTF_CACHE) { 1598 ip6_del_rt(rt); 1599 return; 1600 } 1601 1602 out: 1603 dst_release(&rt->dst); 1604 } 1605 1606 /* 1607 * Handle ICMP "packet too big" messages 1608 * i.e. Path MTU discovery 1609 */ 1610 1611 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, 1612 struct net *net, u32 pmtu, int ifindex) 1613 { 1614 struct rt6_info *rt, *nrt; 1615 int allfrag = 0; 1616 again: 1617 rt = rt6_lookup(net, daddr, saddr, ifindex, 0); 1618 if (rt == NULL) 1619 return; 1620 1621 if (rt6_check_expired(rt)) { 1622 ip6_del_rt(rt); 1623 goto again; 1624 } 1625 1626 if (pmtu >= dst_mtu(&rt->dst)) 1627 goto out; 1628 1629 if (pmtu < IPV6_MIN_MTU) { 1630 /* 1631 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1632 * MTU (1280) and a fragment header should always be included 1633 * after a node receiving Too Big message reporting PMTU is 1634 * less than the IPv6 Minimum Link MTU. 1635 */ 1636 pmtu = IPV6_MIN_MTU; 1637 allfrag = 1; 1638 } 1639 1640 /* New mtu received -> path was valid. 1641 They are sent only in response to data packets, 1642 so that this nexthop apparently is reachable. --ANK 1643 */ 1644 dst_confirm(&rt->dst); 1645 1646 /* Host route. If it is static, it would be better 1647 not to override it, but add new one, so that 1648 when cache entry will expire old pmtu 1649 would return automatically. 1650 */ 1651 if (rt->rt6i_flags & RTF_CACHE) { 1652 dst_metric_set(&rt->dst, RTAX_MTU, pmtu); 1653 if (allfrag) { 1654 u32 features = dst_metric(&rt->dst, RTAX_FEATURES); 1655 features |= RTAX_FEATURE_ALLFRAG; 1656 dst_metric_set(&rt->dst, RTAX_FEATURES, features); 1657 } 1658 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1659 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1660 goto out; 1661 } 1662 1663 /* Network route. 1664 Two cases are possible: 1665 1. It is connected route. Action: COW 1666 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1667 */ 1668 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1669 nrt = rt6_alloc_cow(rt, daddr, saddr); 1670 else 1671 nrt = rt6_alloc_clone(rt, daddr); 1672 1673 if (nrt) { 1674 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); 1675 if (allfrag) { 1676 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); 1677 features |= RTAX_FEATURE_ALLFRAG; 1678 dst_metric_set(&nrt->dst, RTAX_FEATURES, features); 1679 } 1680 1681 /* According to RFC 1981, detecting PMTU increase shouldn't be 1682 * happened within 5 mins, the recommended timer is 10 mins. 1683 * Here this route expiration time is set to ip6_rt_mtu_expires 1684 * which is 10 mins. After 10 mins the decreased pmtu is expired 1685 * and detecting PMTU increase will be automatically happened. 1686 */ 1687 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1688 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1689 1690 ip6_ins_rt(nrt); 1691 } 1692 out: 1693 dst_release(&rt->dst); 1694 } 1695 1696 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1697 struct net_device *dev, u32 pmtu) 1698 { 1699 struct net *net = dev_net(dev); 1700 1701 /* 1702 * RFC 1981 states that a node "MUST reduce the size of the packets it 1703 * is sending along the path" that caused the Packet Too Big message. 1704 * Since it's not possible in the general case to determine which 1705 * interface was used to send the original packet, we update the MTU 1706 * on the interface that will be used to send future packets. We also 1707 * update the MTU on the interface that received the Packet Too Big in 1708 * case the original packet was forced out that interface with 1709 * SO_BINDTODEVICE or similar. This is the next best thing to the 1710 * correct behaviour, which would be to update the MTU on all 1711 * interfaces. 1712 */ 1713 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); 1714 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); 1715 } 1716 1717 /* 1718 * Misc support functions 1719 */ 1720 1721 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1722 { 1723 struct net *net = dev_net(ort->rt6i_dev); 1724 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1725 1726 if (rt) { 1727 rt->dst.input = ort->dst.input; 1728 rt->dst.output = ort->dst.output; 1729 1730 dst_copy_metrics(&rt->dst, &ort->dst); 1731 rt->dst.error = ort->dst.error; 1732 rt->dst.dev = ort->dst.dev; 1733 if (rt->dst.dev) 1734 dev_hold(rt->dst.dev); 1735 rt->rt6i_idev = ort->rt6i_idev; 1736 if (rt->rt6i_idev) 1737 in6_dev_hold(rt->rt6i_idev); 1738 rt->dst.lastuse = jiffies; 1739 rt->rt6i_expires = 0; 1740 1741 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1742 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1743 rt->rt6i_metric = 0; 1744 1745 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1746 #ifdef CONFIG_IPV6_SUBTREES 1747 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1748 #endif 1749 rt->rt6i_table = ort->rt6i_table; 1750 } 1751 return rt; 1752 } 1753 1754 #ifdef CONFIG_IPV6_ROUTE_INFO 1755 static struct rt6_info *rt6_get_route_info(struct net *net, 1756 struct in6_addr *prefix, int prefixlen, 1757 struct in6_addr *gwaddr, int ifindex) 1758 { 1759 struct fib6_node *fn; 1760 struct rt6_info *rt = NULL; 1761 struct fib6_table *table; 1762 1763 table = fib6_get_table(net, RT6_TABLE_INFO); 1764 if (table == NULL) 1765 return NULL; 1766 1767 write_lock_bh(&table->tb6_lock); 1768 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1769 if (!fn) 1770 goto out; 1771 1772 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1773 if (rt->rt6i_dev->ifindex != ifindex) 1774 continue; 1775 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1776 continue; 1777 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1778 continue; 1779 dst_hold(&rt->dst); 1780 break; 1781 } 1782 out: 1783 write_unlock_bh(&table->tb6_lock); 1784 return rt; 1785 } 1786 1787 static struct rt6_info *rt6_add_route_info(struct net *net, 1788 struct in6_addr *prefix, int prefixlen, 1789 struct in6_addr *gwaddr, int ifindex, 1790 unsigned pref) 1791 { 1792 struct fib6_config cfg = { 1793 .fc_table = RT6_TABLE_INFO, 1794 .fc_metric = IP6_RT_PRIO_USER, 1795 .fc_ifindex = ifindex, 1796 .fc_dst_len = prefixlen, 1797 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1798 RTF_UP | RTF_PREF(pref), 1799 .fc_nlinfo.pid = 0, 1800 .fc_nlinfo.nlh = NULL, 1801 .fc_nlinfo.nl_net = net, 1802 }; 1803 1804 ipv6_addr_copy(&cfg.fc_dst, prefix); 1805 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1806 1807 /* We should treat it as a default route if prefix length is 0. */ 1808 if (!prefixlen) 1809 cfg.fc_flags |= RTF_DEFAULT; 1810 1811 ip6_route_add(&cfg); 1812 1813 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1814 } 1815 #endif 1816 1817 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1818 { 1819 struct rt6_info *rt; 1820 struct fib6_table *table; 1821 1822 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1823 if (table == NULL) 1824 return NULL; 1825 1826 write_lock_bh(&table->tb6_lock); 1827 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1828 if (dev == rt->rt6i_dev && 1829 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1830 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1831 break; 1832 } 1833 if (rt) 1834 dst_hold(&rt->dst); 1835 write_unlock_bh(&table->tb6_lock); 1836 return rt; 1837 } 1838 1839 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1840 struct net_device *dev, 1841 unsigned int pref) 1842 { 1843 struct fib6_config cfg = { 1844 .fc_table = RT6_TABLE_DFLT, 1845 .fc_metric = IP6_RT_PRIO_USER, 1846 .fc_ifindex = dev->ifindex, 1847 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1848 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1849 .fc_nlinfo.pid = 0, 1850 .fc_nlinfo.nlh = NULL, 1851 .fc_nlinfo.nl_net = dev_net(dev), 1852 }; 1853 1854 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1855 1856 ip6_route_add(&cfg); 1857 1858 return rt6_get_dflt_router(gwaddr, dev); 1859 } 1860 1861 void rt6_purge_dflt_routers(struct net *net) 1862 { 1863 struct rt6_info *rt; 1864 struct fib6_table *table; 1865 1866 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1867 table = fib6_get_table(net, RT6_TABLE_DFLT); 1868 if (table == NULL) 1869 return; 1870 1871 restart: 1872 read_lock_bh(&table->tb6_lock); 1873 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1874 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1875 dst_hold(&rt->dst); 1876 read_unlock_bh(&table->tb6_lock); 1877 ip6_del_rt(rt); 1878 goto restart; 1879 } 1880 } 1881 read_unlock_bh(&table->tb6_lock); 1882 } 1883 1884 static void rtmsg_to_fib6_config(struct net *net, 1885 struct in6_rtmsg *rtmsg, 1886 struct fib6_config *cfg) 1887 { 1888 memset(cfg, 0, sizeof(*cfg)); 1889 1890 cfg->fc_table = RT6_TABLE_MAIN; 1891 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1892 cfg->fc_metric = rtmsg->rtmsg_metric; 1893 cfg->fc_expires = rtmsg->rtmsg_info; 1894 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1895 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1896 cfg->fc_flags = rtmsg->rtmsg_flags; 1897 1898 cfg->fc_nlinfo.nl_net = net; 1899 1900 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1901 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1902 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1903 } 1904 1905 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1906 { 1907 struct fib6_config cfg; 1908 struct in6_rtmsg rtmsg; 1909 int err; 1910 1911 switch(cmd) { 1912 case SIOCADDRT: /* Add a route */ 1913 case SIOCDELRT: /* Delete a route */ 1914 if (!capable(CAP_NET_ADMIN)) 1915 return -EPERM; 1916 err = copy_from_user(&rtmsg, arg, 1917 sizeof(struct in6_rtmsg)); 1918 if (err) 1919 return -EFAULT; 1920 1921 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1922 1923 rtnl_lock(); 1924 switch (cmd) { 1925 case SIOCADDRT: 1926 err = ip6_route_add(&cfg); 1927 break; 1928 case SIOCDELRT: 1929 err = ip6_route_del(&cfg); 1930 break; 1931 default: 1932 err = -EINVAL; 1933 } 1934 rtnl_unlock(); 1935 1936 return err; 1937 } 1938 1939 return -EINVAL; 1940 } 1941 1942 /* 1943 * Drop the packet on the floor 1944 */ 1945 1946 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 1947 { 1948 int type; 1949 struct dst_entry *dst = skb_dst(skb); 1950 switch (ipstats_mib_noroutes) { 1951 case IPSTATS_MIB_INNOROUTES: 1952 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1953 if (type == IPV6_ADDR_ANY) { 1954 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1955 IPSTATS_MIB_INADDRERRORS); 1956 break; 1957 } 1958 /* FALLTHROUGH */ 1959 case IPSTATS_MIB_OUTNOROUTES: 1960 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1961 ipstats_mib_noroutes); 1962 break; 1963 } 1964 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 1965 kfree_skb(skb); 1966 return 0; 1967 } 1968 1969 static int ip6_pkt_discard(struct sk_buff *skb) 1970 { 1971 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1972 } 1973 1974 static int ip6_pkt_discard_out(struct sk_buff *skb) 1975 { 1976 skb->dev = skb_dst(skb)->dev; 1977 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1978 } 1979 1980 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1981 1982 static int ip6_pkt_prohibit(struct sk_buff *skb) 1983 { 1984 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1985 } 1986 1987 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1988 { 1989 skb->dev = skb_dst(skb)->dev; 1990 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1991 } 1992 1993 #endif 1994 1995 /* 1996 * Allocate a dst for local (unicast / anycast) address. 1997 */ 1998 1999 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2000 const struct in6_addr *addr, 2001 int anycast) 2002 { 2003 struct net *net = dev_net(idev->dev); 2004 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 2005 struct neighbour *neigh; 2006 2007 if (rt == NULL) { 2008 if (net_ratelimit()) 2009 pr_warning("IPv6: Maximum number of routes reached," 2010 " consider increasing route/max_size.\n"); 2011 return ERR_PTR(-ENOMEM); 2012 } 2013 2014 dev_hold(net->loopback_dev); 2015 in6_dev_hold(idev); 2016 2017 rt->dst.flags = DST_HOST; 2018 rt->dst.input = ip6_input; 2019 rt->dst.output = ip6_output; 2020 rt->rt6i_dev = net->loopback_dev; 2021 rt->rt6i_idev = idev; 2022 rt->dst.obsolete = -1; 2023 2024 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2025 if (anycast) 2026 rt->rt6i_flags |= RTF_ANYCAST; 2027 else 2028 rt->rt6i_flags |= RTF_LOCAL; 2029 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 2030 if (IS_ERR(neigh)) { 2031 dst_free(&rt->dst); 2032 2033 return ERR_CAST(neigh); 2034 } 2035 rt->rt6i_nexthop = neigh; 2036 2037 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 2038 rt->rt6i_dst.plen = 128; 2039 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2040 2041 atomic_set(&rt->dst.__refcnt, 1); 2042 2043 return rt; 2044 } 2045 2046 struct arg_dev_net { 2047 struct net_device *dev; 2048 struct net *net; 2049 }; 2050 2051 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2052 { 2053 const struct arg_dev_net *adn = arg; 2054 const struct net_device *dev = adn->dev; 2055 2056 if ((rt->rt6i_dev == dev || dev == NULL) && 2057 rt != adn->net->ipv6.ip6_null_entry) { 2058 RT6_TRACE("deleted by ifdown %p\n", rt); 2059 return -1; 2060 } 2061 return 0; 2062 } 2063 2064 void rt6_ifdown(struct net *net, struct net_device *dev) 2065 { 2066 struct arg_dev_net adn = { 2067 .dev = dev, 2068 .net = net, 2069 }; 2070 2071 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2072 icmp6_clean_all(fib6_ifdown, &adn); 2073 } 2074 2075 struct rt6_mtu_change_arg 2076 { 2077 struct net_device *dev; 2078 unsigned mtu; 2079 }; 2080 2081 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2082 { 2083 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2084 struct inet6_dev *idev; 2085 2086 /* In IPv6 pmtu discovery is not optional, 2087 so that RTAX_MTU lock cannot disable it. 2088 We still use this lock to block changes 2089 caused by addrconf/ndisc. 2090 */ 2091 2092 idev = __in6_dev_get(arg->dev); 2093 if (idev == NULL) 2094 return 0; 2095 2096 /* For administrative MTU increase, there is no way to discover 2097 IPv6 PMTU increase, so PMTU increase should be updated here. 2098 Since RFC 1981 doesn't include administrative MTU increase 2099 update PMTU increase is a MUST. (i.e. jumbo frame) 2100 */ 2101 /* 2102 If new MTU is less than route PMTU, this new MTU will be the 2103 lowest MTU in the path, update the route PMTU to reflect PMTU 2104 decreases; if new MTU is greater than route PMTU, and the 2105 old MTU is the lowest MTU in the path, update the route PMTU 2106 to reflect the increase. In this case if the other nodes' MTU 2107 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2108 PMTU discouvery. 2109 */ 2110 if (rt->rt6i_dev == arg->dev && 2111 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2112 (dst_mtu(&rt->dst) >= arg->mtu || 2113 (dst_mtu(&rt->dst) < arg->mtu && 2114 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2115 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2116 } 2117 return 0; 2118 } 2119 2120 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2121 { 2122 struct rt6_mtu_change_arg arg = { 2123 .dev = dev, 2124 .mtu = mtu, 2125 }; 2126 2127 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2128 } 2129 2130 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2131 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2132 [RTA_OIF] = { .type = NLA_U32 }, 2133 [RTA_IIF] = { .type = NLA_U32 }, 2134 [RTA_PRIORITY] = { .type = NLA_U32 }, 2135 [RTA_METRICS] = { .type = NLA_NESTED }, 2136 }; 2137 2138 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2139 struct fib6_config *cfg) 2140 { 2141 struct rtmsg *rtm; 2142 struct nlattr *tb[RTA_MAX+1]; 2143 int err; 2144 2145 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2146 if (err < 0) 2147 goto errout; 2148 2149 err = -EINVAL; 2150 rtm = nlmsg_data(nlh); 2151 memset(cfg, 0, sizeof(*cfg)); 2152 2153 cfg->fc_table = rtm->rtm_table; 2154 cfg->fc_dst_len = rtm->rtm_dst_len; 2155 cfg->fc_src_len = rtm->rtm_src_len; 2156 cfg->fc_flags = RTF_UP; 2157 cfg->fc_protocol = rtm->rtm_protocol; 2158 2159 if (rtm->rtm_type == RTN_UNREACHABLE) 2160 cfg->fc_flags |= RTF_REJECT; 2161 2162 if (rtm->rtm_type == RTN_LOCAL) 2163 cfg->fc_flags |= RTF_LOCAL; 2164 2165 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2166 cfg->fc_nlinfo.nlh = nlh; 2167 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2168 2169 if (tb[RTA_GATEWAY]) { 2170 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2171 cfg->fc_flags |= RTF_GATEWAY; 2172 } 2173 2174 if (tb[RTA_DST]) { 2175 int plen = (rtm->rtm_dst_len + 7) >> 3; 2176 2177 if (nla_len(tb[RTA_DST]) < plen) 2178 goto errout; 2179 2180 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2181 } 2182 2183 if (tb[RTA_SRC]) { 2184 int plen = (rtm->rtm_src_len + 7) >> 3; 2185 2186 if (nla_len(tb[RTA_SRC]) < plen) 2187 goto errout; 2188 2189 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2190 } 2191 2192 if (tb[RTA_OIF]) 2193 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2194 2195 if (tb[RTA_PRIORITY]) 2196 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2197 2198 if (tb[RTA_METRICS]) { 2199 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2200 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2201 } 2202 2203 if (tb[RTA_TABLE]) 2204 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2205 2206 err = 0; 2207 errout: 2208 return err; 2209 } 2210 2211 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2212 { 2213 struct fib6_config cfg; 2214 int err; 2215 2216 err = rtm_to_fib6_config(skb, nlh, &cfg); 2217 if (err < 0) 2218 return err; 2219 2220 return ip6_route_del(&cfg); 2221 } 2222 2223 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2224 { 2225 struct fib6_config cfg; 2226 int err; 2227 2228 err = rtm_to_fib6_config(skb, nlh, &cfg); 2229 if (err < 0) 2230 return err; 2231 2232 return ip6_route_add(&cfg); 2233 } 2234 2235 static inline size_t rt6_nlmsg_size(void) 2236 { 2237 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2238 + nla_total_size(16) /* RTA_SRC */ 2239 + nla_total_size(16) /* RTA_DST */ 2240 + nla_total_size(16) /* RTA_GATEWAY */ 2241 + nla_total_size(16) /* RTA_PREFSRC */ 2242 + nla_total_size(4) /* RTA_TABLE */ 2243 + nla_total_size(4) /* RTA_IIF */ 2244 + nla_total_size(4) /* RTA_OIF */ 2245 + nla_total_size(4) /* RTA_PRIORITY */ 2246 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2247 + nla_total_size(sizeof(struct rta_cacheinfo)); 2248 } 2249 2250 static int rt6_fill_node(struct net *net, 2251 struct sk_buff *skb, struct rt6_info *rt, 2252 struct in6_addr *dst, struct in6_addr *src, 2253 int iif, int type, u32 pid, u32 seq, 2254 int prefix, int nowait, unsigned int flags) 2255 { 2256 struct rtmsg *rtm; 2257 struct nlmsghdr *nlh; 2258 long expires; 2259 u32 table; 2260 2261 if (prefix) { /* user wants prefix routes only */ 2262 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2263 /* success since this is not a prefix route */ 2264 return 1; 2265 } 2266 } 2267 2268 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2269 if (nlh == NULL) 2270 return -EMSGSIZE; 2271 2272 rtm = nlmsg_data(nlh); 2273 rtm->rtm_family = AF_INET6; 2274 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2275 rtm->rtm_src_len = rt->rt6i_src.plen; 2276 rtm->rtm_tos = 0; 2277 if (rt->rt6i_table) 2278 table = rt->rt6i_table->tb6_id; 2279 else 2280 table = RT6_TABLE_UNSPEC; 2281 rtm->rtm_table = table; 2282 NLA_PUT_U32(skb, RTA_TABLE, table); 2283 if (rt->rt6i_flags&RTF_REJECT) 2284 rtm->rtm_type = RTN_UNREACHABLE; 2285 else if (rt->rt6i_flags&RTF_LOCAL) 2286 rtm->rtm_type = RTN_LOCAL; 2287 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2288 rtm->rtm_type = RTN_LOCAL; 2289 else 2290 rtm->rtm_type = RTN_UNICAST; 2291 rtm->rtm_flags = 0; 2292 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2293 rtm->rtm_protocol = rt->rt6i_protocol; 2294 if (rt->rt6i_flags&RTF_DYNAMIC) 2295 rtm->rtm_protocol = RTPROT_REDIRECT; 2296 else if (rt->rt6i_flags & RTF_ADDRCONF) 2297 rtm->rtm_protocol = RTPROT_KERNEL; 2298 else if (rt->rt6i_flags&RTF_DEFAULT) 2299 rtm->rtm_protocol = RTPROT_RA; 2300 2301 if (rt->rt6i_flags&RTF_CACHE) 2302 rtm->rtm_flags |= RTM_F_CLONED; 2303 2304 if (dst) { 2305 NLA_PUT(skb, RTA_DST, 16, dst); 2306 rtm->rtm_dst_len = 128; 2307 } else if (rtm->rtm_dst_len) 2308 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2309 #ifdef CONFIG_IPV6_SUBTREES 2310 if (src) { 2311 NLA_PUT(skb, RTA_SRC, 16, src); 2312 rtm->rtm_src_len = 128; 2313 } else if (rtm->rtm_src_len) 2314 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2315 #endif 2316 if (iif) { 2317 #ifdef CONFIG_IPV6_MROUTE 2318 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2319 int err = ip6mr_get_route(net, skb, rtm, nowait); 2320 if (err <= 0) { 2321 if (!nowait) { 2322 if (err == 0) 2323 return 0; 2324 goto nla_put_failure; 2325 } else { 2326 if (err == -EMSGSIZE) 2327 goto nla_put_failure; 2328 } 2329 } 2330 } else 2331 #endif 2332 NLA_PUT_U32(skb, RTA_IIF, iif); 2333 } else if (dst) { 2334 struct inet6_dev *idev = ip6_dst_idev(&rt->dst); 2335 struct in6_addr saddr_buf; 2336 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2337 dst, 0, &saddr_buf) == 0) 2338 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2339 } 2340 2341 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2342 goto nla_put_failure; 2343 2344 if (rt->dst.neighbour) 2345 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); 2346 2347 if (rt->dst.dev) 2348 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2349 2350 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2351 2352 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2353 expires = 0; 2354 else if (rt->rt6i_expires - jiffies < INT_MAX) 2355 expires = rt->rt6i_expires - jiffies; 2356 else 2357 expires = INT_MAX; 2358 2359 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, 2360 expires, rt->dst.error) < 0) 2361 goto nla_put_failure; 2362 2363 return nlmsg_end(skb, nlh); 2364 2365 nla_put_failure: 2366 nlmsg_cancel(skb, nlh); 2367 return -EMSGSIZE; 2368 } 2369 2370 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2371 { 2372 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2373 int prefix; 2374 2375 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2376 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2377 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2378 } else 2379 prefix = 0; 2380 2381 return rt6_fill_node(arg->net, 2382 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2383 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2384 prefix, 0, NLM_F_MULTI); 2385 } 2386 2387 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2388 { 2389 struct net *net = sock_net(in_skb->sk); 2390 struct nlattr *tb[RTA_MAX+1]; 2391 struct rt6_info *rt; 2392 struct sk_buff *skb; 2393 struct rtmsg *rtm; 2394 struct flowi6 fl6; 2395 int err, iif = 0; 2396 2397 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2398 if (err < 0) 2399 goto errout; 2400 2401 err = -EINVAL; 2402 memset(&fl6, 0, sizeof(fl6)); 2403 2404 if (tb[RTA_SRC]) { 2405 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2406 goto errout; 2407 2408 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); 2409 } 2410 2411 if (tb[RTA_DST]) { 2412 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2413 goto errout; 2414 2415 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); 2416 } 2417 2418 if (tb[RTA_IIF]) 2419 iif = nla_get_u32(tb[RTA_IIF]); 2420 2421 if (tb[RTA_OIF]) 2422 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); 2423 2424 if (iif) { 2425 struct net_device *dev; 2426 dev = __dev_get_by_index(net, iif); 2427 if (!dev) { 2428 err = -ENODEV; 2429 goto errout; 2430 } 2431 } 2432 2433 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2434 if (skb == NULL) { 2435 err = -ENOBUFS; 2436 goto errout; 2437 } 2438 2439 /* Reserve room for dummy headers, this skb can pass 2440 through good chunk of routing engine. 2441 */ 2442 skb_reset_mac_header(skb); 2443 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2444 2445 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); 2446 skb_dst_set(skb, &rt->dst); 2447 2448 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2449 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2450 nlh->nlmsg_seq, 0, 0, 0); 2451 if (err < 0) { 2452 kfree_skb(skb); 2453 goto errout; 2454 } 2455 2456 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2457 errout: 2458 return err; 2459 } 2460 2461 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2462 { 2463 struct sk_buff *skb; 2464 struct net *net = info->nl_net; 2465 u32 seq; 2466 int err; 2467 2468 err = -ENOBUFS; 2469 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2470 2471 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2472 if (skb == NULL) 2473 goto errout; 2474 2475 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2476 event, info->pid, seq, 0, 0, 0); 2477 if (err < 0) { 2478 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2479 WARN_ON(err == -EMSGSIZE); 2480 kfree_skb(skb); 2481 goto errout; 2482 } 2483 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2484 info->nlh, gfp_any()); 2485 return; 2486 errout: 2487 if (err < 0) 2488 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2489 } 2490 2491 static int ip6_route_dev_notify(struct notifier_block *this, 2492 unsigned long event, void *data) 2493 { 2494 struct net_device *dev = (struct net_device *)data; 2495 struct net *net = dev_net(dev); 2496 2497 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2498 net->ipv6.ip6_null_entry->dst.dev = dev; 2499 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2500 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2501 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2502 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2503 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2504 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2505 #endif 2506 } 2507 2508 return NOTIFY_OK; 2509 } 2510 2511 /* 2512 * /proc 2513 */ 2514 2515 #ifdef CONFIG_PROC_FS 2516 2517 struct rt6_proc_arg 2518 { 2519 char *buffer; 2520 int offset; 2521 int length; 2522 int skip; 2523 int len; 2524 }; 2525 2526 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2527 { 2528 struct seq_file *m = p_arg; 2529 2530 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2531 2532 #ifdef CONFIG_IPV6_SUBTREES 2533 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2534 #else 2535 seq_puts(m, "00000000000000000000000000000000 00 "); 2536 #endif 2537 2538 if (rt->rt6i_nexthop) { 2539 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); 2540 } else { 2541 seq_puts(m, "00000000000000000000000000000000"); 2542 } 2543 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2544 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2545 rt->dst.__use, rt->rt6i_flags, 2546 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2547 return 0; 2548 } 2549 2550 static int ipv6_route_show(struct seq_file *m, void *v) 2551 { 2552 struct net *net = (struct net *)m->private; 2553 fib6_clean_all(net, rt6_info_route, 0, m); 2554 return 0; 2555 } 2556 2557 static int ipv6_route_open(struct inode *inode, struct file *file) 2558 { 2559 return single_open_net(inode, file, ipv6_route_show); 2560 } 2561 2562 static const struct file_operations ipv6_route_proc_fops = { 2563 .owner = THIS_MODULE, 2564 .open = ipv6_route_open, 2565 .read = seq_read, 2566 .llseek = seq_lseek, 2567 .release = single_release_net, 2568 }; 2569 2570 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2571 { 2572 struct net *net = (struct net *)seq->private; 2573 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2574 net->ipv6.rt6_stats->fib_nodes, 2575 net->ipv6.rt6_stats->fib_route_nodes, 2576 net->ipv6.rt6_stats->fib_rt_alloc, 2577 net->ipv6.rt6_stats->fib_rt_entries, 2578 net->ipv6.rt6_stats->fib_rt_cache, 2579 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2580 net->ipv6.rt6_stats->fib_discarded_routes); 2581 2582 return 0; 2583 } 2584 2585 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2586 { 2587 return single_open_net(inode, file, rt6_stats_seq_show); 2588 } 2589 2590 static const struct file_operations rt6_stats_seq_fops = { 2591 .owner = THIS_MODULE, 2592 .open = rt6_stats_seq_open, 2593 .read = seq_read, 2594 .llseek = seq_lseek, 2595 .release = single_release_net, 2596 }; 2597 #endif /* CONFIG_PROC_FS */ 2598 2599 #ifdef CONFIG_SYSCTL 2600 2601 static 2602 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2603 void __user *buffer, size_t *lenp, loff_t *ppos) 2604 { 2605 struct net *net; 2606 int delay; 2607 if (!write) 2608 return -EINVAL; 2609 2610 net = (struct net *)ctl->extra1; 2611 delay = net->ipv6.sysctl.flush_delay; 2612 proc_dointvec(ctl, write, buffer, lenp, ppos); 2613 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2614 return 0; 2615 } 2616 2617 ctl_table ipv6_route_table_template[] = { 2618 { 2619 .procname = "flush", 2620 .data = &init_net.ipv6.sysctl.flush_delay, 2621 .maxlen = sizeof(int), 2622 .mode = 0200, 2623 .proc_handler = ipv6_sysctl_rtcache_flush 2624 }, 2625 { 2626 .procname = "gc_thresh", 2627 .data = &ip6_dst_ops_template.gc_thresh, 2628 .maxlen = sizeof(int), 2629 .mode = 0644, 2630 .proc_handler = proc_dointvec, 2631 }, 2632 { 2633 .procname = "max_size", 2634 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2635 .maxlen = sizeof(int), 2636 .mode = 0644, 2637 .proc_handler = proc_dointvec, 2638 }, 2639 { 2640 .procname = "gc_min_interval", 2641 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2642 .maxlen = sizeof(int), 2643 .mode = 0644, 2644 .proc_handler = proc_dointvec_jiffies, 2645 }, 2646 { 2647 .procname = "gc_timeout", 2648 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2649 .maxlen = sizeof(int), 2650 .mode = 0644, 2651 .proc_handler = proc_dointvec_jiffies, 2652 }, 2653 { 2654 .procname = "gc_interval", 2655 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2656 .maxlen = sizeof(int), 2657 .mode = 0644, 2658 .proc_handler = proc_dointvec_jiffies, 2659 }, 2660 { 2661 .procname = "gc_elasticity", 2662 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2663 .maxlen = sizeof(int), 2664 .mode = 0644, 2665 .proc_handler = proc_dointvec, 2666 }, 2667 { 2668 .procname = "mtu_expires", 2669 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2670 .maxlen = sizeof(int), 2671 .mode = 0644, 2672 .proc_handler = proc_dointvec_jiffies, 2673 }, 2674 { 2675 .procname = "min_adv_mss", 2676 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2677 .maxlen = sizeof(int), 2678 .mode = 0644, 2679 .proc_handler = proc_dointvec, 2680 }, 2681 { 2682 .procname = "gc_min_interval_ms", 2683 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2684 .maxlen = sizeof(int), 2685 .mode = 0644, 2686 .proc_handler = proc_dointvec_ms_jiffies, 2687 }, 2688 { } 2689 }; 2690 2691 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2692 { 2693 struct ctl_table *table; 2694 2695 table = kmemdup(ipv6_route_table_template, 2696 sizeof(ipv6_route_table_template), 2697 GFP_KERNEL); 2698 2699 if (table) { 2700 table[0].data = &net->ipv6.sysctl.flush_delay; 2701 table[0].extra1 = net; 2702 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2703 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2704 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2705 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2706 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2707 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2708 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2709 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2710 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2711 } 2712 2713 return table; 2714 } 2715 #endif 2716 2717 static int __net_init ip6_route_net_init(struct net *net) 2718 { 2719 int ret = -ENOMEM; 2720 2721 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2722 sizeof(net->ipv6.ip6_dst_ops)); 2723 2724 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2725 goto out_ip6_dst_ops; 2726 2727 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2728 sizeof(*net->ipv6.ip6_null_entry), 2729 GFP_KERNEL); 2730 if (!net->ipv6.ip6_null_entry) 2731 goto out_ip6_dst_entries; 2732 net->ipv6.ip6_null_entry->dst.path = 2733 (struct dst_entry *)net->ipv6.ip6_null_entry; 2734 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2735 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 2736 ip6_template_metrics, true); 2737 2738 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2739 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2740 sizeof(*net->ipv6.ip6_prohibit_entry), 2741 GFP_KERNEL); 2742 if (!net->ipv6.ip6_prohibit_entry) 2743 goto out_ip6_null_entry; 2744 net->ipv6.ip6_prohibit_entry->dst.path = 2745 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2746 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2747 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 2748 ip6_template_metrics, true); 2749 2750 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2751 sizeof(*net->ipv6.ip6_blk_hole_entry), 2752 GFP_KERNEL); 2753 if (!net->ipv6.ip6_blk_hole_entry) 2754 goto out_ip6_prohibit_entry; 2755 net->ipv6.ip6_blk_hole_entry->dst.path = 2756 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2757 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2758 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 2759 ip6_template_metrics, true); 2760 #endif 2761 2762 net->ipv6.sysctl.flush_delay = 0; 2763 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2764 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2765 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2766 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2767 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2768 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2769 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2770 2771 #ifdef CONFIG_PROC_FS 2772 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2773 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2774 #endif 2775 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2776 2777 ret = 0; 2778 out: 2779 return ret; 2780 2781 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2782 out_ip6_prohibit_entry: 2783 kfree(net->ipv6.ip6_prohibit_entry); 2784 out_ip6_null_entry: 2785 kfree(net->ipv6.ip6_null_entry); 2786 #endif 2787 out_ip6_dst_entries: 2788 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2789 out_ip6_dst_ops: 2790 goto out; 2791 } 2792 2793 static void __net_exit ip6_route_net_exit(struct net *net) 2794 { 2795 #ifdef CONFIG_PROC_FS 2796 proc_net_remove(net, "ipv6_route"); 2797 proc_net_remove(net, "rt6_stats"); 2798 #endif 2799 kfree(net->ipv6.ip6_null_entry); 2800 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2801 kfree(net->ipv6.ip6_prohibit_entry); 2802 kfree(net->ipv6.ip6_blk_hole_entry); 2803 #endif 2804 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2805 } 2806 2807 static struct pernet_operations ip6_route_net_ops = { 2808 .init = ip6_route_net_init, 2809 .exit = ip6_route_net_exit, 2810 }; 2811 2812 static struct notifier_block ip6_route_dev_notifier = { 2813 .notifier_call = ip6_route_dev_notify, 2814 .priority = 0, 2815 }; 2816 2817 int __init ip6_route_init(void) 2818 { 2819 int ret; 2820 2821 ret = -ENOMEM; 2822 ip6_dst_ops_template.kmem_cachep = 2823 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2824 SLAB_HWCACHE_ALIGN, NULL); 2825 if (!ip6_dst_ops_template.kmem_cachep) 2826 goto out; 2827 2828 ret = dst_entries_init(&ip6_dst_blackhole_ops); 2829 if (ret) 2830 goto out_kmem_cache; 2831 2832 ret = register_pernet_subsys(&ip6_route_net_ops); 2833 if (ret) 2834 goto out_dst_entries; 2835 2836 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2837 2838 /* Registering of the loopback is done before this portion of code, 2839 * the loopback reference in rt6_info will not be taken, do it 2840 * manually for init_net */ 2841 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 2842 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2843 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2844 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 2845 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2846 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 2847 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2848 #endif 2849 ret = fib6_init(); 2850 if (ret) 2851 goto out_register_subsys; 2852 2853 ret = xfrm6_init(); 2854 if (ret) 2855 goto out_fib6_init; 2856 2857 ret = fib6_rules_init(); 2858 if (ret) 2859 goto xfrm6_init; 2860 2861 ret = -ENOBUFS; 2862 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || 2863 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || 2864 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) 2865 goto fib6_rules_init; 2866 2867 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2868 if (ret) 2869 goto fib6_rules_init; 2870 2871 out: 2872 return ret; 2873 2874 fib6_rules_init: 2875 fib6_rules_cleanup(); 2876 xfrm6_init: 2877 xfrm6_fini(); 2878 out_fib6_init: 2879 fib6_gc_cleanup(); 2880 out_register_subsys: 2881 unregister_pernet_subsys(&ip6_route_net_ops); 2882 out_dst_entries: 2883 dst_entries_destroy(&ip6_dst_blackhole_ops); 2884 out_kmem_cache: 2885 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2886 goto out; 2887 } 2888 2889 void ip6_route_cleanup(void) 2890 { 2891 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2892 fib6_rules_cleanup(); 2893 xfrm6_fini(); 2894 fib6_gc_cleanup(); 2895 unregister_pernet_subsys(&ip6_route_net_ops); 2896 dst_entries_destroy(&ip6_dst_blackhole_ops); 2897 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2898 } 2899