1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <linux/slab.h> 44 #include <net/net_namespace.h> 45 #include <net/snmp.h> 46 #include <net/ipv6.h> 47 #include <net/ip6_fib.h> 48 #include <net/ip6_route.h> 49 #include <net/ndisc.h> 50 #include <net/addrconf.h> 51 #include <net/tcp.h> 52 #include <linux/rtnetlink.h> 53 #include <net/dst.h> 54 #include <net/xfrm.h> 55 #include <net/netevent.h> 56 #include <net/netlink.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, 76 const struct in6_addr *dest); 77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 79 static unsigned int ip6_default_mtu(const struct dst_entry *dst); 80 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 81 static void ip6_dst_destroy(struct dst_entry *); 82 static void ip6_dst_ifdown(struct dst_entry *, 83 struct net_device *dev, int how); 84 static int ip6_dst_gc(struct dst_ops *ops); 85 86 static int ip6_pkt_discard(struct sk_buff *skb); 87 static int ip6_pkt_discard_out(struct sk_buff *skb); 88 static void ip6_link_failure(struct sk_buff *skb); 89 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 90 91 #ifdef CONFIG_IPV6_ROUTE_INFO 92 static struct rt6_info *rt6_add_route_info(struct net *net, 93 const struct in6_addr *prefix, int prefixlen, 94 const struct in6_addr *gwaddr, int ifindex, 95 unsigned pref); 96 static struct rt6_info *rt6_get_route_info(struct net *net, 97 const struct in6_addr *prefix, int prefixlen, 98 const struct in6_addr *gwaddr, int ifindex); 99 #endif 100 101 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 102 { 103 struct rt6_info *rt = (struct rt6_info *) dst; 104 struct inet_peer *peer; 105 u32 *p = NULL; 106 107 if (!rt->rt6i_peer) 108 rt6_bind_peer(rt, 1); 109 110 peer = rt->rt6i_peer; 111 if (peer) { 112 u32 *old_p = __DST_METRICS_PTR(old); 113 unsigned long prev, new; 114 115 p = peer->metrics; 116 if (inet_metrics_new(peer)) 117 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 118 119 new = (unsigned long) p; 120 prev = cmpxchg(&dst->_metrics, old, new); 121 122 if (prev != old) { 123 p = __DST_METRICS_PTR(prev); 124 if (prev & DST_METRICS_READ_ONLY) 125 p = NULL; 126 } 127 } 128 return p; 129 } 130 131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) 132 { 133 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev); 134 } 135 136 static struct dst_ops ip6_dst_ops_template = { 137 .family = AF_INET6, 138 .protocol = cpu_to_be16(ETH_P_IPV6), 139 .gc = ip6_dst_gc, 140 .gc_thresh = 1024, 141 .check = ip6_dst_check, 142 .default_advmss = ip6_default_advmss, 143 .default_mtu = ip6_default_mtu, 144 .cow_metrics = ipv6_cow_metrics, 145 .destroy = ip6_dst_destroy, 146 .ifdown = ip6_dst_ifdown, 147 .negative_advice = ip6_negative_advice, 148 .link_failure = ip6_link_failure, 149 .update_pmtu = ip6_rt_update_pmtu, 150 .local_out = __ip6_local_out, 151 .neigh_lookup = ip6_neigh_lookup, 152 }; 153 154 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) 155 { 156 return 0; 157 } 158 159 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 160 { 161 } 162 163 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 164 unsigned long old) 165 { 166 return NULL; 167 } 168 169 static struct dst_ops ip6_dst_blackhole_ops = { 170 .family = AF_INET6, 171 .protocol = cpu_to_be16(ETH_P_IPV6), 172 .destroy = ip6_dst_destroy, 173 .check = ip6_dst_check, 174 .default_mtu = ip6_blackhole_default_mtu, 175 .default_advmss = ip6_default_advmss, 176 .update_pmtu = ip6_rt_blackhole_update_pmtu, 177 .cow_metrics = ip6_rt_blackhole_cow_metrics, 178 .neigh_lookup = ip6_neigh_lookup, 179 }; 180 181 static const u32 ip6_template_metrics[RTAX_MAX] = { 182 [RTAX_HOPLIMIT - 1] = 255, 183 }; 184 185 static struct rt6_info ip6_null_entry_template = { 186 .dst = { 187 .__refcnt = ATOMIC_INIT(1), 188 .__use = 1, 189 .obsolete = -1, 190 .error = -ENETUNREACH, 191 .input = ip6_pkt_discard, 192 .output = ip6_pkt_discard_out, 193 }, 194 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 195 .rt6i_protocol = RTPROT_KERNEL, 196 .rt6i_metric = ~(u32) 0, 197 .rt6i_ref = ATOMIC_INIT(1), 198 }; 199 200 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 201 202 static int ip6_pkt_prohibit(struct sk_buff *skb); 203 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 204 205 static struct rt6_info ip6_prohibit_entry_template = { 206 .dst = { 207 .__refcnt = ATOMIC_INIT(1), 208 .__use = 1, 209 .obsolete = -1, 210 .error = -EACCES, 211 .input = ip6_pkt_prohibit, 212 .output = ip6_pkt_prohibit_out, 213 }, 214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 215 .rt6i_protocol = RTPROT_KERNEL, 216 .rt6i_metric = ~(u32) 0, 217 .rt6i_ref = ATOMIC_INIT(1), 218 }; 219 220 static struct rt6_info ip6_blk_hole_entry_template = { 221 .dst = { 222 .__refcnt = ATOMIC_INIT(1), 223 .__use = 1, 224 .obsolete = -1, 225 .error = -EINVAL, 226 .input = dst_discard, 227 .output = dst_discard, 228 }, 229 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 230 .rt6i_protocol = RTPROT_KERNEL, 231 .rt6i_metric = ~(u32) 0, 232 .rt6i_ref = ATOMIC_INIT(1), 233 }; 234 235 #endif 236 237 /* allocate dst with ip6_dst_ops */ 238 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, 239 struct net_device *dev, 240 int flags) 241 { 242 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); 243 244 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); 245 246 return rt; 247 } 248 249 static void ip6_dst_destroy(struct dst_entry *dst) 250 { 251 struct rt6_info *rt = (struct rt6_info *)dst; 252 struct inet6_dev *idev = rt->rt6i_idev; 253 struct inet_peer *peer = rt->rt6i_peer; 254 255 if (idev != NULL) { 256 rt->rt6i_idev = NULL; 257 in6_dev_put(idev); 258 } 259 if (peer) { 260 rt->rt6i_peer = NULL; 261 inet_putpeer(peer); 262 } 263 } 264 265 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); 266 267 static u32 rt6_peer_genid(void) 268 { 269 return atomic_read(&__rt6_peer_genid); 270 } 271 272 void rt6_bind_peer(struct rt6_info *rt, int create) 273 { 274 struct inet_peer *peer; 275 276 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); 277 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) 278 inet_putpeer(peer); 279 else 280 rt->rt6i_peer_genid = rt6_peer_genid(); 281 } 282 283 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 284 int how) 285 { 286 struct rt6_info *rt = (struct rt6_info *)dst; 287 struct inet6_dev *idev = rt->rt6i_idev; 288 struct net_device *loopback_dev = 289 dev_net(dev)->loopback_dev; 290 291 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 292 struct inet6_dev *loopback_idev = 293 in6_dev_get(loopback_dev); 294 if (loopback_idev != NULL) { 295 rt->rt6i_idev = loopback_idev; 296 in6_dev_put(idev); 297 } 298 } 299 } 300 301 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 302 { 303 return (rt->rt6i_flags & RTF_EXPIRES) && 304 time_after(jiffies, rt->rt6i_expires); 305 } 306 307 static inline int rt6_need_strict(const struct in6_addr *daddr) 308 { 309 return ipv6_addr_type(daddr) & 310 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 311 } 312 313 /* 314 * Route lookup. Any table->tb6_lock is implied. 315 */ 316 317 static inline struct rt6_info *rt6_device_match(struct net *net, 318 struct rt6_info *rt, 319 const struct in6_addr *saddr, 320 int oif, 321 int flags) 322 { 323 struct rt6_info *local = NULL; 324 struct rt6_info *sprt; 325 326 if (!oif && ipv6_addr_any(saddr)) 327 goto out; 328 329 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 330 struct net_device *dev = sprt->rt6i_dev; 331 332 if (oif) { 333 if (dev->ifindex == oif) 334 return sprt; 335 if (dev->flags & IFF_LOOPBACK) { 336 if (sprt->rt6i_idev == NULL || 337 sprt->rt6i_idev->dev->ifindex != oif) { 338 if (flags & RT6_LOOKUP_F_IFACE && oif) 339 continue; 340 if (local && (!oif || 341 local->rt6i_idev->dev->ifindex == oif)) 342 continue; 343 } 344 local = sprt; 345 } 346 } else { 347 if (ipv6_chk_addr(net, saddr, dev, 348 flags & RT6_LOOKUP_F_IFACE)) 349 return sprt; 350 } 351 } 352 353 if (oif) { 354 if (local) 355 return local; 356 357 if (flags & RT6_LOOKUP_F_IFACE) 358 return net->ipv6.ip6_null_entry; 359 } 360 out: 361 return rt; 362 } 363 364 #ifdef CONFIG_IPV6_ROUTER_PREF 365 static void rt6_probe(struct rt6_info *rt) 366 { 367 struct neighbour *neigh; 368 /* 369 * Okay, this does not seem to be appropriate 370 * for now, however, we need to check if it 371 * is really so; aka Router Reachability Probing. 372 * 373 * Router Reachability Probe MUST be rate-limited 374 * to no more than one per minute. 375 */ 376 rcu_read_lock(); 377 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; 378 if (!neigh || (neigh->nud_state & NUD_VALID)) 379 goto out; 380 read_lock_bh(&neigh->lock); 381 if (!(neigh->nud_state & NUD_VALID) && 382 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 383 struct in6_addr mcaddr; 384 struct in6_addr *target; 385 386 neigh->updated = jiffies; 387 read_unlock_bh(&neigh->lock); 388 389 target = (struct in6_addr *)&neigh->primary_key; 390 addrconf_addr_solict_mult(target, &mcaddr); 391 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 392 } else { 393 read_unlock_bh(&neigh->lock); 394 } 395 out: 396 rcu_read_unlock(); 397 } 398 #else 399 static inline void rt6_probe(struct rt6_info *rt) 400 { 401 } 402 #endif 403 404 /* 405 * Default Router Selection (RFC 2461 6.3.6) 406 */ 407 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 408 { 409 struct net_device *dev = rt->rt6i_dev; 410 if (!oif || dev->ifindex == oif) 411 return 2; 412 if ((dev->flags & IFF_LOOPBACK) && 413 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 414 return 1; 415 return 0; 416 } 417 418 static inline int rt6_check_neigh(struct rt6_info *rt) 419 { 420 struct neighbour *neigh; 421 int m; 422 423 rcu_read_lock(); 424 neigh = dst_get_neighbour(&rt->dst); 425 if (rt->rt6i_flags & RTF_NONEXTHOP || 426 !(rt->rt6i_flags & RTF_GATEWAY)) 427 m = 1; 428 else if (neigh) { 429 read_lock_bh(&neigh->lock); 430 if (neigh->nud_state & NUD_VALID) 431 m = 2; 432 #ifdef CONFIG_IPV6_ROUTER_PREF 433 else if (neigh->nud_state & NUD_FAILED) 434 m = 0; 435 #endif 436 else 437 m = 1; 438 read_unlock_bh(&neigh->lock); 439 } else 440 m = 0; 441 rcu_read_unlock(); 442 return m; 443 } 444 445 static int rt6_score_route(struct rt6_info *rt, int oif, 446 int strict) 447 { 448 int m, n; 449 450 m = rt6_check_dev(rt, oif); 451 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 452 return -1; 453 #ifdef CONFIG_IPV6_ROUTER_PREF 454 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 455 #endif 456 n = rt6_check_neigh(rt); 457 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 458 return -1; 459 return m; 460 } 461 462 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 463 int *mpri, struct rt6_info *match) 464 { 465 int m; 466 467 if (rt6_check_expired(rt)) 468 goto out; 469 470 m = rt6_score_route(rt, oif, strict); 471 if (m < 0) 472 goto out; 473 474 if (m > *mpri) { 475 if (strict & RT6_LOOKUP_F_REACHABLE) 476 rt6_probe(match); 477 *mpri = m; 478 match = rt; 479 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 480 rt6_probe(rt); 481 } 482 483 out: 484 return match; 485 } 486 487 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 488 struct rt6_info *rr_head, 489 u32 metric, int oif, int strict) 490 { 491 struct rt6_info *rt, *match; 492 int mpri = -1; 493 494 match = NULL; 495 for (rt = rr_head; rt && rt->rt6i_metric == metric; 496 rt = rt->dst.rt6_next) 497 match = find_match(rt, oif, strict, &mpri, match); 498 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 499 rt = rt->dst.rt6_next) 500 match = find_match(rt, oif, strict, &mpri, match); 501 502 return match; 503 } 504 505 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 506 { 507 struct rt6_info *match, *rt0; 508 struct net *net; 509 510 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 511 __func__, fn->leaf, oif); 512 513 rt0 = fn->rr_ptr; 514 if (!rt0) 515 fn->rr_ptr = rt0 = fn->leaf; 516 517 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 518 519 if (!match && 520 (strict & RT6_LOOKUP_F_REACHABLE)) { 521 struct rt6_info *next = rt0->dst.rt6_next; 522 523 /* no entries matched; do round-robin */ 524 if (!next || next->rt6i_metric != rt0->rt6i_metric) 525 next = fn->leaf; 526 527 if (next != rt0) 528 fn->rr_ptr = next; 529 } 530 531 RT6_TRACE("%s() => %p\n", 532 __func__, match); 533 534 net = dev_net(rt0->rt6i_dev); 535 return match ? match : net->ipv6.ip6_null_entry; 536 } 537 538 #ifdef CONFIG_IPV6_ROUTE_INFO 539 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 540 const struct in6_addr *gwaddr) 541 { 542 struct net *net = dev_net(dev); 543 struct route_info *rinfo = (struct route_info *) opt; 544 struct in6_addr prefix_buf, *prefix; 545 unsigned int pref; 546 unsigned long lifetime; 547 struct rt6_info *rt; 548 549 if (len < sizeof(struct route_info)) { 550 return -EINVAL; 551 } 552 553 /* Sanity check for prefix_len and length */ 554 if (rinfo->length > 3) { 555 return -EINVAL; 556 } else if (rinfo->prefix_len > 128) { 557 return -EINVAL; 558 } else if (rinfo->prefix_len > 64) { 559 if (rinfo->length < 2) { 560 return -EINVAL; 561 } 562 } else if (rinfo->prefix_len > 0) { 563 if (rinfo->length < 1) { 564 return -EINVAL; 565 } 566 } 567 568 pref = rinfo->route_pref; 569 if (pref == ICMPV6_ROUTER_PREF_INVALID) 570 return -EINVAL; 571 572 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 573 574 if (rinfo->length == 3) 575 prefix = (struct in6_addr *)rinfo->prefix; 576 else { 577 /* this function is safe */ 578 ipv6_addr_prefix(&prefix_buf, 579 (struct in6_addr *)rinfo->prefix, 580 rinfo->prefix_len); 581 prefix = &prefix_buf; 582 } 583 584 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 585 dev->ifindex); 586 587 if (rt && !lifetime) { 588 ip6_del_rt(rt); 589 rt = NULL; 590 } 591 592 if (!rt && lifetime) 593 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 594 pref); 595 else if (rt) 596 rt->rt6i_flags = RTF_ROUTEINFO | 597 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 598 599 if (rt) { 600 if (!addrconf_finite_timeout(lifetime)) { 601 rt->rt6i_flags &= ~RTF_EXPIRES; 602 } else { 603 rt->rt6i_expires = jiffies + HZ * lifetime; 604 rt->rt6i_flags |= RTF_EXPIRES; 605 } 606 dst_release(&rt->dst); 607 } 608 return 0; 609 } 610 #endif 611 612 #define BACKTRACK(__net, saddr) \ 613 do { \ 614 if (rt == __net->ipv6.ip6_null_entry) { \ 615 struct fib6_node *pn; \ 616 while (1) { \ 617 if (fn->fn_flags & RTN_TL_ROOT) \ 618 goto out; \ 619 pn = fn->parent; \ 620 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 621 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 622 else \ 623 fn = pn; \ 624 if (fn->fn_flags & RTN_RTINFO) \ 625 goto restart; \ 626 } \ 627 } \ 628 } while(0) 629 630 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 631 struct fib6_table *table, 632 struct flowi6 *fl6, int flags) 633 { 634 struct fib6_node *fn; 635 struct rt6_info *rt; 636 637 read_lock_bh(&table->tb6_lock); 638 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 639 restart: 640 rt = fn->leaf; 641 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 642 BACKTRACK(net, &fl6->saddr); 643 out: 644 dst_use(&rt->dst, jiffies); 645 read_unlock_bh(&table->tb6_lock); 646 return rt; 647 648 } 649 650 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 651 const struct in6_addr *saddr, int oif, int strict) 652 { 653 struct flowi6 fl6 = { 654 .flowi6_oif = oif, 655 .daddr = *daddr, 656 }; 657 struct dst_entry *dst; 658 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 659 660 if (saddr) { 661 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 662 flags |= RT6_LOOKUP_F_HAS_SADDR; 663 } 664 665 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 666 if (dst->error == 0) 667 return (struct rt6_info *) dst; 668 669 dst_release(dst); 670 671 return NULL; 672 } 673 674 EXPORT_SYMBOL(rt6_lookup); 675 676 /* ip6_ins_rt is called with FREE table->tb6_lock. 677 It takes new route entry, the addition fails by any reason the 678 route is freed. In any case, if caller does not hold it, it may 679 be destroyed. 680 */ 681 682 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 683 { 684 int err; 685 struct fib6_table *table; 686 687 table = rt->rt6i_table; 688 write_lock_bh(&table->tb6_lock); 689 err = fib6_add(&table->tb6_root, rt, info); 690 write_unlock_bh(&table->tb6_lock); 691 692 return err; 693 } 694 695 int ip6_ins_rt(struct rt6_info *rt) 696 { 697 struct nl_info info = { 698 .nl_net = dev_net(rt->rt6i_dev), 699 }; 700 return __ip6_ins_rt(rt, &info); 701 } 702 703 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, 704 const struct in6_addr *daddr, 705 const struct in6_addr *saddr) 706 { 707 struct rt6_info *rt; 708 709 /* 710 * Clone the route. 711 */ 712 713 rt = ip6_rt_copy(ort, daddr); 714 715 if (rt) { 716 struct neighbour *neigh; 717 int attempts = !in_softirq(); 718 719 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 720 if (rt->rt6i_dst.plen != 128 && 721 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 722 rt->rt6i_flags |= RTF_ANYCAST; 723 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 724 } 725 726 rt->rt6i_dst.plen = 128; 727 rt->rt6i_flags |= RTF_CACHE; 728 rt->dst.flags |= DST_HOST; 729 730 #ifdef CONFIG_IPV6_SUBTREES 731 if (rt->rt6i_src.plen && saddr) { 732 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 733 rt->rt6i_src.plen = 128; 734 } 735 #endif 736 737 retry: 738 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 739 if (IS_ERR(neigh)) { 740 struct net *net = dev_net(rt->rt6i_dev); 741 int saved_rt_min_interval = 742 net->ipv6.sysctl.ip6_rt_gc_min_interval; 743 int saved_rt_elasticity = 744 net->ipv6.sysctl.ip6_rt_gc_elasticity; 745 746 if (attempts-- > 0) { 747 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 748 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 749 750 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 751 752 net->ipv6.sysctl.ip6_rt_gc_elasticity = 753 saved_rt_elasticity; 754 net->ipv6.sysctl.ip6_rt_gc_min_interval = 755 saved_rt_min_interval; 756 goto retry; 757 } 758 759 if (net_ratelimit()) 760 printk(KERN_WARNING 761 "ipv6: Neighbour table overflow.\n"); 762 dst_free(&rt->dst); 763 return NULL; 764 } 765 dst_set_neighbour(&rt->dst, neigh); 766 767 } 768 769 return rt; 770 } 771 772 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 773 const struct in6_addr *daddr) 774 { 775 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 776 777 if (rt) { 778 rt->rt6i_dst.plen = 128; 779 rt->rt6i_flags |= RTF_CACHE; 780 rt->dst.flags |= DST_HOST; 781 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); 782 } 783 return rt; 784 } 785 786 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 787 struct flowi6 *fl6, int flags) 788 { 789 struct fib6_node *fn; 790 struct rt6_info *rt, *nrt; 791 int strict = 0; 792 int attempts = 3; 793 int err; 794 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 795 796 strict |= flags & RT6_LOOKUP_F_IFACE; 797 798 relookup: 799 read_lock_bh(&table->tb6_lock); 800 801 restart_2: 802 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 803 804 restart: 805 rt = rt6_select(fn, oif, strict | reachable); 806 807 BACKTRACK(net, &fl6->saddr); 808 if (rt == net->ipv6.ip6_null_entry || 809 rt->rt6i_flags & RTF_CACHE) 810 goto out; 811 812 dst_hold(&rt->dst); 813 read_unlock_bh(&table->tb6_lock); 814 815 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 816 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 817 else if (!(rt->dst.flags & DST_HOST)) 818 nrt = rt6_alloc_clone(rt, &fl6->daddr); 819 else 820 goto out2; 821 822 dst_release(&rt->dst); 823 rt = nrt ? : net->ipv6.ip6_null_entry; 824 825 dst_hold(&rt->dst); 826 if (nrt) { 827 err = ip6_ins_rt(nrt); 828 if (!err) 829 goto out2; 830 } 831 832 if (--attempts <= 0) 833 goto out2; 834 835 /* 836 * Race condition! In the gap, when table->tb6_lock was 837 * released someone could insert this route. Relookup. 838 */ 839 dst_release(&rt->dst); 840 goto relookup; 841 842 out: 843 if (reachable) { 844 reachable = 0; 845 goto restart_2; 846 } 847 dst_hold(&rt->dst); 848 read_unlock_bh(&table->tb6_lock); 849 out2: 850 rt->dst.lastuse = jiffies; 851 rt->dst.__use++; 852 853 return rt; 854 } 855 856 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 857 struct flowi6 *fl6, int flags) 858 { 859 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 860 } 861 862 void ip6_route_input(struct sk_buff *skb) 863 { 864 const struct ipv6hdr *iph = ipv6_hdr(skb); 865 struct net *net = dev_net(skb->dev); 866 int flags = RT6_LOOKUP_F_HAS_SADDR; 867 struct flowi6 fl6 = { 868 .flowi6_iif = skb->dev->ifindex, 869 .daddr = iph->daddr, 870 .saddr = iph->saddr, 871 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 872 .flowi6_mark = skb->mark, 873 .flowi6_proto = iph->nexthdr, 874 }; 875 876 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 877 flags |= RT6_LOOKUP_F_IFACE; 878 879 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); 880 } 881 882 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 883 struct flowi6 *fl6, int flags) 884 { 885 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 886 } 887 888 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, 889 struct flowi6 *fl6) 890 { 891 int flags = 0; 892 893 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 894 flags |= RT6_LOOKUP_F_IFACE; 895 896 if (!ipv6_addr_any(&fl6->saddr)) 897 flags |= RT6_LOOKUP_F_HAS_SADDR; 898 else if (sk) 899 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 900 901 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 902 } 903 904 EXPORT_SYMBOL(ip6_route_output); 905 906 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 907 { 908 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 909 struct dst_entry *new = NULL; 910 911 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); 912 if (rt) { 913 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); 914 915 new = &rt->dst; 916 917 new->__use = 1; 918 new->input = dst_discard; 919 new->output = dst_discard; 920 921 if (dst_metrics_read_only(&ort->dst)) 922 new->_metrics = ort->dst._metrics; 923 else 924 dst_copy_metrics(new, &ort->dst); 925 rt->rt6i_idev = ort->rt6i_idev; 926 if (rt->rt6i_idev) 927 in6_dev_hold(rt->rt6i_idev); 928 rt->rt6i_expires = 0; 929 930 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 931 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 932 rt->rt6i_metric = 0; 933 934 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 935 #ifdef CONFIG_IPV6_SUBTREES 936 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 937 #endif 938 939 dst_free(new); 940 } 941 942 dst_release(dst_orig); 943 return new ? new : ERR_PTR(-ENOMEM); 944 } 945 946 /* 947 * Destination cache support functions 948 */ 949 950 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 951 { 952 struct rt6_info *rt; 953 954 rt = (struct rt6_info *) dst; 955 956 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 957 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 958 if (!rt->rt6i_peer) 959 rt6_bind_peer(rt, 0); 960 rt->rt6i_peer_genid = rt6_peer_genid(); 961 } 962 return dst; 963 } 964 return NULL; 965 } 966 967 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 968 { 969 struct rt6_info *rt = (struct rt6_info *) dst; 970 971 if (rt) { 972 if (rt->rt6i_flags & RTF_CACHE) { 973 if (rt6_check_expired(rt)) { 974 ip6_del_rt(rt); 975 dst = NULL; 976 } 977 } else { 978 dst_release(dst); 979 dst = NULL; 980 } 981 } 982 return dst; 983 } 984 985 static void ip6_link_failure(struct sk_buff *skb) 986 { 987 struct rt6_info *rt; 988 989 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 990 991 rt = (struct rt6_info *) skb_dst(skb); 992 if (rt) { 993 if (rt->rt6i_flags&RTF_CACHE) { 994 dst_set_expires(&rt->dst, 0); 995 rt->rt6i_flags |= RTF_EXPIRES; 996 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 997 rt->rt6i_node->fn_sernum = -1; 998 } 999 } 1000 1001 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1002 { 1003 struct rt6_info *rt6 = (struct rt6_info*)dst; 1004 1005 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1006 rt6->rt6i_flags |= RTF_MODIFIED; 1007 if (mtu < IPV6_MIN_MTU) { 1008 u32 features = dst_metric(dst, RTAX_FEATURES); 1009 mtu = IPV6_MIN_MTU; 1010 features |= RTAX_FEATURE_ALLFRAG; 1011 dst_metric_set(dst, RTAX_FEATURES, features); 1012 } 1013 dst_metric_set(dst, RTAX_MTU, mtu); 1014 } 1015 } 1016 1017 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1018 { 1019 struct net_device *dev = dst->dev; 1020 unsigned int mtu = dst_mtu(dst); 1021 struct net *net = dev_net(dev); 1022 1023 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1024 1025 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1026 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1027 1028 /* 1029 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1030 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1031 * IPV6_MAXPLEN is also valid and means: "any MSS, 1032 * rely only on pmtu discovery" 1033 */ 1034 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1035 mtu = IPV6_MAXPLEN; 1036 return mtu; 1037 } 1038 1039 static unsigned int ip6_default_mtu(const struct dst_entry *dst) 1040 { 1041 unsigned int mtu = IPV6_MIN_MTU; 1042 struct inet6_dev *idev; 1043 1044 rcu_read_lock(); 1045 idev = __in6_dev_get(dst->dev); 1046 if (idev) 1047 mtu = idev->cnf.mtu6; 1048 rcu_read_unlock(); 1049 1050 return mtu; 1051 } 1052 1053 static struct dst_entry *icmp6_dst_gc_list; 1054 static DEFINE_SPINLOCK(icmp6_dst_lock); 1055 1056 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1057 struct neighbour *neigh, 1058 const struct in6_addr *addr) 1059 { 1060 struct rt6_info *rt; 1061 struct inet6_dev *idev = in6_dev_get(dev); 1062 struct net *net = dev_net(dev); 1063 1064 if (unlikely(idev == NULL)) 1065 return NULL; 1066 1067 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); 1068 if (unlikely(rt == NULL)) { 1069 in6_dev_put(idev); 1070 goto out; 1071 } 1072 1073 if (neigh) 1074 neigh_hold(neigh); 1075 else { 1076 neigh = ndisc_get_neigh(dev, addr); 1077 if (IS_ERR(neigh)) 1078 neigh = NULL; 1079 } 1080 1081 rt->rt6i_idev = idev; 1082 dst_set_neighbour(&rt->dst, neigh); 1083 atomic_set(&rt->dst.__refcnt, 1); 1084 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1085 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 1086 rt->dst.output = ip6_output; 1087 1088 spin_lock_bh(&icmp6_dst_lock); 1089 rt->dst.next = icmp6_dst_gc_list; 1090 icmp6_dst_gc_list = &rt->dst; 1091 spin_unlock_bh(&icmp6_dst_lock); 1092 1093 fib6_force_start_gc(net); 1094 1095 out: 1096 return &rt->dst; 1097 } 1098 1099 int icmp6_dst_gc(void) 1100 { 1101 struct dst_entry *dst, **pprev; 1102 int more = 0; 1103 1104 spin_lock_bh(&icmp6_dst_lock); 1105 pprev = &icmp6_dst_gc_list; 1106 1107 while ((dst = *pprev) != NULL) { 1108 if (!atomic_read(&dst->__refcnt)) { 1109 *pprev = dst->next; 1110 dst_free(dst); 1111 } else { 1112 pprev = &dst->next; 1113 ++more; 1114 } 1115 } 1116 1117 spin_unlock_bh(&icmp6_dst_lock); 1118 1119 return more; 1120 } 1121 1122 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1123 void *arg) 1124 { 1125 struct dst_entry *dst, **pprev; 1126 1127 spin_lock_bh(&icmp6_dst_lock); 1128 pprev = &icmp6_dst_gc_list; 1129 while ((dst = *pprev) != NULL) { 1130 struct rt6_info *rt = (struct rt6_info *) dst; 1131 if (func(rt, arg)) { 1132 *pprev = dst->next; 1133 dst_free(dst); 1134 } else { 1135 pprev = &dst->next; 1136 } 1137 } 1138 spin_unlock_bh(&icmp6_dst_lock); 1139 } 1140 1141 static int ip6_dst_gc(struct dst_ops *ops) 1142 { 1143 unsigned long now = jiffies; 1144 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1145 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1146 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1147 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1148 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1149 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1150 int entries; 1151 1152 entries = dst_entries_get_fast(ops); 1153 if (time_after(rt_last_gc + rt_min_interval, now) && 1154 entries <= rt_max_size) 1155 goto out; 1156 1157 net->ipv6.ip6_rt_gc_expire++; 1158 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1159 net->ipv6.ip6_rt_last_gc = now; 1160 entries = dst_entries_get_slow(ops); 1161 if (entries < ops->gc_thresh) 1162 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1163 out: 1164 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1165 return entries > rt_max_size; 1166 } 1167 1168 /* Clean host part of a prefix. Not necessary in radix tree, 1169 but results in cleaner routing tables. 1170 1171 Remove it only when all the things will work! 1172 */ 1173 1174 int ip6_dst_hoplimit(struct dst_entry *dst) 1175 { 1176 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1177 if (hoplimit == 0) { 1178 struct net_device *dev = dst->dev; 1179 struct inet6_dev *idev; 1180 1181 rcu_read_lock(); 1182 idev = __in6_dev_get(dev); 1183 if (idev) 1184 hoplimit = idev->cnf.hop_limit; 1185 else 1186 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1187 rcu_read_unlock(); 1188 } 1189 return hoplimit; 1190 } 1191 EXPORT_SYMBOL(ip6_dst_hoplimit); 1192 1193 /* 1194 * 1195 */ 1196 1197 int ip6_route_add(struct fib6_config *cfg) 1198 { 1199 int err; 1200 struct net *net = cfg->fc_nlinfo.nl_net; 1201 struct rt6_info *rt = NULL; 1202 struct net_device *dev = NULL; 1203 struct inet6_dev *idev = NULL; 1204 struct fib6_table *table; 1205 int addr_type; 1206 1207 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1208 return -EINVAL; 1209 #ifndef CONFIG_IPV6_SUBTREES 1210 if (cfg->fc_src_len) 1211 return -EINVAL; 1212 #endif 1213 if (cfg->fc_ifindex) { 1214 err = -ENODEV; 1215 dev = dev_get_by_index(net, cfg->fc_ifindex); 1216 if (!dev) 1217 goto out; 1218 idev = in6_dev_get(dev); 1219 if (!idev) 1220 goto out; 1221 } 1222 1223 if (cfg->fc_metric == 0) 1224 cfg->fc_metric = IP6_RT_PRIO_USER; 1225 1226 table = fib6_new_table(net, cfg->fc_table); 1227 if (table == NULL) { 1228 err = -ENOBUFS; 1229 goto out; 1230 } 1231 1232 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); 1233 1234 if (rt == NULL) { 1235 err = -ENOMEM; 1236 goto out; 1237 } 1238 1239 rt->dst.obsolete = -1; 1240 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1241 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1242 0; 1243 1244 if (cfg->fc_protocol == RTPROT_UNSPEC) 1245 cfg->fc_protocol = RTPROT_BOOT; 1246 rt->rt6i_protocol = cfg->fc_protocol; 1247 1248 addr_type = ipv6_addr_type(&cfg->fc_dst); 1249 1250 if (addr_type & IPV6_ADDR_MULTICAST) 1251 rt->dst.input = ip6_mc_input; 1252 else if (cfg->fc_flags & RTF_LOCAL) 1253 rt->dst.input = ip6_input; 1254 else 1255 rt->dst.input = ip6_forward; 1256 1257 rt->dst.output = ip6_output; 1258 1259 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1260 rt->rt6i_dst.plen = cfg->fc_dst_len; 1261 if (rt->rt6i_dst.plen == 128) 1262 rt->dst.flags |= DST_HOST; 1263 1264 #ifdef CONFIG_IPV6_SUBTREES 1265 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1266 rt->rt6i_src.plen = cfg->fc_src_len; 1267 #endif 1268 1269 rt->rt6i_metric = cfg->fc_metric; 1270 1271 /* We cannot add true routes via loopback here, 1272 they would result in kernel looping; promote them to reject routes 1273 */ 1274 if ((cfg->fc_flags & RTF_REJECT) || 1275 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) 1276 && !(cfg->fc_flags&RTF_LOCAL))) { 1277 /* hold loopback dev/idev if we haven't done so. */ 1278 if (dev != net->loopback_dev) { 1279 if (dev) { 1280 dev_put(dev); 1281 in6_dev_put(idev); 1282 } 1283 dev = net->loopback_dev; 1284 dev_hold(dev); 1285 idev = in6_dev_get(dev); 1286 if (!idev) { 1287 err = -ENODEV; 1288 goto out; 1289 } 1290 } 1291 rt->dst.output = ip6_pkt_discard_out; 1292 rt->dst.input = ip6_pkt_discard; 1293 rt->dst.error = -ENETUNREACH; 1294 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1295 goto install_route; 1296 } 1297 1298 if (cfg->fc_flags & RTF_GATEWAY) { 1299 const struct in6_addr *gw_addr; 1300 int gwa_type; 1301 1302 gw_addr = &cfg->fc_gateway; 1303 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1304 gwa_type = ipv6_addr_type(gw_addr); 1305 1306 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1307 struct rt6_info *grt; 1308 1309 /* IPv6 strictly inhibits using not link-local 1310 addresses as nexthop address. 1311 Otherwise, router will not able to send redirects. 1312 It is very good, but in some (rare!) circumstances 1313 (SIT, PtP, NBMA NOARP links) it is handy to allow 1314 some exceptions. --ANK 1315 */ 1316 err = -EINVAL; 1317 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1318 goto out; 1319 1320 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1321 1322 err = -EHOSTUNREACH; 1323 if (grt == NULL) 1324 goto out; 1325 if (dev) { 1326 if (dev != grt->rt6i_dev) { 1327 dst_release(&grt->dst); 1328 goto out; 1329 } 1330 } else { 1331 dev = grt->rt6i_dev; 1332 idev = grt->rt6i_idev; 1333 dev_hold(dev); 1334 in6_dev_hold(grt->rt6i_idev); 1335 } 1336 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1337 err = 0; 1338 dst_release(&grt->dst); 1339 1340 if (err) 1341 goto out; 1342 } 1343 err = -EINVAL; 1344 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1345 goto out; 1346 } 1347 1348 err = -ENODEV; 1349 if (dev == NULL) 1350 goto out; 1351 1352 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1353 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1354 err = -EINVAL; 1355 goto out; 1356 } 1357 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc); 1358 rt->rt6i_prefsrc.plen = 128; 1359 } else 1360 rt->rt6i_prefsrc.plen = 0; 1361 1362 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1363 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1364 if (IS_ERR(n)) { 1365 err = PTR_ERR(n); 1366 goto out; 1367 } 1368 dst_set_neighbour(&rt->dst, n); 1369 } 1370 1371 rt->rt6i_flags = cfg->fc_flags; 1372 1373 install_route: 1374 if (cfg->fc_mx) { 1375 struct nlattr *nla; 1376 int remaining; 1377 1378 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1379 int type = nla_type(nla); 1380 1381 if (type) { 1382 if (type > RTAX_MAX) { 1383 err = -EINVAL; 1384 goto out; 1385 } 1386 1387 dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1388 } 1389 } 1390 } 1391 1392 rt->dst.dev = dev; 1393 rt->rt6i_idev = idev; 1394 rt->rt6i_table = table; 1395 1396 cfg->fc_nlinfo.nl_net = dev_net(dev); 1397 1398 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1399 1400 out: 1401 if (dev) 1402 dev_put(dev); 1403 if (idev) 1404 in6_dev_put(idev); 1405 if (rt) 1406 dst_free(&rt->dst); 1407 return err; 1408 } 1409 1410 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1411 { 1412 int err; 1413 struct fib6_table *table; 1414 struct net *net = dev_net(rt->rt6i_dev); 1415 1416 if (rt == net->ipv6.ip6_null_entry) 1417 return -ENOENT; 1418 1419 table = rt->rt6i_table; 1420 write_lock_bh(&table->tb6_lock); 1421 1422 err = fib6_del(rt, info); 1423 dst_release(&rt->dst); 1424 1425 write_unlock_bh(&table->tb6_lock); 1426 1427 return err; 1428 } 1429 1430 int ip6_del_rt(struct rt6_info *rt) 1431 { 1432 struct nl_info info = { 1433 .nl_net = dev_net(rt->rt6i_dev), 1434 }; 1435 return __ip6_del_rt(rt, &info); 1436 } 1437 1438 static int ip6_route_del(struct fib6_config *cfg) 1439 { 1440 struct fib6_table *table; 1441 struct fib6_node *fn; 1442 struct rt6_info *rt; 1443 int err = -ESRCH; 1444 1445 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1446 if (table == NULL) 1447 return err; 1448 1449 read_lock_bh(&table->tb6_lock); 1450 1451 fn = fib6_locate(&table->tb6_root, 1452 &cfg->fc_dst, cfg->fc_dst_len, 1453 &cfg->fc_src, cfg->fc_src_len); 1454 1455 if (fn) { 1456 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1457 if (cfg->fc_ifindex && 1458 (rt->rt6i_dev == NULL || 1459 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1460 continue; 1461 if (cfg->fc_flags & RTF_GATEWAY && 1462 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1463 continue; 1464 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1465 continue; 1466 dst_hold(&rt->dst); 1467 read_unlock_bh(&table->tb6_lock); 1468 1469 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1470 } 1471 } 1472 read_unlock_bh(&table->tb6_lock); 1473 1474 return err; 1475 } 1476 1477 /* 1478 * Handle redirects 1479 */ 1480 struct ip6rd_flowi { 1481 struct flowi6 fl6; 1482 struct in6_addr gateway; 1483 }; 1484 1485 static struct rt6_info *__ip6_route_redirect(struct net *net, 1486 struct fib6_table *table, 1487 struct flowi6 *fl6, 1488 int flags) 1489 { 1490 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1491 struct rt6_info *rt; 1492 struct fib6_node *fn; 1493 1494 /* 1495 * Get the "current" route for this destination and 1496 * check if the redirect has come from approriate router. 1497 * 1498 * RFC 2461 specifies that redirects should only be 1499 * accepted if they come from the nexthop to the target. 1500 * Due to the way the routes are chosen, this notion 1501 * is a bit fuzzy and one might need to check all possible 1502 * routes. 1503 */ 1504 1505 read_lock_bh(&table->tb6_lock); 1506 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1507 restart: 1508 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1509 /* 1510 * Current route is on-link; redirect is always invalid. 1511 * 1512 * Seems, previous statement is not true. It could 1513 * be node, which looks for us as on-link (f.e. proxy ndisc) 1514 * But then router serving it might decide, that we should 1515 * know truth 8)8) --ANK (980726). 1516 */ 1517 if (rt6_check_expired(rt)) 1518 continue; 1519 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1520 continue; 1521 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) 1522 continue; 1523 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1524 continue; 1525 break; 1526 } 1527 1528 if (!rt) 1529 rt = net->ipv6.ip6_null_entry; 1530 BACKTRACK(net, &fl6->saddr); 1531 out: 1532 dst_hold(&rt->dst); 1533 1534 read_unlock_bh(&table->tb6_lock); 1535 1536 return rt; 1537 }; 1538 1539 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, 1540 const struct in6_addr *src, 1541 const struct in6_addr *gateway, 1542 struct net_device *dev) 1543 { 1544 int flags = RT6_LOOKUP_F_HAS_SADDR; 1545 struct net *net = dev_net(dev); 1546 struct ip6rd_flowi rdfl = { 1547 .fl6 = { 1548 .flowi6_oif = dev->ifindex, 1549 .daddr = *dest, 1550 .saddr = *src, 1551 }, 1552 }; 1553 1554 ipv6_addr_copy(&rdfl.gateway, gateway); 1555 1556 if (rt6_need_strict(dest)) 1557 flags |= RT6_LOOKUP_F_IFACE; 1558 1559 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, 1560 flags, __ip6_route_redirect); 1561 } 1562 1563 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, 1564 const struct in6_addr *saddr, 1565 struct neighbour *neigh, u8 *lladdr, int on_link) 1566 { 1567 struct rt6_info *rt, *nrt = NULL; 1568 struct netevent_redirect netevent; 1569 struct net *net = dev_net(neigh->dev); 1570 1571 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1572 1573 if (rt == net->ipv6.ip6_null_entry) { 1574 if (net_ratelimit()) 1575 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1576 "for redirect target\n"); 1577 goto out; 1578 } 1579 1580 /* 1581 * We have finally decided to accept it. 1582 */ 1583 1584 neigh_update(neigh, lladdr, NUD_STALE, 1585 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1586 NEIGH_UPDATE_F_OVERRIDE| 1587 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1588 NEIGH_UPDATE_F_ISROUTER)) 1589 ); 1590 1591 /* 1592 * Redirect received -> path was valid. 1593 * Look, redirects are sent only in response to data packets, 1594 * so that this nexthop apparently is reachable. --ANK 1595 */ 1596 dst_confirm(&rt->dst); 1597 1598 /* Duplicate redirect: silently ignore. */ 1599 if (neigh == dst_get_neighbour_raw(&rt->dst)) 1600 goto out; 1601 1602 nrt = ip6_rt_copy(rt, dest); 1603 if (nrt == NULL) 1604 goto out; 1605 1606 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1607 if (on_link) 1608 nrt->rt6i_flags &= ~RTF_GATEWAY; 1609 1610 nrt->rt6i_dst.plen = 128; 1611 nrt->dst.flags |= DST_HOST; 1612 1613 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1614 dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); 1615 1616 if (ip6_ins_rt(nrt)) 1617 goto out; 1618 1619 netevent.old = &rt->dst; 1620 netevent.new = &nrt->dst; 1621 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1622 1623 if (rt->rt6i_flags&RTF_CACHE) { 1624 ip6_del_rt(rt); 1625 return; 1626 } 1627 1628 out: 1629 dst_release(&rt->dst); 1630 } 1631 1632 /* 1633 * Handle ICMP "packet too big" messages 1634 * i.e. Path MTU discovery 1635 */ 1636 1637 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, 1638 struct net *net, u32 pmtu, int ifindex) 1639 { 1640 struct rt6_info *rt, *nrt; 1641 int allfrag = 0; 1642 again: 1643 rt = rt6_lookup(net, daddr, saddr, ifindex, 0); 1644 if (rt == NULL) 1645 return; 1646 1647 if (rt6_check_expired(rt)) { 1648 ip6_del_rt(rt); 1649 goto again; 1650 } 1651 1652 if (pmtu >= dst_mtu(&rt->dst)) 1653 goto out; 1654 1655 if (pmtu < IPV6_MIN_MTU) { 1656 /* 1657 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1658 * MTU (1280) and a fragment header should always be included 1659 * after a node receiving Too Big message reporting PMTU is 1660 * less than the IPv6 Minimum Link MTU. 1661 */ 1662 pmtu = IPV6_MIN_MTU; 1663 allfrag = 1; 1664 } 1665 1666 /* New mtu received -> path was valid. 1667 They are sent only in response to data packets, 1668 so that this nexthop apparently is reachable. --ANK 1669 */ 1670 dst_confirm(&rt->dst); 1671 1672 /* Host route. If it is static, it would be better 1673 not to override it, but add new one, so that 1674 when cache entry will expire old pmtu 1675 would return automatically. 1676 */ 1677 if (rt->rt6i_flags & RTF_CACHE) { 1678 dst_metric_set(&rt->dst, RTAX_MTU, pmtu); 1679 if (allfrag) { 1680 u32 features = dst_metric(&rt->dst, RTAX_FEATURES); 1681 features |= RTAX_FEATURE_ALLFRAG; 1682 dst_metric_set(&rt->dst, RTAX_FEATURES, features); 1683 } 1684 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1685 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1686 goto out; 1687 } 1688 1689 /* Network route. 1690 Two cases are possible: 1691 1. It is connected route. Action: COW 1692 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1693 */ 1694 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1695 nrt = rt6_alloc_cow(rt, daddr, saddr); 1696 else 1697 nrt = rt6_alloc_clone(rt, daddr); 1698 1699 if (nrt) { 1700 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); 1701 if (allfrag) { 1702 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); 1703 features |= RTAX_FEATURE_ALLFRAG; 1704 dst_metric_set(&nrt->dst, RTAX_FEATURES, features); 1705 } 1706 1707 /* According to RFC 1981, detecting PMTU increase shouldn't be 1708 * happened within 5 mins, the recommended timer is 10 mins. 1709 * Here this route expiration time is set to ip6_rt_mtu_expires 1710 * which is 10 mins. After 10 mins the decreased pmtu is expired 1711 * and detecting PMTU increase will be automatically happened. 1712 */ 1713 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1714 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1715 1716 ip6_ins_rt(nrt); 1717 } 1718 out: 1719 dst_release(&rt->dst); 1720 } 1721 1722 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, 1723 struct net_device *dev, u32 pmtu) 1724 { 1725 struct net *net = dev_net(dev); 1726 1727 /* 1728 * RFC 1981 states that a node "MUST reduce the size of the packets it 1729 * is sending along the path" that caused the Packet Too Big message. 1730 * Since it's not possible in the general case to determine which 1731 * interface was used to send the original packet, we update the MTU 1732 * on the interface that will be used to send future packets. We also 1733 * update the MTU on the interface that received the Packet Too Big in 1734 * case the original packet was forced out that interface with 1735 * SO_BINDTODEVICE or similar. This is the next best thing to the 1736 * correct behaviour, which would be to update the MTU on all 1737 * interfaces. 1738 */ 1739 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); 1740 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); 1741 } 1742 1743 /* 1744 * Misc support functions 1745 */ 1746 1747 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, 1748 const struct in6_addr *dest) 1749 { 1750 struct net *net = dev_net(ort->rt6i_dev); 1751 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, 1752 ort->dst.dev, 0); 1753 1754 if (rt) { 1755 rt->dst.input = ort->dst.input; 1756 rt->dst.output = ort->dst.output; 1757 1758 ipv6_addr_copy(&rt->rt6i_dst.addr, dest); 1759 rt->rt6i_dst.plen = ort->rt6i_dst.plen; 1760 dst_copy_metrics(&rt->dst, &ort->dst); 1761 rt->dst.error = ort->dst.error; 1762 rt->rt6i_idev = ort->rt6i_idev; 1763 if (rt->rt6i_idev) 1764 in6_dev_hold(rt->rt6i_idev); 1765 rt->dst.lastuse = jiffies; 1766 rt->rt6i_expires = 0; 1767 1768 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1769 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1770 rt->rt6i_metric = 0; 1771 1772 #ifdef CONFIG_IPV6_SUBTREES 1773 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1774 #endif 1775 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 1776 rt->rt6i_table = ort->rt6i_table; 1777 } 1778 return rt; 1779 } 1780 1781 #ifdef CONFIG_IPV6_ROUTE_INFO 1782 static struct rt6_info *rt6_get_route_info(struct net *net, 1783 const struct in6_addr *prefix, int prefixlen, 1784 const struct in6_addr *gwaddr, int ifindex) 1785 { 1786 struct fib6_node *fn; 1787 struct rt6_info *rt = NULL; 1788 struct fib6_table *table; 1789 1790 table = fib6_get_table(net, RT6_TABLE_INFO); 1791 if (table == NULL) 1792 return NULL; 1793 1794 write_lock_bh(&table->tb6_lock); 1795 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1796 if (!fn) 1797 goto out; 1798 1799 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1800 if (rt->rt6i_dev->ifindex != ifindex) 1801 continue; 1802 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1803 continue; 1804 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1805 continue; 1806 dst_hold(&rt->dst); 1807 break; 1808 } 1809 out: 1810 write_unlock_bh(&table->tb6_lock); 1811 return rt; 1812 } 1813 1814 static struct rt6_info *rt6_add_route_info(struct net *net, 1815 const struct in6_addr *prefix, int prefixlen, 1816 const struct in6_addr *gwaddr, int ifindex, 1817 unsigned pref) 1818 { 1819 struct fib6_config cfg = { 1820 .fc_table = RT6_TABLE_INFO, 1821 .fc_metric = IP6_RT_PRIO_USER, 1822 .fc_ifindex = ifindex, 1823 .fc_dst_len = prefixlen, 1824 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1825 RTF_UP | RTF_PREF(pref), 1826 .fc_nlinfo.pid = 0, 1827 .fc_nlinfo.nlh = NULL, 1828 .fc_nlinfo.nl_net = net, 1829 }; 1830 1831 ipv6_addr_copy(&cfg.fc_dst, prefix); 1832 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1833 1834 /* We should treat it as a default route if prefix length is 0. */ 1835 if (!prefixlen) 1836 cfg.fc_flags |= RTF_DEFAULT; 1837 1838 ip6_route_add(&cfg); 1839 1840 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1841 } 1842 #endif 1843 1844 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 1845 { 1846 struct rt6_info *rt; 1847 struct fib6_table *table; 1848 1849 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1850 if (table == NULL) 1851 return NULL; 1852 1853 write_lock_bh(&table->tb6_lock); 1854 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1855 if (dev == rt->rt6i_dev && 1856 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1857 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1858 break; 1859 } 1860 if (rt) 1861 dst_hold(&rt->dst); 1862 write_unlock_bh(&table->tb6_lock); 1863 return rt; 1864 } 1865 1866 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 1867 struct net_device *dev, 1868 unsigned int pref) 1869 { 1870 struct fib6_config cfg = { 1871 .fc_table = RT6_TABLE_DFLT, 1872 .fc_metric = IP6_RT_PRIO_USER, 1873 .fc_ifindex = dev->ifindex, 1874 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1875 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1876 .fc_nlinfo.pid = 0, 1877 .fc_nlinfo.nlh = NULL, 1878 .fc_nlinfo.nl_net = dev_net(dev), 1879 }; 1880 1881 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1882 1883 ip6_route_add(&cfg); 1884 1885 return rt6_get_dflt_router(gwaddr, dev); 1886 } 1887 1888 void rt6_purge_dflt_routers(struct net *net) 1889 { 1890 struct rt6_info *rt; 1891 struct fib6_table *table; 1892 1893 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1894 table = fib6_get_table(net, RT6_TABLE_DFLT); 1895 if (table == NULL) 1896 return; 1897 1898 restart: 1899 read_lock_bh(&table->tb6_lock); 1900 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1901 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1902 dst_hold(&rt->dst); 1903 read_unlock_bh(&table->tb6_lock); 1904 ip6_del_rt(rt); 1905 goto restart; 1906 } 1907 } 1908 read_unlock_bh(&table->tb6_lock); 1909 } 1910 1911 static void rtmsg_to_fib6_config(struct net *net, 1912 struct in6_rtmsg *rtmsg, 1913 struct fib6_config *cfg) 1914 { 1915 memset(cfg, 0, sizeof(*cfg)); 1916 1917 cfg->fc_table = RT6_TABLE_MAIN; 1918 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1919 cfg->fc_metric = rtmsg->rtmsg_metric; 1920 cfg->fc_expires = rtmsg->rtmsg_info; 1921 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1922 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1923 cfg->fc_flags = rtmsg->rtmsg_flags; 1924 1925 cfg->fc_nlinfo.nl_net = net; 1926 1927 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1928 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1929 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1930 } 1931 1932 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1933 { 1934 struct fib6_config cfg; 1935 struct in6_rtmsg rtmsg; 1936 int err; 1937 1938 switch(cmd) { 1939 case SIOCADDRT: /* Add a route */ 1940 case SIOCDELRT: /* Delete a route */ 1941 if (!capable(CAP_NET_ADMIN)) 1942 return -EPERM; 1943 err = copy_from_user(&rtmsg, arg, 1944 sizeof(struct in6_rtmsg)); 1945 if (err) 1946 return -EFAULT; 1947 1948 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1949 1950 rtnl_lock(); 1951 switch (cmd) { 1952 case SIOCADDRT: 1953 err = ip6_route_add(&cfg); 1954 break; 1955 case SIOCDELRT: 1956 err = ip6_route_del(&cfg); 1957 break; 1958 default: 1959 err = -EINVAL; 1960 } 1961 rtnl_unlock(); 1962 1963 return err; 1964 } 1965 1966 return -EINVAL; 1967 } 1968 1969 /* 1970 * Drop the packet on the floor 1971 */ 1972 1973 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 1974 { 1975 int type; 1976 struct dst_entry *dst = skb_dst(skb); 1977 switch (ipstats_mib_noroutes) { 1978 case IPSTATS_MIB_INNOROUTES: 1979 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1980 if (type == IPV6_ADDR_ANY) { 1981 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1982 IPSTATS_MIB_INADDRERRORS); 1983 break; 1984 } 1985 /* FALLTHROUGH */ 1986 case IPSTATS_MIB_OUTNOROUTES: 1987 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1988 ipstats_mib_noroutes); 1989 break; 1990 } 1991 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 1992 kfree_skb(skb); 1993 return 0; 1994 } 1995 1996 static int ip6_pkt_discard(struct sk_buff *skb) 1997 { 1998 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1999 } 2000 2001 static int ip6_pkt_discard_out(struct sk_buff *skb) 2002 { 2003 skb->dev = skb_dst(skb)->dev; 2004 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2005 } 2006 2007 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2008 2009 static int ip6_pkt_prohibit(struct sk_buff *skb) 2010 { 2011 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2012 } 2013 2014 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 2015 { 2016 skb->dev = skb_dst(skb)->dev; 2017 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2018 } 2019 2020 #endif 2021 2022 /* 2023 * Allocate a dst for local (unicast / anycast) address. 2024 */ 2025 2026 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2027 const struct in6_addr *addr, 2028 int anycast) 2029 { 2030 struct net *net = dev_net(idev->dev); 2031 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, 2032 net->loopback_dev, 0); 2033 struct neighbour *neigh; 2034 2035 if (rt == NULL) { 2036 if (net_ratelimit()) 2037 pr_warning("IPv6: Maximum number of routes reached," 2038 " consider increasing route/max_size.\n"); 2039 return ERR_PTR(-ENOMEM); 2040 } 2041 2042 in6_dev_hold(idev); 2043 2044 rt->dst.flags |= DST_HOST; 2045 rt->dst.input = ip6_input; 2046 rt->dst.output = ip6_output; 2047 rt->rt6i_idev = idev; 2048 rt->dst.obsolete = -1; 2049 2050 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2051 if (anycast) 2052 rt->rt6i_flags |= RTF_ANYCAST; 2053 else 2054 rt->rt6i_flags |= RTF_LOCAL; 2055 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 2056 if (IS_ERR(neigh)) { 2057 dst_free(&rt->dst); 2058 2059 return ERR_CAST(neigh); 2060 } 2061 dst_set_neighbour(&rt->dst, neigh); 2062 2063 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 2064 rt->rt6i_dst.plen = 128; 2065 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2066 2067 atomic_set(&rt->dst.__refcnt, 1); 2068 2069 return rt; 2070 } 2071 2072 int ip6_route_get_saddr(struct net *net, 2073 struct rt6_info *rt, 2074 const struct in6_addr *daddr, 2075 unsigned int prefs, 2076 struct in6_addr *saddr) 2077 { 2078 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); 2079 int err = 0; 2080 if (rt->rt6i_prefsrc.plen) 2081 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr); 2082 else 2083 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2084 daddr, prefs, saddr); 2085 return err; 2086 } 2087 2088 /* remove deleted ip from prefsrc entries */ 2089 struct arg_dev_net_ip { 2090 struct net_device *dev; 2091 struct net *net; 2092 struct in6_addr *addr; 2093 }; 2094 2095 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2096 { 2097 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2098 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2099 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2100 2101 if (((void *)rt->rt6i_dev == dev || dev == NULL) && 2102 rt != net->ipv6.ip6_null_entry && 2103 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2104 /* remove prefsrc entry */ 2105 rt->rt6i_prefsrc.plen = 0; 2106 } 2107 return 0; 2108 } 2109 2110 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2111 { 2112 struct net *net = dev_net(ifp->idev->dev); 2113 struct arg_dev_net_ip adni = { 2114 .dev = ifp->idev->dev, 2115 .net = net, 2116 .addr = &ifp->addr, 2117 }; 2118 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); 2119 } 2120 2121 struct arg_dev_net { 2122 struct net_device *dev; 2123 struct net *net; 2124 }; 2125 2126 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2127 { 2128 const struct arg_dev_net *adn = arg; 2129 const struct net_device *dev = adn->dev; 2130 2131 if ((rt->rt6i_dev == dev || dev == NULL) && 2132 rt != adn->net->ipv6.ip6_null_entry) { 2133 RT6_TRACE("deleted by ifdown %p\n", rt); 2134 return -1; 2135 } 2136 return 0; 2137 } 2138 2139 void rt6_ifdown(struct net *net, struct net_device *dev) 2140 { 2141 struct arg_dev_net adn = { 2142 .dev = dev, 2143 .net = net, 2144 }; 2145 2146 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2147 icmp6_clean_all(fib6_ifdown, &adn); 2148 } 2149 2150 struct rt6_mtu_change_arg 2151 { 2152 struct net_device *dev; 2153 unsigned mtu; 2154 }; 2155 2156 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2157 { 2158 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2159 struct inet6_dev *idev; 2160 2161 /* In IPv6 pmtu discovery is not optional, 2162 so that RTAX_MTU lock cannot disable it. 2163 We still use this lock to block changes 2164 caused by addrconf/ndisc. 2165 */ 2166 2167 idev = __in6_dev_get(arg->dev); 2168 if (idev == NULL) 2169 return 0; 2170 2171 /* For administrative MTU increase, there is no way to discover 2172 IPv6 PMTU increase, so PMTU increase should be updated here. 2173 Since RFC 1981 doesn't include administrative MTU increase 2174 update PMTU increase is a MUST. (i.e. jumbo frame) 2175 */ 2176 /* 2177 If new MTU is less than route PMTU, this new MTU will be the 2178 lowest MTU in the path, update the route PMTU to reflect PMTU 2179 decreases; if new MTU is greater than route PMTU, and the 2180 old MTU is the lowest MTU in the path, update the route PMTU 2181 to reflect the increase. In this case if the other nodes' MTU 2182 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2183 PMTU discouvery. 2184 */ 2185 if (rt->rt6i_dev == arg->dev && 2186 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2187 (dst_mtu(&rt->dst) >= arg->mtu || 2188 (dst_mtu(&rt->dst) < arg->mtu && 2189 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2190 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2191 } 2192 return 0; 2193 } 2194 2195 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2196 { 2197 struct rt6_mtu_change_arg arg = { 2198 .dev = dev, 2199 .mtu = mtu, 2200 }; 2201 2202 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2203 } 2204 2205 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2206 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2207 [RTA_OIF] = { .type = NLA_U32 }, 2208 [RTA_IIF] = { .type = NLA_U32 }, 2209 [RTA_PRIORITY] = { .type = NLA_U32 }, 2210 [RTA_METRICS] = { .type = NLA_NESTED }, 2211 }; 2212 2213 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2214 struct fib6_config *cfg) 2215 { 2216 struct rtmsg *rtm; 2217 struct nlattr *tb[RTA_MAX+1]; 2218 int err; 2219 2220 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2221 if (err < 0) 2222 goto errout; 2223 2224 err = -EINVAL; 2225 rtm = nlmsg_data(nlh); 2226 memset(cfg, 0, sizeof(*cfg)); 2227 2228 cfg->fc_table = rtm->rtm_table; 2229 cfg->fc_dst_len = rtm->rtm_dst_len; 2230 cfg->fc_src_len = rtm->rtm_src_len; 2231 cfg->fc_flags = RTF_UP; 2232 cfg->fc_protocol = rtm->rtm_protocol; 2233 2234 if (rtm->rtm_type == RTN_UNREACHABLE) 2235 cfg->fc_flags |= RTF_REJECT; 2236 2237 if (rtm->rtm_type == RTN_LOCAL) 2238 cfg->fc_flags |= RTF_LOCAL; 2239 2240 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2241 cfg->fc_nlinfo.nlh = nlh; 2242 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2243 2244 if (tb[RTA_GATEWAY]) { 2245 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2246 cfg->fc_flags |= RTF_GATEWAY; 2247 } 2248 2249 if (tb[RTA_DST]) { 2250 int plen = (rtm->rtm_dst_len + 7) >> 3; 2251 2252 if (nla_len(tb[RTA_DST]) < plen) 2253 goto errout; 2254 2255 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2256 } 2257 2258 if (tb[RTA_SRC]) { 2259 int plen = (rtm->rtm_src_len + 7) >> 3; 2260 2261 if (nla_len(tb[RTA_SRC]) < plen) 2262 goto errout; 2263 2264 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2265 } 2266 2267 if (tb[RTA_PREFSRC]) 2268 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); 2269 2270 if (tb[RTA_OIF]) 2271 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2272 2273 if (tb[RTA_PRIORITY]) 2274 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2275 2276 if (tb[RTA_METRICS]) { 2277 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2278 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2279 } 2280 2281 if (tb[RTA_TABLE]) 2282 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2283 2284 err = 0; 2285 errout: 2286 return err; 2287 } 2288 2289 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2290 { 2291 struct fib6_config cfg; 2292 int err; 2293 2294 err = rtm_to_fib6_config(skb, nlh, &cfg); 2295 if (err < 0) 2296 return err; 2297 2298 return ip6_route_del(&cfg); 2299 } 2300 2301 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2302 { 2303 struct fib6_config cfg; 2304 int err; 2305 2306 err = rtm_to_fib6_config(skb, nlh, &cfg); 2307 if (err < 0) 2308 return err; 2309 2310 return ip6_route_add(&cfg); 2311 } 2312 2313 static inline size_t rt6_nlmsg_size(void) 2314 { 2315 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2316 + nla_total_size(16) /* RTA_SRC */ 2317 + nla_total_size(16) /* RTA_DST */ 2318 + nla_total_size(16) /* RTA_GATEWAY */ 2319 + nla_total_size(16) /* RTA_PREFSRC */ 2320 + nla_total_size(4) /* RTA_TABLE */ 2321 + nla_total_size(4) /* RTA_IIF */ 2322 + nla_total_size(4) /* RTA_OIF */ 2323 + nla_total_size(4) /* RTA_PRIORITY */ 2324 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2325 + nla_total_size(sizeof(struct rta_cacheinfo)); 2326 } 2327 2328 static int rt6_fill_node(struct net *net, 2329 struct sk_buff *skb, struct rt6_info *rt, 2330 struct in6_addr *dst, struct in6_addr *src, 2331 int iif, int type, u32 pid, u32 seq, 2332 int prefix, int nowait, unsigned int flags) 2333 { 2334 struct rtmsg *rtm; 2335 struct nlmsghdr *nlh; 2336 long expires; 2337 u32 table; 2338 struct neighbour *n; 2339 2340 if (prefix) { /* user wants prefix routes only */ 2341 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2342 /* success since this is not a prefix route */ 2343 return 1; 2344 } 2345 } 2346 2347 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2348 if (nlh == NULL) 2349 return -EMSGSIZE; 2350 2351 rtm = nlmsg_data(nlh); 2352 rtm->rtm_family = AF_INET6; 2353 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2354 rtm->rtm_src_len = rt->rt6i_src.plen; 2355 rtm->rtm_tos = 0; 2356 if (rt->rt6i_table) 2357 table = rt->rt6i_table->tb6_id; 2358 else 2359 table = RT6_TABLE_UNSPEC; 2360 rtm->rtm_table = table; 2361 NLA_PUT_U32(skb, RTA_TABLE, table); 2362 if (rt->rt6i_flags&RTF_REJECT) 2363 rtm->rtm_type = RTN_UNREACHABLE; 2364 else if (rt->rt6i_flags&RTF_LOCAL) 2365 rtm->rtm_type = RTN_LOCAL; 2366 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2367 rtm->rtm_type = RTN_LOCAL; 2368 else 2369 rtm->rtm_type = RTN_UNICAST; 2370 rtm->rtm_flags = 0; 2371 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2372 rtm->rtm_protocol = rt->rt6i_protocol; 2373 if (rt->rt6i_flags&RTF_DYNAMIC) 2374 rtm->rtm_protocol = RTPROT_REDIRECT; 2375 else if (rt->rt6i_flags & RTF_ADDRCONF) 2376 rtm->rtm_protocol = RTPROT_KERNEL; 2377 else if (rt->rt6i_flags&RTF_DEFAULT) 2378 rtm->rtm_protocol = RTPROT_RA; 2379 2380 if (rt->rt6i_flags&RTF_CACHE) 2381 rtm->rtm_flags |= RTM_F_CLONED; 2382 2383 if (dst) { 2384 NLA_PUT(skb, RTA_DST, 16, dst); 2385 rtm->rtm_dst_len = 128; 2386 } else if (rtm->rtm_dst_len) 2387 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2388 #ifdef CONFIG_IPV6_SUBTREES 2389 if (src) { 2390 NLA_PUT(skb, RTA_SRC, 16, src); 2391 rtm->rtm_src_len = 128; 2392 } else if (rtm->rtm_src_len) 2393 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2394 #endif 2395 if (iif) { 2396 #ifdef CONFIG_IPV6_MROUTE 2397 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2398 int err = ip6mr_get_route(net, skb, rtm, nowait); 2399 if (err <= 0) { 2400 if (!nowait) { 2401 if (err == 0) 2402 return 0; 2403 goto nla_put_failure; 2404 } else { 2405 if (err == -EMSGSIZE) 2406 goto nla_put_failure; 2407 } 2408 } 2409 } else 2410 #endif 2411 NLA_PUT_U32(skb, RTA_IIF, iif); 2412 } else if (dst) { 2413 struct in6_addr saddr_buf; 2414 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0) 2415 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2416 } 2417 2418 if (rt->rt6i_prefsrc.plen) { 2419 struct in6_addr saddr_buf; 2420 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr); 2421 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2422 } 2423 2424 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2425 goto nla_put_failure; 2426 2427 rcu_read_lock(); 2428 n = dst_get_neighbour(&rt->dst); 2429 if (n) 2430 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); 2431 rcu_read_unlock(); 2432 2433 if (rt->dst.dev) 2434 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2435 2436 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2437 2438 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2439 expires = 0; 2440 else if (rt->rt6i_expires - jiffies < INT_MAX) 2441 expires = rt->rt6i_expires - jiffies; 2442 else 2443 expires = INT_MAX; 2444 2445 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, 2446 expires, rt->dst.error) < 0) 2447 goto nla_put_failure; 2448 2449 return nlmsg_end(skb, nlh); 2450 2451 nla_put_failure: 2452 nlmsg_cancel(skb, nlh); 2453 return -EMSGSIZE; 2454 } 2455 2456 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2457 { 2458 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2459 int prefix; 2460 2461 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2462 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2463 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2464 } else 2465 prefix = 0; 2466 2467 return rt6_fill_node(arg->net, 2468 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2469 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2470 prefix, 0, NLM_F_MULTI); 2471 } 2472 2473 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2474 { 2475 struct net *net = sock_net(in_skb->sk); 2476 struct nlattr *tb[RTA_MAX+1]; 2477 struct rt6_info *rt; 2478 struct sk_buff *skb; 2479 struct rtmsg *rtm; 2480 struct flowi6 fl6; 2481 int err, iif = 0; 2482 2483 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2484 if (err < 0) 2485 goto errout; 2486 2487 err = -EINVAL; 2488 memset(&fl6, 0, sizeof(fl6)); 2489 2490 if (tb[RTA_SRC]) { 2491 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2492 goto errout; 2493 2494 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); 2495 } 2496 2497 if (tb[RTA_DST]) { 2498 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2499 goto errout; 2500 2501 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); 2502 } 2503 2504 if (tb[RTA_IIF]) 2505 iif = nla_get_u32(tb[RTA_IIF]); 2506 2507 if (tb[RTA_OIF]) 2508 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); 2509 2510 if (iif) { 2511 struct net_device *dev; 2512 dev = __dev_get_by_index(net, iif); 2513 if (!dev) { 2514 err = -ENODEV; 2515 goto errout; 2516 } 2517 } 2518 2519 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2520 if (skb == NULL) { 2521 err = -ENOBUFS; 2522 goto errout; 2523 } 2524 2525 /* Reserve room for dummy headers, this skb can pass 2526 through good chunk of routing engine. 2527 */ 2528 skb_reset_mac_header(skb); 2529 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2530 2531 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); 2532 skb_dst_set(skb, &rt->dst); 2533 2534 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2535 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2536 nlh->nlmsg_seq, 0, 0, 0); 2537 if (err < 0) { 2538 kfree_skb(skb); 2539 goto errout; 2540 } 2541 2542 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2543 errout: 2544 return err; 2545 } 2546 2547 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2548 { 2549 struct sk_buff *skb; 2550 struct net *net = info->nl_net; 2551 u32 seq; 2552 int err; 2553 2554 err = -ENOBUFS; 2555 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2556 2557 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2558 if (skb == NULL) 2559 goto errout; 2560 2561 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2562 event, info->pid, seq, 0, 0, 0); 2563 if (err < 0) { 2564 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2565 WARN_ON(err == -EMSGSIZE); 2566 kfree_skb(skb); 2567 goto errout; 2568 } 2569 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2570 info->nlh, gfp_any()); 2571 return; 2572 errout: 2573 if (err < 0) 2574 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2575 } 2576 2577 static int ip6_route_dev_notify(struct notifier_block *this, 2578 unsigned long event, void *data) 2579 { 2580 struct net_device *dev = (struct net_device *)data; 2581 struct net *net = dev_net(dev); 2582 2583 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2584 net->ipv6.ip6_null_entry->dst.dev = dev; 2585 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2586 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2587 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2588 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2589 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2590 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2591 #endif 2592 } 2593 2594 return NOTIFY_OK; 2595 } 2596 2597 /* 2598 * /proc 2599 */ 2600 2601 #ifdef CONFIG_PROC_FS 2602 2603 struct rt6_proc_arg 2604 { 2605 char *buffer; 2606 int offset; 2607 int length; 2608 int skip; 2609 int len; 2610 }; 2611 2612 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2613 { 2614 struct seq_file *m = p_arg; 2615 struct neighbour *n; 2616 2617 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2618 2619 #ifdef CONFIG_IPV6_SUBTREES 2620 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2621 #else 2622 seq_puts(m, "00000000000000000000000000000000 00 "); 2623 #endif 2624 rcu_read_lock(); 2625 n = dst_get_neighbour(&rt->dst); 2626 if (n) { 2627 seq_printf(m, "%pi6", n->primary_key); 2628 } else { 2629 seq_puts(m, "00000000000000000000000000000000"); 2630 } 2631 rcu_read_unlock(); 2632 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2633 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2634 rt->dst.__use, rt->rt6i_flags, 2635 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2636 return 0; 2637 } 2638 2639 static int ipv6_route_show(struct seq_file *m, void *v) 2640 { 2641 struct net *net = (struct net *)m->private; 2642 fib6_clean_all(net, rt6_info_route, 0, m); 2643 return 0; 2644 } 2645 2646 static int ipv6_route_open(struct inode *inode, struct file *file) 2647 { 2648 return single_open_net(inode, file, ipv6_route_show); 2649 } 2650 2651 static const struct file_operations ipv6_route_proc_fops = { 2652 .owner = THIS_MODULE, 2653 .open = ipv6_route_open, 2654 .read = seq_read, 2655 .llseek = seq_lseek, 2656 .release = single_release_net, 2657 }; 2658 2659 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2660 { 2661 struct net *net = (struct net *)seq->private; 2662 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2663 net->ipv6.rt6_stats->fib_nodes, 2664 net->ipv6.rt6_stats->fib_route_nodes, 2665 net->ipv6.rt6_stats->fib_rt_alloc, 2666 net->ipv6.rt6_stats->fib_rt_entries, 2667 net->ipv6.rt6_stats->fib_rt_cache, 2668 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2669 net->ipv6.rt6_stats->fib_discarded_routes); 2670 2671 return 0; 2672 } 2673 2674 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2675 { 2676 return single_open_net(inode, file, rt6_stats_seq_show); 2677 } 2678 2679 static const struct file_operations rt6_stats_seq_fops = { 2680 .owner = THIS_MODULE, 2681 .open = rt6_stats_seq_open, 2682 .read = seq_read, 2683 .llseek = seq_lseek, 2684 .release = single_release_net, 2685 }; 2686 #endif /* CONFIG_PROC_FS */ 2687 2688 #ifdef CONFIG_SYSCTL 2689 2690 static 2691 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2692 void __user *buffer, size_t *lenp, loff_t *ppos) 2693 { 2694 struct net *net; 2695 int delay; 2696 if (!write) 2697 return -EINVAL; 2698 2699 net = (struct net *)ctl->extra1; 2700 delay = net->ipv6.sysctl.flush_delay; 2701 proc_dointvec(ctl, write, buffer, lenp, ppos); 2702 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2703 return 0; 2704 } 2705 2706 ctl_table ipv6_route_table_template[] = { 2707 { 2708 .procname = "flush", 2709 .data = &init_net.ipv6.sysctl.flush_delay, 2710 .maxlen = sizeof(int), 2711 .mode = 0200, 2712 .proc_handler = ipv6_sysctl_rtcache_flush 2713 }, 2714 { 2715 .procname = "gc_thresh", 2716 .data = &ip6_dst_ops_template.gc_thresh, 2717 .maxlen = sizeof(int), 2718 .mode = 0644, 2719 .proc_handler = proc_dointvec, 2720 }, 2721 { 2722 .procname = "max_size", 2723 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2724 .maxlen = sizeof(int), 2725 .mode = 0644, 2726 .proc_handler = proc_dointvec, 2727 }, 2728 { 2729 .procname = "gc_min_interval", 2730 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2731 .maxlen = sizeof(int), 2732 .mode = 0644, 2733 .proc_handler = proc_dointvec_jiffies, 2734 }, 2735 { 2736 .procname = "gc_timeout", 2737 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2738 .maxlen = sizeof(int), 2739 .mode = 0644, 2740 .proc_handler = proc_dointvec_jiffies, 2741 }, 2742 { 2743 .procname = "gc_interval", 2744 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2745 .maxlen = sizeof(int), 2746 .mode = 0644, 2747 .proc_handler = proc_dointvec_jiffies, 2748 }, 2749 { 2750 .procname = "gc_elasticity", 2751 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2752 .maxlen = sizeof(int), 2753 .mode = 0644, 2754 .proc_handler = proc_dointvec, 2755 }, 2756 { 2757 .procname = "mtu_expires", 2758 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2759 .maxlen = sizeof(int), 2760 .mode = 0644, 2761 .proc_handler = proc_dointvec_jiffies, 2762 }, 2763 { 2764 .procname = "min_adv_mss", 2765 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2766 .maxlen = sizeof(int), 2767 .mode = 0644, 2768 .proc_handler = proc_dointvec, 2769 }, 2770 { 2771 .procname = "gc_min_interval_ms", 2772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2773 .maxlen = sizeof(int), 2774 .mode = 0644, 2775 .proc_handler = proc_dointvec_ms_jiffies, 2776 }, 2777 { } 2778 }; 2779 2780 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2781 { 2782 struct ctl_table *table; 2783 2784 table = kmemdup(ipv6_route_table_template, 2785 sizeof(ipv6_route_table_template), 2786 GFP_KERNEL); 2787 2788 if (table) { 2789 table[0].data = &net->ipv6.sysctl.flush_delay; 2790 table[0].extra1 = net; 2791 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2792 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2793 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2794 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2795 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2796 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2797 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2798 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2799 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2800 } 2801 2802 return table; 2803 } 2804 #endif 2805 2806 static int __net_init ip6_route_net_init(struct net *net) 2807 { 2808 int ret = -ENOMEM; 2809 2810 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2811 sizeof(net->ipv6.ip6_dst_ops)); 2812 2813 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2814 goto out_ip6_dst_ops; 2815 2816 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2817 sizeof(*net->ipv6.ip6_null_entry), 2818 GFP_KERNEL); 2819 if (!net->ipv6.ip6_null_entry) 2820 goto out_ip6_dst_entries; 2821 net->ipv6.ip6_null_entry->dst.path = 2822 (struct dst_entry *)net->ipv6.ip6_null_entry; 2823 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2824 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 2825 ip6_template_metrics, true); 2826 2827 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2828 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2829 sizeof(*net->ipv6.ip6_prohibit_entry), 2830 GFP_KERNEL); 2831 if (!net->ipv6.ip6_prohibit_entry) 2832 goto out_ip6_null_entry; 2833 net->ipv6.ip6_prohibit_entry->dst.path = 2834 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2835 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2836 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 2837 ip6_template_metrics, true); 2838 2839 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2840 sizeof(*net->ipv6.ip6_blk_hole_entry), 2841 GFP_KERNEL); 2842 if (!net->ipv6.ip6_blk_hole_entry) 2843 goto out_ip6_prohibit_entry; 2844 net->ipv6.ip6_blk_hole_entry->dst.path = 2845 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2846 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2847 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 2848 ip6_template_metrics, true); 2849 #endif 2850 2851 net->ipv6.sysctl.flush_delay = 0; 2852 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2853 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2854 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2855 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2856 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2857 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2858 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2859 2860 #ifdef CONFIG_PROC_FS 2861 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2862 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2863 #endif 2864 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2865 2866 ret = 0; 2867 out: 2868 return ret; 2869 2870 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2871 out_ip6_prohibit_entry: 2872 kfree(net->ipv6.ip6_prohibit_entry); 2873 out_ip6_null_entry: 2874 kfree(net->ipv6.ip6_null_entry); 2875 #endif 2876 out_ip6_dst_entries: 2877 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2878 out_ip6_dst_ops: 2879 goto out; 2880 } 2881 2882 static void __net_exit ip6_route_net_exit(struct net *net) 2883 { 2884 #ifdef CONFIG_PROC_FS 2885 proc_net_remove(net, "ipv6_route"); 2886 proc_net_remove(net, "rt6_stats"); 2887 #endif 2888 kfree(net->ipv6.ip6_null_entry); 2889 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2890 kfree(net->ipv6.ip6_prohibit_entry); 2891 kfree(net->ipv6.ip6_blk_hole_entry); 2892 #endif 2893 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2894 } 2895 2896 static struct pernet_operations ip6_route_net_ops = { 2897 .init = ip6_route_net_init, 2898 .exit = ip6_route_net_exit, 2899 }; 2900 2901 static struct notifier_block ip6_route_dev_notifier = { 2902 .notifier_call = ip6_route_dev_notify, 2903 .priority = 0, 2904 }; 2905 2906 int __init ip6_route_init(void) 2907 { 2908 int ret; 2909 2910 ret = -ENOMEM; 2911 ip6_dst_ops_template.kmem_cachep = 2912 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2913 SLAB_HWCACHE_ALIGN, NULL); 2914 if (!ip6_dst_ops_template.kmem_cachep) 2915 goto out; 2916 2917 ret = dst_entries_init(&ip6_dst_blackhole_ops); 2918 if (ret) 2919 goto out_kmem_cache; 2920 2921 ret = register_pernet_subsys(&ip6_route_net_ops); 2922 if (ret) 2923 goto out_dst_entries; 2924 2925 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2926 2927 /* Registering of the loopback is done before this portion of code, 2928 * the loopback reference in rt6_info will not be taken, do it 2929 * manually for init_net */ 2930 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 2931 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2933 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 2934 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2935 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 2936 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2937 #endif 2938 ret = fib6_init(); 2939 if (ret) 2940 goto out_register_subsys; 2941 2942 ret = xfrm6_init(); 2943 if (ret) 2944 goto out_fib6_init; 2945 2946 ret = fib6_rules_init(); 2947 if (ret) 2948 goto xfrm6_init; 2949 2950 ret = -ENOBUFS; 2951 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 2952 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 2953 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 2954 goto fib6_rules_init; 2955 2956 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2957 if (ret) 2958 goto fib6_rules_init; 2959 2960 out: 2961 return ret; 2962 2963 fib6_rules_init: 2964 fib6_rules_cleanup(); 2965 xfrm6_init: 2966 xfrm6_fini(); 2967 out_fib6_init: 2968 fib6_gc_cleanup(); 2969 out_register_subsys: 2970 unregister_pernet_subsys(&ip6_route_net_ops); 2971 out_dst_entries: 2972 dst_entries_destroy(&ip6_dst_blackhole_ops); 2973 out_kmem_cache: 2974 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2975 goto out; 2976 } 2977 2978 void ip6_route_cleanup(void) 2979 { 2980 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2981 fib6_rules_cleanup(); 2982 xfrm6_fini(); 2983 fib6_gc_cleanup(); 2984 unregister_pernet_subsys(&ip6_route_net_ops); 2985 dst_entries_destroy(&ip6_dst_blackhole_ops); 2986 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2987 } 2988