1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 /* Changes: 17 * 18 * YOSHIFUJI Hideaki @USAGI 19 * reworked default router selection. 20 * - respect outgoing interface 21 * - select from (probably) reachable routers (i.e. 22 * routers in REACHABLE, STALE, DELAY or PROBE states). 23 * - always select the same router if it is (probably) 24 * reachable. otherwise, round-robin the list. 25 * Ville Nuorvala 26 * Fixed routing subtrees. 27 */ 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/types.h> 32 #include <linux/times.h> 33 #include <linux/socket.h> 34 #include <linux/sockios.h> 35 #include <linux/net.h> 36 #include <linux/route.h> 37 #include <linux/netdevice.h> 38 #include <linux/in6.h> 39 #include <linux/init.h> 40 #include <linux/if_arp.h> 41 42 #ifdef CONFIG_PROC_FS 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #endif 46 47 #include <net/snmp.h> 48 #include <net/ipv6.h> 49 #include <net/ip6_fib.h> 50 #include <net/ip6_route.h> 51 #include <net/ndisc.h> 52 #include <net/addrconf.h> 53 #include <net/tcp.h> 54 #include <linux/rtnetlink.h> 55 #include <net/dst.h> 56 #include <net/xfrm.h> 57 #include <net/netevent.h> 58 #include <net/netlink.h> 59 60 #include <asm/uaccess.h> 61 62 #ifdef CONFIG_SYSCTL 63 #include <linux/sysctl.h> 64 #endif 65 66 /* Set to 3 to get tracing. */ 67 #define RT6_DEBUG 2 68 69 #if RT6_DEBUG >= 3 70 #define RDBG(x) printk x 71 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 72 #else 73 #define RDBG(x) 74 #define RT6_TRACE(x...) do { ; } while (0) 75 #endif 76 77 #define CLONE_OFFLINK_ROUTE 0 78 79 static int ip6_rt_max_size = 4096; 80 static int ip6_rt_gc_min_interval = HZ / 2; 81 static int ip6_rt_gc_timeout = 60*HZ; 82 int ip6_rt_gc_interval = 30*HZ; 83 static int ip6_rt_gc_elasticity = 9; 84 static int ip6_rt_mtu_expires = 10*60*HZ; 85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 86 87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 89 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 90 static void ip6_dst_destroy(struct dst_entry *); 91 static void ip6_dst_ifdown(struct dst_entry *, 92 struct net_device *dev, int how); 93 static int ip6_dst_gc(void); 94 95 static int ip6_pkt_discard(struct sk_buff *skb); 96 static int ip6_pkt_discard_out(struct sk_buff *skb); 97 static void ip6_link_failure(struct sk_buff *skb); 98 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 99 100 #ifdef CONFIG_IPV6_ROUTE_INFO 101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 102 struct in6_addr *gwaddr, int ifindex, 103 unsigned pref); 104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 105 struct in6_addr *gwaddr, int ifindex); 106 #endif 107 108 static struct dst_ops ip6_dst_ops = { 109 .family = AF_INET6, 110 .protocol = __constant_htons(ETH_P_IPV6), 111 .gc = ip6_dst_gc, 112 .gc_thresh = 1024, 113 .check = ip6_dst_check, 114 .destroy = ip6_dst_destroy, 115 .ifdown = ip6_dst_ifdown, 116 .negative_advice = ip6_negative_advice, 117 .link_failure = ip6_link_failure, 118 .update_pmtu = ip6_rt_update_pmtu, 119 .entry_size = sizeof(struct rt6_info), 120 }; 121 122 struct rt6_info ip6_null_entry = { 123 .u = { 124 .dst = { 125 .__refcnt = ATOMIC_INIT(1), 126 .__use = 1, 127 .dev = &loopback_dev, 128 .obsolete = -1, 129 .error = -ENETUNREACH, 130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 131 .input = ip6_pkt_discard, 132 .output = ip6_pkt_discard_out, 133 .ops = &ip6_dst_ops, 134 .path = (struct dst_entry*)&ip6_null_entry, 135 } 136 }, 137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 138 .rt6i_metric = ~(u32) 0, 139 .rt6i_ref = ATOMIC_INIT(1), 140 }; 141 142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 143 144 static int ip6_pkt_prohibit(struct sk_buff *skb); 145 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 146 static int ip6_pkt_blk_hole(struct sk_buff *skb); 147 148 struct rt6_info ip6_prohibit_entry = { 149 .u = { 150 .dst = { 151 .__refcnt = ATOMIC_INIT(1), 152 .__use = 1, 153 .dev = &loopback_dev, 154 .obsolete = -1, 155 .error = -EACCES, 156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 157 .input = ip6_pkt_prohibit, 158 .output = ip6_pkt_prohibit_out, 159 .ops = &ip6_dst_ops, 160 .path = (struct dst_entry*)&ip6_prohibit_entry, 161 } 162 }, 163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 164 .rt6i_metric = ~(u32) 0, 165 .rt6i_ref = ATOMIC_INIT(1), 166 }; 167 168 struct rt6_info ip6_blk_hole_entry = { 169 .u = { 170 .dst = { 171 .__refcnt = ATOMIC_INIT(1), 172 .__use = 1, 173 .dev = &loopback_dev, 174 .obsolete = -1, 175 .error = -EINVAL, 176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 177 .input = ip6_pkt_blk_hole, 178 .output = ip6_pkt_blk_hole, 179 .ops = &ip6_dst_ops, 180 .path = (struct dst_entry*)&ip6_blk_hole_entry, 181 } 182 }, 183 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 184 .rt6i_metric = ~(u32) 0, 185 .rt6i_ref = ATOMIC_INIT(1), 186 }; 187 188 #endif 189 190 /* allocate dst with ip6_dst_ops */ 191 static __inline__ struct rt6_info *ip6_dst_alloc(void) 192 { 193 return (struct rt6_info *)dst_alloc(&ip6_dst_ops); 194 } 195 196 static void ip6_dst_destroy(struct dst_entry *dst) 197 { 198 struct rt6_info *rt = (struct rt6_info *)dst; 199 struct inet6_dev *idev = rt->rt6i_idev; 200 201 if (idev != NULL) { 202 rt->rt6i_idev = NULL; 203 in6_dev_put(idev); 204 } 205 } 206 207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 208 int how) 209 { 210 struct rt6_info *rt = (struct rt6_info *)dst; 211 struct inet6_dev *idev = rt->rt6i_idev; 212 213 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { 214 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); 215 if (loopback_idev != NULL) { 216 rt->rt6i_idev = loopback_idev; 217 in6_dev_put(idev); 218 } 219 } 220 } 221 222 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 223 { 224 return (rt->rt6i_flags & RTF_EXPIRES && 225 time_after(jiffies, rt->rt6i_expires)); 226 } 227 228 static inline int rt6_need_strict(struct in6_addr *daddr) 229 { 230 return (ipv6_addr_type(daddr) & 231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); 232 } 233 234 /* 235 * Route lookup. Any table->tb6_lock is implied. 236 */ 237 238 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, 239 int oif, 240 int strict) 241 { 242 struct rt6_info *local = NULL; 243 struct rt6_info *sprt; 244 245 if (oif) { 246 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 247 struct net_device *dev = sprt->rt6i_dev; 248 if (dev->ifindex == oif) 249 return sprt; 250 if (dev->flags & IFF_LOOPBACK) { 251 if (sprt->rt6i_idev == NULL || 252 sprt->rt6i_idev->dev->ifindex != oif) { 253 if (strict && oif) 254 continue; 255 if (local && (!oif || 256 local->rt6i_idev->dev->ifindex == oif)) 257 continue; 258 } 259 local = sprt; 260 } 261 } 262 263 if (local) 264 return local; 265 266 if (strict) 267 return &ip6_null_entry; 268 } 269 return rt; 270 } 271 272 #ifdef CONFIG_IPV6_ROUTER_PREF 273 static void rt6_probe(struct rt6_info *rt) 274 { 275 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 276 /* 277 * Okay, this does not seem to be appropriate 278 * for now, however, we need to check if it 279 * is really so; aka Router Reachability Probing. 280 * 281 * Router Reachability Probe MUST be rate-limited 282 * to no more than one per minute. 283 */ 284 if (!neigh || (neigh->nud_state & NUD_VALID)) 285 return; 286 read_lock_bh(&neigh->lock); 287 if (!(neigh->nud_state & NUD_VALID) && 288 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 289 struct in6_addr mcaddr; 290 struct in6_addr *target; 291 292 neigh->updated = jiffies; 293 read_unlock_bh(&neigh->lock); 294 295 target = (struct in6_addr *)&neigh->primary_key; 296 addrconf_addr_solict_mult(target, &mcaddr); 297 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 298 } else 299 read_unlock_bh(&neigh->lock); 300 } 301 #else 302 static inline void rt6_probe(struct rt6_info *rt) 303 { 304 return; 305 } 306 #endif 307 308 /* 309 * Default Router Selection (RFC 2461 6.3.6) 310 */ 311 static int inline rt6_check_dev(struct rt6_info *rt, int oif) 312 { 313 struct net_device *dev = rt->rt6i_dev; 314 int ret = 0; 315 316 if (!oif) 317 return 2; 318 if (dev->flags & IFF_LOOPBACK) { 319 if (!WARN_ON(rt->rt6i_idev == NULL) && 320 rt->rt6i_idev->dev->ifindex == oif) 321 ret = 1; 322 else 323 return 0; 324 } 325 if (dev->ifindex == oif) 326 return 2; 327 328 return ret; 329 } 330 331 static int inline rt6_check_neigh(struct rt6_info *rt) 332 { 333 struct neighbour *neigh = rt->rt6i_nexthop; 334 int m = 0; 335 if (rt->rt6i_flags & RTF_NONEXTHOP || 336 !(rt->rt6i_flags & RTF_GATEWAY)) 337 m = 1; 338 else if (neigh) { 339 read_lock_bh(&neigh->lock); 340 if (neigh->nud_state & NUD_VALID) 341 m = 2; 342 else if (!(neigh->nud_state & NUD_FAILED)) 343 m = 1; 344 read_unlock_bh(&neigh->lock); 345 } 346 return m; 347 } 348 349 static int rt6_score_route(struct rt6_info *rt, int oif, 350 int strict) 351 { 352 int m, n; 353 354 m = rt6_check_dev(rt, oif); 355 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 356 return -1; 357 #ifdef CONFIG_IPV6_ROUTER_PREF 358 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 359 #endif 360 n = rt6_check_neigh(rt); 361 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 362 return -1; 363 return m; 364 } 365 366 static struct rt6_info *rt6_select(struct rt6_info **head, int oif, 367 int strict) 368 { 369 struct rt6_info *match = NULL, *last = NULL; 370 struct rt6_info *rt, *rt0 = *head; 371 u32 metric; 372 int mpri = -1; 373 374 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n", 375 __FUNCTION__, head, head ? *head : NULL, oif); 376 377 for (rt = rt0, metric = rt0->rt6i_metric; 378 rt && rt->rt6i_metric == metric && (!last || rt != rt0); 379 rt = rt->u.dst.rt6_next) { 380 int m; 381 382 if (rt6_check_expired(rt)) 383 continue; 384 385 last = rt; 386 387 m = rt6_score_route(rt, oif, strict); 388 if (m < 0) 389 continue; 390 391 if (m > mpri) { 392 if (strict & RT6_LOOKUP_F_REACHABLE) 393 rt6_probe(match); 394 match = rt; 395 mpri = m; 396 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 397 rt6_probe(rt); 398 } 399 } 400 401 if (!match && 402 (strict & RT6_LOOKUP_F_REACHABLE) && 403 last && last != rt0) { 404 /* no entries matched; do round-robin */ 405 static DEFINE_SPINLOCK(lock); 406 spin_lock(&lock); 407 *head = rt0->u.dst.rt6_next; 408 rt0->u.dst.rt6_next = last->u.dst.rt6_next; 409 last->u.dst.rt6_next = rt0; 410 spin_unlock(&lock); 411 } 412 413 RT6_TRACE("%s() => %p, score=%d\n", 414 __FUNCTION__, match, mpri); 415 416 return (match ? match : &ip6_null_entry); 417 } 418 419 #ifdef CONFIG_IPV6_ROUTE_INFO 420 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 421 struct in6_addr *gwaddr) 422 { 423 struct route_info *rinfo = (struct route_info *) opt; 424 struct in6_addr prefix_buf, *prefix; 425 unsigned int pref; 426 u32 lifetime; 427 struct rt6_info *rt; 428 429 if (len < sizeof(struct route_info)) { 430 return -EINVAL; 431 } 432 433 /* Sanity check for prefix_len and length */ 434 if (rinfo->length > 3) { 435 return -EINVAL; 436 } else if (rinfo->prefix_len > 128) { 437 return -EINVAL; 438 } else if (rinfo->prefix_len > 64) { 439 if (rinfo->length < 2) { 440 return -EINVAL; 441 } 442 } else if (rinfo->prefix_len > 0) { 443 if (rinfo->length < 1) { 444 return -EINVAL; 445 } 446 } 447 448 pref = rinfo->route_pref; 449 if (pref == ICMPV6_ROUTER_PREF_INVALID) 450 pref = ICMPV6_ROUTER_PREF_MEDIUM; 451 452 lifetime = ntohl(rinfo->lifetime); 453 if (lifetime == 0xffffffff) { 454 /* infinity */ 455 } else if (lifetime > 0x7fffffff/HZ) { 456 /* Avoid arithmetic overflow */ 457 lifetime = 0x7fffffff/HZ - 1; 458 } 459 460 if (rinfo->length == 3) 461 prefix = (struct in6_addr *)rinfo->prefix; 462 else { 463 /* this function is safe */ 464 ipv6_addr_prefix(&prefix_buf, 465 (struct in6_addr *)rinfo->prefix, 466 rinfo->prefix_len); 467 prefix = &prefix_buf; 468 } 469 470 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); 471 472 if (rt && !lifetime) { 473 ip6_del_rt(rt); 474 rt = NULL; 475 } 476 477 if (!rt && lifetime) 478 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 479 pref); 480 else if (rt) 481 rt->rt6i_flags = RTF_ROUTEINFO | 482 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 483 484 if (rt) { 485 if (lifetime == 0xffffffff) { 486 rt->rt6i_flags &= ~RTF_EXPIRES; 487 } else { 488 rt->rt6i_expires = jiffies + HZ * lifetime; 489 rt->rt6i_flags |= RTF_EXPIRES; 490 } 491 dst_release(&rt->u.dst); 492 } 493 return 0; 494 } 495 #endif 496 497 #define BACKTRACK(saddr) \ 498 do { \ 499 if (rt == &ip6_null_entry) { \ 500 struct fib6_node *pn; \ 501 while (1) { \ 502 if (fn->fn_flags & RTN_TL_ROOT) \ 503 goto out; \ 504 pn = fn->parent; \ 505 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 506 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 507 else \ 508 fn = pn; \ 509 if (fn->fn_flags & RTN_RTINFO) \ 510 goto restart; \ 511 } \ 512 } \ 513 } while(0) 514 515 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, 516 struct flowi *fl, int flags) 517 { 518 struct fib6_node *fn; 519 struct rt6_info *rt; 520 521 read_lock_bh(&table->tb6_lock); 522 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 523 restart: 524 rt = fn->leaf; 525 rt = rt6_device_match(rt, fl->oif, flags); 526 BACKTRACK(&fl->fl6_src); 527 out: 528 dst_hold(&rt->u.dst); 529 read_unlock_bh(&table->tb6_lock); 530 531 rt->u.dst.lastuse = jiffies; 532 rt->u.dst.__use++; 533 534 return rt; 535 536 } 537 538 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 539 int oif, int strict) 540 { 541 struct flowi fl = { 542 .oif = oif, 543 .nl_u = { 544 .ip6_u = { 545 .daddr = *daddr, 546 }, 547 }, 548 }; 549 struct dst_entry *dst; 550 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 551 552 if (saddr) { 553 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 554 flags |= RT6_LOOKUP_F_HAS_SADDR; 555 } 556 557 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); 558 if (dst->error == 0) 559 return (struct rt6_info *) dst; 560 561 dst_release(dst); 562 563 return NULL; 564 } 565 566 /* ip6_ins_rt is called with FREE table->tb6_lock. 567 It takes new route entry, the addition fails by any reason the 568 route is freed. In any case, if caller does not hold it, it may 569 be destroyed. 570 */ 571 572 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 573 { 574 int err; 575 struct fib6_table *table; 576 577 table = rt->rt6i_table; 578 write_lock_bh(&table->tb6_lock); 579 err = fib6_add(&table->tb6_root, rt, info); 580 write_unlock_bh(&table->tb6_lock); 581 582 return err; 583 } 584 585 int ip6_ins_rt(struct rt6_info *rt) 586 { 587 return __ip6_ins_rt(rt, NULL); 588 } 589 590 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 591 struct in6_addr *saddr) 592 { 593 struct rt6_info *rt; 594 595 /* 596 * Clone the route. 597 */ 598 599 rt = ip6_rt_copy(ort); 600 601 if (rt) { 602 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 603 if (rt->rt6i_dst.plen != 128 && 604 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 605 rt->rt6i_flags |= RTF_ANYCAST; 606 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 607 } 608 609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 610 rt->rt6i_dst.plen = 128; 611 rt->rt6i_flags |= RTF_CACHE; 612 rt->u.dst.flags |= DST_HOST; 613 614 #ifdef CONFIG_IPV6_SUBTREES 615 if (rt->rt6i_src.plen && saddr) { 616 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 617 rt->rt6i_src.plen = 128; 618 } 619 #endif 620 621 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 622 623 } 624 625 return rt; 626 } 627 628 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 629 { 630 struct rt6_info *rt = ip6_rt_copy(ort); 631 if (rt) { 632 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 633 rt->rt6i_dst.plen = 128; 634 rt->rt6i_flags |= RTF_CACHE; 635 rt->u.dst.flags |= DST_HOST; 636 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 637 } 638 return rt; 639 } 640 641 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, 642 struct flowi *fl, int flags) 643 { 644 struct fib6_node *fn; 645 struct rt6_info *rt, *nrt; 646 int strict = 0; 647 int attempts = 3; 648 int err; 649 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 650 651 strict |= flags & RT6_LOOKUP_F_IFACE; 652 653 relookup: 654 read_lock_bh(&table->tb6_lock); 655 656 restart_2: 657 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 658 659 restart: 660 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); 661 BACKTRACK(&fl->fl6_src); 662 if (rt == &ip6_null_entry || 663 rt->rt6i_flags & RTF_CACHE) 664 goto out; 665 666 dst_hold(&rt->u.dst); 667 read_unlock_bh(&table->tb6_lock); 668 669 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 670 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 671 else { 672 #if CLONE_OFFLINK_ROUTE 673 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 674 #else 675 goto out2; 676 #endif 677 } 678 679 dst_release(&rt->u.dst); 680 rt = nrt ? : &ip6_null_entry; 681 682 dst_hold(&rt->u.dst); 683 if (nrt) { 684 err = ip6_ins_rt(nrt); 685 if (!err) 686 goto out2; 687 } 688 689 if (--attempts <= 0) 690 goto out2; 691 692 /* 693 * Race condition! In the gap, when table->tb6_lock was 694 * released someone could insert this route. Relookup. 695 */ 696 dst_release(&rt->u.dst); 697 goto relookup; 698 699 out: 700 if (reachable) { 701 reachable = 0; 702 goto restart_2; 703 } 704 dst_hold(&rt->u.dst); 705 read_unlock_bh(&table->tb6_lock); 706 out2: 707 rt->u.dst.lastuse = jiffies; 708 rt->u.dst.__use++; 709 710 return rt; 711 } 712 713 void ip6_route_input(struct sk_buff *skb) 714 { 715 struct ipv6hdr *iph = skb->nh.ipv6h; 716 int flags = RT6_LOOKUP_F_HAS_SADDR; 717 struct flowi fl = { 718 .iif = skb->dev->ifindex, 719 .nl_u = { 720 .ip6_u = { 721 .daddr = iph->daddr, 722 .saddr = iph->saddr, 723 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 724 }, 725 }, 726 .mark = skb->mark, 727 .proto = iph->nexthdr, 728 }; 729 730 if (rt6_need_strict(&iph->daddr)) 731 flags |= RT6_LOOKUP_F_IFACE; 732 733 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); 734 } 735 736 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, 737 struct flowi *fl, int flags) 738 { 739 struct fib6_node *fn; 740 struct rt6_info *rt, *nrt; 741 int strict = 0; 742 int attempts = 3; 743 int err; 744 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 745 746 strict |= flags & RT6_LOOKUP_F_IFACE; 747 748 relookup: 749 read_lock_bh(&table->tb6_lock); 750 751 restart_2: 752 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 753 754 restart: 755 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); 756 BACKTRACK(&fl->fl6_src); 757 if (rt == &ip6_null_entry || 758 rt->rt6i_flags & RTF_CACHE) 759 goto out; 760 761 dst_hold(&rt->u.dst); 762 read_unlock_bh(&table->tb6_lock); 763 764 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 765 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 766 else { 767 #if CLONE_OFFLINK_ROUTE 768 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 769 #else 770 goto out2; 771 #endif 772 } 773 774 dst_release(&rt->u.dst); 775 rt = nrt ? : &ip6_null_entry; 776 777 dst_hold(&rt->u.dst); 778 if (nrt) { 779 err = ip6_ins_rt(nrt); 780 if (!err) 781 goto out2; 782 } 783 784 if (--attempts <= 0) 785 goto out2; 786 787 /* 788 * Race condition! In the gap, when table->tb6_lock was 789 * released someone could insert this route. Relookup. 790 */ 791 dst_release(&rt->u.dst); 792 goto relookup; 793 794 out: 795 if (reachable) { 796 reachable = 0; 797 goto restart_2; 798 } 799 dst_hold(&rt->u.dst); 800 read_unlock_bh(&table->tb6_lock); 801 out2: 802 rt->u.dst.lastuse = jiffies; 803 rt->u.dst.__use++; 804 return rt; 805 } 806 807 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 808 { 809 int flags = 0; 810 811 if (rt6_need_strict(&fl->fl6_dst)) 812 flags |= RT6_LOOKUP_F_IFACE; 813 814 if (!ipv6_addr_any(&fl->fl6_src)) 815 flags |= RT6_LOOKUP_F_HAS_SADDR; 816 817 return fib6_rule_lookup(fl, flags, ip6_pol_route_output); 818 } 819 820 821 /* 822 * Destination cache support functions 823 */ 824 825 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 826 { 827 struct rt6_info *rt; 828 829 rt = (struct rt6_info *) dst; 830 831 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 832 return dst; 833 834 return NULL; 835 } 836 837 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 838 { 839 struct rt6_info *rt = (struct rt6_info *) dst; 840 841 if (rt) { 842 if (rt->rt6i_flags & RTF_CACHE) 843 ip6_del_rt(rt); 844 else 845 dst_release(dst); 846 } 847 return NULL; 848 } 849 850 static void ip6_link_failure(struct sk_buff *skb) 851 { 852 struct rt6_info *rt; 853 854 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 855 856 rt = (struct rt6_info *) skb->dst; 857 if (rt) { 858 if (rt->rt6i_flags&RTF_CACHE) { 859 dst_set_expires(&rt->u.dst, 0); 860 rt->rt6i_flags |= RTF_EXPIRES; 861 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 862 rt->rt6i_node->fn_sernum = -1; 863 } 864 } 865 866 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 867 { 868 struct rt6_info *rt6 = (struct rt6_info*)dst; 869 870 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 871 rt6->rt6i_flags |= RTF_MODIFIED; 872 if (mtu < IPV6_MIN_MTU) { 873 mtu = IPV6_MIN_MTU; 874 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 875 } 876 dst->metrics[RTAX_MTU-1] = mtu; 877 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 878 } 879 } 880 881 static int ipv6_get_mtu(struct net_device *dev); 882 883 static inline unsigned int ipv6_advmss(unsigned int mtu) 884 { 885 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 886 887 if (mtu < ip6_rt_min_advmss) 888 mtu = ip6_rt_min_advmss; 889 890 /* 891 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 892 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 893 * IPV6_MAXPLEN is also valid and means: "any MSS, 894 * rely only on pmtu discovery" 895 */ 896 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 897 mtu = IPV6_MAXPLEN; 898 return mtu; 899 } 900 901 static struct dst_entry *ndisc_dst_gc_list; 902 static DEFINE_SPINLOCK(ndisc_lock); 903 904 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 905 struct neighbour *neigh, 906 struct in6_addr *addr, 907 int (*output)(struct sk_buff *)) 908 { 909 struct rt6_info *rt; 910 struct inet6_dev *idev = in6_dev_get(dev); 911 912 if (unlikely(idev == NULL)) 913 return NULL; 914 915 rt = ip6_dst_alloc(); 916 if (unlikely(rt == NULL)) { 917 in6_dev_put(idev); 918 goto out; 919 } 920 921 dev_hold(dev); 922 if (neigh) 923 neigh_hold(neigh); 924 else 925 neigh = ndisc_get_neigh(dev, addr); 926 927 rt->rt6i_dev = dev; 928 rt->rt6i_idev = idev; 929 rt->rt6i_nexthop = neigh; 930 atomic_set(&rt->u.dst.__refcnt, 1); 931 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 932 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 933 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 934 rt->u.dst.output = output; 935 936 #if 0 /* there's no chance to use these for ndisc */ 937 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 938 ? DST_HOST 939 : 0; 940 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 941 rt->rt6i_dst.plen = 128; 942 #endif 943 944 spin_lock_bh(&ndisc_lock); 945 rt->u.dst.next = ndisc_dst_gc_list; 946 ndisc_dst_gc_list = &rt->u.dst; 947 spin_unlock_bh(&ndisc_lock); 948 949 fib6_force_start_gc(); 950 951 out: 952 return &rt->u.dst; 953 } 954 955 int ndisc_dst_gc(int *more) 956 { 957 struct dst_entry *dst, *next, **pprev; 958 int freed; 959 960 next = NULL; 961 freed = 0; 962 963 spin_lock_bh(&ndisc_lock); 964 pprev = &ndisc_dst_gc_list; 965 966 while ((dst = *pprev) != NULL) { 967 if (!atomic_read(&dst->__refcnt)) { 968 *pprev = dst->next; 969 dst_free(dst); 970 freed++; 971 } else { 972 pprev = &dst->next; 973 (*more)++; 974 } 975 } 976 977 spin_unlock_bh(&ndisc_lock); 978 979 return freed; 980 } 981 982 static int ip6_dst_gc(void) 983 { 984 static unsigned expire = 30*HZ; 985 static unsigned long last_gc; 986 unsigned long now = jiffies; 987 988 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 989 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 990 goto out; 991 992 expire++; 993 fib6_run_gc(expire); 994 last_gc = now; 995 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 996 expire = ip6_rt_gc_timeout>>1; 997 998 out: 999 expire -= expire>>ip6_rt_gc_elasticity; 1000 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 1001 } 1002 1003 /* Clean host part of a prefix. Not necessary in radix tree, 1004 but results in cleaner routing tables. 1005 1006 Remove it only when all the things will work! 1007 */ 1008 1009 static int ipv6_get_mtu(struct net_device *dev) 1010 { 1011 int mtu = IPV6_MIN_MTU; 1012 struct inet6_dev *idev; 1013 1014 idev = in6_dev_get(dev); 1015 if (idev) { 1016 mtu = idev->cnf.mtu6; 1017 in6_dev_put(idev); 1018 } 1019 return mtu; 1020 } 1021 1022 int ipv6_get_hoplimit(struct net_device *dev) 1023 { 1024 int hoplimit = ipv6_devconf.hop_limit; 1025 struct inet6_dev *idev; 1026 1027 idev = in6_dev_get(dev); 1028 if (idev) { 1029 hoplimit = idev->cnf.hop_limit; 1030 in6_dev_put(idev); 1031 } 1032 return hoplimit; 1033 } 1034 1035 /* 1036 * 1037 */ 1038 1039 int ip6_route_add(struct fib6_config *cfg) 1040 { 1041 int err; 1042 struct rt6_info *rt = NULL; 1043 struct net_device *dev = NULL; 1044 struct inet6_dev *idev = NULL; 1045 struct fib6_table *table; 1046 int addr_type; 1047 1048 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1049 return -EINVAL; 1050 #ifndef CONFIG_IPV6_SUBTREES 1051 if (cfg->fc_src_len) 1052 return -EINVAL; 1053 #endif 1054 if (cfg->fc_ifindex) { 1055 err = -ENODEV; 1056 dev = dev_get_by_index(cfg->fc_ifindex); 1057 if (!dev) 1058 goto out; 1059 idev = in6_dev_get(dev); 1060 if (!idev) 1061 goto out; 1062 } 1063 1064 if (cfg->fc_metric == 0) 1065 cfg->fc_metric = IP6_RT_PRIO_USER; 1066 1067 table = fib6_new_table(cfg->fc_table); 1068 if (table == NULL) { 1069 err = -ENOBUFS; 1070 goto out; 1071 } 1072 1073 rt = ip6_dst_alloc(); 1074 1075 if (rt == NULL) { 1076 err = -ENOMEM; 1077 goto out; 1078 } 1079 1080 rt->u.dst.obsolete = -1; 1081 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); 1082 1083 if (cfg->fc_protocol == RTPROT_UNSPEC) 1084 cfg->fc_protocol = RTPROT_BOOT; 1085 rt->rt6i_protocol = cfg->fc_protocol; 1086 1087 addr_type = ipv6_addr_type(&cfg->fc_dst); 1088 1089 if (addr_type & IPV6_ADDR_MULTICAST) 1090 rt->u.dst.input = ip6_mc_input; 1091 else 1092 rt->u.dst.input = ip6_forward; 1093 1094 rt->u.dst.output = ip6_output; 1095 1096 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1097 rt->rt6i_dst.plen = cfg->fc_dst_len; 1098 if (rt->rt6i_dst.plen == 128) 1099 rt->u.dst.flags = DST_HOST; 1100 1101 #ifdef CONFIG_IPV6_SUBTREES 1102 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1103 rt->rt6i_src.plen = cfg->fc_src_len; 1104 #endif 1105 1106 rt->rt6i_metric = cfg->fc_metric; 1107 1108 /* We cannot add true routes via loopback here, 1109 they would result in kernel looping; promote them to reject routes 1110 */ 1111 if ((cfg->fc_flags & RTF_REJECT) || 1112 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1113 /* hold loopback dev/idev if we haven't done so. */ 1114 if (dev != &loopback_dev) { 1115 if (dev) { 1116 dev_put(dev); 1117 in6_dev_put(idev); 1118 } 1119 dev = &loopback_dev; 1120 dev_hold(dev); 1121 idev = in6_dev_get(dev); 1122 if (!idev) { 1123 err = -ENODEV; 1124 goto out; 1125 } 1126 } 1127 rt->u.dst.output = ip6_pkt_discard_out; 1128 rt->u.dst.input = ip6_pkt_discard; 1129 rt->u.dst.error = -ENETUNREACH; 1130 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1131 goto install_route; 1132 } 1133 1134 if (cfg->fc_flags & RTF_GATEWAY) { 1135 struct in6_addr *gw_addr; 1136 int gwa_type; 1137 1138 gw_addr = &cfg->fc_gateway; 1139 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1140 gwa_type = ipv6_addr_type(gw_addr); 1141 1142 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1143 struct rt6_info *grt; 1144 1145 /* IPv6 strictly inhibits using not link-local 1146 addresses as nexthop address. 1147 Otherwise, router will not able to send redirects. 1148 It is very good, but in some (rare!) circumstances 1149 (SIT, PtP, NBMA NOARP links) it is handy to allow 1150 some exceptions. --ANK 1151 */ 1152 err = -EINVAL; 1153 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1154 goto out; 1155 1156 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); 1157 1158 err = -EHOSTUNREACH; 1159 if (grt == NULL) 1160 goto out; 1161 if (dev) { 1162 if (dev != grt->rt6i_dev) { 1163 dst_release(&grt->u.dst); 1164 goto out; 1165 } 1166 } else { 1167 dev = grt->rt6i_dev; 1168 idev = grt->rt6i_idev; 1169 dev_hold(dev); 1170 in6_dev_hold(grt->rt6i_idev); 1171 } 1172 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1173 err = 0; 1174 dst_release(&grt->u.dst); 1175 1176 if (err) 1177 goto out; 1178 } 1179 err = -EINVAL; 1180 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1181 goto out; 1182 } 1183 1184 err = -ENODEV; 1185 if (dev == NULL) 1186 goto out; 1187 1188 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1189 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1190 if (IS_ERR(rt->rt6i_nexthop)) { 1191 err = PTR_ERR(rt->rt6i_nexthop); 1192 rt->rt6i_nexthop = NULL; 1193 goto out; 1194 } 1195 } 1196 1197 rt->rt6i_flags = cfg->fc_flags; 1198 1199 install_route: 1200 if (cfg->fc_mx) { 1201 struct nlattr *nla; 1202 int remaining; 1203 1204 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1205 int type = nla->nla_type; 1206 1207 if (type) { 1208 if (type > RTAX_MAX) { 1209 err = -EINVAL; 1210 goto out; 1211 } 1212 1213 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1214 } 1215 } 1216 } 1217 1218 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1219 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1220 if (!rt->u.dst.metrics[RTAX_MTU-1]) 1221 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1222 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 1223 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1224 rt->u.dst.dev = dev; 1225 rt->rt6i_idev = idev; 1226 rt->rt6i_table = table; 1227 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1228 1229 out: 1230 if (dev) 1231 dev_put(dev); 1232 if (idev) 1233 in6_dev_put(idev); 1234 if (rt) 1235 dst_free(&rt->u.dst); 1236 return err; 1237 } 1238 1239 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1240 { 1241 int err; 1242 struct fib6_table *table; 1243 1244 if (rt == &ip6_null_entry) 1245 return -ENOENT; 1246 1247 table = rt->rt6i_table; 1248 write_lock_bh(&table->tb6_lock); 1249 1250 err = fib6_del(rt, info); 1251 dst_release(&rt->u.dst); 1252 1253 write_unlock_bh(&table->tb6_lock); 1254 1255 return err; 1256 } 1257 1258 int ip6_del_rt(struct rt6_info *rt) 1259 { 1260 return __ip6_del_rt(rt, NULL); 1261 } 1262 1263 static int ip6_route_del(struct fib6_config *cfg) 1264 { 1265 struct fib6_table *table; 1266 struct fib6_node *fn; 1267 struct rt6_info *rt; 1268 int err = -ESRCH; 1269 1270 table = fib6_get_table(cfg->fc_table); 1271 if (table == NULL) 1272 return err; 1273 1274 read_lock_bh(&table->tb6_lock); 1275 1276 fn = fib6_locate(&table->tb6_root, 1277 &cfg->fc_dst, cfg->fc_dst_len, 1278 &cfg->fc_src, cfg->fc_src_len); 1279 1280 if (fn) { 1281 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1282 if (cfg->fc_ifindex && 1283 (rt->rt6i_dev == NULL || 1284 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1285 continue; 1286 if (cfg->fc_flags & RTF_GATEWAY && 1287 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1288 continue; 1289 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1290 continue; 1291 dst_hold(&rt->u.dst); 1292 read_unlock_bh(&table->tb6_lock); 1293 1294 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1295 } 1296 } 1297 read_unlock_bh(&table->tb6_lock); 1298 1299 return err; 1300 } 1301 1302 /* 1303 * Handle redirects 1304 */ 1305 struct ip6rd_flowi { 1306 struct flowi fl; 1307 struct in6_addr gateway; 1308 }; 1309 1310 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, 1311 struct flowi *fl, 1312 int flags) 1313 { 1314 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1315 struct rt6_info *rt; 1316 struct fib6_node *fn; 1317 1318 /* 1319 * Get the "current" route for this destination and 1320 * check if the redirect has come from approriate router. 1321 * 1322 * RFC 2461 specifies that redirects should only be 1323 * accepted if they come from the nexthop to the target. 1324 * Due to the way the routes are chosen, this notion 1325 * is a bit fuzzy and one might need to check all possible 1326 * routes. 1327 */ 1328 1329 read_lock_bh(&table->tb6_lock); 1330 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1331 restart: 1332 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1333 /* 1334 * Current route is on-link; redirect is always invalid. 1335 * 1336 * Seems, previous statement is not true. It could 1337 * be node, which looks for us as on-link (f.e. proxy ndisc) 1338 * But then router serving it might decide, that we should 1339 * know truth 8)8) --ANK (980726). 1340 */ 1341 if (rt6_check_expired(rt)) 1342 continue; 1343 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1344 continue; 1345 if (fl->oif != rt->rt6i_dev->ifindex) 1346 continue; 1347 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1348 continue; 1349 break; 1350 } 1351 1352 if (!rt) 1353 rt = &ip6_null_entry; 1354 BACKTRACK(&fl->fl6_src); 1355 out: 1356 dst_hold(&rt->u.dst); 1357 1358 read_unlock_bh(&table->tb6_lock); 1359 1360 return rt; 1361 }; 1362 1363 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1364 struct in6_addr *src, 1365 struct in6_addr *gateway, 1366 struct net_device *dev) 1367 { 1368 int flags = RT6_LOOKUP_F_HAS_SADDR; 1369 struct ip6rd_flowi rdfl = { 1370 .fl = { 1371 .oif = dev->ifindex, 1372 .nl_u = { 1373 .ip6_u = { 1374 .daddr = *dest, 1375 .saddr = *src, 1376 }, 1377 }, 1378 }, 1379 .gateway = *gateway, 1380 }; 1381 1382 if (rt6_need_strict(dest)) 1383 flags |= RT6_LOOKUP_F_IFACE; 1384 1385 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); 1386 } 1387 1388 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1389 struct in6_addr *saddr, 1390 struct neighbour *neigh, u8 *lladdr, int on_link) 1391 { 1392 struct rt6_info *rt, *nrt = NULL; 1393 struct netevent_redirect netevent; 1394 1395 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1396 1397 if (rt == &ip6_null_entry) { 1398 if (net_ratelimit()) 1399 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1400 "for redirect target\n"); 1401 goto out; 1402 } 1403 1404 /* 1405 * We have finally decided to accept it. 1406 */ 1407 1408 neigh_update(neigh, lladdr, NUD_STALE, 1409 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1410 NEIGH_UPDATE_F_OVERRIDE| 1411 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1412 NEIGH_UPDATE_F_ISROUTER)) 1413 ); 1414 1415 /* 1416 * Redirect received -> path was valid. 1417 * Look, redirects are sent only in response to data packets, 1418 * so that this nexthop apparently is reachable. --ANK 1419 */ 1420 dst_confirm(&rt->u.dst); 1421 1422 /* Duplicate redirect: silently ignore. */ 1423 if (neigh == rt->u.dst.neighbour) 1424 goto out; 1425 1426 nrt = ip6_rt_copy(rt); 1427 if (nrt == NULL) 1428 goto out; 1429 1430 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1431 if (on_link) 1432 nrt->rt6i_flags &= ~RTF_GATEWAY; 1433 1434 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1435 nrt->rt6i_dst.plen = 128; 1436 nrt->u.dst.flags |= DST_HOST; 1437 1438 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1439 nrt->rt6i_nexthop = neigh_clone(neigh); 1440 /* Reset pmtu, it may be better */ 1441 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1442 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1443 1444 if (ip6_ins_rt(nrt)) 1445 goto out; 1446 1447 netevent.old = &rt->u.dst; 1448 netevent.new = &nrt->u.dst; 1449 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1450 1451 if (rt->rt6i_flags&RTF_CACHE) { 1452 ip6_del_rt(rt); 1453 return; 1454 } 1455 1456 out: 1457 dst_release(&rt->u.dst); 1458 return; 1459 } 1460 1461 /* 1462 * Handle ICMP "packet too big" messages 1463 * i.e. Path MTU discovery 1464 */ 1465 1466 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1467 struct net_device *dev, u32 pmtu) 1468 { 1469 struct rt6_info *rt, *nrt; 1470 int allfrag = 0; 1471 1472 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); 1473 if (rt == NULL) 1474 return; 1475 1476 if (pmtu >= dst_mtu(&rt->u.dst)) 1477 goto out; 1478 1479 if (pmtu < IPV6_MIN_MTU) { 1480 /* 1481 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1482 * MTU (1280) and a fragment header should always be included 1483 * after a node receiving Too Big message reporting PMTU is 1484 * less than the IPv6 Minimum Link MTU. 1485 */ 1486 pmtu = IPV6_MIN_MTU; 1487 allfrag = 1; 1488 } 1489 1490 /* New mtu received -> path was valid. 1491 They are sent only in response to data packets, 1492 so that this nexthop apparently is reachable. --ANK 1493 */ 1494 dst_confirm(&rt->u.dst); 1495 1496 /* Host route. If it is static, it would be better 1497 not to override it, but add new one, so that 1498 when cache entry will expire old pmtu 1499 would return automatically. 1500 */ 1501 if (rt->rt6i_flags & RTF_CACHE) { 1502 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1503 if (allfrag) 1504 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1505 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1506 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1507 goto out; 1508 } 1509 1510 /* Network route. 1511 Two cases are possible: 1512 1. It is connected route. Action: COW 1513 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1514 */ 1515 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1516 nrt = rt6_alloc_cow(rt, daddr, saddr); 1517 else 1518 nrt = rt6_alloc_clone(rt, daddr); 1519 1520 if (nrt) { 1521 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1522 if (allfrag) 1523 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1524 1525 /* According to RFC 1981, detecting PMTU increase shouldn't be 1526 * happened within 5 mins, the recommended timer is 10 mins. 1527 * Here this route expiration time is set to ip6_rt_mtu_expires 1528 * which is 10 mins. After 10 mins the decreased pmtu is expired 1529 * and detecting PMTU increase will be automatically happened. 1530 */ 1531 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1532 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1533 1534 ip6_ins_rt(nrt); 1535 } 1536 out: 1537 dst_release(&rt->u.dst); 1538 } 1539 1540 /* 1541 * Misc support functions 1542 */ 1543 1544 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1545 { 1546 struct rt6_info *rt = ip6_dst_alloc(); 1547 1548 if (rt) { 1549 rt->u.dst.input = ort->u.dst.input; 1550 rt->u.dst.output = ort->u.dst.output; 1551 1552 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1553 rt->u.dst.error = ort->u.dst.error; 1554 rt->u.dst.dev = ort->u.dst.dev; 1555 if (rt->u.dst.dev) 1556 dev_hold(rt->u.dst.dev); 1557 rt->rt6i_idev = ort->rt6i_idev; 1558 if (rt->rt6i_idev) 1559 in6_dev_hold(rt->rt6i_idev); 1560 rt->u.dst.lastuse = jiffies; 1561 rt->rt6i_expires = 0; 1562 1563 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1564 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1565 rt->rt6i_metric = 0; 1566 1567 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1568 #ifdef CONFIG_IPV6_SUBTREES 1569 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1570 #endif 1571 rt->rt6i_table = ort->rt6i_table; 1572 } 1573 return rt; 1574 } 1575 1576 #ifdef CONFIG_IPV6_ROUTE_INFO 1577 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 1578 struct in6_addr *gwaddr, int ifindex) 1579 { 1580 struct fib6_node *fn; 1581 struct rt6_info *rt = NULL; 1582 struct fib6_table *table; 1583 1584 table = fib6_get_table(RT6_TABLE_INFO); 1585 if (table == NULL) 1586 return NULL; 1587 1588 write_lock_bh(&table->tb6_lock); 1589 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1590 if (!fn) 1591 goto out; 1592 1593 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1594 if (rt->rt6i_dev->ifindex != ifindex) 1595 continue; 1596 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1597 continue; 1598 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1599 continue; 1600 dst_hold(&rt->u.dst); 1601 break; 1602 } 1603 out: 1604 write_unlock_bh(&table->tb6_lock); 1605 return rt; 1606 } 1607 1608 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 1609 struct in6_addr *gwaddr, int ifindex, 1610 unsigned pref) 1611 { 1612 struct fib6_config cfg = { 1613 .fc_table = RT6_TABLE_INFO, 1614 .fc_metric = 1024, 1615 .fc_ifindex = ifindex, 1616 .fc_dst_len = prefixlen, 1617 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1618 RTF_UP | RTF_PREF(pref), 1619 }; 1620 1621 ipv6_addr_copy(&cfg.fc_dst, prefix); 1622 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1623 1624 /* We should treat it as a default route if prefix length is 0. */ 1625 if (!prefixlen) 1626 cfg.fc_flags |= RTF_DEFAULT; 1627 1628 ip6_route_add(&cfg); 1629 1630 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); 1631 } 1632 #endif 1633 1634 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1635 { 1636 struct rt6_info *rt; 1637 struct fib6_table *table; 1638 1639 table = fib6_get_table(RT6_TABLE_DFLT); 1640 if (table == NULL) 1641 return NULL; 1642 1643 write_lock_bh(&table->tb6_lock); 1644 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1645 if (dev == rt->rt6i_dev && 1646 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1647 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1648 break; 1649 } 1650 if (rt) 1651 dst_hold(&rt->u.dst); 1652 write_unlock_bh(&table->tb6_lock); 1653 return rt; 1654 } 1655 1656 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1657 struct net_device *dev, 1658 unsigned int pref) 1659 { 1660 struct fib6_config cfg = { 1661 .fc_table = RT6_TABLE_DFLT, 1662 .fc_metric = 1024, 1663 .fc_ifindex = dev->ifindex, 1664 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1665 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1666 }; 1667 1668 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1669 1670 ip6_route_add(&cfg); 1671 1672 return rt6_get_dflt_router(gwaddr, dev); 1673 } 1674 1675 void rt6_purge_dflt_routers(void) 1676 { 1677 struct rt6_info *rt; 1678 struct fib6_table *table; 1679 1680 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1681 table = fib6_get_table(RT6_TABLE_DFLT); 1682 if (table == NULL) 1683 return; 1684 1685 restart: 1686 read_lock_bh(&table->tb6_lock); 1687 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1688 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1689 dst_hold(&rt->u.dst); 1690 read_unlock_bh(&table->tb6_lock); 1691 ip6_del_rt(rt); 1692 goto restart; 1693 } 1694 } 1695 read_unlock_bh(&table->tb6_lock); 1696 } 1697 1698 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, 1699 struct fib6_config *cfg) 1700 { 1701 memset(cfg, 0, sizeof(*cfg)); 1702 1703 cfg->fc_table = RT6_TABLE_MAIN; 1704 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1705 cfg->fc_metric = rtmsg->rtmsg_metric; 1706 cfg->fc_expires = rtmsg->rtmsg_info; 1707 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1708 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1709 cfg->fc_flags = rtmsg->rtmsg_flags; 1710 1711 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1712 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1713 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1714 } 1715 1716 int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1717 { 1718 struct fib6_config cfg; 1719 struct in6_rtmsg rtmsg; 1720 int err; 1721 1722 switch(cmd) { 1723 case SIOCADDRT: /* Add a route */ 1724 case SIOCDELRT: /* Delete a route */ 1725 if (!capable(CAP_NET_ADMIN)) 1726 return -EPERM; 1727 err = copy_from_user(&rtmsg, arg, 1728 sizeof(struct in6_rtmsg)); 1729 if (err) 1730 return -EFAULT; 1731 1732 rtmsg_to_fib6_config(&rtmsg, &cfg); 1733 1734 rtnl_lock(); 1735 switch (cmd) { 1736 case SIOCADDRT: 1737 err = ip6_route_add(&cfg); 1738 break; 1739 case SIOCDELRT: 1740 err = ip6_route_del(&cfg); 1741 break; 1742 default: 1743 err = -EINVAL; 1744 } 1745 rtnl_unlock(); 1746 1747 return err; 1748 }; 1749 1750 return -EINVAL; 1751 } 1752 1753 /* 1754 * Drop the packet on the floor 1755 */ 1756 1757 static inline int ip6_pkt_drop(struct sk_buff *skb, int code) 1758 { 1759 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr); 1760 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) 1761 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); 1762 1763 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES); 1764 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); 1765 kfree_skb(skb); 1766 return 0; 1767 } 1768 1769 static int ip6_pkt_discard(struct sk_buff *skb) 1770 { 1771 return ip6_pkt_drop(skb, ICMPV6_NOROUTE); 1772 } 1773 1774 static int ip6_pkt_discard_out(struct sk_buff *skb) 1775 { 1776 skb->dev = skb->dst->dev; 1777 return ip6_pkt_discard(skb); 1778 } 1779 1780 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1781 1782 static int ip6_pkt_prohibit(struct sk_buff *skb) 1783 { 1784 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED); 1785 } 1786 1787 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1788 { 1789 skb->dev = skb->dst->dev; 1790 return ip6_pkt_prohibit(skb); 1791 } 1792 1793 static int ip6_pkt_blk_hole(struct sk_buff *skb) 1794 { 1795 kfree_skb(skb); 1796 return 0; 1797 } 1798 1799 #endif 1800 1801 /* 1802 * Allocate a dst for local (unicast / anycast) address. 1803 */ 1804 1805 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1806 const struct in6_addr *addr, 1807 int anycast) 1808 { 1809 struct rt6_info *rt = ip6_dst_alloc(); 1810 1811 if (rt == NULL) 1812 return ERR_PTR(-ENOMEM); 1813 1814 dev_hold(&loopback_dev); 1815 in6_dev_hold(idev); 1816 1817 rt->u.dst.flags = DST_HOST; 1818 rt->u.dst.input = ip6_input; 1819 rt->u.dst.output = ip6_output; 1820 rt->rt6i_dev = &loopback_dev; 1821 rt->rt6i_idev = idev; 1822 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1823 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1824 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1825 rt->u.dst.obsolete = -1; 1826 1827 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1828 if (anycast) 1829 rt->rt6i_flags |= RTF_ANYCAST; 1830 else 1831 rt->rt6i_flags |= RTF_LOCAL; 1832 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1833 if (rt->rt6i_nexthop == NULL) { 1834 dst_free(&rt->u.dst); 1835 return ERR_PTR(-ENOMEM); 1836 } 1837 1838 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1839 rt->rt6i_dst.plen = 128; 1840 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); 1841 1842 atomic_set(&rt->u.dst.__refcnt, 1); 1843 1844 return rt; 1845 } 1846 1847 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1848 { 1849 if (((void*)rt->rt6i_dev == arg || arg == NULL) && 1850 rt != &ip6_null_entry) { 1851 RT6_TRACE("deleted by ifdown %p\n", rt); 1852 return -1; 1853 } 1854 return 0; 1855 } 1856 1857 void rt6_ifdown(struct net_device *dev) 1858 { 1859 fib6_clean_all(fib6_ifdown, 0, dev); 1860 } 1861 1862 struct rt6_mtu_change_arg 1863 { 1864 struct net_device *dev; 1865 unsigned mtu; 1866 }; 1867 1868 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 1869 { 1870 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 1871 struct inet6_dev *idev; 1872 1873 /* In IPv6 pmtu discovery is not optional, 1874 so that RTAX_MTU lock cannot disable it. 1875 We still use this lock to block changes 1876 caused by addrconf/ndisc. 1877 */ 1878 1879 idev = __in6_dev_get(arg->dev); 1880 if (idev == NULL) 1881 return 0; 1882 1883 /* For administrative MTU increase, there is no way to discover 1884 IPv6 PMTU increase, so PMTU increase should be updated here. 1885 Since RFC 1981 doesn't include administrative MTU increase 1886 update PMTU increase is a MUST. (i.e. jumbo frame) 1887 */ 1888 /* 1889 If new MTU is less than route PMTU, this new MTU will be the 1890 lowest MTU in the path, update the route PMTU to reflect PMTU 1891 decreases; if new MTU is greater than route PMTU, and the 1892 old MTU is the lowest MTU in the path, update the route PMTU 1893 to reflect the increase. In this case if the other nodes' MTU 1894 also have the lowest MTU, TOO BIG MESSAGE will be lead to 1895 PMTU discouvery. 1896 */ 1897 if (rt->rt6i_dev == arg->dev && 1898 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1899 (dst_mtu(&rt->u.dst) > arg->mtu || 1900 (dst_mtu(&rt->u.dst) < arg->mtu && 1901 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) 1902 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1903 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); 1904 return 0; 1905 } 1906 1907 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 1908 { 1909 struct rt6_mtu_change_arg arg = { 1910 .dev = dev, 1911 .mtu = mtu, 1912 }; 1913 1914 fib6_clean_all(rt6_mtu_change_route, 0, &arg); 1915 } 1916 1917 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { 1918 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 1919 [RTA_OIF] = { .type = NLA_U32 }, 1920 [RTA_IIF] = { .type = NLA_U32 }, 1921 [RTA_PRIORITY] = { .type = NLA_U32 }, 1922 [RTA_METRICS] = { .type = NLA_NESTED }, 1923 }; 1924 1925 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 1926 struct fib6_config *cfg) 1927 { 1928 struct rtmsg *rtm; 1929 struct nlattr *tb[RTA_MAX+1]; 1930 int err; 1931 1932 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 1933 if (err < 0) 1934 goto errout; 1935 1936 err = -EINVAL; 1937 rtm = nlmsg_data(nlh); 1938 memset(cfg, 0, sizeof(*cfg)); 1939 1940 cfg->fc_table = rtm->rtm_table; 1941 cfg->fc_dst_len = rtm->rtm_dst_len; 1942 cfg->fc_src_len = rtm->rtm_src_len; 1943 cfg->fc_flags = RTF_UP; 1944 cfg->fc_protocol = rtm->rtm_protocol; 1945 1946 if (rtm->rtm_type == RTN_UNREACHABLE) 1947 cfg->fc_flags |= RTF_REJECT; 1948 1949 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 1950 cfg->fc_nlinfo.nlh = nlh; 1951 1952 if (tb[RTA_GATEWAY]) { 1953 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 1954 cfg->fc_flags |= RTF_GATEWAY; 1955 } 1956 1957 if (tb[RTA_DST]) { 1958 int plen = (rtm->rtm_dst_len + 7) >> 3; 1959 1960 if (nla_len(tb[RTA_DST]) < plen) 1961 goto errout; 1962 1963 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 1964 } 1965 1966 if (tb[RTA_SRC]) { 1967 int plen = (rtm->rtm_src_len + 7) >> 3; 1968 1969 if (nla_len(tb[RTA_SRC]) < plen) 1970 goto errout; 1971 1972 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 1973 } 1974 1975 if (tb[RTA_OIF]) 1976 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 1977 1978 if (tb[RTA_PRIORITY]) 1979 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 1980 1981 if (tb[RTA_METRICS]) { 1982 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 1983 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 1984 } 1985 1986 if (tb[RTA_TABLE]) 1987 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 1988 1989 err = 0; 1990 errout: 1991 return err; 1992 } 1993 1994 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1995 { 1996 struct fib6_config cfg; 1997 int err; 1998 1999 err = rtm_to_fib6_config(skb, nlh, &cfg); 2000 if (err < 0) 2001 return err; 2002 2003 return ip6_route_del(&cfg); 2004 } 2005 2006 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2007 { 2008 struct fib6_config cfg; 2009 int err; 2010 2011 err = rtm_to_fib6_config(skb, nlh, &cfg); 2012 if (err < 0) 2013 return err; 2014 2015 return ip6_route_add(&cfg); 2016 } 2017 2018 static inline size_t rt6_nlmsg_size(void) 2019 { 2020 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2021 + nla_total_size(16) /* RTA_SRC */ 2022 + nla_total_size(16) /* RTA_DST */ 2023 + nla_total_size(16) /* RTA_GATEWAY */ 2024 + nla_total_size(16) /* RTA_PREFSRC */ 2025 + nla_total_size(4) /* RTA_TABLE */ 2026 + nla_total_size(4) /* RTA_IIF */ 2027 + nla_total_size(4) /* RTA_OIF */ 2028 + nla_total_size(4) /* RTA_PRIORITY */ 2029 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2030 + nla_total_size(sizeof(struct rta_cacheinfo)); 2031 } 2032 2033 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 2034 struct in6_addr *dst, struct in6_addr *src, 2035 int iif, int type, u32 pid, u32 seq, 2036 int prefix, unsigned int flags) 2037 { 2038 struct rtmsg *rtm; 2039 struct nlmsghdr *nlh; 2040 long expires; 2041 u32 table; 2042 2043 if (prefix) { /* user wants prefix routes only */ 2044 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2045 /* success since this is not a prefix route */ 2046 return 1; 2047 } 2048 } 2049 2050 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2051 if (nlh == NULL) 2052 return -EMSGSIZE; 2053 2054 rtm = nlmsg_data(nlh); 2055 rtm->rtm_family = AF_INET6; 2056 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2057 rtm->rtm_src_len = rt->rt6i_src.plen; 2058 rtm->rtm_tos = 0; 2059 if (rt->rt6i_table) 2060 table = rt->rt6i_table->tb6_id; 2061 else 2062 table = RT6_TABLE_UNSPEC; 2063 rtm->rtm_table = table; 2064 NLA_PUT_U32(skb, RTA_TABLE, table); 2065 if (rt->rt6i_flags&RTF_REJECT) 2066 rtm->rtm_type = RTN_UNREACHABLE; 2067 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2068 rtm->rtm_type = RTN_LOCAL; 2069 else 2070 rtm->rtm_type = RTN_UNICAST; 2071 rtm->rtm_flags = 0; 2072 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2073 rtm->rtm_protocol = rt->rt6i_protocol; 2074 if (rt->rt6i_flags&RTF_DYNAMIC) 2075 rtm->rtm_protocol = RTPROT_REDIRECT; 2076 else if (rt->rt6i_flags & RTF_ADDRCONF) 2077 rtm->rtm_protocol = RTPROT_KERNEL; 2078 else if (rt->rt6i_flags&RTF_DEFAULT) 2079 rtm->rtm_protocol = RTPROT_RA; 2080 2081 if (rt->rt6i_flags&RTF_CACHE) 2082 rtm->rtm_flags |= RTM_F_CLONED; 2083 2084 if (dst) { 2085 NLA_PUT(skb, RTA_DST, 16, dst); 2086 rtm->rtm_dst_len = 128; 2087 } else if (rtm->rtm_dst_len) 2088 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2089 #ifdef CONFIG_IPV6_SUBTREES 2090 if (src) { 2091 NLA_PUT(skb, RTA_SRC, 16, src); 2092 rtm->rtm_src_len = 128; 2093 } else if (rtm->rtm_src_len) 2094 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2095 #endif 2096 if (iif) 2097 NLA_PUT_U32(skb, RTA_IIF, iif); 2098 else if (dst) { 2099 struct in6_addr saddr_buf; 2100 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) 2101 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2102 } 2103 2104 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2105 goto nla_put_failure; 2106 2107 if (rt->u.dst.neighbour) 2108 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2109 2110 if (rt->u.dst.dev) 2111 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2112 2113 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2114 2115 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; 2116 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2117 expires, rt->u.dst.error) < 0) 2118 goto nla_put_failure; 2119 2120 return nlmsg_end(skb, nlh); 2121 2122 nla_put_failure: 2123 nlmsg_cancel(skb, nlh); 2124 return -EMSGSIZE; 2125 } 2126 2127 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2128 { 2129 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2130 int prefix; 2131 2132 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2133 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2134 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2135 } else 2136 prefix = 0; 2137 2138 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2139 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2140 prefix, NLM_F_MULTI); 2141 } 2142 2143 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2144 { 2145 struct nlattr *tb[RTA_MAX+1]; 2146 struct rt6_info *rt; 2147 struct sk_buff *skb; 2148 struct rtmsg *rtm; 2149 struct flowi fl; 2150 int err, iif = 0; 2151 2152 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2153 if (err < 0) 2154 goto errout; 2155 2156 err = -EINVAL; 2157 memset(&fl, 0, sizeof(fl)); 2158 2159 if (tb[RTA_SRC]) { 2160 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2161 goto errout; 2162 2163 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2164 } 2165 2166 if (tb[RTA_DST]) { 2167 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2168 goto errout; 2169 2170 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2171 } 2172 2173 if (tb[RTA_IIF]) 2174 iif = nla_get_u32(tb[RTA_IIF]); 2175 2176 if (tb[RTA_OIF]) 2177 fl.oif = nla_get_u32(tb[RTA_OIF]); 2178 2179 if (iif) { 2180 struct net_device *dev; 2181 dev = __dev_get_by_index(iif); 2182 if (!dev) { 2183 err = -ENODEV; 2184 goto errout; 2185 } 2186 } 2187 2188 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2189 if (skb == NULL) { 2190 err = -ENOBUFS; 2191 goto errout; 2192 } 2193 2194 /* Reserve room for dummy headers, this skb can pass 2195 through good chunk of routing engine. 2196 */ 2197 skb->mac.raw = skb->data; 2198 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2199 2200 rt = (struct rt6_info*) ip6_route_output(NULL, &fl); 2201 skb->dst = &rt->u.dst; 2202 2203 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2204 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2205 nlh->nlmsg_seq, 0, 0); 2206 if (err < 0) { 2207 kfree_skb(skb); 2208 goto errout; 2209 } 2210 2211 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 2212 errout: 2213 return err; 2214 } 2215 2216 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2217 { 2218 struct sk_buff *skb; 2219 u32 pid = 0, seq = 0; 2220 struct nlmsghdr *nlh = NULL; 2221 int err = -ENOBUFS; 2222 2223 if (info) { 2224 pid = info->pid; 2225 nlh = info->nlh; 2226 if (nlh) 2227 seq = nlh->nlmsg_seq; 2228 } 2229 2230 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2231 if (skb == NULL) 2232 goto errout; 2233 2234 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); 2235 if (err < 0) { 2236 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2237 WARN_ON(err == -EMSGSIZE); 2238 kfree_skb(skb); 2239 goto errout; 2240 } 2241 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); 2242 errout: 2243 if (err < 0) 2244 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); 2245 } 2246 2247 /* 2248 * /proc 2249 */ 2250 2251 #ifdef CONFIG_PROC_FS 2252 2253 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2254 2255 struct rt6_proc_arg 2256 { 2257 char *buffer; 2258 int offset; 2259 int length; 2260 int skip; 2261 int len; 2262 }; 2263 2264 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2265 { 2266 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; 2267 2268 if (arg->skip < arg->offset / RT6_INFO_LEN) { 2269 arg->skip++; 2270 return 0; 2271 } 2272 2273 if (arg->len >= arg->length) 2274 return 0; 2275 2276 arg->len += sprintf(arg->buffer + arg->len, 2277 NIP6_SEQFMT " %02x ", 2278 NIP6(rt->rt6i_dst.addr), 2279 rt->rt6i_dst.plen); 2280 2281 #ifdef CONFIG_IPV6_SUBTREES 2282 arg->len += sprintf(arg->buffer + arg->len, 2283 NIP6_SEQFMT " %02x ", 2284 NIP6(rt->rt6i_src.addr), 2285 rt->rt6i_src.plen); 2286 #else 2287 arg->len += sprintf(arg->buffer + arg->len, 2288 "00000000000000000000000000000000 00 "); 2289 #endif 2290 2291 if (rt->rt6i_nexthop) { 2292 arg->len += sprintf(arg->buffer + arg->len, 2293 NIP6_SEQFMT, 2294 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); 2295 } else { 2296 arg->len += sprintf(arg->buffer + arg->len, 2297 "00000000000000000000000000000000"); 2298 } 2299 arg->len += sprintf(arg->buffer + arg->len, 2300 " %08x %08x %08x %08x %8s\n", 2301 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2302 rt->u.dst.__use, rt->rt6i_flags, 2303 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2304 return 0; 2305 } 2306 2307 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 2308 { 2309 struct rt6_proc_arg arg = { 2310 .buffer = buffer, 2311 .offset = offset, 2312 .length = length, 2313 }; 2314 2315 fib6_clean_all(rt6_info_route, 0, &arg); 2316 2317 *start = buffer; 2318 if (offset) 2319 *start += offset % RT6_INFO_LEN; 2320 2321 arg.len -= offset % RT6_INFO_LEN; 2322 2323 if (arg.len > length) 2324 arg.len = length; 2325 if (arg.len < 0) 2326 arg.len = 0; 2327 2328 return arg.len; 2329 } 2330 2331 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2332 { 2333 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2334 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, 2335 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, 2336 rt6_stats.fib_rt_cache, 2337 atomic_read(&ip6_dst_ops.entries), 2338 rt6_stats.fib_discarded_routes); 2339 2340 return 0; 2341 } 2342 2343 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2344 { 2345 return single_open(file, rt6_stats_seq_show, NULL); 2346 } 2347 2348 static const struct file_operations rt6_stats_seq_fops = { 2349 .owner = THIS_MODULE, 2350 .open = rt6_stats_seq_open, 2351 .read = seq_read, 2352 .llseek = seq_lseek, 2353 .release = single_release, 2354 }; 2355 #endif /* CONFIG_PROC_FS */ 2356 2357 #ifdef CONFIG_SYSCTL 2358 2359 static int flush_delay; 2360 2361 static 2362 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2363 void __user *buffer, size_t *lenp, loff_t *ppos) 2364 { 2365 if (write) { 2366 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2367 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2368 return 0; 2369 } else 2370 return -EINVAL; 2371 } 2372 2373 ctl_table ipv6_route_table[] = { 2374 { 2375 .ctl_name = NET_IPV6_ROUTE_FLUSH, 2376 .procname = "flush", 2377 .data = &flush_delay, 2378 .maxlen = sizeof(int), 2379 .mode = 0200, 2380 .proc_handler = &ipv6_sysctl_rtcache_flush 2381 }, 2382 { 2383 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2384 .procname = "gc_thresh", 2385 .data = &ip6_dst_ops.gc_thresh, 2386 .maxlen = sizeof(int), 2387 .mode = 0644, 2388 .proc_handler = &proc_dointvec, 2389 }, 2390 { 2391 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2392 .procname = "max_size", 2393 .data = &ip6_rt_max_size, 2394 .maxlen = sizeof(int), 2395 .mode = 0644, 2396 .proc_handler = &proc_dointvec, 2397 }, 2398 { 2399 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2400 .procname = "gc_min_interval", 2401 .data = &ip6_rt_gc_min_interval, 2402 .maxlen = sizeof(int), 2403 .mode = 0644, 2404 .proc_handler = &proc_dointvec_jiffies, 2405 .strategy = &sysctl_jiffies, 2406 }, 2407 { 2408 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2409 .procname = "gc_timeout", 2410 .data = &ip6_rt_gc_timeout, 2411 .maxlen = sizeof(int), 2412 .mode = 0644, 2413 .proc_handler = &proc_dointvec_jiffies, 2414 .strategy = &sysctl_jiffies, 2415 }, 2416 { 2417 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2418 .procname = "gc_interval", 2419 .data = &ip6_rt_gc_interval, 2420 .maxlen = sizeof(int), 2421 .mode = 0644, 2422 .proc_handler = &proc_dointvec_jiffies, 2423 .strategy = &sysctl_jiffies, 2424 }, 2425 { 2426 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2427 .procname = "gc_elasticity", 2428 .data = &ip6_rt_gc_elasticity, 2429 .maxlen = sizeof(int), 2430 .mode = 0644, 2431 .proc_handler = &proc_dointvec_jiffies, 2432 .strategy = &sysctl_jiffies, 2433 }, 2434 { 2435 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2436 .procname = "mtu_expires", 2437 .data = &ip6_rt_mtu_expires, 2438 .maxlen = sizeof(int), 2439 .mode = 0644, 2440 .proc_handler = &proc_dointvec_jiffies, 2441 .strategy = &sysctl_jiffies, 2442 }, 2443 { 2444 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2445 .procname = "min_adv_mss", 2446 .data = &ip6_rt_min_advmss, 2447 .maxlen = sizeof(int), 2448 .mode = 0644, 2449 .proc_handler = &proc_dointvec_jiffies, 2450 .strategy = &sysctl_jiffies, 2451 }, 2452 { 2453 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2454 .procname = "gc_min_interval_ms", 2455 .data = &ip6_rt_gc_min_interval, 2456 .maxlen = sizeof(int), 2457 .mode = 0644, 2458 .proc_handler = &proc_dointvec_ms_jiffies, 2459 .strategy = &sysctl_ms_jiffies, 2460 }, 2461 { .ctl_name = 0 } 2462 }; 2463 2464 #endif 2465 2466 void __init ip6_route_init(void) 2467 { 2468 struct proc_dir_entry *p; 2469 2470 ip6_dst_ops.kmem_cachep = 2471 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2472 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 2473 fib6_init(); 2474 #ifdef CONFIG_PROC_FS 2475 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2476 if (p) 2477 p->owner = THIS_MODULE; 2478 2479 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2480 #endif 2481 #ifdef CONFIG_XFRM 2482 xfrm6_init(); 2483 #endif 2484 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2485 fib6_rules_init(); 2486 #endif 2487 } 2488 2489 void ip6_route_cleanup(void) 2490 { 2491 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2492 fib6_rules_cleanup(); 2493 #endif 2494 #ifdef CONFIG_PROC_FS 2495 proc_net_remove("ipv6_route"); 2496 proc_net_remove("rt6_stats"); 2497 #endif 2498 #ifdef CONFIG_XFRM 2499 xfrm6_fini(); 2500 #endif 2501 rt6_ifdown(NULL); 2502 fib6_gc_cleanup(); 2503 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2504 } 2505