1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 /* Changes: 17 * 18 * YOSHIFUJI Hideaki @USAGI 19 * reworked default router selection. 20 * - respect outgoing interface 21 * - select from (probably) reachable routers (i.e. 22 * routers in REACHABLE, STALE, DELAY or PROBE states). 23 * - always select the same router if it is (probably) 24 * reachable. otherwise, round-robin the list. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/init.h> 38 #include <linux/netlink.h> 39 #include <linux/if_arp.h> 40 41 #ifdef CONFIG_PROC_FS 42 #include <linux/proc_fs.h> 43 #include <linux/seq_file.h> 44 #endif 45 46 #include <net/snmp.h> 47 #include <net/ipv6.h> 48 #include <net/ip6_fib.h> 49 #include <net/ip6_route.h> 50 #include <net/ndisc.h> 51 #include <net/addrconf.h> 52 #include <net/tcp.h> 53 #include <linux/rtnetlink.h> 54 #include <net/dst.h> 55 #include <net/xfrm.h> 56 57 #include <asm/uaccess.h> 58 59 #ifdef CONFIG_SYSCTL 60 #include <linux/sysctl.h> 61 #endif 62 63 /* Set to 3 to get tracing. */ 64 #define RT6_DEBUG 2 65 66 #if RT6_DEBUG >= 3 67 #define RDBG(x) printk x 68 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 69 #else 70 #define RDBG(x) 71 #define RT6_TRACE(x...) do { ; } while (0) 72 #endif 73 74 #define CLONE_OFFLINK_ROUTE 0 75 76 #define RT6_SELECT_F_IFACE 0x1 77 #define RT6_SELECT_F_REACHABLE 0x2 78 79 static int ip6_rt_max_size = 4096; 80 static int ip6_rt_gc_min_interval = HZ / 2; 81 static int ip6_rt_gc_timeout = 60*HZ; 82 int ip6_rt_gc_interval = 30*HZ; 83 static int ip6_rt_gc_elasticity = 9; 84 static int ip6_rt_mtu_expires = 10*60*HZ; 85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 86 87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 89 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 90 static void ip6_dst_destroy(struct dst_entry *); 91 static void ip6_dst_ifdown(struct dst_entry *, 92 struct net_device *dev, int how); 93 static int ip6_dst_gc(void); 94 95 static int ip6_pkt_discard(struct sk_buff *skb); 96 static int ip6_pkt_discard_out(struct sk_buff *skb); 97 static void ip6_link_failure(struct sk_buff *skb); 98 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 99 100 #ifdef CONFIG_IPV6_ROUTE_INFO 101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 102 struct in6_addr *gwaddr, int ifindex, 103 unsigned pref); 104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 105 struct in6_addr *gwaddr, int ifindex); 106 #endif 107 108 static struct dst_ops ip6_dst_ops = { 109 .family = AF_INET6, 110 .protocol = __constant_htons(ETH_P_IPV6), 111 .gc = ip6_dst_gc, 112 .gc_thresh = 1024, 113 .check = ip6_dst_check, 114 .destroy = ip6_dst_destroy, 115 .ifdown = ip6_dst_ifdown, 116 .negative_advice = ip6_negative_advice, 117 .link_failure = ip6_link_failure, 118 .update_pmtu = ip6_rt_update_pmtu, 119 .entry_size = sizeof(struct rt6_info), 120 }; 121 122 struct rt6_info ip6_null_entry = { 123 .u = { 124 .dst = { 125 .__refcnt = ATOMIC_INIT(1), 126 .__use = 1, 127 .dev = &loopback_dev, 128 .obsolete = -1, 129 .error = -ENETUNREACH, 130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 131 .input = ip6_pkt_discard, 132 .output = ip6_pkt_discard_out, 133 .ops = &ip6_dst_ops, 134 .path = (struct dst_entry*)&ip6_null_entry, 135 } 136 }, 137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 138 .rt6i_metric = ~(u32) 0, 139 .rt6i_ref = ATOMIC_INIT(1), 140 }; 141 142 struct fib6_node ip6_routing_table = { 143 .leaf = &ip6_null_entry, 144 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, 145 }; 146 147 /* Protects all the ip6 fib */ 148 149 DEFINE_RWLOCK(rt6_lock); 150 151 152 /* allocate dst with ip6_dst_ops */ 153 static __inline__ struct rt6_info *ip6_dst_alloc(void) 154 { 155 return (struct rt6_info *)dst_alloc(&ip6_dst_ops); 156 } 157 158 static void ip6_dst_destroy(struct dst_entry *dst) 159 { 160 struct rt6_info *rt = (struct rt6_info *)dst; 161 struct inet6_dev *idev = rt->rt6i_idev; 162 163 if (idev != NULL) { 164 rt->rt6i_idev = NULL; 165 in6_dev_put(idev); 166 } 167 } 168 169 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 170 int how) 171 { 172 struct rt6_info *rt = (struct rt6_info *)dst; 173 struct inet6_dev *idev = rt->rt6i_idev; 174 175 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { 176 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); 177 if (loopback_idev != NULL) { 178 rt->rt6i_idev = loopback_idev; 179 in6_dev_put(idev); 180 } 181 } 182 } 183 184 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 185 { 186 return (rt->rt6i_flags & RTF_EXPIRES && 187 time_after(jiffies, rt->rt6i_expires)); 188 } 189 190 /* 191 * Route lookup. Any rt6_lock is implied. 192 */ 193 194 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, 195 int oif, 196 int strict) 197 { 198 struct rt6_info *local = NULL; 199 struct rt6_info *sprt; 200 201 if (oif) { 202 for (sprt = rt; sprt; sprt = sprt->u.next) { 203 struct net_device *dev = sprt->rt6i_dev; 204 if (dev->ifindex == oif) 205 return sprt; 206 if (dev->flags & IFF_LOOPBACK) { 207 if (sprt->rt6i_idev == NULL || 208 sprt->rt6i_idev->dev->ifindex != oif) { 209 if (strict && oif) 210 continue; 211 if (local && (!oif || 212 local->rt6i_idev->dev->ifindex == oif)) 213 continue; 214 } 215 local = sprt; 216 } 217 } 218 219 if (local) 220 return local; 221 222 if (strict) 223 return &ip6_null_entry; 224 } 225 return rt; 226 } 227 228 #ifdef CONFIG_IPV6_ROUTER_PREF 229 static void rt6_probe(struct rt6_info *rt) 230 { 231 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 232 /* 233 * Okay, this does not seem to be appropriate 234 * for now, however, we need to check if it 235 * is really so; aka Router Reachability Probing. 236 * 237 * Router Reachability Probe MUST be rate-limited 238 * to no more than one per minute. 239 */ 240 if (!neigh || (neigh->nud_state & NUD_VALID)) 241 return; 242 read_lock_bh(&neigh->lock); 243 if (!(neigh->nud_state & NUD_VALID) && 244 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 245 struct in6_addr mcaddr; 246 struct in6_addr *target; 247 248 neigh->updated = jiffies; 249 read_unlock_bh(&neigh->lock); 250 251 target = (struct in6_addr *)&neigh->primary_key; 252 addrconf_addr_solict_mult(target, &mcaddr); 253 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 254 } else 255 read_unlock_bh(&neigh->lock); 256 } 257 #else 258 static inline void rt6_probe(struct rt6_info *rt) 259 { 260 return; 261 } 262 #endif 263 264 /* 265 * Default Router Selection (RFC 2461 6.3.6) 266 */ 267 static int inline rt6_check_dev(struct rt6_info *rt, int oif) 268 { 269 struct net_device *dev = rt->rt6i_dev; 270 if (!oif || dev->ifindex == oif) 271 return 2; 272 if ((dev->flags & IFF_LOOPBACK) && 273 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 274 return 1; 275 return 0; 276 } 277 278 static int inline rt6_check_neigh(struct rt6_info *rt) 279 { 280 struct neighbour *neigh = rt->rt6i_nexthop; 281 int m = 0; 282 if (rt->rt6i_flags & RTF_NONEXTHOP || 283 !(rt->rt6i_flags & RTF_GATEWAY)) 284 m = 1; 285 else if (neigh) { 286 read_lock_bh(&neigh->lock); 287 if (neigh->nud_state & NUD_VALID) 288 m = 2; 289 read_unlock_bh(&neigh->lock); 290 } 291 return m; 292 } 293 294 static int rt6_score_route(struct rt6_info *rt, int oif, 295 int strict) 296 { 297 int m, n; 298 299 m = rt6_check_dev(rt, oif); 300 if (!m && (strict & RT6_SELECT_F_IFACE)) 301 return -1; 302 #ifdef CONFIG_IPV6_ROUTER_PREF 303 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 304 #endif 305 n = rt6_check_neigh(rt); 306 if (n > 1) 307 m |= 16; 308 else if (!n && strict & RT6_SELECT_F_REACHABLE) 309 return -1; 310 return m; 311 } 312 313 static struct rt6_info *rt6_select(struct rt6_info **head, int oif, 314 int strict) 315 { 316 struct rt6_info *match = NULL, *last = NULL; 317 struct rt6_info *rt, *rt0 = *head; 318 u32 metric; 319 int mpri = -1; 320 321 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n", 322 __FUNCTION__, head, head ? *head : NULL, oif); 323 324 for (rt = rt0, metric = rt0->rt6i_metric; 325 rt && rt->rt6i_metric == metric && (!last || rt != rt0); 326 rt = rt->u.next) { 327 int m; 328 329 if (rt6_check_expired(rt)) 330 continue; 331 332 last = rt; 333 334 m = rt6_score_route(rt, oif, strict); 335 if (m < 0) 336 continue; 337 338 if (m > mpri) { 339 rt6_probe(match); 340 match = rt; 341 mpri = m; 342 } else { 343 rt6_probe(rt); 344 } 345 } 346 347 if (!match && 348 (strict & RT6_SELECT_F_REACHABLE) && 349 last && last != rt0) { 350 /* no entries matched; do round-robin */ 351 static DEFINE_SPINLOCK(lock); 352 spin_lock(&lock); 353 *head = rt0->u.next; 354 rt0->u.next = last->u.next; 355 last->u.next = rt0; 356 spin_unlock(&lock); 357 } 358 359 RT6_TRACE("%s() => %p, score=%d\n", 360 __FUNCTION__, match, mpri); 361 362 return (match ? match : &ip6_null_entry); 363 } 364 365 #ifdef CONFIG_IPV6_ROUTE_INFO 366 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 367 struct in6_addr *gwaddr) 368 { 369 struct route_info *rinfo = (struct route_info *) opt; 370 struct in6_addr prefix_buf, *prefix; 371 unsigned int pref; 372 u32 lifetime; 373 struct rt6_info *rt; 374 375 if (len < sizeof(struct route_info)) { 376 return -EINVAL; 377 } 378 379 /* Sanity check for prefix_len and length */ 380 if (rinfo->length > 3) { 381 return -EINVAL; 382 } else if (rinfo->prefix_len > 128) { 383 return -EINVAL; 384 } else if (rinfo->prefix_len > 64) { 385 if (rinfo->length < 2) { 386 return -EINVAL; 387 } 388 } else if (rinfo->prefix_len > 0) { 389 if (rinfo->length < 1) { 390 return -EINVAL; 391 } 392 } 393 394 pref = rinfo->route_pref; 395 if (pref == ICMPV6_ROUTER_PREF_INVALID) 396 pref = ICMPV6_ROUTER_PREF_MEDIUM; 397 398 lifetime = htonl(rinfo->lifetime); 399 if (lifetime == 0xffffffff) { 400 /* infinity */ 401 } else if (lifetime > 0x7fffffff/HZ) { 402 /* Avoid arithmetic overflow */ 403 lifetime = 0x7fffffff/HZ - 1; 404 } 405 406 if (rinfo->length == 3) 407 prefix = (struct in6_addr *)rinfo->prefix; 408 else { 409 /* this function is safe */ 410 ipv6_addr_prefix(&prefix_buf, 411 (struct in6_addr *)rinfo->prefix, 412 rinfo->prefix_len); 413 prefix = &prefix_buf; 414 } 415 416 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); 417 418 if (rt && !lifetime) { 419 ip6_del_rt(rt, NULL, NULL, NULL); 420 rt = NULL; 421 } 422 423 if (!rt && lifetime) 424 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 425 pref); 426 else if (rt) 427 rt->rt6i_flags = RTF_ROUTEINFO | 428 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 429 430 if (rt) { 431 if (lifetime == 0xffffffff) { 432 rt->rt6i_flags &= ~RTF_EXPIRES; 433 } else { 434 rt->rt6i_expires = jiffies + HZ * lifetime; 435 rt->rt6i_flags |= RTF_EXPIRES; 436 } 437 dst_release(&rt->u.dst); 438 } 439 return 0; 440 } 441 #endif 442 443 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 444 int oif, int strict) 445 { 446 struct fib6_node *fn; 447 struct rt6_info *rt; 448 449 read_lock_bh(&rt6_lock); 450 fn = fib6_lookup(&ip6_routing_table, daddr, saddr); 451 rt = rt6_device_match(fn->leaf, oif, strict); 452 dst_hold(&rt->u.dst); 453 rt->u.dst.__use++; 454 read_unlock_bh(&rt6_lock); 455 456 rt->u.dst.lastuse = jiffies; 457 if (rt->u.dst.error == 0) 458 return rt; 459 dst_release(&rt->u.dst); 460 return NULL; 461 } 462 463 /* ip6_ins_rt is called with FREE rt6_lock. 464 It takes new route entry, the addition fails by any reason the 465 route is freed. In any case, if caller does not hold it, it may 466 be destroyed. 467 */ 468 469 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, 470 void *_rtattr, struct netlink_skb_parms *req) 471 { 472 int err; 473 474 write_lock_bh(&rt6_lock); 475 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); 476 write_unlock_bh(&rt6_lock); 477 478 return err; 479 } 480 481 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 482 struct in6_addr *saddr) 483 { 484 struct rt6_info *rt; 485 486 /* 487 * Clone the route. 488 */ 489 490 rt = ip6_rt_copy(ort); 491 492 if (rt) { 493 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 494 if (rt->rt6i_dst.plen != 128 && 495 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 496 rt->rt6i_flags |= RTF_ANYCAST; 497 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 498 } 499 500 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 501 rt->rt6i_dst.plen = 128; 502 rt->rt6i_flags |= RTF_CACHE; 503 rt->u.dst.flags |= DST_HOST; 504 505 #ifdef CONFIG_IPV6_SUBTREES 506 if (rt->rt6i_src.plen && saddr) { 507 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 508 rt->rt6i_src.plen = 128; 509 } 510 #endif 511 512 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 513 514 } 515 516 return rt; 517 } 518 519 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 520 { 521 struct rt6_info *rt = ip6_rt_copy(ort); 522 if (rt) { 523 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 524 rt->rt6i_dst.plen = 128; 525 rt->rt6i_flags |= RTF_CACHE; 526 if (rt->rt6i_flags & RTF_REJECT) 527 rt->u.dst.error = ort->u.dst.error; 528 rt->u.dst.flags |= DST_HOST; 529 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 530 } 531 return rt; 532 } 533 534 #define BACKTRACK() \ 535 if (rt == &ip6_null_entry) { \ 536 while ((fn = fn->parent) != NULL) { \ 537 if (fn->fn_flags & RTN_ROOT) { \ 538 goto out; \ 539 } \ 540 if (fn->fn_flags & RTN_RTINFO) \ 541 goto restart; \ 542 } \ 543 } 544 545 546 void ip6_route_input(struct sk_buff *skb) 547 { 548 struct fib6_node *fn; 549 struct rt6_info *rt, *nrt; 550 int strict; 551 int attempts = 3; 552 int err; 553 int reachable = RT6_SELECT_F_REACHABLE; 554 555 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; 556 557 relookup: 558 read_lock_bh(&rt6_lock); 559 560 restart_2: 561 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, 562 &skb->nh.ipv6h->saddr); 563 564 restart: 565 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); 566 BACKTRACK(); 567 if (rt == &ip6_null_entry || 568 rt->rt6i_flags & RTF_CACHE) 569 goto out; 570 571 dst_hold(&rt->u.dst); 572 read_unlock_bh(&rt6_lock); 573 574 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 575 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); 576 else { 577 #if CLONE_OFFLINK_ROUTE 578 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); 579 #else 580 goto out2; 581 #endif 582 } 583 584 dst_release(&rt->u.dst); 585 rt = nrt ? : &ip6_null_entry; 586 587 dst_hold(&rt->u.dst); 588 if (nrt) { 589 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); 590 if (!err) 591 goto out2; 592 } 593 594 if (--attempts <= 0) 595 goto out2; 596 597 /* 598 * Race condition! In the gap, when rt6_lock was 599 * released someone could insert this route. Relookup. 600 */ 601 dst_release(&rt->u.dst); 602 goto relookup; 603 604 out: 605 if (reachable) { 606 reachable = 0; 607 goto restart_2; 608 } 609 dst_hold(&rt->u.dst); 610 read_unlock_bh(&rt6_lock); 611 out2: 612 rt->u.dst.lastuse = jiffies; 613 rt->u.dst.__use++; 614 skb->dst = (struct dst_entry *) rt; 615 return; 616 } 617 618 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 619 { 620 struct fib6_node *fn; 621 struct rt6_info *rt, *nrt; 622 int strict; 623 int attempts = 3; 624 int err; 625 int reachable = RT6_SELECT_F_REACHABLE; 626 627 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; 628 629 relookup: 630 read_lock_bh(&rt6_lock); 631 632 restart_2: 633 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); 634 635 restart: 636 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); 637 BACKTRACK(); 638 if (rt == &ip6_null_entry || 639 rt->rt6i_flags & RTF_CACHE) 640 goto out; 641 642 dst_hold(&rt->u.dst); 643 read_unlock_bh(&rt6_lock); 644 645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 647 else { 648 #if CLONE_OFFLINK_ROUTE 649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 650 #else 651 goto out2; 652 #endif 653 } 654 655 dst_release(&rt->u.dst); 656 rt = nrt ? : &ip6_null_entry; 657 658 dst_hold(&rt->u.dst); 659 if (nrt) { 660 err = ip6_ins_rt(nrt, NULL, NULL, NULL); 661 if (!err) 662 goto out2; 663 } 664 665 if (--attempts <= 0) 666 goto out2; 667 668 /* 669 * Race condition! In the gap, when rt6_lock was 670 * released someone could insert this route. Relookup. 671 */ 672 dst_release(&rt->u.dst); 673 goto relookup; 674 675 out: 676 if (reachable) { 677 reachable = 0; 678 goto restart_2; 679 } 680 dst_hold(&rt->u.dst); 681 read_unlock_bh(&rt6_lock); 682 out2: 683 rt->u.dst.lastuse = jiffies; 684 rt->u.dst.__use++; 685 return &rt->u.dst; 686 } 687 688 689 /* 690 * Destination cache support functions 691 */ 692 693 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 694 { 695 struct rt6_info *rt; 696 697 rt = (struct rt6_info *) dst; 698 699 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 700 return dst; 701 702 return NULL; 703 } 704 705 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 706 { 707 struct rt6_info *rt = (struct rt6_info *) dst; 708 709 if (rt) { 710 if (rt->rt6i_flags & RTF_CACHE) 711 ip6_del_rt(rt, NULL, NULL, NULL); 712 else 713 dst_release(dst); 714 } 715 return NULL; 716 } 717 718 static void ip6_link_failure(struct sk_buff *skb) 719 { 720 struct rt6_info *rt; 721 722 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 723 724 rt = (struct rt6_info *) skb->dst; 725 if (rt) { 726 if (rt->rt6i_flags&RTF_CACHE) { 727 dst_set_expires(&rt->u.dst, 0); 728 rt->rt6i_flags |= RTF_EXPIRES; 729 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 730 rt->rt6i_node->fn_sernum = -1; 731 } 732 } 733 734 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 735 { 736 struct rt6_info *rt6 = (struct rt6_info*)dst; 737 738 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 739 rt6->rt6i_flags |= RTF_MODIFIED; 740 if (mtu < IPV6_MIN_MTU) { 741 mtu = IPV6_MIN_MTU; 742 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 743 } 744 dst->metrics[RTAX_MTU-1] = mtu; 745 } 746 } 747 748 /* Protected by rt6_lock. */ 749 static struct dst_entry *ndisc_dst_gc_list; 750 static int ipv6_get_mtu(struct net_device *dev); 751 752 static inline unsigned int ipv6_advmss(unsigned int mtu) 753 { 754 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 755 756 if (mtu < ip6_rt_min_advmss) 757 mtu = ip6_rt_min_advmss; 758 759 /* 760 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 761 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 762 * IPV6_MAXPLEN is also valid and means: "any MSS, 763 * rely only on pmtu discovery" 764 */ 765 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 766 mtu = IPV6_MAXPLEN; 767 return mtu; 768 } 769 770 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 771 struct neighbour *neigh, 772 struct in6_addr *addr, 773 int (*output)(struct sk_buff *)) 774 { 775 struct rt6_info *rt; 776 struct inet6_dev *idev = in6_dev_get(dev); 777 778 if (unlikely(idev == NULL)) 779 return NULL; 780 781 rt = ip6_dst_alloc(); 782 if (unlikely(rt == NULL)) { 783 in6_dev_put(idev); 784 goto out; 785 } 786 787 dev_hold(dev); 788 if (neigh) 789 neigh_hold(neigh); 790 else 791 neigh = ndisc_get_neigh(dev, addr); 792 793 rt->rt6i_dev = dev; 794 rt->rt6i_idev = idev; 795 rt->rt6i_nexthop = neigh; 796 atomic_set(&rt->u.dst.__refcnt, 1); 797 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 798 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 799 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 800 rt->u.dst.output = output; 801 802 #if 0 /* there's no chance to use these for ndisc */ 803 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 804 ? DST_HOST 805 : 0; 806 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 807 rt->rt6i_dst.plen = 128; 808 #endif 809 810 write_lock_bh(&rt6_lock); 811 rt->u.dst.next = ndisc_dst_gc_list; 812 ndisc_dst_gc_list = &rt->u.dst; 813 write_unlock_bh(&rt6_lock); 814 815 fib6_force_start_gc(); 816 817 out: 818 return (struct dst_entry *)rt; 819 } 820 821 int ndisc_dst_gc(int *more) 822 { 823 struct dst_entry *dst, *next, **pprev; 824 int freed; 825 826 next = NULL; 827 pprev = &ndisc_dst_gc_list; 828 freed = 0; 829 while ((dst = *pprev) != NULL) { 830 if (!atomic_read(&dst->__refcnt)) { 831 *pprev = dst->next; 832 dst_free(dst); 833 freed++; 834 } else { 835 pprev = &dst->next; 836 (*more)++; 837 } 838 } 839 840 return freed; 841 } 842 843 static int ip6_dst_gc(void) 844 { 845 static unsigned expire = 30*HZ; 846 static unsigned long last_gc; 847 unsigned long now = jiffies; 848 849 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 850 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 851 goto out; 852 853 expire++; 854 fib6_run_gc(expire); 855 last_gc = now; 856 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 857 expire = ip6_rt_gc_timeout>>1; 858 859 out: 860 expire -= expire>>ip6_rt_gc_elasticity; 861 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 862 } 863 864 /* Clean host part of a prefix. Not necessary in radix tree, 865 but results in cleaner routing tables. 866 867 Remove it only when all the things will work! 868 */ 869 870 static int ipv6_get_mtu(struct net_device *dev) 871 { 872 int mtu = IPV6_MIN_MTU; 873 struct inet6_dev *idev; 874 875 idev = in6_dev_get(dev); 876 if (idev) { 877 mtu = idev->cnf.mtu6; 878 in6_dev_put(idev); 879 } 880 return mtu; 881 } 882 883 int ipv6_get_hoplimit(struct net_device *dev) 884 { 885 int hoplimit = ipv6_devconf.hop_limit; 886 struct inet6_dev *idev; 887 888 idev = in6_dev_get(dev); 889 if (idev) { 890 hoplimit = idev->cnf.hop_limit; 891 in6_dev_put(idev); 892 } 893 return hoplimit; 894 } 895 896 /* 897 * 898 */ 899 900 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 901 void *_rtattr, struct netlink_skb_parms *req) 902 { 903 int err; 904 struct rtmsg *r; 905 struct rtattr **rta; 906 struct rt6_info *rt = NULL; 907 struct net_device *dev = NULL; 908 struct inet6_dev *idev = NULL; 909 int addr_type; 910 911 rta = (struct rtattr **) _rtattr; 912 913 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) 914 return -EINVAL; 915 #ifndef CONFIG_IPV6_SUBTREES 916 if (rtmsg->rtmsg_src_len) 917 return -EINVAL; 918 #endif 919 if (rtmsg->rtmsg_ifindex) { 920 err = -ENODEV; 921 dev = dev_get_by_index(rtmsg->rtmsg_ifindex); 922 if (!dev) 923 goto out; 924 idev = in6_dev_get(dev); 925 if (!idev) 926 goto out; 927 } 928 929 if (rtmsg->rtmsg_metric == 0) 930 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; 931 932 rt = ip6_dst_alloc(); 933 934 if (rt == NULL) { 935 err = -ENOMEM; 936 goto out; 937 } 938 939 rt->u.dst.obsolete = -1; 940 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); 941 if (nlh && (r = NLMSG_DATA(nlh))) { 942 rt->rt6i_protocol = r->rtm_protocol; 943 } else { 944 rt->rt6i_protocol = RTPROT_BOOT; 945 } 946 947 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); 948 949 if (addr_type & IPV6_ADDR_MULTICAST) 950 rt->u.dst.input = ip6_mc_input; 951 else 952 rt->u.dst.input = ip6_forward; 953 954 rt->u.dst.output = ip6_output; 955 956 ipv6_addr_prefix(&rt->rt6i_dst.addr, 957 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); 958 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; 959 if (rt->rt6i_dst.plen == 128) 960 rt->u.dst.flags = DST_HOST; 961 962 #ifdef CONFIG_IPV6_SUBTREES 963 ipv6_addr_prefix(&rt->rt6i_src.addr, 964 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 965 rt->rt6i_src.plen = rtmsg->rtmsg_src_len; 966 #endif 967 968 rt->rt6i_metric = rtmsg->rtmsg_metric; 969 970 /* We cannot add true routes via loopback here, 971 they would result in kernel looping; promote them to reject routes 972 */ 973 if ((rtmsg->rtmsg_flags&RTF_REJECT) || 974 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 975 /* hold loopback dev/idev if we haven't done so. */ 976 if (dev != &loopback_dev) { 977 if (dev) { 978 dev_put(dev); 979 in6_dev_put(idev); 980 } 981 dev = &loopback_dev; 982 dev_hold(dev); 983 idev = in6_dev_get(dev); 984 if (!idev) { 985 err = -ENODEV; 986 goto out; 987 } 988 } 989 rt->u.dst.output = ip6_pkt_discard_out; 990 rt->u.dst.input = ip6_pkt_discard; 991 rt->u.dst.error = -ENETUNREACH; 992 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 993 goto install_route; 994 } 995 996 if (rtmsg->rtmsg_flags & RTF_GATEWAY) { 997 struct in6_addr *gw_addr; 998 int gwa_type; 999 1000 gw_addr = &rtmsg->rtmsg_gateway; 1001 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); 1002 gwa_type = ipv6_addr_type(gw_addr); 1003 1004 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1005 struct rt6_info *grt; 1006 1007 /* IPv6 strictly inhibits using not link-local 1008 addresses as nexthop address. 1009 Otherwise, router will not able to send redirects. 1010 It is very good, but in some (rare!) circumstances 1011 (SIT, PtP, NBMA NOARP links) it is handy to allow 1012 some exceptions. --ANK 1013 */ 1014 err = -EINVAL; 1015 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1016 goto out; 1017 1018 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); 1019 1020 err = -EHOSTUNREACH; 1021 if (grt == NULL) 1022 goto out; 1023 if (dev) { 1024 if (dev != grt->rt6i_dev) { 1025 dst_release(&grt->u.dst); 1026 goto out; 1027 } 1028 } else { 1029 dev = grt->rt6i_dev; 1030 idev = grt->rt6i_idev; 1031 dev_hold(dev); 1032 in6_dev_hold(grt->rt6i_idev); 1033 } 1034 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1035 err = 0; 1036 dst_release(&grt->u.dst); 1037 1038 if (err) 1039 goto out; 1040 } 1041 err = -EINVAL; 1042 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1043 goto out; 1044 } 1045 1046 err = -ENODEV; 1047 if (dev == NULL) 1048 goto out; 1049 1050 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { 1051 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1052 if (IS_ERR(rt->rt6i_nexthop)) { 1053 err = PTR_ERR(rt->rt6i_nexthop); 1054 rt->rt6i_nexthop = NULL; 1055 goto out; 1056 } 1057 } 1058 1059 rt->rt6i_flags = rtmsg->rtmsg_flags; 1060 1061 install_route: 1062 if (rta && rta[RTA_METRICS-1]) { 1063 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); 1064 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); 1065 1066 while (RTA_OK(attr, attrlen)) { 1067 unsigned flavor = attr->rta_type; 1068 if (flavor) { 1069 if (flavor > RTAX_MAX) { 1070 err = -EINVAL; 1071 goto out; 1072 } 1073 rt->u.dst.metrics[flavor-1] = 1074 *(u32 *)RTA_DATA(attr); 1075 } 1076 attr = RTA_NEXT(attr, attrlen); 1077 } 1078 } 1079 1080 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1081 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1082 if (!rt->u.dst.metrics[RTAX_MTU-1]) 1083 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1084 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 1085 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1086 rt->u.dst.dev = dev; 1087 rt->rt6i_idev = idev; 1088 return ip6_ins_rt(rt, nlh, _rtattr, req); 1089 1090 out: 1091 if (dev) 1092 dev_put(dev); 1093 if (idev) 1094 in6_dev_put(idev); 1095 if (rt) 1096 dst_free((struct dst_entry *) rt); 1097 return err; 1098 } 1099 1100 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1101 { 1102 int err; 1103 1104 write_lock_bh(&rt6_lock); 1105 1106 err = fib6_del(rt, nlh, _rtattr, req); 1107 dst_release(&rt->u.dst); 1108 1109 write_unlock_bh(&rt6_lock); 1110 1111 return err; 1112 } 1113 1114 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1115 { 1116 struct fib6_node *fn; 1117 struct rt6_info *rt; 1118 int err = -ESRCH; 1119 1120 read_lock_bh(&rt6_lock); 1121 1122 fn = fib6_locate(&ip6_routing_table, 1123 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, 1124 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 1125 1126 if (fn) { 1127 for (rt = fn->leaf; rt; rt = rt->u.next) { 1128 if (rtmsg->rtmsg_ifindex && 1129 (rt->rt6i_dev == NULL || 1130 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) 1131 continue; 1132 if (rtmsg->rtmsg_flags&RTF_GATEWAY && 1133 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) 1134 continue; 1135 if (rtmsg->rtmsg_metric && 1136 rtmsg->rtmsg_metric != rt->rt6i_metric) 1137 continue; 1138 dst_hold(&rt->u.dst); 1139 read_unlock_bh(&rt6_lock); 1140 1141 return ip6_del_rt(rt, nlh, _rtattr, req); 1142 } 1143 } 1144 read_unlock_bh(&rt6_lock); 1145 1146 return err; 1147 } 1148 1149 /* 1150 * Handle redirects 1151 */ 1152 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, 1153 struct neighbour *neigh, u8 *lladdr, int on_link) 1154 { 1155 struct rt6_info *rt, *nrt = NULL; 1156 int strict; 1157 struct fib6_node *fn; 1158 1159 /* 1160 * Get the "current" route for this destination and 1161 * check if the redirect has come from approriate router. 1162 * 1163 * RFC 2461 specifies that redirects should only be 1164 * accepted if they come from the nexthop to the target. 1165 * Due to the way the routes are chosen, this notion 1166 * is a bit fuzzy and one might need to check all possible 1167 * routes. 1168 */ 1169 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); 1170 1171 read_lock_bh(&rt6_lock); 1172 fn = fib6_lookup(&ip6_routing_table, dest, NULL); 1173 restart: 1174 for (rt = fn->leaf; rt; rt = rt->u.next) { 1175 /* 1176 * Current route is on-link; redirect is always invalid. 1177 * 1178 * Seems, previous statement is not true. It could 1179 * be node, which looks for us as on-link (f.e. proxy ndisc) 1180 * But then router serving it might decide, that we should 1181 * know truth 8)8) --ANK (980726). 1182 */ 1183 if (rt6_check_expired(rt)) 1184 continue; 1185 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1186 continue; 1187 if (neigh->dev != rt->rt6i_dev) 1188 continue; 1189 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) 1190 continue; 1191 break; 1192 } 1193 if (rt) 1194 dst_hold(&rt->u.dst); 1195 else if (strict) { 1196 while ((fn = fn->parent) != NULL) { 1197 if (fn->fn_flags & RTN_ROOT) 1198 break; 1199 if (fn->fn_flags & RTN_RTINFO) 1200 goto restart; 1201 } 1202 } 1203 read_unlock_bh(&rt6_lock); 1204 1205 if (!rt) { 1206 if (net_ratelimit()) 1207 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1208 "for redirect target\n"); 1209 return; 1210 } 1211 1212 /* 1213 * We have finally decided to accept it. 1214 */ 1215 1216 neigh_update(neigh, lladdr, NUD_STALE, 1217 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1218 NEIGH_UPDATE_F_OVERRIDE| 1219 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1220 NEIGH_UPDATE_F_ISROUTER)) 1221 ); 1222 1223 /* 1224 * Redirect received -> path was valid. 1225 * Look, redirects are sent only in response to data packets, 1226 * so that this nexthop apparently is reachable. --ANK 1227 */ 1228 dst_confirm(&rt->u.dst); 1229 1230 /* Duplicate redirect: silently ignore. */ 1231 if (neigh == rt->u.dst.neighbour) 1232 goto out; 1233 1234 nrt = ip6_rt_copy(rt); 1235 if (nrt == NULL) 1236 goto out; 1237 1238 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1239 if (on_link) 1240 nrt->rt6i_flags &= ~RTF_GATEWAY; 1241 1242 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1243 nrt->rt6i_dst.plen = 128; 1244 nrt->u.dst.flags |= DST_HOST; 1245 1246 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1247 nrt->rt6i_nexthop = neigh_clone(neigh); 1248 /* Reset pmtu, it may be better */ 1249 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1250 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1251 1252 if (ip6_ins_rt(nrt, NULL, NULL, NULL)) 1253 goto out; 1254 1255 if (rt->rt6i_flags&RTF_CACHE) { 1256 ip6_del_rt(rt, NULL, NULL, NULL); 1257 return; 1258 } 1259 1260 out: 1261 dst_release(&rt->u.dst); 1262 return; 1263 } 1264 1265 /* 1266 * Handle ICMP "packet too big" messages 1267 * i.e. Path MTU discovery 1268 */ 1269 1270 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1271 struct net_device *dev, u32 pmtu) 1272 { 1273 struct rt6_info *rt, *nrt; 1274 int allfrag = 0; 1275 1276 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); 1277 if (rt == NULL) 1278 return; 1279 1280 if (pmtu >= dst_mtu(&rt->u.dst)) 1281 goto out; 1282 1283 if (pmtu < IPV6_MIN_MTU) { 1284 /* 1285 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1286 * MTU (1280) and a fragment header should always be included 1287 * after a node receiving Too Big message reporting PMTU is 1288 * less than the IPv6 Minimum Link MTU. 1289 */ 1290 pmtu = IPV6_MIN_MTU; 1291 allfrag = 1; 1292 } 1293 1294 /* New mtu received -> path was valid. 1295 They are sent only in response to data packets, 1296 so that this nexthop apparently is reachable. --ANK 1297 */ 1298 dst_confirm(&rt->u.dst); 1299 1300 /* Host route. If it is static, it would be better 1301 not to override it, but add new one, so that 1302 when cache entry will expire old pmtu 1303 would return automatically. 1304 */ 1305 if (rt->rt6i_flags & RTF_CACHE) { 1306 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1307 if (allfrag) 1308 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1309 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1310 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1311 goto out; 1312 } 1313 1314 /* Network route. 1315 Two cases are possible: 1316 1. It is connected route. Action: COW 1317 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1318 */ 1319 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1320 nrt = rt6_alloc_cow(rt, daddr, saddr); 1321 else 1322 nrt = rt6_alloc_clone(rt, daddr); 1323 1324 if (nrt) { 1325 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1326 if (allfrag) 1327 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1328 1329 /* According to RFC 1981, detecting PMTU increase shouldn't be 1330 * happened within 5 mins, the recommended timer is 10 mins. 1331 * Here this route expiration time is set to ip6_rt_mtu_expires 1332 * which is 10 mins. After 10 mins the decreased pmtu is expired 1333 * and detecting PMTU increase will be automatically happened. 1334 */ 1335 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1336 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1337 1338 ip6_ins_rt(nrt, NULL, NULL, NULL); 1339 } 1340 out: 1341 dst_release(&rt->u.dst); 1342 } 1343 1344 /* 1345 * Misc support functions 1346 */ 1347 1348 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1349 { 1350 struct rt6_info *rt = ip6_dst_alloc(); 1351 1352 if (rt) { 1353 rt->u.dst.input = ort->u.dst.input; 1354 rt->u.dst.output = ort->u.dst.output; 1355 1356 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1357 rt->u.dst.dev = ort->u.dst.dev; 1358 if (rt->u.dst.dev) 1359 dev_hold(rt->u.dst.dev); 1360 rt->rt6i_idev = ort->rt6i_idev; 1361 if (rt->rt6i_idev) 1362 in6_dev_hold(rt->rt6i_idev); 1363 rt->u.dst.lastuse = jiffies; 1364 rt->rt6i_expires = 0; 1365 1366 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1367 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1368 rt->rt6i_metric = 0; 1369 1370 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1371 #ifdef CONFIG_IPV6_SUBTREES 1372 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1373 #endif 1374 } 1375 return rt; 1376 } 1377 1378 #ifdef CONFIG_IPV6_ROUTE_INFO 1379 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 1380 struct in6_addr *gwaddr, int ifindex) 1381 { 1382 struct fib6_node *fn; 1383 struct rt6_info *rt = NULL; 1384 1385 write_lock_bh(&rt6_lock); 1386 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); 1387 if (!fn) 1388 goto out; 1389 1390 for (rt = fn->leaf; rt; rt = rt->u.next) { 1391 if (rt->rt6i_dev->ifindex != ifindex) 1392 continue; 1393 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1394 continue; 1395 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1396 continue; 1397 dst_hold(&rt->u.dst); 1398 break; 1399 } 1400 out: 1401 write_unlock_bh(&rt6_lock); 1402 return rt; 1403 } 1404 1405 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 1406 struct in6_addr *gwaddr, int ifindex, 1407 unsigned pref) 1408 { 1409 struct in6_rtmsg rtmsg; 1410 1411 memset(&rtmsg, 0, sizeof(rtmsg)); 1412 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1413 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); 1414 rtmsg.rtmsg_dst_len = prefixlen; 1415 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); 1416 rtmsg.rtmsg_metric = 1024; 1417 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); 1418 /* We should treat it as a default route if prefix length is 0. */ 1419 if (!prefixlen) 1420 rtmsg.rtmsg_flags |= RTF_DEFAULT; 1421 rtmsg.rtmsg_ifindex = ifindex; 1422 1423 ip6_route_add(&rtmsg, NULL, NULL, NULL); 1424 1425 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); 1426 } 1427 #endif 1428 1429 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1430 { 1431 struct rt6_info *rt; 1432 struct fib6_node *fn; 1433 1434 fn = &ip6_routing_table; 1435 1436 write_lock_bh(&rt6_lock); 1437 for (rt = fn->leaf; rt; rt=rt->u.next) { 1438 if (dev == rt->rt6i_dev && 1439 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1440 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1441 break; 1442 } 1443 if (rt) 1444 dst_hold(&rt->u.dst); 1445 write_unlock_bh(&rt6_lock); 1446 return rt; 1447 } 1448 1449 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1450 struct net_device *dev, 1451 unsigned int pref) 1452 { 1453 struct in6_rtmsg rtmsg; 1454 1455 memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); 1456 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1457 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); 1458 rtmsg.rtmsg_metric = 1024; 1459 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | 1460 RTF_PREF(pref); 1461 1462 rtmsg.rtmsg_ifindex = dev->ifindex; 1463 1464 ip6_route_add(&rtmsg, NULL, NULL, NULL); 1465 return rt6_get_dflt_router(gwaddr, dev); 1466 } 1467 1468 void rt6_purge_dflt_routers(void) 1469 { 1470 struct rt6_info *rt; 1471 1472 restart: 1473 read_lock_bh(&rt6_lock); 1474 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { 1475 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1476 dst_hold(&rt->u.dst); 1477 1478 read_unlock_bh(&rt6_lock); 1479 1480 ip6_del_rt(rt, NULL, NULL, NULL); 1481 1482 goto restart; 1483 } 1484 } 1485 read_unlock_bh(&rt6_lock); 1486 } 1487 1488 int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1489 { 1490 struct in6_rtmsg rtmsg; 1491 int err; 1492 1493 switch(cmd) { 1494 case SIOCADDRT: /* Add a route */ 1495 case SIOCDELRT: /* Delete a route */ 1496 if (!capable(CAP_NET_ADMIN)) 1497 return -EPERM; 1498 err = copy_from_user(&rtmsg, arg, 1499 sizeof(struct in6_rtmsg)); 1500 if (err) 1501 return -EFAULT; 1502 1503 rtnl_lock(); 1504 switch (cmd) { 1505 case SIOCADDRT: 1506 err = ip6_route_add(&rtmsg, NULL, NULL, NULL); 1507 break; 1508 case SIOCDELRT: 1509 err = ip6_route_del(&rtmsg, NULL, NULL, NULL); 1510 break; 1511 default: 1512 err = -EINVAL; 1513 } 1514 rtnl_unlock(); 1515 1516 return err; 1517 }; 1518 1519 return -EINVAL; 1520 } 1521 1522 /* 1523 * Drop the packet on the floor 1524 */ 1525 1526 static int ip6_pkt_discard(struct sk_buff *skb) 1527 { 1528 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 1529 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); 1530 kfree_skb(skb); 1531 return 0; 1532 } 1533 1534 static int ip6_pkt_discard_out(struct sk_buff *skb) 1535 { 1536 skb->dev = skb->dst->dev; 1537 return ip6_pkt_discard(skb); 1538 } 1539 1540 /* 1541 * Allocate a dst for local (unicast / anycast) address. 1542 */ 1543 1544 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1545 const struct in6_addr *addr, 1546 int anycast) 1547 { 1548 struct rt6_info *rt = ip6_dst_alloc(); 1549 1550 if (rt == NULL) 1551 return ERR_PTR(-ENOMEM); 1552 1553 dev_hold(&loopback_dev); 1554 in6_dev_hold(idev); 1555 1556 rt->u.dst.flags = DST_HOST; 1557 rt->u.dst.input = ip6_input; 1558 rt->u.dst.output = ip6_output; 1559 rt->rt6i_dev = &loopback_dev; 1560 rt->rt6i_idev = idev; 1561 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1562 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1563 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1564 rt->u.dst.obsolete = -1; 1565 1566 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1567 if (anycast) 1568 rt->rt6i_flags |= RTF_ANYCAST; 1569 else 1570 rt->rt6i_flags |= RTF_LOCAL; 1571 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1572 if (rt->rt6i_nexthop == NULL) { 1573 dst_free((struct dst_entry *) rt); 1574 return ERR_PTR(-ENOMEM); 1575 } 1576 1577 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1578 rt->rt6i_dst.plen = 128; 1579 1580 atomic_set(&rt->u.dst.__refcnt, 1); 1581 1582 return rt; 1583 } 1584 1585 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1586 { 1587 if (((void*)rt->rt6i_dev == arg || arg == NULL) && 1588 rt != &ip6_null_entry) { 1589 RT6_TRACE("deleted by ifdown %p\n", rt); 1590 return -1; 1591 } 1592 return 0; 1593 } 1594 1595 void rt6_ifdown(struct net_device *dev) 1596 { 1597 write_lock_bh(&rt6_lock); 1598 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); 1599 write_unlock_bh(&rt6_lock); 1600 } 1601 1602 struct rt6_mtu_change_arg 1603 { 1604 struct net_device *dev; 1605 unsigned mtu; 1606 }; 1607 1608 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 1609 { 1610 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 1611 struct inet6_dev *idev; 1612 1613 /* In IPv6 pmtu discovery is not optional, 1614 so that RTAX_MTU lock cannot disable it. 1615 We still use this lock to block changes 1616 caused by addrconf/ndisc. 1617 */ 1618 1619 idev = __in6_dev_get(arg->dev); 1620 if (idev == NULL) 1621 return 0; 1622 1623 /* For administrative MTU increase, there is no way to discover 1624 IPv6 PMTU increase, so PMTU increase should be updated here. 1625 Since RFC 1981 doesn't include administrative MTU increase 1626 update PMTU increase is a MUST. (i.e. jumbo frame) 1627 */ 1628 /* 1629 If new MTU is less than route PMTU, this new MTU will be the 1630 lowest MTU in the path, update the route PMTU to reflect PMTU 1631 decreases; if new MTU is greater than route PMTU, and the 1632 old MTU is the lowest MTU in the path, update the route PMTU 1633 to reflect the increase. In this case if the other nodes' MTU 1634 also have the lowest MTU, TOO BIG MESSAGE will be lead to 1635 PMTU discouvery. 1636 */ 1637 if (rt->rt6i_dev == arg->dev && 1638 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1639 (dst_mtu(&rt->u.dst) > arg->mtu || 1640 (dst_mtu(&rt->u.dst) < arg->mtu && 1641 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) 1642 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1643 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); 1644 return 0; 1645 } 1646 1647 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 1648 { 1649 struct rt6_mtu_change_arg arg; 1650 1651 arg.dev = dev; 1652 arg.mtu = mtu; 1653 read_lock_bh(&rt6_lock); 1654 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); 1655 read_unlock_bh(&rt6_lock); 1656 } 1657 1658 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, 1659 struct in6_rtmsg *rtmsg) 1660 { 1661 memset(rtmsg, 0, sizeof(*rtmsg)); 1662 1663 rtmsg->rtmsg_dst_len = r->rtm_dst_len; 1664 rtmsg->rtmsg_src_len = r->rtm_src_len; 1665 rtmsg->rtmsg_flags = RTF_UP; 1666 if (r->rtm_type == RTN_UNREACHABLE) 1667 rtmsg->rtmsg_flags |= RTF_REJECT; 1668 1669 if (rta[RTA_GATEWAY-1]) { 1670 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) 1671 return -EINVAL; 1672 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); 1673 rtmsg->rtmsg_flags |= RTF_GATEWAY; 1674 } 1675 if (rta[RTA_DST-1]) { 1676 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) 1677 return -EINVAL; 1678 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); 1679 } 1680 if (rta[RTA_SRC-1]) { 1681 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) 1682 return -EINVAL; 1683 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); 1684 } 1685 if (rta[RTA_OIF-1]) { 1686 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) 1687 return -EINVAL; 1688 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 1689 } 1690 if (rta[RTA_PRIORITY-1]) { 1691 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) 1692 return -EINVAL; 1693 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); 1694 } 1695 return 0; 1696 } 1697 1698 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1699 { 1700 struct rtmsg *r = NLMSG_DATA(nlh); 1701 struct in6_rtmsg rtmsg; 1702 1703 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1704 return -EINVAL; 1705 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1706 } 1707 1708 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1709 { 1710 struct rtmsg *r = NLMSG_DATA(nlh); 1711 struct in6_rtmsg rtmsg; 1712 1713 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1714 return -EINVAL; 1715 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1716 } 1717 1718 struct rt6_rtnl_dump_arg 1719 { 1720 struct sk_buff *skb; 1721 struct netlink_callback *cb; 1722 }; 1723 1724 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 1725 struct in6_addr *dst, struct in6_addr *src, 1726 int iif, int type, u32 pid, u32 seq, 1727 int prefix, unsigned int flags) 1728 { 1729 struct rtmsg *rtm; 1730 struct nlmsghdr *nlh; 1731 unsigned char *b = skb->tail; 1732 struct rta_cacheinfo ci; 1733 1734 if (prefix) { /* user wants prefix routes only */ 1735 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 1736 /* success since this is not a prefix route */ 1737 return 1; 1738 } 1739 } 1740 1741 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); 1742 rtm = NLMSG_DATA(nlh); 1743 rtm->rtm_family = AF_INET6; 1744 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1745 rtm->rtm_src_len = rt->rt6i_src.plen; 1746 rtm->rtm_tos = 0; 1747 rtm->rtm_table = RT_TABLE_MAIN; 1748 if (rt->rt6i_flags&RTF_REJECT) 1749 rtm->rtm_type = RTN_UNREACHABLE; 1750 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 1751 rtm->rtm_type = RTN_LOCAL; 1752 else 1753 rtm->rtm_type = RTN_UNICAST; 1754 rtm->rtm_flags = 0; 1755 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 1756 rtm->rtm_protocol = rt->rt6i_protocol; 1757 if (rt->rt6i_flags&RTF_DYNAMIC) 1758 rtm->rtm_protocol = RTPROT_REDIRECT; 1759 else if (rt->rt6i_flags & RTF_ADDRCONF) 1760 rtm->rtm_protocol = RTPROT_KERNEL; 1761 else if (rt->rt6i_flags&RTF_DEFAULT) 1762 rtm->rtm_protocol = RTPROT_RA; 1763 1764 if (rt->rt6i_flags&RTF_CACHE) 1765 rtm->rtm_flags |= RTM_F_CLONED; 1766 1767 if (dst) { 1768 RTA_PUT(skb, RTA_DST, 16, dst); 1769 rtm->rtm_dst_len = 128; 1770 } else if (rtm->rtm_dst_len) 1771 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 1772 #ifdef CONFIG_IPV6_SUBTREES 1773 if (src) { 1774 RTA_PUT(skb, RTA_SRC, 16, src); 1775 rtm->rtm_src_len = 128; 1776 } else if (rtm->rtm_src_len) 1777 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 1778 #endif 1779 if (iif) 1780 RTA_PUT(skb, RTA_IIF, 4, &iif); 1781 else if (dst) { 1782 struct in6_addr saddr_buf; 1783 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) 1784 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 1785 } 1786 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 1787 goto rtattr_failure; 1788 if (rt->u.dst.neighbour) 1789 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 1790 if (rt->u.dst.dev) 1791 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); 1792 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); 1793 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 1794 if (rt->rt6i_expires) 1795 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); 1796 else 1797 ci.rta_expires = 0; 1798 ci.rta_used = rt->u.dst.__use; 1799 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 1800 ci.rta_error = rt->u.dst.error; 1801 ci.rta_id = 0; 1802 ci.rta_ts = 0; 1803 ci.rta_tsage = 0; 1804 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 1805 nlh->nlmsg_len = skb->tail - b; 1806 return skb->len; 1807 1808 nlmsg_failure: 1809 rtattr_failure: 1810 skb_trim(skb, b - skb->data); 1811 return -1; 1812 } 1813 1814 static int rt6_dump_route(struct rt6_info *rt, void *p_arg) 1815 { 1816 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 1817 int prefix; 1818 1819 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { 1820 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); 1821 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 1822 } else 1823 prefix = 0; 1824 1825 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 1826 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 1827 prefix, NLM_F_MULTI); 1828 } 1829 1830 static int fib6_dump_node(struct fib6_walker_t *w) 1831 { 1832 int res; 1833 struct rt6_info *rt; 1834 1835 for (rt = w->leaf; rt; rt = rt->u.next) { 1836 res = rt6_dump_route(rt, w->args); 1837 if (res < 0) { 1838 /* Frame is full, suspend walking */ 1839 w->leaf = rt; 1840 return 1; 1841 } 1842 BUG_TRAP(res!=0); 1843 } 1844 w->leaf = NULL; 1845 return 0; 1846 } 1847 1848 static void fib6_dump_end(struct netlink_callback *cb) 1849 { 1850 struct fib6_walker_t *w = (void*)cb->args[0]; 1851 1852 if (w) { 1853 cb->args[0] = 0; 1854 fib6_walker_unlink(w); 1855 kfree(w); 1856 } 1857 cb->done = (void*)cb->args[1]; 1858 cb->args[1] = 0; 1859 } 1860 1861 static int fib6_dump_done(struct netlink_callback *cb) 1862 { 1863 fib6_dump_end(cb); 1864 return cb->done ? cb->done(cb) : 0; 1865 } 1866 1867 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 1868 { 1869 struct rt6_rtnl_dump_arg arg; 1870 struct fib6_walker_t *w; 1871 int res; 1872 1873 arg.skb = skb; 1874 arg.cb = cb; 1875 1876 w = (void*)cb->args[0]; 1877 if (w == NULL) { 1878 /* New dump: 1879 * 1880 * 1. hook callback destructor. 1881 */ 1882 cb->args[1] = (long)cb->done; 1883 cb->done = fib6_dump_done; 1884 1885 /* 1886 * 2. allocate and initialize walker. 1887 */ 1888 w = kzalloc(sizeof(*w), GFP_ATOMIC); 1889 if (w == NULL) 1890 return -ENOMEM; 1891 RT6_TRACE("dump<%p", w); 1892 w->root = &ip6_routing_table; 1893 w->func = fib6_dump_node; 1894 w->args = &arg; 1895 cb->args[0] = (long)w; 1896 read_lock_bh(&rt6_lock); 1897 res = fib6_walk(w); 1898 read_unlock_bh(&rt6_lock); 1899 } else { 1900 w->args = &arg; 1901 read_lock_bh(&rt6_lock); 1902 res = fib6_walk_continue(w); 1903 read_unlock_bh(&rt6_lock); 1904 } 1905 #if RT6_DEBUG >= 3 1906 if (res <= 0 && skb->len == 0) 1907 RT6_TRACE("%p>dump end\n", w); 1908 #endif 1909 res = res < 0 ? res : skb->len; 1910 /* res < 0 is an error. (really, impossible) 1911 res == 0 means that dump is complete, but skb still can contain data. 1912 res > 0 dump is not complete, but frame is full. 1913 */ 1914 /* Destroy walker, if dump of this table is complete. */ 1915 if (res <= 0) 1916 fib6_dump_end(cb); 1917 return res; 1918 } 1919 1920 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 1921 { 1922 struct rtattr **rta = arg; 1923 int iif = 0; 1924 int err = -ENOBUFS; 1925 struct sk_buff *skb; 1926 struct flowi fl; 1927 struct rt6_info *rt; 1928 1929 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1930 if (skb == NULL) 1931 goto out; 1932 1933 /* Reserve room for dummy headers, this skb can pass 1934 through good chunk of routing engine. 1935 */ 1936 skb->mac.raw = skb->data; 1937 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 1938 1939 memset(&fl, 0, sizeof(fl)); 1940 if (rta[RTA_SRC-1]) 1941 ipv6_addr_copy(&fl.fl6_src, 1942 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); 1943 if (rta[RTA_DST-1]) 1944 ipv6_addr_copy(&fl.fl6_dst, 1945 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); 1946 1947 if (rta[RTA_IIF-1]) 1948 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1949 1950 if (iif) { 1951 struct net_device *dev; 1952 dev = __dev_get_by_index(iif); 1953 if (!dev) { 1954 err = -ENODEV; 1955 goto out_free; 1956 } 1957 } 1958 1959 fl.oif = 0; 1960 if (rta[RTA_OIF-1]) 1961 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 1962 1963 rt = (struct rt6_info*)ip6_route_output(NULL, &fl); 1964 1965 skb->dst = &rt->u.dst; 1966 1967 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1968 err = rt6_fill_node(skb, rt, 1969 &fl.fl6_dst, &fl.fl6_src, 1970 iif, 1971 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 1972 nlh->nlmsg_seq, 0, 0); 1973 if (err < 0) { 1974 err = -EMSGSIZE; 1975 goto out_free; 1976 } 1977 1978 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1979 if (err > 0) 1980 err = 0; 1981 out: 1982 return err; 1983 out_free: 1984 kfree_skb(skb); 1985 goto out; 1986 } 1987 1988 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 1989 struct netlink_skb_parms *req) 1990 { 1991 struct sk_buff *skb; 1992 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 1993 u32 pid = current->pid; 1994 u32 seq = 0; 1995 1996 if (req) 1997 pid = req->pid; 1998 if (nlh) 1999 seq = nlh->nlmsg_seq; 2000 2001 skb = alloc_skb(size, gfp_any()); 2002 if (!skb) { 2003 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); 2004 return; 2005 } 2006 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { 2007 kfree_skb(skb); 2008 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); 2009 return; 2010 } 2011 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; 2012 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); 2013 } 2014 2015 /* 2016 * /proc 2017 */ 2018 2019 #ifdef CONFIG_PROC_FS 2020 2021 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2022 2023 struct rt6_proc_arg 2024 { 2025 char *buffer; 2026 int offset; 2027 int length; 2028 int skip; 2029 int len; 2030 }; 2031 2032 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2033 { 2034 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; 2035 int i; 2036 2037 if (arg->skip < arg->offset / RT6_INFO_LEN) { 2038 arg->skip++; 2039 return 0; 2040 } 2041 2042 if (arg->len >= arg->length) 2043 return 0; 2044 2045 for (i=0; i<16; i++) { 2046 sprintf(arg->buffer + arg->len, "%02x", 2047 rt->rt6i_dst.addr.s6_addr[i]); 2048 arg->len += 2; 2049 } 2050 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 2051 rt->rt6i_dst.plen); 2052 2053 #ifdef CONFIG_IPV6_SUBTREES 2054 for (i=0; i<16; i++) { 2055 sprintf(arg->buffer + arg->len, "%02x", 2056 rt->rt6i_src.addr.s6_addr[i]); 2057 arg->len += 2; 2058 } 2059 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 2060 rt->rt6i_src.plen); 2061 #else 2062 sprintf(arg->buffer + arg->len, 2063 "00000000000000000000000000000000 00 "); 2064 arg->len += 36; 2065 #endif 2066 2067 if (rt->rt6i_nexthop) { 2068 for (i=0; i<16; i++) { 2069 sprintf(arg->buffer + arg->len, "%02x", 2070 rt->rt6i_nexthop->primary_key[i]); 2071 arg->len += 2; 2072 } 2073 } else { 2074 sprintf(arg->buffer + arg->len, 2075 "00000000000000000000000000000000"); 2076 arg->len += 32; 2077 } 2078 arg->len += sprintf(arg->buffer + arg->len, 2079 " %08x %08x %08x %08x %8s\n", 2080 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2081 rt->u.dst.__use, rt->rt6i_flags, 2082 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2083 return 0; 2084 } 2085 2086 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 2087 { 2088 struct rt6_proc_arg arg; 2089 arg.buffer = buffer; 2090 arg.offset = offset; 2091 arg.length = length; 2092 arg.skip = 0; 2093 arg.len = 0; 2094 2095 read_lock_bh(&rt6_lock); 2096 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); 2097 read_unlock_bh(&rt6_lock); 2098 2099 *start = buffer; 2100 if (offset) 2101 *start += offset % RT6_INFO_LEN; 2102 2103 arg.len -= offset % RT6_INFO_LEN; 2104 2105 if (arg.len > length) 2106 arg.len = length; 2107 if (arg.len < 0) 2108 arg.len = 0; 2109 2110 return arg.len; 2111 } 2112 2113 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2114 { 2115 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2116 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, 2117 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, 2118 rt6_stats.fib_rt_cache, 2119 atomic_read(&ip6_dst_ops.entries), 2120 rt6_stats.fib_discarded_routes); 2121 2122 return 0; 2123 } 2124 2125 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2126 { 2127 return single_open(file, rt6_stats_seq_show, NULL); 2128 } 2129 2130 static struct file_operations rt6_stats_seq_fops = { 2131 .owner = THIS_MODULE, 2132 .open = rt6_stats_seq_open, 2133 .read = seq_read, 2134 .llseek = seq_lseek, 2135 .release = single_release, 2136 }; 2137 #endif /* CONFIG_PROC_FS */ 2138 2139 #ifdef CONFIG_SYSCTL 2140 2141 static int flush_delay; 2142 2143 static 2144 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2145 void __user *buffer, size_t *lenp, loff_t *ppos) 2146 { 2147 if (write) { 2148 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2149 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2150 return 0; 2151 } else 2152 return -EINVAL; 2153 } 2154 2155 ctl_table ipv6_route_table[] = { 2156 { 2157 .ctl_name = NET_IPV6_ROUTE_FLUSH, 2158 .procname = "flush", 2159 .data = &flush_delay, 2160 .maxlen = sizeof(int), 2161 .mode = 0200, 2162 .proc_handler = &ipv6_sysctl_rtcache_flush 2163 }, 2164 { 2165 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2166 .procname = "gc_thresh", 2167 .data = &ip6_dst_ops.gc_thresh, 2168 .maxlen = sizeof(int), 2169 .mode = 0644, 2170 .proc_handler = &proc_dointvec, 2171 }, 2172 { 2173 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2174 .procname = "max_size", 2175 .data = &ip6_rt_max_size, 2176 .maxlen = sizeof(int), 2177 .mode = 0644, 2178 .proc_handler = &proc_dointvec, 2179 }, 2180 { 2181 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2182 .procname = "gc_min_interval", 2183 .data = &ip6_rt_gc_min_interval, 2184 .maxlen = sizeof(int), 2185 .mode = 0644, 2186 .proc_handler = &proc_dointvec_jiffies, 2187 .strategy = &sysctl_jiffies, 2188 }, 2189 { 2190 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2191 .procname = "gc_timeout", 2192 .data = &ip6_rt_gc_timeout, 2193 .maxlen = sizeof(int), 2194 .mode = 0644, 2195 .proc_handler = &proc_dointvec_jiffies, 2196 .strategy = &sysctl_jiffies, 2197 }, 2198 { 2199 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2200 .procname = "gc_interval", 2201 .data = &ip6_rt_gc_interval, 2202 .maxlen = sizeof(int), 2203 .mode = 0644, 2204 .proc_handler = &proc_dointvec_jiffies, 2205 .strategy = &sysctl_jiffies, 2206 }, 2207 { 2208 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2209 .procname = "gc_elasticity", 2210 .data = &ip6_rt_gc_elasticity, 2211 .maxlen = sizeof(int), 2212 .mode = 0644, 2213 .proc_handler = &proc_dointvec_jiffies, 2214 .strategy = &sysctl_jiffies, 2215 }, 2216 { 2217 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2218 .procname = "mtu_expires", 2219 .data = &ip6_rt_mtu_expires, 2220 .maxlen = sizeof(int), 2221 .mode = 0644, 2222 .proc_handler = &proc_dointvec_jiffies, 2223 .strategy = &sysctl_jiffies, 2224 }, 2225 { 2226 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2227 .procname = "min_adv_mss", 2228 .data = &ip6_rt_min_advmss, 2229 .maxlen = sizeof(int), 2230 .mode = 0644, 2231 .proc_handler = &proc_dointvec_jiffies, 2232 .strategy = &sysctl_jiffies, 2233 }, 2234 { 2235 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2236 .procname = "gc_min_interval_ms", 2237 .data = &ip6_rt_gc_min_interval, 2238 .maxlen = sizeof(int), 2239 .mode = 0644, 2240 .proc_handler = &proc_dointvec_ms_jiffies, 2241 .strategy = &sysctl_ms_jiffies, 2242 }, 2243 { .ctl_name = 0 } 2244 }; 2245 2246 #endif 2247 2248 void __init ip6_route_init(void) 2249 { 2250 struct proc_dir_entry *p; 2251 2252 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", 2253 sizeof(struct rt6_info), 2254 0, SLAB_HWCACHE_ALIGN, 2255 NULL, NULL); 2256 if (!ip6_dst_ops.kmem_cachep) 2257 panic("cannot create ip6_dst_cache"); 2258 2259 fib6_init(); 2260 #ifdef CONFIG_PROC_FS 2261 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2262 if (p) 2263 p->owner = THIS_MODULE; 2264 2265 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2266 #endif 2267 #ifdef CONFIG_XFRM 2268 xfrm6_init(); 2269 #endif 2270 } 2271 2272 void ip6_route_cleanup(void) 2273 { 2274 #ifdef CONFIG_PROC_FS 2275 proc_net_remove("ipv6_route"); 2276 proc_net_remove("rt6_stats"); 2277 #endif 2278 #ifdef CONFIG_XFRM 2279 xfrm6_fini(); 2280 #endif 2281 rt6_ifdown(NULL); 2282 fib6_gc_cleanup(); 2283 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2284 } 2285