1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 /* Changes: 17 * 18 * YOSHIFUJI Hideaki @USAGI 19 * reworked default router selection. 20 * - respect outgoing interface 21 * - select from (probably) reachable routers (i.e. 22 * routers in REACHABLE, STALE, DELAY or PROBE states). 23 * - always select the same router if it is (probably) 24 * reachable. otherwise, round-robin the list. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/config.h> 29 #include <linux/errno.h> 30 #include <linux/types.h> 31 #include <linux/times.h> 32 #include <linux/socket.h> 33 #include <linux/sockios.h> 34 #include <linux/net.h> 35 #include <linux/route.h> 36 #include <linux/netdevice.h> 37 #include <linux/in6.h> 38 #include <linux/init.h> 39 #include <linux/netlink.h> 40 #include <linux/if_arp.h> 41 42 #ifdef CONFIG_PROC_FS 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #endif 46 47 #include <net/snmp.h> 48 #include <net/ipv6.h> 49 #include <net/ip6_fib.h> 50 #include <net/ip6_route.h> 51 #include <net/ndisc.h> 52 #include <net/addrconf.h> 53 #include <net/tcp.h> 54 #include <linux/rtnetlink.h> 55 #include <net/dst.h> 56 #include <net/xfrm.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 #define CLONE_OFFLINK_ROUTE 0 76 77 #define RT6_SELECT_F_IFACE 0x1 78 #define RT6_SELECT_F_REACHABLE 0x2 79 80 static int ip6_rt_max_size = 4096; 81 static int ip6_rt_gc_min_interval = HZ / 2; 82 static int ip6_rt_gc_timeout = 60*HZ; 83 int ip6_rt_gc_interval = 30*HZ; 84 static int ip6_rt_gc_elasticity = 9; 85 static int ip6_rt_mtu_expires = 10*60*HZ; 86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 87 88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 90 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 91 static void ip6_dst_destroy(struct dst_entry *); 92 static void ip6_dst_ifdown(struct dst_entry *, 93 struct net_device *dev, int how); 94 static int ip6_dst_gc(void); 95 96 static int ip6_pkt_discard(struct sk_buff *skb); 97 static int ip6_pkt_discard_out(struct sk_buff *skb); 98 static void ip6_link_failure(struct sk_buff *skb); 99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 100 101 #ifdef CONFIG_IPV6_ROUTE_INFO 102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 103 struct in6_addr *gwaddr, int ifindex, 104 unsigned pref); 105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 106 struct in6_addr *gwaddr, int ifindex); 107 #endif 108 109 static struct dst_ops ip6_dst_ops = { 110 .family = AF_INET6, 111 .protocol = __constant_htons(ETH_P_IPV6), 112 .gc = ip6_dst_gc, 113 .gc_thresh = 1024, 114 .check = ip6_dst_check, 115 .destroy = ip6_dst_destroy, 116 .ifdown = ip6_dst_ifdown, 117 .negative_advice = ip6_negative_advice, 118 .link_failure = ip6_link_failure, 119 .update_pmtu = ip6_rt_update_pmtu, 120 .entry_size = sizeof(struct rt6_info), 121 }; 122 123 struct rt6_info ip6_null_entry = { 124 .u = { 125 .dst = { 126 .__refcnt = ATOMIC_INIT(1), 127 .__use = 1, 128 .dev = &loopback_dev, 129 .obsolete = -1, 130 .error = -ENETUNREACH, 131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 132 .input = ip6_pkt_discard, 133 .output = ip6_pkt_discard_out, 134 .ops = &ip6_dst_ops, 135 .path = (struct dst_entry*)&ip6_null_entry, 136 } 137 }, 138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 139 .rt6i_metric = ~(u32) 0, 140 .rt6i_ref = ATOMIC_INIT(1), 141 }; 142 143 struct fib6_node ip6_routing_table = { 144 .leaf = &ip6_null_entry, 145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, 146 }; 147 148 /* Protects all the ip6 fib */ 149 150 DEFINE_RWLOCK(rt6_lock); 151 152 153 /* allocate dst with ip6_dst_ops */ 154 static __inline__ struct rt6_info *ip6_dst_alloc(void) 155 { 156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops); 157 } 158 159 static void ip6_dst_destroy(struct dst_entry *dst) 160 { 161 struct rt6_info *rt = (struct rt6_info *)dst; 162 struct inet6_dev *idev = rt->rt6i_idev; 163 164 if (idev != NULL) { 165 rt->rt6i_idev = NULL; 166 in6_dev_put(idev); 167 } 168 } 169 170 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 171 int how) 172 { 173 struct rt6_info *rt = (struct rt6_info *)dst; 174 struct inet6_dev *idev = rt->rt6i_idev; 175 176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { 177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); 178 if (loopback_idev != NULL) { 179 rt->rt6i_idev = loopback_idev; 180 in6_dev_put(idev); 181 } 182 } 183 } 184 185 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 186 { 187 return (rt->rt6i_flags & RTF_EXPIRES && 188 time_after(jiffies, rt->rt6i_expires)); 189 } 190 191 /* 192 * Route lookup. Any rt6_lock is implied. 193 */ 194 195 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, 196 int oif, 197 int strict) 198 { 199 struct rt6_info *local = NULL; 200 struct rt6_info *sprt; 201 202 if (oif) { 203 for (sprt = rt; sprt; sprt = sprt->u.next) { 204 struct net_device *dev = sprt->rt6i_dev; 205 if (dev->ifindex == oif) 206 return sprt; 207 if (dev->flags & IFF_LOOPBACK) { 208 if (sprt->rt6i_idev == NULL || 209 sprt->rt6i_idev->dev->ifindex != oif) { 210 if (strict && oif) 211 continue; 212 if (local && (!oif || 213 local->rt6i_idev->dev->ifindex == oif)) 214 continue; 215 } 216 local = sprt; 217 } 218 } 219 220 if (local) 221 return local; 222 223 if (strict) 224 return &ip6_null_entry; 225 } 226 return rt; 227 } 228 229 #ifdef CONFIG_IPV6_ROUTER_PREF 230 static void rt6_probe(struct rt6_info *rt) 231 { 232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 233 /* 234 * Okay, this does not seem to be appropriate 235 * for now, however, we need to check if it 236 * is really so; aka Router Reachability Probing. 237 * 238 * Router Reachability Probe MUST be rate-limited 239 * to no more than one per minute. 240 */ 241 if (!neigh || (neigh->nud_state & NUD_VALID)) 242 return; 243 read_lock_bh(&neigh->lock); 244 if (!(neigh->nud_state & NUD_VALID) && 245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 246 struct in6_addr mcaddr; 247 struct in6_addr *target; 248 249 neigh->updated = jiffies; 250 read_unlock_bh(&neigh->lock); 251 252 target = (struct in6_addr *)&neigh->primary_key; 253 addrconf_addr_solict_mult(target, &mcaddr); 254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 255 } else 256 read_unlock_bh(&neigh->lock); 257 } 258 #else 259 static inline void rt6_probe(struct rt6_info *rt) 260 { 261 return; 262 } 263 #endif 264 265 /* 266 * Default Router Selection (RFC 2461 6.3.6) 267 */ 268 static int inline rt6_check_dev(struct rt6_info *rt, int oif) 269 { 270 struct net_device *dev = rt->rt6i_dev; 271 if (!oif || dev->ifindex == oif) 272 return 2; 273 if ((dev->flags & IFF_LOOPBACK) && 274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 275 return 1; 276 return 0; 277 } 278 279 static int inline rt6_check_neigh(struct rt6_info *rt) 280 { 281 struct neighbour *neigh = rt->rt6i_nexthop; 282 int m = 0; 283 if (neigh) { 284 read_lock_bh(&neigh->lock); 285 if (neigh->nud_state & NUD_VALID) 286 m = 1; 287 read_unlock_bh(&neigh->lock); 288 } 289 return m; 290 } 291 292 static int rt6_score_route(struct rt6_info *rt, int oif, 293 int strict) 294 { 295 int m = rt6_check_dev(rt, oif); 296 if (!m && (strict & RT6_SELECT_F_IFACE)) 297 return -1; 298 #ifdef CONFIG_IPV6_ROUTER_PREF 299 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 300 #endif 301 if (rt6_check_neigh(rt)) 302 m |= 16; 303 else if (strict & RT6_SELECT_F_REACHABLE) 304 return -1; 305 return m; 306 } 307 308 static struct rt6_info *rt6_select(struct rt6_info **head, int oif, 309 int strict) 310 { 311 struct rt6_info *match = NULL, *last = NULL; 312 struct rt6_info *rt, *rt0 = *head; 313 u32 metric; 314 int mpri = -1; 315 316 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n", 317 __FUNCTION__, head, head ? *head : NULL, oif); 318 319 for (rt = rt0, metric = rt0->rt6i_metric; 320 rt && rt->rt6i_metric == metric && (!last || rt != rt0); 321 rt = rt->u.next) { 322 int m; 323 324 if (rt6_check_expired(rt)) 325 continue; 326 327 last = rt; 328 329 m = rt6_score_route(rt, oif, strict); 330 if (m < 0) 331 continue; 332 333 if (m > mpri) { 334 rt6_probe(match); 335 match = rt; 336 mpri = m; 337 } else { 338 rt6_probe(rt); 339 } 340 } 341 342 if (!match && 343 (strict & RT6_SELECT_F_REACHABLE) && 344 last && last != rt0) { 345 /* no entries matched; do round-robin */ 346 static spinlock_t lock = SPIN_LOCK_UNLOCKED; 347 spin_lock(&lock); 348 *head = rt0->u.next; 349 rt0->u.next = last->u.next; 350 last->u.next = rt0; 351 spin_unlock(&lock); 352 } 353 354 RT6_TRACE("%s() => %p, score=%d\n", 355 __FUNCTION__, match, mpri); 356 357 return (match ? match : &ip6_null_entry); 358 } 359 360 #ifdef CONFIG_IPV6_ROUTE_INFO 361 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 362 struct in6_addr *gwaddr) 363 { 364 struct route_info *rinfo = (struct route_info *) opt; 365 struct in6_addr prefix_buf, *prefix; 366 unsigned int pref; 367 u32 lifetime; 368 struct rt6_info *rt; 369 370 if (len < sizeof(struct route_info)) { 371 return -EINVAL; 372 } 373 374 /* Sanity check for prefix_len and length */ 375 if (rinfo->length > 3) { 376 return -EINVAL; 377 } else if (rinfo->prefix_len > 128) { 378 return -EINVAL; 379 } else if (rinfo->prefix_len > 64) { 380 if (rinfo->length < 2) { 381 return -EINVAL; 382 } 383 } else if (rinfo->prefix_len > 0) { 384 if (rinfo->length < 1) { 385 return -EINVAL; 386 } 387 } 388 389 pref = rinfo->route_pref; 390 if (pref == ICMPV6_ROUTER_PREF_INVALID) 391 pref = ICMPV6_ROUTER_PREF_MEDIUM; 392 393 lifetime = htonl(rinfo->lifetime); 394 if (lifetime == 0xffffffff) { 395 /* infinity */ 396 } else if (lifetime > 0x7fffffff/HZ) { 397 /* Avoid arithmetic overflow */ 398 lifetime = 0x7fffffff/HZ - 1; 399 } 400 401 if (rinfo->length == 3) 402 prefix = (struct in6_addr *)rinfo->prefix; 403 else { 404 /* this function is safe */ 405 ipv6_addr_prefix(&prefix_buf, 406 (struct in6_addr *)rinfo->prefix, 407 rinfo->prefix_len); 408 prefix = &prefix_buf; 409 } 410 411 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); 412 413 if (rt && !lifetime) { 414 ip6_del_rt(rt, NULL, NULL, NULL); 415 rt = NULL; 416 } 417 418 if (!rt && lifetime) 419 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 420 pref); 421 else if (rt) 422 rt->rt6i_flags = RTF_ROUTEINFO | 423 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 424 425 if (rt) { 426 if (lifetime == 0xffffffff) { 427 rt->rt6i_flags &= ~RTF_EXPIRES; 428 } else { 429 rt->rt6i_expires = jiffies + HZ * lifetime; 430 rt->rt6i_flags |= RTF_EXPIRES; 431 } 432 dst_release(&rt->u.dst); 433 } 434 return 0; 435 } 436 #endif 437 438 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 439 int oif, int strict) 440 { 441 struct fib6_node *fn; 442 struct rt6_info *rt; 443 444 read_lock_bh(&rt6_lock); 445 fn = fib6_lookup(&ip6_routing_table, daddr, saddr); 446 rt = rt6_device_match(fn->leaf, oif, strict); 447 dst_hold(&rt->u.dst); 448 rt->u.dst.__use++; 449 read_unlock_bh(&rt6_lock); 450 451 rt->u.dst.lastuse = jiffies; 452 if (rt->u.dst.error == 0) 453 return rt; 454 dst_release(&rt->u.dst); 455 return NULL; 456 } 457 458 /* ip6_ins_rt is called with FREE rt6_lock. 459 It takes new route entry, the addition fails by any reason the 460 route is freed. In any case, if caller does not hold it, it may 461 be destroyed. 462 */ 463 464 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, 465 void *_rtattr, struct netlink_skb_parms *req) 466 { 467 int err; 468 469 write_lock_bh(&rt6_lock); 470 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); 471 write_unlock_bh(&rt6_lock); 472 473 return err; 474 } 475 476 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 477 struct in6_addr *saddr) 478 { 479 struct rt6_info *rt; 480 481 /* 482 * Clone the route. 483 */ 484 485 rt = ip6_rt_copy(ort); 486 487 if (rt) { 488 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 489 if (rt->rt6i_dst.plen != 128 && 490 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 491 rt->rt6i_flags |= RTF_ANYCAST; 492 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 493 } 494 495 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 496 rt->rt6i_dst.plen = 128; 497 rt->rt6i_flags |= RTF_CACHE; 498 rt->u.dst.flags |= DST_HOST; 499 500 #ifdef CONFIG_IPV6_SUBTREES 501 if (rt->rt6i_src.plen && saddr) { 502 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 503 rt->rt6i_src.plen = 128; 504 } 505 #endif 506 507 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 508 509 } 510 511 return rt; 512 } 513 514 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 515 { 516 struct rt6_info *rt = ip6_rt_copy(ort); 517 if (rt) { 518 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 519 rt->rt6i_dst.plen = 128; 520 rt->rt6i_flags |= RTF_CACHE; 521 if (rt->rt6i_flags & RTF_REJECT) 522 rt->u.dst.error = ort->u.dst.error; 523 rt->u.dst.flags |= DST_HOST; 524 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 525 } 526 return rt; 527 } 528 529 #define BACKTRACK() \ 530 if (rt == &ip6_null_entry) { \ 531 while ((fn = fn->parent) != NULL) { \ 532 if (fn->fn_flags & RTN_ROOT) { \ 533 goto out; \ 534 } \ 535 if (fn->fn_flags & RTN_RTINFO) \ 536 goto restart; \ 537 } \ 538 } 539 540 541 void ip6_route_input(struct sk_buff *skb) 542 { 543 struct fib6_node *fn; 544 struct rt6_info *rt, *nrt; 545 int strict; 546 int attempts = 3; 547 int err; 548 int reachable = RT6_SELECT_F_REACHABLE; 549 550 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; 551 552 relookup: 553 read_lock_bh(&rt6_lock); 554 555 restart_2: 556 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, 557 &skb->nh.ipv6h->saddr); 558 559 restart: 560 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); 561 BACKTRACK(); 562 if (rt == &ip6_null_entry || 563 rt->rt6i_flags & RTF_CACHE) 564 goto out; 565 566 dst_hold(&rt->u.dst); 567 read_unlock_bh(&rt6_lock); 568 569 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 570 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); 571 else { 572 #if CLONE_OFFLINK_ROUTE 573 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); 574 #else 575 goto out2; 576 #endif 577 } 578 579 dst_release(&rt->u.dst); 580 rt = nrt ? : &ip6_null_entry; 581 582 dst_hold(&rt->u.dst); 583 if (nrt) { 584 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); 585 if (!err) 586 goto out2; 587 } 588 589 if (--attempts <= 0) 590 goto out2; 591 592 /* 593 * Race condition! In the gap, when rt6_lock was 594 * released someone could insert this route. Relookup. 595 */ 596 dst_release(&rt->u.dst); 597 goto relookup; 598 599 out: 600 if (reachable) { 601 reachable = 0; 602 goto restart_2; 603 } 604 dst_hold(&rt->u.dst); 605 read_unlock_bh(&rt6_lock); 606 out2: 607 rt->u.dst.lastuse = jiffies; 608 rt->u.dst.__use++; 609 skb->dst = (struct dst_entry *) rt; 610 return; 611 } 612 613 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 614 { 615 struct fib6_node *fn; 616 struct rt6_info *rt, *nrt; 617 int strict; 618 int attempts = 3; 619 int err; 620 int reachable = RT6_SELECT_F_REACHABLE; 621 622 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; 623 624 relookup: 625 read_lock_bh(&rt6_lock); 626 627 restart_2: 628 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); 629 630 restart: 631 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); 632 BACKTRACK(); 633 if (rt == &ip6_null_entry || 634 rt->rt6i_flags & RTF_CACHE) 635 goto out; 636 637 dst_hold(&rt->u.dst); 638 read_unlock_bh(&rt6_lock); 639 640 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 641 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 642 else { 643 #if CLONE_OFFLINK_ROUTE 644 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 645 #else 646 goto out2; 647 #endif 648 } 649 650 dst_release(&rt->u.dst); 651 rt = nrt ? : &ip6_null_entry; 652 653 dst_hold(&rt->u.dst); 654 if (nrt) { 655 err = ip6_ins_rt(nrt, NULL, NULL, NULL); 656 if (!err) 657 goto out2; 658 } 659 660 if (--attempts <= 0) 661 goto out2; 662 663 /* 664 * Race condition! In the gap, when rt6_lock was 665 * released someone could insert this route. Relookup. 666 */ 667 dst_release(&rt->u.dst); 668 goto relookup; 669 670 out: 671 if (reachable) { 672 reachable = 0; 673 goto restart_2; 674 } 675 dst_hold(&rt->u.dst); 676 read_unlock_bh(&rt6_lock); 677 out2: 678 rt->u.dst.lastuse = jiffies; 679 rt->u.dst.__use++; 680 return &rt->u.dst; 681 } 682 683 684 /* 685 * Destination cache support functions 686 */ 687 688 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 689 { 690 struct rt6_info *rt; 691 692 rt = (struct rt6_info *) dst; 693 694 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 695 return dst; 696 697 return NULL; 698 } 699 700 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 701 { 702 struct rt6_info *rt = (struct rt6_info *) dst; 703 704 if (rt) { 705 if (rt->rt6i_flags & RTF_CACHE) 706 ip6_del_rt(rt, NULL, NULL, NULL); 707 else 708 dst_release(dst); 709 } 710 return NULL; 711 } 712 713 static void ip6_link_failure(struct sk_buff *skb) 714 { 715 struct rt6_info *rt; 716 717 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 718 719 rt = (struct rt6_info *) skb->dst; 720 if (rt) { 721 if (rt->rt6i_flags&RTF_CACHE) { 722 dst_set_expires(&rt->u.dst, 0); 723 rt->rt6i_flags |= RTF_EXPIRES; 724 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 725 rt->rt6i_node->fn_sernum = -1; 726 } 727 } 728 729 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 730 { 731 struct rt6_info *rt6 = (struct rt6_info*)dst; 732 733 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 734 rt6->rt6i_flags |= RTF_MODIFIED; 735 if (mtu < IPV6_MIN_MTU) { 736 mtu = IPV6_MIN_MTU; 737 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 738 } 739 dst->metrics[RTAX_MTU-1] = mtu; 740 } 741 } 742 743 /* Protected by rt6_lock. */ 744 static struct dst_entry *ndisc_dst_gc_list; 745 static int ipv6_get_mtu(struct net_device *dev); 746 747 static inline unsigned int ipv6_advmss(unsigned int mtu) 748 { 749 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 750 751 if (mtu < ip6_rt_min_advmss) 752 mtu = ip6_rt_min_advmss; 753 754 /* 755 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 756 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 757 * IPV6_MAXPLEN is also valid and means: "any MSS, 758 * rely only on pmtu discovery" 759 */ 760 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 761 mtu = IPV6_MAXPLEN; 762 return mtu; 763 } 764 765 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 766 struct neighbour *neigh, 767 struct in6_addr *addr, 768 int (*output)(struct sk_buff *)) 769 { 770 struct rt6_info *rt; 771 struct inet6_dev *idev = in6_dev_get(dev); 772 773 if (unlikely(idev == NULL)) 774 return NULL; 775 776 rt = ip6_dst_alloc(); 777 if (unlikely(rt == NULL)) { 778 in6_dev_put(idev); 779 goto out; 780 } 781 782 dev_hold(dev); 783 if (neigh) 784 neigh_hold(neigh); 785 else 786 neigh = ndisc_get_neigh(dev, addr); 787 788 rt->rt6i_dev = dev; 789 rt->rt6i_idev = idev; 790 rt->rt6i_nexthop = neigh; 791 atomic_set(&rt->u.dst.__refcnt, 1); 792 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 793 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 794 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 795 rt->u.dst.output = output; 796 797 #if 0 /* there's no chance to use these for ndisc */ 798 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 799 ? DST_HOST 800 : 0; 801 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 802 rt->rt6i_dst.plen = 128; 803 #endif 804 805 write_lock_bh(&rt6_lock); 806 rt->u.dst.next = ndisc_dst_gc_list; 807 ndisc_dst_gc_list = &rt->u.dst; 808 write_unlock_bh(&rt6_lock); 809 810 fib6_force_start_gc(); 811 812 out: 813 return (struct dst_entry *)rt; 814 } 815 816 int ndisc_dst_gc(int *more) 817 { 818 struct dst_entry *dst, *next, **pprev; 819 int freed; 820 821 next = NULL; 822 pprev = &ndisc_dst_gc_list; 823 freed = 0; 824 while ((dst = *pprev) != NULL) { 825 if (!atomic_read(&dst->__refcnt)) { 826 *pprev = dst->next; 827 dst_free(dst); 828 freed++; 829 } else { 830 pprev = &dst->next; 831 (*more)++; 832 } 833 } 834 835 return freed; 836 } 837 838 static int ip6_dst_gc(void) 839 { 840 static unsigned expire = 30*HZ; 841 static unsigned long last_gc; 842 unsigned long now = jiffies; 843 844 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 845 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 846 goto out; 847 848 expire++; 849 fib6_run_gc(expire); 850 last_gc = now; 851 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 852 expire = ip6_rt_gc_timeout>>1; 853 854 out: 855 expire -= expire>>ip6_rt_gc_elasticity; 856 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 857 } 858 859 /* Clean host part of a prefix. Not necessary in radix tree, 860 but results in cleaner routing tables. 861 862 Remove it only when all the things will work! 863 */ 864 865 static int ipv6_get_mtu(struct net_device *dev) 866 { 867 int mtu = IPV6_MIN_MTU; 868 struct inet6_dev *idev; 869 870 idev = in6_dev_get(dev); 871 if (idev) { 872 mtu = idev->cnf.mtu6; 873 in6_dev_put(idev); 874 } 875 return mtu; 876 } 877 878 int ipv6_get_hoplimit(struct net_device *dev) 879 { 880 int hoplimit = ipv6_devconf.hop_limit; 881 struct inet6_dev *idev; 882 883 idev = in6_dev_get(dev); 884 if (idev) { 885 hoplimit = idev->cnf.hop_limit; 886 in6_dev_put(idev); 887 } 888 return hoplimit; 889 } 890 891 /* 892 * 893 */ 894 895 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 896 void *_rtattr, struct netlink_skb_parms *req) 897 { 898 int err; 899 struct rtmsg *r; 900 struct rtattr **rta; 901 struct rt6_info *rt = NULL; 902 struct net_device *dev = NULL; 903 struct inet6_dev *idev = NULL; 904 int addr_type; 905 906 rta = (struct rtattr **) _rtattr; 907 908 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) 909 return -EINVAL; 910 #ifndef CONFIG_IPV6_SUBTREES 911 if (rtmsg->rtmsg_src_len) 912 return -EINVAL; 913 #endif 914 if (rtmsg->rtmsg_ifindex) { 915 err = -ENODEV; 916 dev = dev_get_by_index(rtmsg->rtmsg_ifindex); 917 if (!dev) 918 goto out; 919 idev = in6_dev_get(dev); 920 if (!idev) 921 goto out; 922 } 923 924 if (rtmsg->rtmsg_metric == 0) 925 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; 926 927 rt = ip6_dst_alloc(); 928 929 if (rt == NULL) { 930 err = -ENOMEM; 931 goto out; 932 } 933 934 rt->u.dst.obsolete = -1; 935 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); 936 if (nlh && (r = NLMSG_DATA(nlh))) { 937 rt->rt6i_protocol = r->rtm_protocol; 938 } else { 939 rt->rt6i_protocol = RTPROT_BOOT; 940 } 941 942 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); 943 944 if (addr_type & IPV6_ADDR_MULTICAST) 945 rt->u.dst.input = ip6_mc_input; 946 else 947 rt->u.dst.input = ip6_forward; 948 949 rt->u.dst.output = ip6_output; 950 951 ipv6_addr_prefix(&rt->rt6i_dst.addr, 952 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); 953 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; 954 if (rt->rt6i_dst.plen == 128) 955 rt->u.dst.flags = DST_HOST; 956 957 #ifdef CONFIG_IPV6_SUBTREES 958 ipv6_addr_prefix(&rt->rt6i_src.addr, 959 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 960 rt->rt6i_src.plen = rtmsg->rtmsg_src_len; 961 #endif 962 963 rt->rt6i_metric = rtmsg->rtmsg_metric; 964 965 /* We cannot add true routes via loopback here, 966 they would result in kernel looping; promote them to reject routes 967 */ 968 if ((rtmsg->rtmsg_flags&RTF_REJECT) || 969 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 970 /* hold loopback dev/idev if we haven't done so. */ 971 if (dev != &loopback_dev) { 972 if (dev) { 973 dev_put(dev); 974 in6_dev_put(idev); 975 } 976 dev = &loopback_dev; 977 dev_hold(dev); 978 idev = in6_dev_get(dev); 979 if (!idev) { 980 err = -ENODEV; 981 goto out; 982 } 983 } 984 rt->u.dst.output = ip6_pkt_discard_out; 985 rt->u.dst.input = ip6_pkt_discard; 986 rt->u.dst.error = -ENETUNREACH; 987 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 988 goto install_route; 989 } 990 991 if (rtmsg->rtmsg_flags & RTF_GATEWAY) { 992 struct in6_addr *gw_addr; 993 int gwa_type; 994 995 gw_addr = &rtmsg->rtmsg_gateway; 996 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); 997 gwa_type = ipv6_addr_type(gw_addr); 998 999 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1000 struct rt6_info *grt; 1001 1002 /* IPv6 strictly inhibits using not link-local 1003 addresses as nexthop address. 1004 Otherwise, router will not able to send redirects. 1005 It is very good, but in some (rare!) circumstances 1006 (SIT, PtP, NBMA NOARP links) it is handy to allow 1007 some exceptions. --ANK 1008 */ 1009 err = -EINVAL; 1010 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1011 goto out; 1012 1013 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); 1014 1015 err = -EHOSTUNREACH; 1016 if (grt == NULL) 1017 goto out; 1018 if (dev) { 1019 if (dev != grt->rt6i_dev) { 1020 dst_release(&grt->u.dst); 1021 goto out; 1022 } 1023 } else { 1024 dev = grt->rt6i_dev; 1025 idev = grt->rt6i_idev; 1026 dev_hold(dev); 1027 in6_dev_hold(grt->rt6i_idev); 1028 } 1029 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1030 err = 0; 1031 dst_release(&grt->u.dst); 1032 1033 if (err) 1034 goto out; 1035 } 1036 err = -EINVAL; 1037 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1038 goto out; 1039 } 1040 1041 err = -ENODEV; 1042 if (dev == NULL) 1043 goto out; 1044 1045 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { 1046 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1047 if (IS_ERR(rt->rt6i_nexthop)) { 1048 err = PTR_ERR(rt->rt6i_nexthop); 1049 rt->rt6i_nexthop = NULL; 1050 goto out; 1051 } 1052 } 1053 1054 rt->rt6i_flags = rtmsg->rtmsg_flags; 1055 1056 install_route: 1057 if (rta && rta[RTA_METRICS-1]) { 1058 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); 1059 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); 1060 1061 while (RTA_OK(attr, attrlen)) { 1062 unsigned flavor = attr->rta_type; 1063 if (flavor) { 1064 if (flavor > RTAX_MAX) { 1065 err = -EINVAL; 1066 goto out; 1067 } 1068 rt->u.dst.metrics[flavor-1] = 1069 *(u32 *)RTA_DATA(attr); 1070 } 1071 attr = RTA_NEXT(attr, attrlen); 1072 } 1073 } 1074 1075 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1076 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1077 if (!rt->u.dst.metrics[RTAX_MTU-1]) 1078 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1079 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 1080 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1081 rt->u.dst.dev = dev; 1082 rt->rt6i_idev = idev; 1083 return ip6_ins_rt(rt, nlh, _rtattr, req); 1084 1085 out: 1086 if (dev) 1087 dev_put(dev); 1088 if (idev) 1089 in6_dev_put(idev); 1090 if (rt) 1091 dst_free((struct dst_entry *) rt); 1092 return err; 1093 } 1094 1095 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1096 { 1097 int err; 1098 1099 write_lock_bh(&rt6_lock); 1100 1101 err = fib6_del(rt, nlh, _rtattr, req); 1102 dst_release(&rt->u.dst); 1103 1104 write_unlock_bh(&rt6_lock); 1105 1106 return err; 1107 } 1108 1109 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1110 { 1111 struct fib6_node *fn; 1112 struct rt6_info *rt; 1113 int err = -ESRCH; 1114 1115 read_lock_bh(&rt6_lock); 1116 1117 fn = fib6_locate(&ip6_routing_table, 1118 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, 1119 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 1120 1121 if (fn) { 1122 for (rt = fn->leaf; rt; rt = rt->u.next) { 1123 if (rtmsg->rtmsg_ifindex && 1124 (rt->rt6i_dev == NULL || 1125 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) 1126 continue; 1127 if (rtmsg->rtmsg_flags&RTF_GATEWAY && 1128 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) 1129 continue; 1130 if (rtmsg->rtmsg_metric && 1131 rtmsg->rtmsg_metric != rt->rt6i_metric) 1132 continue; 1133 dst_hold(&rt->u.dst); 1134 read_unlock_bh(&rt6_lock); 1135 1136 return ip6_del_rt(rt, nlh, _rtattr, req); 1137 } 1138 } 1139 read_unlock_bh(&rt6_lock); 1140 1141 return err; 1142 } 1143 1144 /* 1145 * Handle redirects 1146 */ 1147 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, 1148 struct neighbour *neigh, u8 *lladdr, int on_link) 1149 { 1150 struct rt6_info *rt, *nrt = NULL; 1151 int strict; 1152 struct fib6_node *fn; 1153 1154 /* 1155 * Get the "current" route for this destination and 1156 * check if the redirect has come from approriate router. 1157 * 1158 * RFC 2461 specifies that redirects should only be 1159 * accepted if they come from the nexthop to the target. 1160 * Due to the way the routes are chosen, this notion 1161 * is a bit fuzzy and one might need to check all possible 1162 * routes. 1163 */ 1164 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); 1165 1166 read_lock_bh(&rt6_lock); 1167 fn = fib6_lookup(&ip6_routing_table, dest, NULL); 1168 restart: 1169 for (rt = fn->leaf; rt; rt = rt->u.next) { 1170 /* 1171 * Current route is on-link; redirect is always invalid. 1172 * 1173 * Seems, previous statement is not true. It could 1174 * be node, which looks for us as on-link (f.e. proxy ndisc) 1175 * But then router serving it might decide, that we should 1176 * know truth 8)8) --ANK (980726). 1177 */ 1178 if (rt6_check_expired(rt)) 1179 continue; 1180 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1181 continue; 1182 if (neigh->dev != rt->rt6i_dev) 1183 continue; 1184 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) 1185 continue; 1186 break; 1187 } 1188 if (rt) 1189 dst_hold(&rt->u.dst); 1190 else if (strict) { 1191 while ((fn = fn->parent) != NULL) { 1192 if (fn->fn_flags & RTN_ROOT) 1193 break; 1194 if (fn->fn_flags & RTN_RTINFO) 1195 goto restart; 1196 } 1197 } 1198 read_unlock_bh(&rt6_lock); 1199 1200 if (!rt) { 1201 if (net_ratelimit()) 1202 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1203 "for redirect target\n"); 1204 return; 1205 } 1206 1207 /* 1208 * We have finally decided to accept it. 1209 */ 1210 1211 neigh_update(neigh, lladdr, NUD_STALE, 1212 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1213 NEIGH_UPDATE_F_OVERRIDE| 1214 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1215 NEIGH_UPDATE_F_ISROUTER)) 1216 ); 1217 1218 /* 1219 * Redirect received -> path was valid. 1220 * Look, redirects are sent only in response to data packets, 1221 * so that this nexthop apparently is reachable. --ANK 1222 */ 1223 dst_confirm(&rt->u.dst); 1224 1225 /* Duplicate redirect: silently ignore. */ 1226 if (neigh == rt->u.dst.neighbour) 1227 goto out; 1228 1229 nrt = ip6_rt_copy(rt); 1230 if (nrt == NULL) 1231 goto out; 1232 1233 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1234 if (on_link) 1235 nrt->rt6i_flags &= ~RTF_GATEWAY; 1236 1237 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1238 nrt->rt6i_dst.plen = 128; 1239 nrt->u.dst.flags |= DST_HOST; 1240 1241 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1242 nrt->rt6i_nexthop = neigh_clone(neigh); 1243 /* Reset pmtu, it may be better */ 1244 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1245 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1246 1247 if (ip6_ins_rt(nrt, NULL, NULL, NULL)) 1248 goto out; 1249 1250 if (rt->rt6i_flags&RTF_CACHE) { 1251 ip6_del_rt(rt, NULL, NULL, NULL); 1252 return; 1253 } 1254 1255 out: 1256 dst_release(&rt->u.dst); 1257 return; 1258 } 1259 1260 /* 1261 * Handle ICMP "packet too big" messages 1262 * i.e. Path MTU discovery 1263 */ 1264 1265 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1266 struct net_device *dev, u32 pmtu) 1267 { 1268 struct rt6_info *rt, *nrt; 1269 int allfrag = 0; 1270 1271 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); 1272 if (rt == NULL) 1273 return; 1274 1275 if (pmtu >= dst_mtu(&rt->u.dst)) 1276 goto out; 1277 1278 if (pmtu < IPV6_MIN_MTU) { 1279 /* 1280 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1281 * MTU (1280) and a fragment header should always be included 1282 * after a node receiving Too Big message reporting PMTU is 1283 * less than the IPv6 Minimum Link MTU. 1284 */ 1285 pmtu = IPV6_MIN_MTU; 1286 allfrag = 1; 1287 } 1288 1289 /* New mtu received -> path was valid. 1290 They are sent only in response to data packets, 1291 so that this nexthop apparently is reachable. --ANK 1292 */ 1293 dst_confirm(&rt->u.dst); 1294 1295 /* Host route. If it is static, it would be better 1296 not to override it, but add new one, so that 1297 when cache entry will expire old pmtu 1298 would return automatically. 1299 */ 1300 if (rt->rt6i_flags & RTF_CACHE) { 1301 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1302 if (allfrag) 1303 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1304 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1305 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1306 goto out; 1307 } 1308 1309 /* Network route. 1310 Two cases are possible: 1311 1. It is connected route. Action: COW 1312 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1313 */ 1314 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1315 nrt = rt6_alloc_cow(rt, daddr, saddr); 1316 else 1317 nrt = rt6_alloc_clone(rt, daddr); 1318 1319 if (nrt) { 1320 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1321 if (allfrag) 1322 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1323 1324 /* According to RFC 1981, detecting PMTU increase shouldn't be 1325 * happened within 5 mins, the recommended timer is 10 mins. 1326 * Here this route expiration time is set to ip6_rt_mtu_expires 1327 * which is 10 mins. After 10 mins the decreased pmtu is expired 1328 * and detecting PMTU increase will be automatically happened. 1329 */ 1330 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1331 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1332 1333 ip6_ins_rt(nrt, NULL, NULL, NULL); 1334 } 1335 out: 1336 dst_release(&rt->u.dst); 1337 } 1338 1339 /* 1340 * Misc support functions 1341 */ 1342 1343 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1344 { 1345 struct rt6_info *rt = ip6_dst_alloc(); 1346 1347 if (rt) { 1348 rt->u.dst.input = ort->u.dst.input; 1349 rt->u.dst.output = ort->u.dst.output; 1350 1351 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1352 rt->u.dst.dev = ort->u.dst.dev; 1353 if (rt->u.dst.dev) 1354 dev_hold(rt->u.dst.dev); 1355 rt->rt6i_idev = ort->rt6i_idev; 1356 if (rt->rt6i_idev) 1357 in6_dev_hold(rt->rt6i_idev); 1358 rt->u.dst.lastuse = jiffies; 1359 rt->rt6i_expires = 0; 1360 1361 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1362 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1363 rt->rt6i_metric = 0; 1364 1365 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1366 #ifdef CONFIG_IPV6_SUBTREES 1367 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1368 #endif 1369 } 1370 return rt; 1371 } 1372 1373 #ifdef CONFIG_IPV6_ROUTE_INFO 1374 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 1375 struct in6_addr *gwaddr, int ifindex) 1376 { 1377 struct fib6_node *fn; 1378 struct rt6_info *rt = NULL; 1379 1380 write_lock_bh(&rt6_lock); 1381 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); 1382 if (!fn) 1383 goto out; 1384 1385 for (rt = fn->leaf; rt; rt = rt->u.next) { 1386 if (rt->rt6i_dev->ifindex != ifindex) 1387 continue; 1388 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1389 continue; 1390 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1391 continue; 1392 dst_hold(&rt->u.dst); 1393 break; 1394 } 1395 out: 1396 write_unlock_bh(&rt6_lock); 1397 return rt; 1398 } 1399 1400 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 1401 struct in6_addr *gwaddr, int ifindex, 1402 unsigned pref) 1403 { 1404 struct in6_rtmsg rtmsg; 1405 1406 memset(&rtmsg, 0, sizeof(rtmsg)); 1407 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1408 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); 1409 rtmsg.rtmsg_dst_len = prefixlen; 1410 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); 1411 rtmsg.rtmsg_metric = 1024; 1412 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); 1413 /* We should treat it as a default route if prefix length is 0. */ 1414 if (!prefixlen) 1415 rtmsg.rtmsg_flags |= RTF_DEFAULT; 1416 rtmsg.rtmsg_ifindex = ifindex; 1417 1418 ip6_route_add(&rtmsg, NULL, NULL, NULL); 1419 1420 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); 1421 } 1422 #endif 1423 1424 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1425 { 1426 struct rt6_info *rt; 1427 struct fib6_node *fn; 1428 1429 fn = &ip6_routing_table; 1430 1431 write_lock_bh(&rt6_lock); 1432 for (rt = fn->leaf; rt; rt=rt->u.next) { 1433 if (dev == rt->rt6i_dev && 1434 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1435 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1436 break; 1437 } 1438 if (rt) 1439 dst_hold(&rt->u.dst); 1440 write_unlock_bh(&rt6_lock); 1441 return rt; 1442 } 1443 1444 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1445 struct net_device *dev, 1446 unsigned int pref) 1447 { 1448 struct in6_rtmsg rtmsg; 1449 1450 memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); 1451 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1452 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); 1453 rtmsg.rtmsg_metric = 1024; 1454 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | 1455 RTF_PREF(pref); 1456 1457 rtmsg.rtmsg_ifindex = dev->ifindex; 1458 1459 ip6_route_add(&rtmsg, NULL, NULL, NULL); 1460 return rt6_get_dflt_router(gwaddr, dev); 1461 } 1462 1463 void rt6_purge_dflt_routers(void) 1464 { 1465 struct rt6_info *rt; 1466 1467 restart: 1468 read_lock_bh(&rt6_lock); 1469 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { 1470 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1471 dst_hold(&rt->u.dst); 1472 1473 read_unlock_bh(&rt6_lock); 1474 1475 ip6_del_rt(rt, NULL, NULL, NULL); 1476 1477 goto restart; 1478 } 1479 } 1480 read_unlock_bh(&rt6_lock); 1481 } 1482 1483 int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1484 { 1485 struct in6_rtmsg rtmsg; 1486 int err; 1487 1488 switch(cmd) { 1489 case SIOCADDRT: /* Add a route */ 1490 case SIOCDELRT: /* Delete a route */ 1491 if (!capable(CAP_NET_ADMIN)) 1492 return -EPERM; 1493 err = copy_from_user(&rtmsg, arg, 1494 sizeof(struct in6_rtmsg)); 1495 if (err) 1496 return -EFAULT; 1497 1498 rtnl_lock(); 1499 switch (cmd) { 1500 case SIOCADDRT: 1501 err = ip6_route_add(&rtmsg, NULL, NULL, NULL); 1502 break; 1503 case SIOCDELRT: 1504 err = ip6_route_del(&rtmsg, NULL, NULL, NULL); 1505 break; 1506 default: 1507 err = -EINVAL; 1508 } 1509 rtnl_unlock(); 1510 1511 return err; 1512 }; 1513 1514 return -EINVAL; 1515 } 1516 1517 /* 1518 * Drop the packet on the floor 1519 */ 1520 1521 static int ip6_pkt_discard(struct sk_buff *skb) 1522 { 1523 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 1524 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); 1525 kfree_skb(skb); 1526 return 0; 1527 } 1528 1529 static int ip6_pkt_discard_out(struct sk_buff *skb) 1530 { 1531 skb->dev = skb->dst->dev; 1532 return ip6_pkt_discard(skb); 1533 } 1534 1535 /* 1536 * Allocate a dst for local (unicast / anycast) address. 1537 */ 1538 1539 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1540 const struct in6_addr *addr, 1541 int anycast) 1542 { 1543 struct rt6_info *rt = ip6_dst_alloc(); 1544 1545 if (rt == NULL) 1546 return ERR_PTR(-ENOMEM); 1547 1548 dev_hold(&loopback_dev); 1549 in6_dev_hold(idev); 1550 1551 rt->u.dst.flags = DST_HOST; 1552 rt->u.dst.input = ip6_input; 1553 rt->u.dst.output = ip6_output; 1554 rt->rt6i_dev = &loopback_dev; 1555 rt->rt6i_idev = idev; 1556 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1557 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1558 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1559 rt->u.dst.obsolete = -1; 1560 1561 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1562 if (anycast) 1563 rt->rt6i_flags |= RTF_ANYCAST; 1564 else 1565 rt->rt6i_flags |= RTF_LOCAL; 1566 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1567 if (rt->rt6i_nexthop == NULL) { 1568 dst_free((struct dst_entry *) rt); 1569 return ERR_PTR(-ENOMEM); 1570 } 1571 1572 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1573 rt->rt6i_dst.plen = 128; 1574 1575 atomic_set(&rt->u.dst.__refcnt, 1); 1576 1577 return rt; 1578 } 1579 1580 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1581 { 1582 if (((void*)rt->rt6i_dev == arg || arg == NULL) && 1583 rt != &ip6_null_entry) { 1584 RT6_TRACE("deleted by ifdown %p\n", rt); 1585 return -1; 1586 } 1587 return 0; 1588 } 1589 1590 void rt6_ifdown(struct net_device *dev) 1591 { 1592 write_lock_bh(&rt6_lock); 1593 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); 1594 write_unlock_bh(&rt6_lock); 1595 } 1596 1597 struct rt6_mtu_change_arg 1598 { 1599 struct net_device *dev; 1600 unsigned mtu; 1601 }; 1602 1603 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 1604 { 1605 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 1606 struct inet6_dev *idev; 1607 1608 /* In IPv6 pmtu discovery is not optional, 1609 so that RTAX_MTU lock cannot disable it. 1610 We still use this lock to block changes 1611 caused by addrconf/ndisc. 1612 */ 1613 1614 idev = __in6_dev_get(arg->dev); 1615 if (idev == NULL) 1616 return 0; 1617 1618 /* For administrative MTU increase, there is no way to discover 1619 IPv6 PMTU increase, so PMTU increase should be updated here. 1620 Since RFC 1981 doesn't include administrative MTU increase 1621 update PMTU increase is a MUST. (i.e. jumbo frame) 1622 */ 1623 /* 1624 If new MTU is less than route PMTU, this new MTU will be the 1625 lowest MTU in the path, update the route PMTU to reflect PMTU 1626 decreases; if new MTU is greater than route PMTU, and the 1627 old MTU is the lowest MTU in the path, update the route PMTU 1628 to reflect the increase. In this case if the other nodes' MTU 1629 also have the lowest MTU, TOO BIG MESSAGE will be lead to 1630 PMTU discouvery. 1631 */ 1632 if (rt->rt6i_dev == arg->dev && 1633 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1634 (dst_mtu(&rt->u.dst) > arg->mtu || 1635 (dst_mtu(&rt->u.dst) < arg->mtu && 1636 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) 1637 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1638 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); 1639 return 0; 1640 } 1641 1642 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 1643 { 1644 struct rt6_mtu_change_arg arg; 1645 1646 arg.dev = dev; 1647 arg.mtu = mtu; 1648 read_lock_bh(&rt6_lock); 1649 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); 1650 read_unlock_bh(&rt6_lock); 1651 } 1652 1653 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, 1654 struct in6_rtmsg *rtmsg) 1655 { 1656 memset(rtmsg, 0, sizeof(*rtmsg)); 1657 1658 rtmsg->rtmsg_dst_len = r->rtm_dst_len; 1659 rtmsg->rtmsg_src_len = r->rtm_src_len; 1660 rtmsg->rtmsg_flags = RTF_UP; 1661 if (r->rtm_type == RTN_UNREACHABLE) 1662 rtmsg->rtmsg_flags |= RTF_REJECT; 1663 1664 if (rta[RTA_GATEWAY-1]) { 1665 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) 1666 return -EINVAL; 1667 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); 1668 rtmsg->rtmsg_flags |= RTF_GATEWAY; 1669 } 1670 if (rta[RTA_DST-1]) { 1671 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) 1672 return -EINVAL; 1673 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); 1674 } 1675 if (rta[RTA_SRC-1]) { 1676 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) 1677 return -EINVAL; 1678 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); 1679 } 1680 if (rta[RTA_OIF-1]) { 1681 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) 1682 return -EINVAL; 1683 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 1684 } 1685 if (rta[RTA_PRIORITY-1]) { 1686 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) 1687 return -EINVAL; 1688 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); 1689 } 1690 return 0; 1691 } 1692 1693 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1694 { 1695 struct rtmsg *r = NLMSG_DATA(nlh); 1696 struct in6_rtmsg rtmsg; 1697 1698 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1699 return -EINVAL; 1700 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1701 } 1702 1703 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1704 { 1705 struct rtmsg *r = NLMSG_DATA(nlh); 1706 struct in6_rtmsg rtmsg; 1707 1708 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1709 return -EINVAL; 1710 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1711 } 1712 1713 struct rt6_rtnl_dump_arg 1714 { 1715 struct sk_buff *skb; 1716 struct netlink_callback *cb; 1717 }; 1718 1719 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 1720 struct in6_addr *dst, struct in6_addr *src, 1721 int iif, int type, u32 pid, u32 seq, 1722 int prefix, unsigned int flags) 1723 { 1724 struct rtmsg *rtm; 1725 struct nlmsghdr *nlh; 1726 unsigned char *b = skb->tail; 1727 struct rta_cacheinfo ci; 1728 1729 if (prefix) { /* user wants prefix routes only */ 1730 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 1731 /* success since this is not a prefix route */ 1732 return 1; 1733 } 1734 } 1735 1736 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); 1737 rtm = NLMSG_DATA(nlh); 1738 rtm->rtm_family = AF_INET6; 1739 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1740 rtm->rtm_src_len = rt->rt6i_src.plen; 1741 rtm->rtm_tos = 0; 1742 rtm->rtm_table = RT_TABLE_MAIN; 1743 if (rt->rt6i_flags&RTF_REJECT) 1744 rtm->rtm_type = RTN_UNREACHABLE; 1745 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 1746 rtm->rtm_type = RTN_LOCAL; 1747 else 1748 rtm->rtm_type = RTN_UNICAST; 1749 rtm->rtm_flags = 0; 1750 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 1751 rtm->rtm_protocol = rt->rt6i_protocol; 1752 if (rt->rt6i_flags&RTF_DYNAMIC) 1753 rtm->rtm_protocol = RTPROT_REDIRECT; 1754 else if (rt->rt6i_flags & RTF_ADDRCONF) 1755 rtm->rtm_protocol = RTPROT_KERNEL; 1756 else if (rt->rt6i_flags&RTF_DEFAULT) 1757 rtm->rtm_protocol = RTPROT_RA; 1758 1759 if (rt->rt6i_flags&RTF_CACHE) 1760 rtm->rtm_flags |= RTM_F_CLONED; 1761 1762 if (dst) { 1763 RTA_PUT(skb, RTA_DST, 16, dst); 1764 rtm->rtm_dst_len = 128; 1765 } else if (rtm->rtm_dst_len) 1766 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 1767 #ifdef CONFIG_IPV6_SUBTREES 1768 if (src) { 1769 RTA_PUT(skb, RTA_SRC, 16, src); 1770 rtm->rtm_src_len = 128; 1771 } else if (rtm->rtm_src_len) 1772 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 1773 #endif 1774 if (iif) 1775 RTA_PUT(skb, RTA_IIF, 4, &iif); 1776 else if (dst) { 1777 struct in6_addr saddr_buf; 1778 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) 1779 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 1780 } 1781 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 1782 goto rtattr_failure; 1783 if (rt->u.dst.neighbour) 1784 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 1785 if (rt->u.dst.dev) 1786 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); 1787 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); 1788 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 1789 if (rt->rt6i_expires) 1790 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); 1791 else 1792 ci.rta_expires = 0; 1793 ci.rta_used = rt->u.dst.__use; 1794 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 1795 ci.rta_error = rt->u.dst.error; 1796 ci.rta_id = 0; 1797 ci.rta_ts = 0; 1798 ci.rta_tsage = 0; 1799 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 1800 nlh->nlmsg_len = skb->tail - b; 1801 return skb->len; 1802 1803 nlmsg_failure: 1804 rtattr_failure: 1805 skb_trim(skb, b - skb->data); 1806 return -1; 1807 } 1808 1809 static int rt6_dump_route(struct rt6_info *rt, void *p_arg) 1810 { 1811 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 1812 int prefix; 1813 1814 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { 1815 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); 1816 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 1817 } else 1818 prefix = 0; 1819 1820 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 1821 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 1822 prefix, NLM_F_MULTI); 1823 } 1824 1825 static int fib6_dump_node(struct fib6_walker_t *w) 1826 { 1827 int res; 1828 struct rt6_info *rt; 1829 1830 for (rt = w->leaf; rt; rt = rt->u.next) { 1831 res = rt6_dump_route(rt, w->args); 1832 if (res < 0) { 1833 /* Frame is full, suspend walking */ 1834 w->leaf = rt; 1835 return 1; 1836 } 1837 BUG_TRAP(res!=0); 1838 } 1839 w->leaf = NULL; 1840 return 0; 1841 } 1842 1843 static void fib6_dump_end(struct netlink_callback *cb) 1844 { 1845 struct fib6_walker_t *w = (void*)cb->args[0]; 1846 1847 if (w) { 1848 cb->args[0] = 0; 1849 fib6_walker_unlink(w); 1850 kfree(w); 1851 } 1852 cb->done = (void*)cb->args[1]; 1853 cb->args[1] = 0; 1854 } 1855 1856 static int fib6_dump_done(struct netlink_callback *cb) 1857 { 1858 fib6_dump_end(cb); 1859 return cb->done ? cb->done(cb) : 0; 1860 } 1861 1862 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 1863 { 1864 struct rt6_rtnl_dump_arg arg; 1865 struct fib6_walker_t *w; 1866 int res; 1867 1868 arg.skb = skb; 1869 arg.cb = cb; 1870 1871 w = (void*)cb->args[0]; 1872 if (w == NULL) { 1873 /* New dump: 1874 * 1875 * 1. hook callback destructor. 1876 */ 1877 cb->args[1] = (long)cb->done; 1878 cb->done = fib6_dump_done; 1879 1880 /* 1881 * 2. allocate and initialize walker. 1882 */ 1883 w = kzalloc(sizeof(*w), GFP_ATOMIC); 1884 if (w == NULL) 1885 return -ENOMEM; 1886 RT6_TRACE("dump<%p", w); 1887 w->root = &ip6_routing_table; 1888 w->func = fib6_dump_node; 1889 w->args = &arg; 1890 cb->args[0] = (long)w; 1891 read_lock_bh(&rt6_lock); 1892 res = fib6_walk(w); 1893 read_unlock_bh(&rt6_lock); 1894 } else { 1895 w->args = &arg; 1896 read_lock_bh(&rt6_lock); 1897 res = fib6_walk_continue(w); 1898 read_unlock_bh(&rt6_lock); 1899 } 1900 #if RT6_DEBUG >= 3 1901 if (res <= 0 && skb->len == 0) 1902 RT6_TRACE("%p>dump end\n", w); 1903 #endif 1904 res = res < 0 ? res : skb->len; 1905 /* res < 0 is an error. (really, impossible) 1906 res == 0 means that dump is complete, but skb still can contain data. 1907 res > 0 dump is not complete, but frame is full. 1908 */ 1909 /* Destroy walker, if dump of this table is complete. */ 1910 if (res <= 0) 1911 fib6_dump_end(cb); 1912 return res; 1913 } 1914 1915 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 1916 { 1917 struct rtattr **rta = arg; 1918 int iif = 0; 1919 int err = -ENOBUFS; 1920 struct sk_buff *skb; 1921 struct flowi fl; 1922 struct rt6_info *rt; 1923 1924 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1925 if (skb == NULL) 1926 goto out; 1927 1928 /* Reserve room for dummy headers, this skb can pass 1929 through good chunk of routing engine. 1930 */ 1931 skb->mac.raw = skb->data; 1932 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 1933 1934 memset(&fl, 0, sizeof(fl)); 1935 if (rta[RTA_SRC-1]) 1936 ipv6_addr_copy(&fl.fl6_src, 1937 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); 1938 if (rta[RTA_DST-1]) 1939 ipv6_addr_copy(&fl.fl6_dst, 1940 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); 1941 1942 if (rta[RTA_IIF-1]) 1943 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1944 1945 if (iif) { 1946 struct net_device *dev; 1947 dev = __dev_get_by_index(iif); 1948 if (!dev) { 1949 err = -ENODEV; 1950 goto out_free; 1951 } 1952 } 1953 1954 fl.oif = 0; 1955 if (rta[RTA_OIF-1]) 1956 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 1957 1958 rt = (struct rt6_info*)ip6_route_output(NULL, &fl); 1959 1960 skb->dst = &rt->u.dst; 1961 1962 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1963 err = rt6_fill_node(skb, rt, 1964 &fl.fl6_dst, &fl.fl6_src, 1965 iif, 1966 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 1967 nlh->nlmsg_seq, 0, 0); 1968 if (err < 0) { 1969 err = -EMSGSIZE; 1970 goto out_free; 1971 } 1972 1973 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1974 if (err > 0) 1975 err = 0; 1976 out: 1977 return err; 1978 out_free: 1979 kfree_skb(skb); 1980 goto out; 1981 } 1982 1983 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 1984 struct netlink_skb_parms *req) 1985 { 1986 struct sk_buff *skb; 1987 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 1988 u32 pid = current->pid; 1989 u32 seq = 0; 1990 1991 if (req) 1992 pid = req->pid; 1993 if (nlh) 1994 seq = nlh->nlmsg_seq; 1995 1996 skb = alloc_skb(size, gfp_any()); 1997 if (!skb) { 1998 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); 1999 return; 2000 } 2001 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { 2002 kfree_skb(skb); 2003 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); 2004 return; 2005 } 2006 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; 2007 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); 2008 } 2009 2010 /* 2011 * /proc 2012 */ 2013 2014 #ifdef CONFIG_PROC_FS 2015 2016 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2017 2018 struct rt6_proc_arg 2019 { 2020 char *buffer; 2021 int offset; 2022 int length; 2023 int skip; 2024 int len; 2025 }; 2026 2027 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2028 { 2029 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; 2030 int i; 2031 2032 if (arg->skip < arg->offset / RT6_INFO_LEN) { 2033 arg->skip++; 2034 return 0; 2035 } 2036 2037 if (arg->len >= arg->length) 2038 return 0; 2039 2040 for (i=0; i<16; i++) { 2041 sprintf(arg->buffer + arg->len, "%02x", 2042 rt->rt6i_dst.addr.s6_addr[i]); 2043 arg->len += 2; 2044 } 2045 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 2046 rt->rt6i_dst.plen); 2047 2048 #ifdef CONFIG_IPV6_SUBTREES 2049 for (i=0; i<16; i++) { 2050 sprintf(arg->buffer + arg->len, "%02x", 2051 rt->rt6i_src.addr.s6_addr[i]); 2052 arg->len += 2; 2053 } 2054 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 2055 rt->rt6i_src.plen); 2056 #else 2057 sprintf(arg->buffer + arg->len, 2058 "00000000000000000000000000000000 00 "); 2059 arg->len += 36; 2060 #endif 2061 2062 if (rt->rt6i_nexthop) { 2063 for (i=0; i<16; i++) { 2064 sprintf(arg->buffer + arg->len, "%02x", 2065 rt->rt6i_nexthop->primary_key[i]); 2066 arg->len += 2; 2067 } 2068 } else { 2069 sprintf(arg->buffer + arg->len, 2070 "00000000000000000000000000000000"); 2071 arg->len += 32; 2072 } 2073 arg->len += sprintf(arg->buffer + arg->len, 2074 " %08x %08x %08x %08x %8s\n", 2075 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2076 rt->u.dst.__use, rt->rt6i_flags, 2077 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2078 return 0; 2079 } 2080 2081 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 2082 { 2083 struct rt6_proc_arg arg; 2084 arg.buffer = buffer; 2085 arg.offset = offset; 2086 arg.length = length; 2087 arg.skip = 0; 2088 arg.len = 0; 2089 2090 read_lock_bh(&rt6_lock); 2091 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); 2092 read_unlock_bh(&rt6_lock); 2093 2094 *start = buffer; 2095 if (offset) 2096 *start += offset % RT6_INFO_LEN; 2097 2098 arg.len -= offset % RT6_INFO_LEN; 2099 2100 if (arg.len > length) 2101 arg.len = length; 2102 if (arg.len < 0) 2103 arg.len = 0; 2104 2105 return arg.len; 2106 } 2107 2108 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2109 { 2110 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2111 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, 2112 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, 2113 rt6_stats.fib_rt_cache, 2114 atomic_read(&ip6_dst_ops.entries), 2115 rt6_stats.fib_discarded_routes); 2116 2117 return 0; 2118 } 2119 2120 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2121 { 2122 return single_open(file, rt6_stats_seq_show, NULL); 2123 } 2124 2125 static struct file_operations rt6_stats_seq_fops = { 2126 .owner = THIS_MODULE, 2127 .open = rt6_stats_seq_open, 2128 .read = seq_read, 2129 .llseek = seq_lseek, 2130 .release = single_release, 2131 }; 2132 #endif /* CONFIG_PROC_FS */ 2133 2134 #ifdef CONFIG_SYSCTL 2135 2136 static int flush_delay; 2137 2138 static 2139 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2140 void __user *buffer, size_t *lenp, loff_t *ppos) 2141 { 2142 if (write) { 2143 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2144 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2145 return 0; 2146 } else 2147 return -EINVAL; 2148 } 2149 2150 ctl_table ipv6_route_table[] = { 2151 { 2152 .ctl_name = NET_IPV6_ROUTE_FLUSH, 2153 .procname = "flush", 2154 .data = &flush_delay, 2155 .maxlen = sizeof(int), 2156 .mode = 0200, 2157 .proc_handler = &ipv6_sysctl_rtcache_flush 2158 }, 2159 { 2160 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2161 .procname = "gc_thresh", 2162 .data = &ip6_dst_ops.gc_thresh, 2163 .maxlen = sizeof(int), 2164 .mode = 0644, 2165 .proc_handler = &proc_dointvec, 2166 }, 2167 { 2168 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2169 .procname = "max_size", 2170 .data = &ip6_rt_max_size, 2171 .maxlen = sizeof(int), 2172 .mode = 0644, 2173 .proc_handler = &proc_dointvec, 2174 }, 2175 { 2176 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2177 .procname = "gc_min_interval", 2178 .data = &ip6_rt_gc_min_interval, 2179 .maxlen = sizeof(int), 2180 .mode = 0644, 2181 .proc_handler = &proc_dointvec_jiffies, 2182 .strategy = &sysctl_jiffies, 2183 }, 2184 { 2185 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2186 .procname = "gc_timeout", 2187 .data = &ip6_rt_gc_timeout, 2188 .maxlen = sizeof(int), 2189 .mode = 0644, 2190 .proc_handler = &proc_dointvec_jiffies, 2191 .strategy = &sysctl_jiffies, 2192 }, 2193 { 2194 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2195 .procname = "gc_interval", 2196 .data = &ip6_rt_gc_interval, 2197 .maxlen = sizeof(int), 2198 .mode = 0644, 2199 .proc_handler = &proc_dointvec_jiffies, 2200 .strategy = &sysctl_jiffies, 2201 }, 2202 { 2203 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2204 .procname = "gc_elasticity", 2205 .data = &ip6_rt_gc_elasticity, 2206 .maxlen = sizeof(int), 2207 .mode = 0644, 2208 .proc_handler = &proc_dointvec_jiffies, 2209 .strategy = &sysctl_jiffies, 2210 }, 2211 { 2212 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2213 .procname = "mtu_expires", 2214 .data = &ip6_rt_mtu_expires, 2215 .maxlen = sizeof(int), 2216 .mode = 0644, 2217 .proc_handler = &proc_dointvec_jiffies, 2218 .strategy = &sysctl_jiffies, 2219 }, 2220 { 2221 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2222 .procname = "min_adv_mss", 2223 .data = &ip6_rt_min_advmss, 2224 .maxlen = sizeof(int), 2225 .mode = 0644, 2226 .proc_handler = &proc_dointvec_jiffies, 2227 .strategy = &sysctl_jiffies, 2228 }, 2229 { 2230 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2231 .procname = "gc_min_interval_ms", 2232 .data = &ip6_rt_gc_min_interval, 2233 .maxlen = sizeof(int), 2234 .mode = 0644, 2235 .proc_handler = &proc_dointvec_ms_jiffies, 2236 .strategy = &sysctl_ms_jiffies, 2237 }, 2238 { .ctl_name = 0 } 2239 }; 2240 2241 #endif 2242 2243 void __init ip6_route_init(void) 2244 { 2245 struct proc_dir_entry *p; 2246 2247 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", 2248 sizeof(struct rt6_info), 2249 0, SLAB_HWCACHE_ALIGN, 2250 NULL, NULL); 2251 if (!ip6_dst_ops.kmem_cachep) 2252 panic("cannot create ip6_dst_cache"); 2253 2254 fib6_init(); 2255 #ifdef CONFIG_PROC_FS 2256 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2257 if (p) 2258 p->owner = THIS_MODULE; 2259 2260 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2261 #endif 2262 #ifdef CONFIG_XFRM 2263 xfrm6_init(); 2264 #endif 2265 } 2266 2267 void ip6_route_cleanup(void) 2268 { 2269 #ifdef CONFIG_PROC_FS 2270 proc_net_remove("ipv6_route"); 2271 proc_net_remove("rt6_stats"); 2272 #endif 2273 #ifdef CONFIG_XFRM 2274 xfrm6_fini(); 2275 #endif 2276 rt6_ifdown(NULL); 2277 fib6_gc_cleanup(); 2278 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2279 } 2280