1 /* 2 * Linux INET6 implementation 3 * Forwarding Information Database 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $ 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 /* 17 * Changes: 18 * Yuji SEKIYA @USAGI: Support default route on router node; 19 * remove ip6_null_entry from the top of 20 * routing table. 21 */ 22 #include <linux/config.h> 23 #include <linux/errno.h> 24 #include <linux/types.h> 25 #include <linux/net.h> 26 #include <linux/route.h> 27 #include <linux/netdevice.h> 28 #include <linux/in6.h> 29 #include <linux/init.h> 30 31 #ifdef CONFIG_PROC_FS 32 #include <linux/proc_fs.h> 33 #endif 34 35 #include <net/ipv6.h> 36 #include <net/ndisc.h> 37 #include <net/addrconf.h> 38 39 #include <net/ip6_fib.h> 40 #include <net/ip6_route.h> 41 42 #define RT6_DEBUG 2 43 44 #if RT6_DEBUG >= 3 45 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 46 #else 47 #define RT6_TRACE(x...) do { ; } while (0) 48 #endif 49 50 struct rt6_statistics rt6_stats; 51 52 static kmem_cache_t * fib6_node_kmem __read_mostly; 53 54 enum fib_walk_state_t 55 { 56 #ifdef CONFIG_IPV6_SUBTREES 57 FWS_S, 58 #endif 59 FWS_L, 60 FWS_R, 61 FWS_C, 62 FWS_U 63 }; 64 65 struct fib6_cleaner_t 66 { 67 struct fib6_walker_t w; 68 int (*func)(struct rt6_info *, void *arg); 69 void *arg; 70 }; 71 72 DEFINE_RWLOCK(fib6_walker_lock); 73 74 75 #ifdef CONFIG_IPV6_SUBTREES 76 #define FWS_INIT FWS_S 77 #define SUBTREE(fn) ((fn)->subtree) 78 #else 79 #define FWS_INIT FWS_L 80 #define SUBTREE(fn) NULL 81 #endif 82 83 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); 84 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); 85 86 /* 87 * A routing update causes an increase of the serial number on the 88 * affected subtree. This allows for cached routes to be asynchronously 89 * tested when modifications are made to the destination cache as a 90 * result of redirects, path MTU changes, etc. 91 */ 92 93 static __u32 rt_sernum; 94 95 static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); 96 97 struct fib6_walker_t fib6_walker_list = { 98 .prev = &fib6_walker_list, 99 .next = &fib6_walker_list, 100 }; 101 102 #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) 103 104 static __inline__ u32 fib6_new_sernum(void) 105 { 106 u32 n = ++rt_sernum; 107 if ((__s32)n <= 0) 108 rt_sernum = n = 1; 109 return n; 110 } 111 112 /* 113 * Auxiliary address test functions for the radix tree. 114 * 115 * These assume a 32bit processor (although it will work on 116 * 64bit processors) 117 */ 118 119 /* 120 * test bit 121 */ 122 123 static __inline__ int addr_bit_set(void *token, int fn_bit) 124 { 125 __u32 *addr = token; 126 127 return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; 128 } 129 130 static __inline__ struct fib6_node * node_alloc(void) 131 { 132 struct fib6_node *fn; 133 134 if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL) 135 memset(fn, 0, sizeof(struct fib6_node)); 136 137 return fn; 138 } 139 140 static __inline__ void node_free(struct fib6_node * fn) 141 { 142 kmem_cache_free(fib6_node_kmem, fn); 143 } 144 145 static __inline__ void rt6_release(struct rt6_info *rt) 146 { 147 if (atomic_dec_and_test(&rt->rt6i_ref)) 148 dst_free(&rt->u.dst); 149 } 150 151 152 /* 153 * Routing Table 154 * 155 * return the appropriate node for a routing tree "add" operation 156 * by either creating and inserting or by returning an existing 157 * node. 158 */ 159 160 static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, 161 int addrlen, int plen, 162 int offset) 163 { 164 struct fib6_node *fn, *in, *ln; 165 struct fib6_node *pn = NULL; 166 struct rt6key *key; 167 int bit; 168 int dir = 0; 169 __u32 sernum = fib6_new_sernum(); 170 171 RT6_TRACE("fib6_add_1\n"); 172 173 /* insert node in tree */ 174 175 fn = root; 176 177 do { 178 key = (struct rt6key *)((u8 *)fn->leaf + offset); 179 180 /* 181 * Prefix match 182 */ 183 if (plen < fn->fn_bit || 184 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) 185 goto insert_above; 186 187 /* 188 * Exact match ? 189 */ 190 191 if (plen == fn->fn_bit) { 192 /* clean up an intermediate node */ 193 if ((fn->fn_flags & RTN_RTINFO) == 0) { 194 rt6_release(fn->leaf); 195 fn->leaf = NULL; 196 } 197 198 fn->fn_sernum = sernum; 199 200 return fn; 201 } 202 203 /* 204 * We have more bits to go 205 */ 206 207 /* Try to walk down on tree. */ 208 fn->fn_sernum = sernum; 209 dir = addr_bit_set(addr, fn->fn_bit); 210 pn = fn; 211 fn = dir ? fn->right: fn->left; 212 } while (fn); 213 214 /* 215 * We walked to the bottom of tree. 216 * Create new leaf node without children. 217 */ 218 219 ln = node_alloc(); 220 221 if (ln == NULL) 222 return NULL; 223 ln->fn_bit = plen; 224 225 ln->parent = pn; 226 ln->fn_sernum = sernum; 227 228 if (dir) 229 pn->right = ln; 230 else 231 pn->left = ln; 232 233 return ln; 234 235 236 insert_above: 237 /* 238 * split since we don't have a common prefix anymore or 239 * we have a less significant route. 240 * we've to insert an intermediate node on the list 241 * this new node will point to the one we need to create 242 * and the current 243 */ 244 245 pn = fn->parent; 246 247 /* find 1st bit in difference between the 2 addrs. 248 249 See comment in __ipv6_addr_diff: bit may be an invalid value, 250 but if it is >= plen, the value is ignored in any case. 251 */ 252 253 bit = __ipv6_addr_diff(addr, &key->addr, addrlen); 254 255 /* 256 * (intermediate)[in] 257 * / \ 258 * (new leaf node)[ln] (old node)[fn] 259 */ 260 if (plen > bit) { 261 in = node_alloc(); 262 ln = node_alloc(); 263 264 if (in == NULL || ln == NULL) { 265 if (in) 266 node_free(in); 267 if (ln) 268 node_free(ln); 269 return NULL; 270 } 271 272 /* 273 * new intermediate node. 274 * RTN_RTINFO will 275 * be off since that an address that chooses one of 276 * the branches would not match less specific routes 277 * in the other branch 278 */ 279 280 in->fn_bit = bit; 281 282 in->parent = pn; 283 in->leaf = fn->leaf; 284 atomic_inc(&in->leaf->rt6i_ref); 285 286 in->fn_sernum = sernum; 287 288 /* update parent pointer */ 289 if (dir) 290 pn->right = in; 291 else 292 pn->left = in; 293 294 ln->fn_bit = plen; 295 296 ln->parent = in; 297 fn->parent = in; 298 299 ln->fn_sernum = sernum; 300 301 if (addr_bit_set(addr, bit)) { 302 in->right = ln; 303 in->left = fn; 304 } else { 305 in->left = ln; 306 in->right = fn; 307 } 308 } else { /* plen <= bit */ 309 310 /* 311 * (new leaf node)[ln] 312 * / \ 313 * (old node)[fn] NULL 314 */ 315 316 ln = node_alloc(); 317 318 if (ln == NULL) 319 return NULL; 320 321 ln->fn_bit = plen; 322 323 ln->parent = pn; 324 325 ln->fn_sernum = sernum; 326 327 if (dir) 328 pn->right = ln; 329 else 330 pn->left = ln; 331 332 if (addr_bit_set(&key->addr, plen)) 333 ln->right = fn; 334 else 335 ln->left = fn; 336 337 fn->parent = ln; 338 } 339 return ln; 340 } 341 342 /* 343 * Insert routing information in a node. 344 */ 345 346 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 347 struct nlmsghdr *nlh, struct netlink_skb_parms *req) 348 { 349 struct rt6_info *iter = NULL; 350 struct rt6_info **ins; 351 352 ins = &fn->leaf; 353 354 if (fn->fn_flags&RTN_TL_ROOT && 355 fn->leaf == &ip6_null_entry && 356 !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){ 357 fn->leaf = rt; 358 rt->u.next = NULL; 359 goto out; 360 } 361 362 for (iter = fn->leaf; iter; iter=iter->u.next) { 363 /* 364 * Search for duplicates 365 */ 366 367 if (iter->rt6i_metric == rt->rt6i_metric) { 368 /* 369 * Same priority level 370 */ 371 372 if (iter->rt6i_dev == rt->rt6i_dev && 373 iter->rt6i_idev == rt->rt6i_idev && 374 ipv6_addr_equal(&iter->rt6i_gateway, 375 &rt->rt6i_gateway)) { 376 if (!(iter->rt6i_flags&RTF_EXPIRES)) 377 return -EEXIST; 378 iter->rt6i_expires = rt->rt6i_expires; 379 if (!(rt->rt6i_flags&RTF_EXPIRES)) { 380 iter->rt6i_flags &= ~RTF_EXPIRES; 381 iter->rt6i_expires = 0; 382 } 383 return -EEXIST; 384 } 385 } 386 387 if (iter->rt6i_metric > rt->rt6i_metric) 388 break; 389 390 ins = &iter->u.next; 391 } 392 393 /* 394 * insert node 395 */ 396 397 out: 398 rt->u.next = iter; 399 *ins = rt; 400 rt->rt6i_node = fn; 401 atomic_inc(&rt->rt6i_ref); 402 inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); 403 rt6_stats.fib_rt_entries++; 404 405 if ((fn->fn_flags & RTN_RTINFO) == 0) { 406 rt6_stats.fib_route_nodes++; 407 fn->fn_flags |= RTN_RTINFO; 408 } 409 410 return 0; 411 } 412 413 static __inline__ void fib6_start_gc(struct rt6_info *rt) 414 { 415 if (ip6_fib_timer.expires == 0 && 416 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) 417 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 418 } 419 420 void fib6_force_start_gc(void) 421 { 422 if (ip6_fib_timer.expires == 0) 423 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 424 } 425 426 /* 427 * Add routing information to the routing tree. 428 * <destination addr>/<source addr> 429 * with source addr info in sub-trees 430 */ 431 432 int fib6_add(struct fib6_node *root, struct rt6_info *rt, 433 struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 434 { 435 struct fib6_node *fn; 436 int err = -ENOMEM; 437 438 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), 439 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst)); 440 441 if (fn == NULL) 442 goto out; 443 444 #ifdef CONFIG_IPV6_SUBTREES 445 if (rt->rt6i_src.plen) { 446 struct fib6_node *sn; 447 448 if (fn->subtree == NULL) { 449 struct fib6_node *sfn; 450 451 /* 452 * Create subtree. 453 * 454 * fn[main tree] 455 * | 456 * sfn[subtree root] 457 * \ 458 * sn[new leaf node] 459 */ 460 461 /* Create subtree root node */ 462 sfn = node_alloc(); 463 if (sfn == NULL) 464 goto st_failure; 465 466 sfn->leaf = &ip6_null_entry; 467 atomic_inc(&ip6_null_entry.rt6i_ref); 468 sfn->fn_flags = RTN_ROOT; 469 sfn->fn_sernum = fib6_new_sernum(); 470 471 /* Now add the first leaf node to new subtree */ 472 473 sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 474 sizeof(struct in6_addr), rt->rt6i_src.plen, 475 offsetof(struct rt6_info, rt6i_src)); 476 477 if (sn == NULL) { 478 /* If it is failed, discard just allocated 479 root, and then (in st_failure) stale node 480 in main tree. 481 */ 482 node_free(sfn); 483 goto st_failure; 484 } 485 486 /* Now link new subtree to main tree */ 487 sfn->parent = fn; 488 fn->subtree = sfn; 489 if (fn->leaf == NULL) { 490 fn->leaf = rt; 491 atomic_inc(&rt->rt6i_ref); 492 } 493 } else { 494 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 495 sizeof(struct in6_addr), rt->rt6i_src.plen, 496 offsetof(struct rt6_info, rt6i_src)); 497 498 if (sn == NULL) 499 goto st_failure; 500 } 501 502 fn = sn; 503 } 504 #endif 505 506 err = fib6_add_rt2node(fn, rt, nlh, req); 507 508 if (err == 0) { 509 fib6_start_gc(rt); 510 if (!(rt->rt6i_flags&RTF_CACHE)) 511 fib6_prune_clones(fn, rt); 512 } 513 514 out: 515 if (err) 516 dst_free(&rt->u.dst); 517 return err; 518 519 #ifdef CONFIG_IPV6_SUBTREES 520 /* Subtree creation failed, probably main tree node 521 is orphan. If it is, shoot it. 522 */ 523 st_failure: 524 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 525 fib6_repair_tree(fn); 526 dst_free(&rt->u.dst); 527 return err; 528 #endif 529 } 530 531 /* 532 * Routing tree lookup 533 * 534 */ 535 536 struct lookup_args { 537 int offset; /* key offset on rt6_info */ 538 struct in6_addr *addr; /* search key */ 539 }; 540 541 static struct fib6_node * fib6_lookup_1(struct fib6_node *root, 542 struct lookup_args *args) 543 { 544 struct fib6_node *fn; 545 int dir; 546 547 /* 548 * Descend on a tree 549 */ 550 551 fn = root; 552 553 for (;;) { 554 struct fib6_node *next; 555 556 dir = addr_bit_set(args->addr, fn->fn_bit); 557 558 next = dir ? fn->right : fn->left; 559 560 if (next) { 561 fn = next; 562 continue; 563 } 564 565 break; 566 } 567 568 while ((fn->fn_flags & RTN_ROOT) == 0) { 569 #ifdef CONFIG_IPV6_SUBTREES 570 if (fn->subtree) { 571 struct fib6_node *st; 572 struct lookup_args *narg; 573 574 narg = args + 1; 575 576 if (narg->addr) { 577 st = fib6_lookup_1(fn->subtree, narg); 578 579 if (st && !(st->fn_flags & RTN_ROOT)) 580 return st; 581 } 582 } 583 #endif 584 585 if (fn->fn_flags & RTN_RTINFO) { 586 struct rt6key *key; 587 588 key = (struct rt6key *) ((u8 *) fn->leaf + 589 args->offset); 590 591 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) 592 return fn; 593 } 594 595 fn = fn->parent; 596 } 597 598 return NULL; 599 } 600 601 struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, 602 struct in6_addr *saddr) 603 { 604 struct lookup_args args[2]; 605 struct fib6_node *fn; 606 607 args[0].offset = offsetof(struct rt6_info, rt6i_dst); 608 args[0].addr = daddr; 609 610 #ifdef CONFIG_IPV6_SUBTREES 611 args[1].offset = offsetof(struct rt6_info, rt6i_src); 612 args[1].addr = saddr; 613 #endif 614 615 fn = fib6_lookup_1(root, args); 616 617 if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) 618 fn = root; 619 620 return fn; 621 } 622 623 /* 624 * Get node with specified destination prefix (and source prefix, 625 * if subtrees are used) 626 */ 627 628 629 static struct fib6_node * fib6_locate_1(struct fib6_node *root, 630 struct in6_addr *addr, 631 int plen, int offset) 632 { 633 struct fib6_node *fn; 634 635 for (fn = root; fn ; ) { 636 struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); 637 638 /* 639 * Prefix match 640 */ 641 if (plen < fn->fn_bit || 642 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) 643 return NULL; 644 645 if (plen == fn->fn_bit) 646 return fn; 647 648 /* 649 * We have more bits to go 650 */ 651 if (addr_bit_set(addr, fn->fn_bit)) 652 fn = fn->right; 653 else 654 fn = fn->left; 655 } 656 return NULL; 657 } 658 659 struct fib6_node * fib6_locate(struct fib6_node *root, 660 struct in6_addr *daddr, int dst_len, 661 struct in6_addr *saddr, int src_len) 662 { 663 struct fib6_node *fn; 664 665 fn = fib6_locate_1(root, daddr, dst_len, 666 offsetof(struct rt6_info, rt6i_dst)); 667 668 #ifdef CONFIG_IPV6_SUBTREES 669 if (src_len) { 670 BUG_TRAP(saddr!=NULL); 671 if (fn == NULL) 672 fn = fn->subtree; 673 if (fn) 674 fn = fib6_locate_1(fn, saddr, src_len, 675 offsetof(struct rt6_info, rt6i_src)); 676 } 677 #endif 678 679 if (fn && fn->fn_flags&RTN_RTINFO) 680 return fn; 681 682 return NULL; 683 } 684 685 686 /* 687 * Deletion 688 * 689 */ 690 691 static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) 692 { 693 if (fn->fn_flags&RTN_ROOT) 694 return &ip6_null_entry; 695 696 while(fn) { 697 if(fn->left) 698 return fn->left->leaf; 699 700 if(fn->right) 701 return fn->right->leaf; 702 703 fn = SUBTREE(fn); 704 } 705 return NULL; 706 } 707 708 /* 709 * Called to trim the tree of intermediate nodes when possible. "fn" 710 * is the node we want to try and remove. 711 */ 712 713 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) 714 { 715 int children; 716 int nstate; 717 struct fib6_node *child, *pn; 718 struct fib6_walker_t *w; 719 int iter = 0; 720 721 for (;;) { 722 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); 723 iter++; 724 725 BUG_TRAP(!(fn->fn_flags&RTN_RTINFO)); 726 BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT)); 727 BUG_TRAP(fn->leaf==NULL); 728 729 children = 0; 730 child = NULL; 731 if (fn->right) child = fn->right, children |= 1; 732 if (fn->left) child = fn->left, children |= 2; 733 734 if (children == 3 || SUBTREE(fn) 735 #ifdef CONFIG_IPV6_SUBTREES 736 /* Subtree root (i.e. fn) may have one child */ 737 || (children && fn->fn_flags&RTN_ROOT) 738 #endif 739 ) { 740 fn->leaf = fib6_find_prefix(fn); 741 #if RT6_DEBUG >= 2 742 if (fn->leaf==NULL) { 743 BUG_TRAP(fn->leaf); 744 fn->leaf = &ip6_null_entry; 745 } 746 #endif 747 atomic_inc(&fn->leaf->rt6i_ref); 748 return fn->parent; 749 } 750 751 pn = fn->parent; 752 #ifdef CONFIG_IPV6_SUBTREES 753 if (SUBTREE(pn) == fn) { 754 BUG_TRAP(fn->fn_flags&RTN_ROOT); 755 SUBTREE(pn) = NULL; 756 nstate = FWS_L; 757 } else { 758 BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); 759 #endif 760 if (pn->right == fn) pn->right = child; 761 else if (pn->left == fn) pn->left = child; 762 #if RT6_DEBUG >= 2 763 else BUG_TRAP(0); 764 #endif 765 if (child) 766 child->parent = pn; 767 nstate = FWS_R; 768 #ifdef CONFIG_IPV6_SUBTREES 769 } 770 #endif 771 772 read_lock(&fib6_walker_lock); 773 FOR_WALKERS(w) { 774 if (child == NULL) { 775 if (w->root == fn) { 776 w->root = w->node = NULL; 777 RT6_TRACE("W %p adjusted by delroot 1\n", w); 778 } else if (w->node == fn) { 779 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); 780 w->node = pn; 781 w->state = nstate; 782 } 783 } else { 784 if (w->root == fn) { 785 w->root = child; 786 RT6_TRACE("W %p adjusted by delroot 2\n", w); 787 } 788 if (w->node == fn) { 789 w->node = child; 790 if (children&2) { 791 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 792 w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; 793 } else { 794 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 795 w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; 796 } 797 } 798 } 799 } 800 read_unlock(&fib6_walker_lock); 801 802 node_free(fn); 803 if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) 804 return pn; 805 806 rt6_release(pn->leaf); 807 pn->leaf = NULL; 808 fn = pn; 809 } 810 } 811 812 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, 813 struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 814 { 815 struct fib6_walker_t *w; 816 struct rt6_info *rt = *rtp; 817 818 RT6_TRACE("fib6_del_route\n"); 819 820 /* Unlink it */ 821 *rtp = rt->u.next; 822 rt->rt6i_node = NULL; 823 rt6_stats.fib_rt_entries--; 824 rt6_stats.fib_discarded_routes++; 825 826 /* Adjust walkers */ 827 read_lock(&fib6_walker_lock); 828 FOR_WALKERS(w) { 829 if (w->state == FWS_C && w->leaf == rt) { 830 RT6_TRACE("walker %p adjusted by delroute\n", w); 831 w->leaf = rt->u.next; 832 if (w->leaf == NULL) 833 w->state = FWS_U; 834 } 835 } 836 read_unlock(&fib6_walker_lock); 837 838 rt->u.next = NULL; 839 840 if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT) 841 fn->leaf = &ip6_null_entry; 842 843 /* If it was last route, expunge its radix tree node */ 844 if (fn->leaf == NULL) { 845 fn->fn_flags &= ~RTN_RTINFO; 846 rt6_stats.fib_route_nodes--; 847 fn = fib6_repair_tree(fn); 848 } 849 850 if (atomic_read(&rt->rt6i_ref) != 1) { 851 /* This route is used as dummy address holder in some split 852 * nodes. It is not leaked, but it still holds other resources, 853 * which must be released in time. So, scan ascendant nodes 854 * and replace dummy references to this route with references 855 * to still alive ones. 856 */ 857 while (fn) { 858 if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) { 859 fn->leaf = fib6_find_prefix(fn); 860 atomic_inc(&fn->leaf->rt6i_ref); 861 rt6_release(rt); 862 } 863 fn = fn->parent; 864 } 865 /* No more references are possible at this point. */ 866 if (atomic_read(&rt->rt6i_ref) != 1) BUG(); 867 } 868 869 inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); 870 rt6_release(rt); 871 } 872 873 int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 874 { 875 struct fib6_node *fn = rt->rt6i_node; 876 struct rt6_info **rtp; 877 878 #if RT6_DEBUG >= 2 879 if (rt->u.dst.obsolete>0) { 880 BUG_TRAP(fn==NULL); 881 return -ENOENT; 882 } 883 #endif 884 if (fn == NULL || rt == &ip6_null_entry) 885 return -ENOENT; 886 887 BUG_TRAP(fn->fn_flags&RTN_RTINFO); 888 889 if (!(rt->rt6i_flags&RTF_CACHE)) 890 fib6_prune_clones(fn, rt); 891 892 /* 893 * Walk the leaf entries looking for ourself 894 */ 895 896 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { 897 if (*rtp == rt) { 898 fib6_del_route(fn, rtp, nlh, _rtattr, req); 899 return 0; 900 } 901 } 902 return -ENOENT; 903 } 904 905 /* 906 * Tree traversal function. 907 * 908 * Certainly, it is not interrupt safe. 909 * However, it is internally reenterable wrt itself and fib6_add/fib6_del. 910 * It means, that we can modify tree during walking 911 * and use this function for garbage collection, clone pruning, 912 * cleaning tree when a device goes down etc. etc. 913 * 914 * It guarantees that every node will be traversed, 915 * and that it will be traversed only once. 916 * 917 * Callback function w->func may return: 918 * 0 -> continue walking. 919 * positive value -> walking is suspended (used by tree dumps, 920 * and probably by gc, if it will be split to several slices) 921 * negative value -> terminate walking. 922 * 923 * The function itself returns: 924 * 0 -> walk is complete. 925 * >0 -> walk is incomplete (i.e. suspended) 926 * <0 -> walk is terminated by an error. 927 */ 928 929 int fib6_walk_continue(struct fib6_walker_t *w) 930 { 931 struct fib6_node *fn, *pn; 932 933 for (;;) { 934 fn = w->node; 935 if (fn == NULL) 936 return 0; 937 938 if (w->prune && fn != w->root && 939 fn->fn_flags&RTN_RTINFO && w->state < FWS_C) { 940 w->state = FWS_C; 941 w->leaf = fn->leaf; 942 } 943 switch (w->state) { 944 #ifdef CONFIG_IPV6_SUBTREES 945 case FWS_S: 946 if (SUBTREE(fn)) { 947 w->node = SUBTREE(fn); 948 continue; 949 } 950 w->state = FWS_L; 951 #endif 952 case FWS_L: 953 if (fn->left) { 954 w->node = fn->left; 955 w->state = FWS_INIT; 956 continue; 957 } 958 w->state = FWS_R; 959 case FWS_R: 960 if (fn->right) { 961 w->node = fn->right; 962 w->state = FWS_INIT; 963 continue; 964 } 965 w->state = FWS_C; 966 w->leaf = fn->leaf; 967 case FWS_C: 968 if (w->leaf && fn->fn_flags&RTN_RTINFO) { 969 int err = w->func(w); 970 if (err) 971 return err; 972 continue; 973 } 974 w->state = FWS_U; 975 case FWS_U: 976 if (fn == w->root) 977 return 0; 978 pn = fn->parent; 979 w->node = pn; 980 #ifdef CONFIG_IPV6_SUBTREES 981 if (SUBTREE(pn) == fn) { 982 BUG_TRAP(fn->fn_flags&RTN_ROOT); 983 w->state = FWS_L; 984 continue; 985 } 986 #endif 987 if (pn->left == fn) { 988 w->state = FWS_R; 989 continue; 990 } 991 if (pn->right == fn) { 992 w->state = FWS_C; 993 w->leaf = w->node->leaf; 994 continue; 995 } 996 #if RT6_DEBUG >= 2 997 BUG_TRAP(0); 998 #endif 999 } 1000 } 1001 } 1002 1003 int fib6_walk(struct fib6_walker_t *w) 1004 { 1005 int res; 1006 1007 w->state = FWS_INIT; 1008 w->node = w->root; 1009 1010 fib6_walker_link(w); 1011 res = fib6_walk_continue(w); 1012 if (res <= 0) 1013 fib6_walker_unlink(w); 1014 return res; 1015 } 1016 1017 static int fib6_clean_node(struct fib6_walker_t *w) 1018 { 1019 int res; 1020 struct rt6_info *rt; 1021 struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w; 1022 1023 for (rt = w->leaf; rt; rt = rt->u.next) { 1024 res = c->func(rt, c->arg); 1025 if (res < 0) { 1026 w->leaf = rt; 1027 res = fib6_del(rt, NULL, NULL, NULL); 1028 if (res) { 1029 #if RT6_DEBUG >= 2 1030 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); 1031 #endif 1032 continue; 1033 } 1034 return 0; 1035 } 1036 BUG_TRAP(res==0); 1037 } 1038 w->leaf = rt; 1039 return 0; 1040 } 1041 1042 /* 1043 * Convenient frontend to tree walker. 1044 * 1045 * func is called on each route. 1046 * It may return -1 -> delete this route. 1047 * 0 -> continue walking 1048 * 1049 * prune==1 -> only immediate children of node (certainly, 1050 * ignoring pure split nodes) will be scanned. 1051 */ 1052 1053 void fib6_clean_tree(struct fib6_node *root, 1054 int (*func)(struct rt6_info *, void *arg), 1055 int prune, void *arg) 1056 { 1057 struct fib6_cleaner_t c; 1058 1059 c.w.root = root; 1060 c.w.func = fib6_clean_node; 1061 c.w.prune = prune; 1062 c.func = func; 1063 c.arg = arg; 1064 1065 fib6_walk(&c.w); 1066 } 1067 1068 static int fib6_prune_clone(struct rt6_info *rt, void *arg) 1069 { 1070 if (rt->rt6i_flags & RTF_CACHE) { 1071 RT6_TRACE("pruning clone %p\n", rt); 1072 return -1; 1073 } 1074 1075 return 0; 1076 } 1077 1078 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt) 1079 { 1080 fib6_clean_tree(fn, fib6_prune_clone, 1, rt); 1081 } 1082 1083 /* 1084 * Garbage collection 1085 */ 1086 1087 static struct fib6_gc_args 1088 { 1089 int timeout; 1090 int more; 1091 } gc_args; 1092 1093 static int fib6_age(struct rt6_info *rt, void *arg) 1094 { 1095 unsigned long now = jiffies; 1096 1097 /* 1098 * check addrconf expiration here. 1099 * Routes are expired even if they are in use. 1100 * 1101 * Also age clones. Note, that clones are aged out 1102 * only if they are not in use now. 1103 */ 1104 1105 if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { 1106 if (time_after(now, rt->rt6i_expires)) { 1107 RT6_TRACE("expiring %p\n", rt); 1108 return -1; 1109 } 1110 gc_args.more++; 1111 } else if (rt->rt6i_flags & RTF_CACHE) { 1112 if (atomic_read(&rt->u.dst.__refcnt) == 0 && 1113 time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) { 1114 RT6_TRACE("aging clone %p\n", rt); 1115 return -1; 1116 } else if ((rt->rt6i_flags & RTF_GATEWAY) && 1117 (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) { 1118 RT6_TRACE("purging route %p via non-router but gateway\n", 1119 rt); 1120 return -1; 1121 } 1122 gc_args.more++; 1123 } 1124 1125 return 0; 1126 } 1127 1128 static DEFINE_SPINLOCK(fib6_gc_lock); 1129 1130 void fib6_run_gc(unsigned long dummy) 1131 { 1132 if (dummy != ~0UL) { 1133 spin_lock_bh(&fib6_gc_lock); 1134 gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval; 1135 } else { 1136 local_bh_disable(); 1137 if (!spin_trylock(&fib6_gc_lock)) { 1138 mod_timer(&ip6_fib_timer, jiffies + HZ); 1139 local_bh_enable(); 1140 return; 1141 } 1142 gc_args.timeout = ip6_rt_gc_interval; 1143 } 1144 gc_args.more = 0; 1145 1146 1147 write_lock_bh(&rt6_lock); 1148 ndisc_dst_gc(&gc_args.more); 1149 fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); 1150 write_unlock_bh(&rt6_lock); 1151 1152 if (gc_args.more) 1153 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 1154 else { 1155 del_timer(&ip6_fib_timer); 1156 ip6_fib_timer.expires = 0; 1157 } 1158 spin_unlock_bh(&fib6_gc_lock); 1159 } 1160 1161 void __init fib6_init(void) 1162 { 1163 fib6_node_kmem = kmem_cache_create("fib6_nodes", 1164 sizeof(struct fib6_node), 1165 0, SLAB_HWCACHE_ALIGN, 1166 NULL, NULL); 1167 if (!fib6_node_kmem) 1168 panic("cannot create fib6_nodes cache"); 1169 } 1170 1171 void fib6_gc_cleanup(void) 1172 { 1173 del_timer(&ip6_fib_timer); 1174 kmem_cache_destroy(fib6_node_kmem); 1175 } 1176