1 /* 2 * This program is free software; you can redistribute it and/or 3 * modify it under the terms of the GNU General Public License 4 * as published by the Free Software Foundation; either version 5 * 2 of the License, or (at your option) any later version. 6 * 7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 8 * & Swedish University of Agricultural Sciences. 9 * 10 * Jens Laas <jens.laas@data.slu.se> Swedish University of 11 * Agricultural Sciences. 12 * 13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 14 * 15 * This work is based on the LPC-trie which is originally descibed in: 16 * 17 * An experimental study of compression methods for dynamic tries 18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 20 * 21 * 22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 24 * 25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ 26 * 27 * 28 * Code from fib_hash has been reused which includes the following header: 29 * 30 * 31 * INET An implementation of the TCP/IP protocol suite for the LINUX 32 * operating system. INET is implemented using the BSD Socket 33 * interface as the means of communication with the user level. 34 * 35 * IPv4 FIB: lookup engine and maintenance routines. 36 * 37 * 38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 39 * 40 * This program is free software; you can redistribute it and/or 41 * modify it under the terms of the GNU General Public License 42 * as published by the Free Software Foundation; either version 43 * 2 of the License, or (at your option) any later version. 44 */ 45 46 #define VERSION "0.323" 47 48 #include <linux/config.h> 49 #include <asm/uaccess.h> 50 #include <asm/system.h> 51 #include <asm/bitops.h> 52 #include <linux/types.h> 53 #include <linux/kernel.h> 54 #include <linux/sched.h> 55 #include <linux/mm.h> 56 #include <linux/string.h> 57 #include <linux/socket.h> 58 #include <linux/sockios.h> 59 #include <linux/errno.h> 60 #include <linux/in.h> 61 #include <linux/inet.h> 62 #include <linux/netdevice.h> 63 #include <linux/if_arp.h> 64 #include <linux/proc_fs.h> 65 #include <linux/skbuff.h> 66 #include <linux/netlink.h> 67 #include <linux/init.h> 68 #include <linux/list.h> 69 #include <net/ip.h> 70 #include <net/protocol.h> 71 #include <net/route.h> 72 #include <net/tcp.h> 73 #include <net/sock.h> 74 #include <net/ip_fib.h> 75 #include "fib_lookup.h" 76 77 #undef CONFIG_IP_FIB_TRIE_STATS 78 #define MAX_CHILDS 16384 79 80 #define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n))) 81 #define KEYLENGTH (8*sizeof(t_key)) 82 #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) 83 #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) 84 85 static DEFINE_RWLOCK(fib_lock); 86 87 typedef unsigned int t_key; 88 89 #define T_TNODE 0 90 #define T_LEAF 1 91 #define NODE_TYPE_MASK 0x1UL 92 #define NODE_PARENT(_node) \ 93 ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK)) 94 #define NODE_SET_PARENT(_node, _ptr) \ 95 ((_node)->_parent = (((unsigned long)(_ptr)) | \ 96 ((_node)->_parent & NODE_TYPE_MASK))) 97 #define NODE_INIT_PARENT(_node, _type) \ 98 ((_node)->_parent = (_type)) 99 #define NODE_TYPE(_node) \ 100 ((_node)->_parent & NODE_TYPE_MASK) 101 102 #define IS_TNODE(n) (!(n->_parent & T_LEAF)) 103 #define IS_LEAF(n) (n->_parent & T_LEAF) 104 105 struct node { 106 t_key key; 107 unsigned long _parent; 108 }; 109 110 struct leaf { 111 t_key key; 112 unsigned long _parent; 113 struct hlist_head list; 114 }; 115 116 struct leaf_info { 117 struct hlist_node hlist; 118 int plen; 119 struct list_head falh; 120 }; 121 122 struct tnode { 123 t_key key; 124 unsigned long _parent; 125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ 126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ 127 unsigned short full_children; /* KEYLENGTH bits needed */ 128 unsigned short empty_children; /* KEYLENGTH bits needed */ 129 struct node *child[0]; 130 }; 131 132 #ifdef CONFIG_IP_FIB_TRIE_STATS 133 struct trie_use_stats { 134 unsigned int gets; 135 unsigned int backtrack; 136 unsigned int semantic_match_passed; 137 unsigned int semantic_match_miss; 138 unsigned int null_node_hit; 139 }; 140 #endif 141 142 struct trie_stat { 143 unsigned int totdepth; 144 unsigned int maxdepth; 145 unsigned int tnodes; 146 unsigned int leaves; 147 unsigned int nullpointers; 148 unsigned int nodesizes[MAX_CHILDS]; 149 }; 150 151 struct trie { 152 struct node *trie; 153 #ifdef CONFIG_IP_FIB_TRIE_STATS 154 struct trie_use_stats stats; 155 #endif 156 int size; 157 unsigned int revision; 158 }; 159 160 static int trie_debug = 0; 161 162 static int tnode_full(struct tnode *tn, struct node *n); 163 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 164 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 165 static int tnode_child_length(struct tnode *tn); 166 static struct node *resize(struct trie *t, struct tnode *tn); 167 static struct tnode *inflate(struct trie *t, struct tnode *tn); 168 static struct tnode *halve(struct trie *t, struct tnode *tn); 169 static void tnode_free(struct tnode *tn); 170 static void trie_dump_seq(struct seq_file *seq, struct trie *t); 171 extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); 172 extern int fib_detect_death(struct fib_info *fi, int order, 173 struct fib_info **last_resort, int *last_idx, int *dflt); 174 175 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id, 176 struct nlmsghdr *n, struct netlink_skb_parms *req); 177 178 static kmem_cache_t *fn_alias_kmem; 179 static struct trie *trie_local = NULL, *trie_main = NULL; 180 181 static void trie_bug(char *err) 182 { 183 printk("Trie Bug: %s\n", err); 184 BUG(); 185 } 186 187 static inline struct node *tnode_get_child(struct tnode *tn, int i) 188 { 189 if (i >= 1<<tn->bits) 190 trie_bug("tnode_get_child"); 191 192 return tn->child[i]; 193 } 194 195 static inline int tnode_child_length(struct tnode *tn) 196 { 197 return 1<<tn->bits; 198 } 199 200 /* 201 _________________________________________________________________ 202 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 203 ---------------------------------------------------------------- 204 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 205 206 _________________________________________________________________ 207 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 208 ----------------------------------------------------------------- 209 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 210 211 tp->pos = 7 212 tp->bits = 3 213 n->pos = 15 214 n->bits=4 215 KEYLENGTH=32 216 */ 217 218 static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 219 { 220 if (offset < KEYLENGTH) 221 return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 222 else 223 return 0; 224 } 225 226 static inline int tkey_equals(t_key a, t_key b) 227 { 228 return a == b; 229 } 230 231 static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) 232 { 233 if (bits == 0 || offset >= KEYLENGTH) 234 return 1; 235 bits = bits > KEYLENGTH ? KEYLENGTH : bits; 236 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; 237 } 238 239 static inline int tkey_mismatch(t_key a, int offset, t_key b) 240 { 241 t_key diff = a ^ b; 242 int i = offset; 243 244 if(!diff) 245 return 0; 246 while((diff << i) >> (KEYLENGTH-1) == 0) 247 i++; 248 return i; 249 } 250 251 /* Candiate for fib_semantics */ 252 253 static void fn_free_alias(struct fib_alias *fa) 254 { 255 fib_release_info(fa->fa_info); 256 kmem_cache_free(fn_alias_kmem, fa); 257 } 258 259 /* 260 To understand this stuff, an understanding of keys and all their bits is 261 necessary. Every node in the trie has a key associated with it, but not 262 all of the bits in that key are significant. 263 264 Consider a node 'n' and its parent 'tp'. 265 266 If n is a leaf, every bit in its key is significant. Its presence is 267 necessitaded by path compression, since during a tree traversal (when 268 searching for a leaf - unless we are doing an insertion) we will completely 269 ignore all skipped bits we encounter. Thus we need to verify, at the end of 270 a potentially successful search, that we have indeed been walking the 271 correct key path. 272 273 Note that we can never "miss" the correct key in the tree if present by 274 following the wrong path. Path compression ensures that segments of the key 275 that are the same for all keys with a given prefix are skipped, but the 276 skipped part *is* identical for each node in the subtrie below the skipped 277 bit! trie_insert() in this implementation takes care of that - note the 278 call to tkey_sub_equals() in trie_insert(). 279 280 if n is an internal node - a 'tnode' here, the various parts of its key 281 have many different meanings. 282 283 Example: 284 _________________________________________________________________ 285 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 286 ----------------------------------------------------------------- 287 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 288 289 _________________________________________________________________ 290 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 291 ----------------------------------------------------------------- 292 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 293 294 tp->pos = 7 295 tp->bits = 3 296 n->pos = 15 297 n->bits=4 298 299 First, let's just ignore the bits that come before the parent tp, that is 300 the bits from 0 to (tp->pos-1). They are *known* but at this point we do 301 not use them for anything. 302 303 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 304 index into the parent's child array. That is, they will be used to find 305 'n' among tp's children. 306 307 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits 308 for the node n. 309 310 All the bits we have seen so far are significant to the node n. The rest 311 of the bits are really not needed or indeed known in n->key. 312 313 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 314 n's child array, and will of course be different for each child. 315 316 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown 317 at this point. 318 319 */ 320 321 static void check_tnode(struct tnode *tn) 322 { 323 if(tn && tn->pos+tn->bits > 32) { 324 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits); 325 } 326 } 327 328 static int halve_threshold = 25; 329 static int inflate_threshold = 50; 330 331 static struct leaf *leaf_new(void) 332 { 333 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); 334 if(l) { 335 NODE_INIT_PARENT(l, T_LEAF); 336 INIT_HLIST_HEAD(&l->list); 337 } 338 return l; 339 } 340 341 static struct leaf_info *leaf_info_new(int plen) 342 { 343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 344 li->plen = plen; 345 INIT_LIST_HEAD(&li->falh); 346 return li; 347 } 348 349 static inline void free_leaf(struct leaf *l) 350 { 351 kfree(l); 352 } 353 354 static inline void free_leaf_info(struct leaf_info *li) 355 { 356 kfree(li); 357 } 358 359 static struct tnode* tnode_new(t_key key, int pos, int bits) 360 { 361 int nchildren = 1<<bits; 362 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); 363 struct tnode *tn = kmalloc(sz, GFP_KERNEL); 364 365 if(tn) { 366 memset(tn, 0, sz); 367 NODE_INIT_PARENT(tn, T_TNODE); 368 tn->pos = pos; 369 tn->bits = bits; 370 tn->key = key; 371 tn->full_children = 0; 372 tn->empty_children = 1<<bits; 373 } 374 if(trie_debug > 0) 375 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), 376 (unsigned int) (sizeof(struct node) * 1<<bits)); 377 return tn; 378 } 379 380 static void tnode_free(struct tnode *tn) 381 { 382 if(!tn) { 383 trie_bug("tnode_free\n"); 384 } 385 if(IS_LEAF(tn)) { 386 free_leaf((struct leaf *)tn); 387 if(trie_debug > 0 ) 388 printk("FL %p \n", tn); 389 } 390 else if(IS_TNODE(tn)) { 391 kfree(tn); 392 if(trie_debug > 0 ) 393 printk("FT %p \n", tn); 394 } 395 else { 396 trie_bug("tnode_free\n"); 397 } 398 } 399 400 /* 401 * Check whether a tnode 'n' is "full", i.e. it is an internal node 402 * and no bits are skipped. See discussion in dyntree paper p. 6 403 */ 404 405 static inline int tnode_full(struct tnode *tn, struct node *n) 406 { 407 if(n == NULL || IS_LEAF(n)) 408 return 0; 409 410 return ((struct tnode *) n)->pos == tn->pos + tn->bits; 411 } 412 413 static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n) 414 { 415 tnode_put_child_reorg(tn, i, n, -1); 416 } 417 418 /* 419 * Add a child at position i overwriting the old value. 420 * Update the value of full_children and empty_children. 421 */ 422 423 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) 424 { 425 struct node *chi; 426 int isfull; 427 428 if(i >= 1<<tn->bits) { 429 printk("bits=%d, i=%d\n", tn->bits, i); 430 trie_bug("tnode_put_child_reorg bits"); 431 } 432 write_lock_bh(&fib_lock); 433 chi = tn->child[i]; 434 435 /* update emptyChildren */ 436 if (n == NULL && chi != NULL) 437 tn->empty_children++; 438 else if (n != NULL && chi == NULL) 439 tn->empty_children--; 440 441 /* update fullChildren */ 442 if (wasfull == -1) 443 wasfull = tnode_full(tn, chi); 444 445 isfull = tnode_full(tn, n); 446 if (wasfull && !isfull) 447 tn->full_children--; 448 449 else if (!wasfull && isfull) 450 tn->full_children++; 451 if(n) 452 NODE_SET_PARENT(n, tn); 453 454 tn->child[i] = n; 455 write_unlock_bh(&fib_lock); 456 } 457 458 static struct node *resize(struct trie *t, struct tnode *tn) 459 { 460 int i; 461 462 if (!tn) 463 return NULL; 464 465 if(trie_debug) 466 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 467 tn, inflate_threshold, halve_threshold); 468 469 /* No children */ 470 if (tn->empty_children == tnode_child_length(tn)) { 471 tnode_free(tn); 472 return NULL; 473 } 474 /* One child */ 475 if (tn->empty_children == tnode_child_length(tn) - 1) 476 for (i = 0; i < tnode_child_length(tn); i++) { 477 478 write_lock_bh(&fib_lock); 479 if (tn->child[i] != NULL) { 480 481 /* compress one level */ 482 struct node *n = tn->child[i]; 483 if(n) 484 NODE_INIT_PARENT(n, NODE_TYPE(n)); 485 486 write_unlock_bh(&fib_lock); 487 tnode_free(tn); 488 return n; 489 } 490 write_unlock_bh(&fib_lock); 491 } 492 /* 493 * Double as long as the resulting node has a number of 494 * nonempty nodes that are above the threshold. 495 */ 496 497 /* 498 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 499 * the Helsinki University of Technology and Matti Tikkanen of Nokia 500 * Telecommunications, page 6: 501 * "A node is doubled if the ratio of non-empty children to all 502 * children in the *doubled* node is at least 'high'." 503 * 504 * 'high' in this instance is the variable 'inflate_threshold'. It 505 * is expressed as a percentage, so we multiply it with 506 * tnode_child_length() and instead of multiplying by 2 (since the 507 * child array will be doubled by inflate()) and multiplying 508 * the left-hand side by 100 (to handle the percentage thing) we 509 * multiply the left-hand side by 50. 510 * 511 * The left-hand side may look a bit weird: tnode_child_length(tn) 512 * - tn->empty_children is of course the number of non-null children 513 * in the current node. tn->full_children is the number of "full" 514 * children, that is non-null tnodes with a skip value of 0. 515 * All of those will be doubled in the resulting inflated tnode, so 516 * we just count them one extra time here. 517 * 518 * A clearer way to write this would be: 519 * 520 * to_be_doubled = tn->full_children; 521 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 522 * tn->full_children; 523 * 524 * new_child_length = tnode_child_length(tn) * 2; 525 * 526 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 527 * new_child_length; 528 * if (new_fill_factor >= inflate_threshold) 529 * 530 * ...and so on, tho it would mess up the while() loop. 531 * 532 * anyway, 533 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 534 * inflate_threshold 535 * 536 * avoid a division: 537 * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 538 * inflate_threshold * new_child_length 539 * 540 * expand not_to_be_doubled and to_be_doubled, and shorten: 541 * 100 * (tnode_child_length(tn) - tn->empty_children + 542 * tn->full_children ) >= inflate_threshold * new_child_length 543 * 544 * expand new_child_length: 545 * 100 * (tnode_child_length(tn) - tn->empty_children + 546 * tn->full_children ) >= 547 * inflate_threshold * tnode_child_length(tn) * 2 548 * 549 * shorten again: 550 * 50 * (tn->full_children + tnode_child_length(tn) - 551 * tn->empty_children ) >= inflate_threshold * 552 * tnode_child_length(tn) 553 * 554 */ 555 556 check_tnode(tn); 557 558 while ((tn->full_children > 0 && 559 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 560 inflate_threshold * tnode_child_length(tn))) { 561 562 tn = inflate(t, tn); 563 } 564 565 check_tnode(tn); 566 567 /* 568 * Halve as long as the number of empty children in this 569 * node is above threshold. 570 */ 571 while (tn->bits > 1 && 572 100 * (tnode_child_length(tn) - tn->empty_children) < 573 halve_threshold * tnode_child_length(tn)) 574 575 tn = halve(t, tn); 576 577 /* Only one child remains */ 578 579 if (tn->empty_children == tnode_child_length(tn) - 1) 580 for (i = 0; i < tnode_child_length(tn); i++) { 581 582 write_lock_bh(&fib_lock); 583 if (tn->child[i] != NULL) { 584 /* compress one level */ 585 struct node *n = tn->child[i]; 586 587 if(n) 588 NODE_INIT_PARENT(n, NODE_TYPE(n)); 589 590 write_unlock_bh(&fib_lock); 591 tnode_free(tn); 592 return n; 593 } 594 write_unlock_bh(&fib_lock); 595 } 596 597 return (struct node *) tn; 598 } 599 600 static struct tnode *inflate(struct trie *t, struct tnode *tn) 601 { 602 struct tnode *inode; 603 struct tnode *oldtnode = tn; 604 int olen = tnode_child_length(tn); 605 int i; 606 607 if(trie_debug) 608 printk("In inflate\n"); 609 610 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 611 612 if (!tn) 613 trie_bug("tnode_new failed"); 614 615 for(i = 0; i < olen; i++) { 616 struct node *node = tnode_get_child(oldtnode, i); 617 618 /* An empty child */ 619 if (node == NULL) 620 continue; 621 622 /* A leaf or an internal node with skipped bits */ 623 624 if(IS_LEAF(node) || ((struct tnode *) node)->pos > 625 tn->pos + tn->bits - 1) { 626 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1, 627 1) == 0) 628 put_child(t, tn, 2*i, node); 629 else 630 put_child(t, tn, 2*i+1, node); 631 continue; 632 } 633 634 /* An internal node with two children */ 635 inode = (struct tnode *) node; 636 637 if (inode->bits == 1) { 638 put_child(t, tn, 2*i, inode->child[0]); 639 put_child(t, tn, 2*i+1, inode->child[1]); 640 641 tnode_free(inode); 642 } 643 644 /* An internal node with more than two children */ 645 else { 646 struct tnode *left, *right; 647 int size, j; 648 649 /* We will replace this node 'inode' with two new 650 * ones, 'left' and 'right', each with half of the 651 * original children. The two new nodes will have 652 * a position one bit further down the key and this 653 * means that the "significant" part of their keys 654 * (see the discussion near the top of this file) 655 * will differ by one bit, which will be "0" in 656 * left's key and "1" in right's key. Since we are 657 * moving the key position by one step, the bit that 658 * we are moving away from - the bit at position 659 * (inode->pos) - is the one that will differ between 660 * left and right. So... we synthesize that bit in the 661 * two new keys. 662 * The mask 'm' below will be a single "one" bit at 663 * the position (inode->pos) 664 */ 665 666 t_key m = TKEY_GET_MASK(inode->pos, 1); 667 668 /* Use the old key, but set the new significant 669 * bit to zero. 670 */ 671 left = tnode_new(inode->key&(~m), inode->pos + 1, 672 inode->bits - 1); 673 674 if(!left) 675 trie_bug("tnode_new failed"); 676 677 678 /* Use the old key, but set the new significant 679 * bit to one. 680 */ 681 right = tnode_new(inode->key|m, inode->pos + 1, 682 inode->bits - 1); 683 684 if(!right) 685 trie_bug("tnode_new failed"); 686 687 size = tnode_child_length(left); 688 for(j = 0; j < size; j++) { 689 put_child(t, left, j, inode->child[j]); 690 put_child(t, right, j, inode->child[j + size]); 691 } 692 put_child(t, tn, 2*i, resize(t, left)); 693 put_child(t, tn, 2*i+1, resize(t, right)); 694 695 tnode_free(inode); 696 } 697 } 698 tnode_free(oldtnode); 699 return tn; 700 } 701 702 static struct tnode *halve(struct trie *t, struct tnode *tn) 703 { 704 struct tnode *oldtnode = tn; 705 struct node *left, *right; 706 int i; 707 int olen = tnode_child_length(tn); 708 709 if(trie_debug) printk("In halve\n"); 710 711 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 712 713 if(!tn) 714 trie_bug("tnode_new failed"); 715 716 for(i = 0; i < olen; i += 2) { 717 left = tnode_get_child(oldtnode, i); 718 right = tnode_get_child(oldtnode, i+1); 719 720 /* At least one of the children is empty */ 721 if (left == NULL) { 722 if (right == NULL) /* Both are empty */ 723 continue; 724 put_child(t, tn, i/2, right); 725 } else if (right == NULL) 726 put_child(t, tn, i/2, left); 727 728 /* Two nonempty children */ 729 else { 730 struct tnode *newBinNode = 731 tnode_new(left->key, tn->pos + tn->bits, 1); 732 733 if(!newBinNode) 734 trie_bug("tnode_new failed"); 735 736 put_child(t, newBinNode, 0, left); 737 put_child(t, newBinNode, 1, right); 738 put_child(t, tn, i/2, resize(t, newBinNode)); 739 } 740 } 741 tnode_free(oldtnode); 742 return tn; 743 } 744 745 static void *trie_init(struct trie *t) 746 { 747 if(t) { 748 t->size = 0; 749 t->trie = NULL; 750 t->revision = 0; 751 #ifdef CONFIG_IP_FIB_TRIE_STATS 752 memset(&t->stats, 0, sizeof(struct trie_use_stats)); 753 #endif 754 } 755 return t; 756 } 757 758 static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) 759 { 760 struct hlist_node *node; 761 struct leaf_info *li; 762 763 hlist_for_each_entry(li, node, head, hlist) { 764 765 if ( li->plen == plen ) 766 return li; 767 } 768 return NULL; 769 } 770 771 static inline struct list_head * get_fa_head(struct leaf *l, int plen) 772 { 773 struct list_head *fa_head=NULL; 774 struct leaf_info *li = find_leaf_info(&l->list, plen); 775 776 if(li) 777 fa_head = &li->falh; 778 779 return fa_head; 780 } 781 782 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) 783 { 784 struct leaf_info *li=NULL, *last=NULL; 785 struct hlist_node *node, *tmp; 786 787 write_lock_bh(&fib_lock); 788 789 if(hlist_empty(head)) 790 hlist_add_head(&new->hlist, head); 791 else { 792 hlist_for_each_entry_safe(li, node, tmp, head, hlist) { 793 794 if (new->plen > li->plen) 795 break; 796 797 last = li; 798 } 799 if(last) 800 hlist_add_after(&last->hlist, &new->hlist); 801 else 802 hlist_add_before(&new->hlist, &li->hlist); 803 } 804 write_unlock_bh(&fib_lock); 805 } 806 807 static struct leaf * 808 fib_find_node(struct trie *t, u32 key) 809 { 810 int pos; 811 struct tnode *tn; 812 struct node *n; 813 814 pos = 0; 815 n=t->trie; 816 817 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 818 tn = (struct tnode *) n; 819 820 check_tnode(tn); 821 822 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 823 pos=tn->pos + tn->bits; 824 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 825 } 826 else 827 break; 828 } 829 /* Case we have found a leaf. Compare prefixes */ 830 831 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 832 struct leaf *l = (struct leaf *) n; 833 return l; 834 } 835 return NULL; 836 } 837 838 static struct node *trie_rebalance(struct trie *t, struct tnode *tn) 839 { 840 int i = 0; 841 int wasfull; 842 t_key cindex, key; 843 struct tnode *tp = NULL; 844 845 if(!tn) 846 BUG(); 847 848 key = tn->key; 849 i = 0; 850 851 while (tn != NULL && NODE_PARENT(tn) != NULL) { 852 853 if( i > 10 ) { 854 printk("Rebalance tn=%p \n", tn); 855 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn)); 856 857 printk("Rebalance tp=%p \n", tp); 858 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp)); 859 } 860 861 if( i > 12 ) BUG(); 862 i++; 863 864 tp = NODE_PARENT(tn); 865 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 866 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 867 tn = (struct tnode *) resize (t, (struct tnode *)tn); 868 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); 869 870 if(!NODE_PARENT(tn)) 871 break; 872 873 tn = NODE_PARENT(tn); 874 } 875 /* Handle last (top) tnode */ 876 if (IS_TNODE(tn)) 877 tn = (struct tnode*) resize(t, (struct tnode *)tn); 878 879 return (struct node*) tn; 880 } 881 882 static struct list_head * 883 fib_insert_node(struct trie *t, u32 key, int plen) 884 { 885 int pos, newpos; 886 struct tnode *tp = NULL, *tn = NULL; 887 struct node *n; 888 struct leaf *l; 889 int missbit; 890 struct list_head *fa_head=NULL; 891 struct leaf_info *li; 892 t_key cindex; 893 894 pos = 0; 895 n=t->trie; 896 897 /* If we point to NULL, stop. Either the tree is empty and we should 898 * just put a new leaf in if, or we have reached an empty child slot, 899 * and we should just put our new leaf in that. 900 * If we point to a T_TNODE, check if it matches our key. Note that 901 * a T_TNODE might be skipping any number of bits - its 'pos' need 902 * not be the parent's 'pos'+'bits'! 903 * 904 * If it does match the current key, get pos/bits from it, extract 905 * the index from our key, push the T_TNODE and walk the tree. 906 * 907 * If it doesn't, we have to replace it with a new T_TNODE. 908 * 909 * If we point to a T_LEAF, it might or might not have the same key 910 * as we do. If it does, just change the value, update the T_LEAF's 911 * value, and return it. 912 * If it doesn't, we need to replace it with a T_TNODE. 913 */ 914 915 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 916 tn = (struct tnode *) n; 917 918 check_tnode(tn); 919 920 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 921 tp = tn; 922 pos=tn->pos + tn->bits; 923 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 924 925 if(n && NODE_PARENT(n) != tn) { 926 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); 927 BUG(); 928 } 929 } 930 else 931 break; 932 } 933 934 /* 935 * n ----> NULL, LEAF or TNODE 936 * 937 * tp is n's (parent) ----> NULL or TNODE 938 */ 939 940 if(tp && IS_LEAF(tp)) 941 BUG(); 942 943 t->revision++; 944 945 /* Case 1: n is a leaf. Compare prefixes */ 946 947 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 948 struct leaf *l = ( struct leaf *) n; 949 950 li = leaf_info_new(plen); 951 952 if(! li) 953 BUG(); 954 955 fa_head = &li->falh; 956 insert_leaf_info(&l->list, li); 957 goto done; 958 } 959 t->size++; 960 l = leaf_new(); 961 962 if(! l) 963 BUG(); 964 965 l->key = key; 966 li = leaf_info_new(plen); 967 968 if(! li) 969 BUG(); 970 971 fa_head = &li->falh; 972 insert_leaf_info(&l->list, li); 973 974 /* Case 2: n is NULL, and will just insert a new leaf */ 975 if (t->trie && n == NULL) { 976 977 NODE_SET_PARENT(l, tp); 978 979 if (!tp) 980 BUG(); 981 982 else { 983 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 984 put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 985 } 986 } 987 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 988 else { 989 /* 990 * Add a new tnode here 991 * first tnode need some special handling 992 */ 993 994 if (tp) 995 pos=tp->pos+tp->bits; 996 else 997 pos=0; 998 if(n) { 999 newpos = tkey_mismatch(key, pos, n->key); 1000 tn = tnode_new(n->key, newpos, 1); 1001 } 1002 else { 1003 newpos = 0; 1004 tn = tnode_new(key, newpos, 1); /* First tnode */ 1005 } 1006 if(!tn) 1007 trie_bug("tnode_pfx_new failed"); 1008 1009 NODE_SET_PARENT(tn, tp); 1010 1011 missbit=tkey_extract_bits(key, newpos, 1); 1012 put_child(t, tn, missbit, (struct node *)l); 1013 put_child(t, tn, 1-missbit, n); 1014 1015 if(tp) { 1016 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1017 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); 1018 } 1019 else { 1020 t->trie = (struct node*) tn; /* First tnode */ 1021 tp = tn; 1022 } 1023 } 1024 if(tp && tp->pos+tp->bits > 32) { 1025 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 1026 tp, tp->pos, tp->bits, key, plen); 1027 } 1028 /* Rebalance the trie */ 1029 t->trie = trie_rebalance(t, tp); 1030 done:; 1031 return fa_head; 1032 } 1033 1034 static int 1035 fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, 1036 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) 1037 { 1038 struct trie *t = (struct trie *) tb->tb_data; 1039 struct fib_alias *fa, *new_fa; 1040 struct list_head *fa_head=NULL; 1041 struct fib_info *fi; 1042 int plen = r->rtm_dst_len; 1043 int type = r->rtm_type; 1044 u8 tos = r->rtm_tos; 1045 u32 key, mask; 1046 int err; 1047 struct leaf *l; 1048 1049 if (plen > 32) 1050 return -EINVAL; 1051 1052 key = 0; 1053 if (rta->rta_dst) 1054 memcpy(&key, rta->rta_dst, 4); 1055 1056 key = ntohl(key); 1057 1058 if(trie_debug) 1059 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); 1060 1061 mask = ntohl( inet_make_mask(plen) ); 1062 1063 if(key & ~mask) 1064 return -EINVAL; 1065 1066 key = key & mask; 1067 1068 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL) 1069 goto err; 1070 1071 l = fib_find_node(t, key); 1072 fa = NULL; 1073 1074 if(l) { 1075 fa_head = get_fa_head(l, plen); 1076 fa = fib_find_alias(fa_head, tos, fi->fib_priority); 1077 } 1078 1079 /* Now fa, if non-NULL, points to the first fib alias 1080 * with the same keys [prefix,tos,priority], if such key already 1081 * exists or to the node before which we will insert new one. 1082 * 1083 * If fa is NULL, we will need to allocate a new one and 1084 * insert to the head of f. 1085 * 1086 * If f is NULL, no fib node matched the destination key 1087 * and we need to allocate a new one of those as well. 1088 */ 1089 1090 if (fa && 1091 fa->fa_info->fib_priority == fi->fib_priority) { 1092 struct fib_alias *fa_orig; 1093 1094 err = -EEXIST; 1095 if (nlhdr->nlmsg_flags & NLM_F_EXCL) 1096 goto out; 1097 1098 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { 1099 struct fib_info *fi_drop; 1100 u8 state; 1101 1102 write_lock_bh(&fib_lock); 1103 1104 fi_drop = fa->fa_info; 1105 fa->fa_info = fi; 1106 fa->fa_type = type; 1107 fa->fa_scope = r->rtm_scope; 1108 state = fa->fa_state; 1109 fa->fa_state &= ~FA_S_ACCESSED; 1110 1111 write_unlock_bh(&fib_lock); 1112 1113 fib_release_info(fi_drop); 1114 if (state & FA_S_ACCESSED) 1115 rt_cache_flush(-1); 1116 1117 goto succeeded; 1118 } 1119 /* Error if we find a perfect match which 1120 * uses the same scope, type, and nexthop 1121 * information. 1122 */ 1123 fa_orig = fa; 1124 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { 1125 if (fa->fa_tos != tos) 1126 break; 1127 if (fa->fa_info->fib_priority != fi->fib_priority) 1128 break; 1129 if (fa->fa_type == type && 1130 fa->fa_scope == r->rtm_scope && 1131 fa->fa_info == fi) { 1132 goto out; 1133 } 1134 } 1135 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) 1136 fa = fa_orig; 1137 } 1138 err = -ENOENT; 1139 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE)) 1140 goto out; 1141 1142 err = -ENOBUFS; 1143 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); 1144 if (new_fa == NULL) 1145 goto out; 1146 1147 new_fa->fa_info = fi; 1148 new_fa->fa_tos = tos; 1149 new_fa->fa_type = type; 1150 new_fa->fa_scope = r->rtm_scope; 1151 new_fa->fa_state = 0; 1152 #if 0 1153 new_fa->dst = NULL; 1154 #endif 1155 /* 1156 * Insert new entry to the list. 1157 */ 1158 1159 if(!fa_head) 1160 fa_head = fib_insert_node(t, key, plen); 1161 1162 write_lock_bh(&fib_lock); 1163 1164 list_add_tail(&new_fa->fa_list, 1165 (fa ? &fa->fa_list : fa_head)); 1166 1167 write_unlock_bh(&fib_lock); 1168 1169 rt_cache_flush(-1); 1170 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); 1171 succeeded: 1172 return 0; 1173 out: 1174 fib_release_info(fi); 1175 err:; 1176 return err; 1177 } 1178 1179 static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, 1180 struct fib_result *res, int *err) 1181 { 1182 int i; 1183 t_key mask; 1184 struct leaf_info *li; 1185 struct hlist_head *hhead = &l->list; 1186 struct hlist_node *node; 1187 1188 hlist_for_each_entry(li, node, hhead, hlist) { 1189 1190 i = li->plen; 1191 mask = ntohl(inet_make_mask(i)); 1192 if (l->key != (key & mask)) 1193 continue; 1194 1195 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) { 1196 *plen = i; 1197 #ifdef CONFIG_IP_FIB_TRIE_STATS 1198 t->stats.semantic_match_passed++; 1199 #endif 1200 return 1; 1201 } 1202 #ifdef CONFIG_IP_FIB_TRIE_STATS 1203 t->stats.semantic_match_miss++; 1204 #endif 1205 } 1206 return 0; 1207 } 1208 1209 static int 1210 fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 1211 { 1212 struct trie *t = (struct trie *) tb->tb_data; 1213 int plen, ret = 0; 1214 struct node *n; 1215 struct tnode *pn; 1216 int pos, bits; 1217 t_key key=ntohl(flp->fl4_dst); 1218 int chopped_off; 1219 t_key cindex = 0; 1220 int current_prefix_length = KEYLENGTH; 1221 n = t->trie; 1222 1223 read_lock(&fib_lock); 1224 if(!n) 1225 goto failed; 1226 1227 #ifdef CONFIG_IP_FIB_TRIE_STATS 1228 t->stats.gets++; 1229 #endif 1230 1231 /* Just a leaf? */ 1232 if (IS_LEAF(n)) { 1233 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) ) 1234 goto found; 1235 goto failed; 1236 } 1237 pn = (struct tnode *) n; 1238 chopped_off = 0; 1239 1240 while (pn) { 1241 1242 pos = pn->pos; 1243 bits = pn->bits; 1244 1245 if(!chopped_off) 1246 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits); 1247 1248 n = tnode_get_child(pn, cindex); 1249 1250 if (n == NULL) { 1251 #ifdef CONFIG_IP_FIB_TRIE_STATS 1252 t->stats.null_node_hit++; 1253 #endif 1254 goto backtrace; 1255 } 1256 1257 if (IS_TNODE(n)) { 1258 #define HL_OPTIMIZE 1259 #ifdef HL_OPTIMIZE 1260 struct tnode *cn = (struct tnode *)n; 1261 t_key node_prefix, key_prefix, pref_mismatch; 1262 int mp; 1263 1264 /* 1265 * It's a tnode, and we can do some extra checks here if we 1266 * like, to avoid descending into a dead-end branch. 1267 * This tnode is in the parent's child array at index 1268 * key[p_pos..p_pos+p_bits] but potentially with some bits 1269 * chopped off, so in reality the index may be just a 1270 * subprefix, padded with zero at the end. 1271 * We can also take a look at any skipped bits in this 1272 * tnode - everything up to p_pos is supposed to be ok, 1273 * and the non-chopped bits of the index (se previous 1274 * paragraph) are also guaranteed ok, but the rest is 1275 * considered unknown. 1276 * 1277 * The skipped bits are key[pos+bits..cn->pos]. 1278 */ 1279 1280 /* If current_prefix_length < pos+bits, we are already doing 1281 * actual prefix matching, which means everything from 1282 * pos+(bits-chopped_off) onward must be zero along some 1283 * branch of this subtree - otherwise there is *no* valid 1284 * prefix present. Here we can only check the skipped 1285 * bits. Remember, since we have already indexed into the 1286 * parent's child array, we know that the bits we chopped of 1287 * *are* zero. 1288 */ 1289 1290 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ 1291 1292 if (current_prefix_length < pos+bits) { 1293 if (tkey_extract_bits(cn->key, current_prefix_length, 1294 cn->pos - current_prefix_length) != 0 || 1295 !(cn->child[0])) 1296 goto backtrace; 1297 } 1298 1299 /* 1300 * If chopped_off=0, the index is fully validated and we 1301 * only need to look at the skipped bits for this, the new, 1302 * tnode. What we actually want to do is to find out if 1303 * these skipped bits match our key perfectly, or if we will 1304 * have to count on finding a matching prefix further down, 1305 * because if we do, we would like to have some way of 1306 * verifying the existence of such a prefix at this point. 1307 */ 1308 1309 /* The only thing we can do at this point is to verify that 1310 * any such matching prefix can indeed be a prefix to our 1311 * key, and if the bits in the node we are inspecting that 1312 * do not match our key are not ZERO, this cannot be true. 1313 * Thus, find out where there is a mismatch (before cn->pos) 1314 * and verify that all the mismatching bits are zero in the 1315 * new tnode's key. 1316 */ 1317 1318 /* Note: We aren't very concerned about the piece of the key 1319 * that precede pn->pos+pn->bits, since these have already been 1320 * checked. The bits after cn->pos aren't checked since these are 1321 * by definition "unknown" at this point. Thus, what we want to 1322 * see is if we are about to enter the "prefix matching" state, 1323 * and in that case verify that the skipped bits that will prevail 1324 * throughout this subtree are zero, as they have to be if we are 1325 * to find a matching prefix. 1326 */ 1327 1328 node_prefix = MASK_PFX(cn->key, cn->pos); 1329 key_prefix = MASK_PFX(key, cn->pos); 1330 pref_mismatch = key_prefix^node_prefix; 1331 mp = 0; 1332 1333 /* In short: If skipped bits in this node do not match the search 1334 * key, enter the "prefix matching" state.directly. 1335 */ 1336 if (pref_mismatch) { 1337 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 1338 mp++; 1339 pref_mismatch = pref_mismatch <<1; 1340 } 1341 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 1342 1343 if (key_prefix != 0) 1344 goto backtrace; 1345 1346 if (current_prefix_length >= cn->pos) 1347 current_prefix_length=mp; 1348 } 1349 #endif 1350 pn = (struct tnode *)n; /* Descend */ 1351 chopped_off = 0; 1352 continue; 1353 } 1354 if (IS_LEAF(n)) { 1355 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) 1356 goto found; 1357 } 1358 backtrace: 1359 chopped_off++; 1360 1361 /* As zero don't change the child key (cindex) */ 1362 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) { 1363 chopped_off++; 1364 } 1365 1366 /* Decrease current_... with bits chopped off */ 1367 if (current_prefix_length > pn->pos + pn->bits - chopped_off) 1368 current_prefix_length = pn->pos + pn->bits - chopped_off; 1369 1370 /* 1371 * Either we do the actual chop off according or if we have 1372 * chopped off all bits in this tnode walk up to our parent. 1373 */ 1374 1375 if(chopped_off <= pn->bits) 1376 cindex &= ~(1 << (chopped_off-1)); 1377 else { 1378 if( NODE_PARENT(pn) == NULL) 1379 goto failed; 1380 1381 /* Get Child's index */ 1382 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); 1383 pn = NODE_PARENT(pn); 1384 chopped_off = 0; 1385 1386 #ifdef CONFIG_IP_FIB_TRIE_STATS 1387 t->stats.backtrack++; 1388 #endif 1389 goto backtrace; 1390 } 1391 } 1392 failed: 1393 ret = 1; 1394 found: 1395 read_unlock(&fib_lock); 1396 return ret; 1397 } 1398 1399 static int trie_leaf_remove(struct trie *t, t_key key) 1400 { 1401 t_key cindex; 1402 struct tnode *tp = NULL; 1403 struct node *n = t->trie; 1404 struct leaf *l; 1405 1406 if(trie_debug) 1407 printk("entering trie_leaf_remove(%p)\n", n); 1408 1409 /* Note that in the case skipped bits, those bits are *not* checked! 1410 * When we finish this, we will have NULL or a T_LEAF, and the 1411 * T_LEAF may or may not match our key. 1412 */ 1413 1414 while (n != NULL && IS_TNODE(n)) { 1415 struct tnode *tn = (struct tnode *) n; 1416 check_tnode(tn); 1417 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); 1418 1419 if(n && NODE_PARENT(n) != tn) { 1420 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); 1421 BUG(); 1422 } 1423 } 1424 l = (struct leaf *) n; 1425 1426 if(!n || !tkey_equals(l->key, key)) 1427 return 0; 1428 1429 /* 1430 * Key found. 1431 * Remove the leaf and rebalance the tree 1432 */ 1433 1434 t->revision++; 1435 t->size--; 1436 1437 tp = NODE_PARENT(n); 1438 tnode_free((struct tnode *) n); 1439 1440 if(tp) { 1441 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1442 put_child(t, (struct tnode *)tp, cindex, NULL); 1443 t->trie = trie_rebalance(t, tp); 1444 } 1445 else 1446 t->trie = NULL; 1447 1448 return 1; 1449 } 1450 1451 static int 1452 fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, 1453 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) 1454 { 1455 struct trie *t = (struct trie *) tb->tb_data; 1456 u32 key, mask; 1457 int plen = r->rtm_dst_len; 1458 u8 tos = r->rtm_tos; 1459 struct fib_alias *fa, *fa_to_delete; 1460 struct list_head *fa_head; 1461 struct leaf *l; 1462 1463 if (plen > 32) 1464 return -EINVAL; 1465 1466 key = 0; 1467 if (rta->rta_dst) 1468 memcpy(&key, rta->rta_dst, 4); 1469 1470 key = ntohl(key); 1471 mask = ntohl( inet_make_mask(plen) ); 1472 1473 if(key & ~mask) 1474 return -EINVAL; 1475 1476 key = key & mask; 1477 l = fib_find_node(t, key); 1478 1479 if(!l) 1480 return -ESRCH; 1481 1482 fa_head = get_fa_head(l, plen); 1483 fa = fib_find_alias(fa_head, tos, 0); 1484 1485 if (!fa) 1486 return -ESRCH; 1487 1488 if (trie_debug) 1489 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 1490 1491 fa_to_delete = NULL; 1492 fa_head = fa->fa_list.prev; 1493 list_for_each_entry(fa, fa_head, fa_list) { 1494 struct fib_info *fi = fa->fa_info; 1495 1496 if (fa->fa_tos != tos) 1497 break; 1498 1499 if ((!r->rtm_type || 1500 fa->fa_type == r->rtm_type) && 1501 (r->rtm_scope == RT_SCOPE_NOWHERE || 1502 fa->fa_scope == r->rtm_scope) && 1503 (!r->rtm_protocol || 1504 fi->fib_protocol == r->rtm_protocol) && 1505 fib_nh_match(r, nlhdr, rta, fi) == 0) { 1506 fa_to_delete = fa; 1507 break; 1508 } 1509 } 1510 1511 if (fa_to_delete) { 1512 int kill_li = 0; 1513 struct leaf_info *li; 1514 1515 fa = fa_to_delete; 1516 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); 1517 1518 l = fib_find_node(t, key); 1519 li = find_leaf_info(&l->list, plen); 1520 1521 write_lock_bh(&fib_lock); 1522 1523 list_del(&fa->fa_list); 1524 1525 if(list_empty(fa_head)) { 1526 hlist_del(&li->hlist); 1527 kill_li = 1; 1528 } 1529 write_unlock_bh(&fib_lock); 1530 1531 if(kill_li) 1532 free_leaf_info(li); 1533 1534 if(hlist_empty(&l->list)) 1535 trie_leaf_remove(t, key); 1536 1537 if (fa->fa_state & FA_S_ACCESSED) 1538 rt_cache_flush(-1); 1539 1540 fn_free_alias(fa); 1541 return 0; 1542 } 1543 return -ESRCH; 1544 } 1545 1546 static int trie_flush_list(struct trie *t, struct list_head *head) 1547 { 1548 struct fib_alias *fa, *fa_node; 1549 int found = 0; 1550 1551 list_for_each_entry_safe(fa, fa_node, head, fa_list) { 1552 struct fib_info *fi = fa->fa_info; 1553 1554 if (fi && (fi->fib_flags&RTNH_F_DEAD)) { 1555 1556 write_lock_bh(&fib_lock); 1557 list_del(&fa->fa_list); 1558 write_unlock_bh(&fib_lock); 1559 1560 fn_free_alias(fa); 1561 found++; 1562 } 1563 } 1564 return found; 1565 } 1566 1567 static int trie_flush_leaf(struct trie *t, struct leaf *l) 1568 { 1569 int found = 0; 1570 struct hlist_head *lih = &l->list; 1571 struct hlist_node *node, *tmp; 1572 struct leaf_info *li = NULL; 1573 1574 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 1575 1576 found += trie_flush_list(t, &li->falh); 1577 1578 if (list_empty(&li->falh)) { 1579 1580 write_lock_bh(&fib_lock); 1581 hlist_del(&li->hlist); 1582 write_unlock_bh(&fib_lock); 1583 1584 free_leaf_info(li); 1585 } 1586 } 1587 return found; 1588 } 1589 1590 static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) 1591 { 1592 struct node *c = (struct node *) thisleaf; 1593 struct tnode *p; 1594 int idx; 1595 1596 if(c == NULL) { 1597 if(t->trie == NULL) 1598 return NULL; 1599 1600 if (IS_LEAF(t->trie)) /* trie w. just a leaf */ 1601 return (struct leaf *) t->trie; 1602 1603 p = (struct tnode*) t->trie; /* Start */ 1604 } 1605 else 1606 p = (struct tnode *) NODE_PARENT(c); 1607 while (p) { 1608 int pos, last; 1609 1610 /* Find the next child of the parent */ 1611 if(c) 1612 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); 1613 else 1614 pos = 0; 1615 1616 last = 1 << p->bits; 1617 for(idx = pos; idx < last ; idx++) { 1618 if( p->child[idx]) { 1619 1620 /* Decend if tnode */ 1621 1622 while (IS_TNODE(p->child[idx])) { 1623 p = (struct tnode*) p->child[idx]; 1624 idx = 0; 1625 1626 /* Rightmost non-NULL branch */ 1627 if( p && IS_TNODE(p) ) 1628 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++; 1629 1630 /* Done with this tnode? */ 1631 if( idx >= (1 << p->bits) || p->child[idx] == NULL ) 1632 goto up; 1633 } 1634 return (struct leaf*) p->child[idx]; 1635 } 1636 } 1637 up: 1638 /* No more children go up one step */ 1639 c = (struct node*) p; 1640 p = (struct tnode *) NODE_PARENT(p); 1641 } 1642 return NULL; /* Ready. Root of trie */ 1643 } 1644 1645 static int fn_trie_flush(struct fib_table *tb) 1646 { 1647 struct trie *t = (struct trie *) tb->tb_data; 1648 struct leaf *ll = NULL, *l = NULL; 1649 int found = 0, h; 1650 1651 t->revision++; 1652 1653 for (h=0; (l = nextleaf(t, l)) != NULL; h++) { 1654 found += trie_flush_leaf(t, l); 1655 1656 if (ll && hlist_empty(&ll->list)) 1657 trie_leaf_remove(t, ll->key); 1658 ll = l; 1659 } 1660 1661 if (ll && hlist_empty(&ll->list)) 1662 trie_leaf_remove(t, ll->key); 1663 1664 if(trie_debug) 1665 printk("trie_flush found=%d\n", found); 1666 return found; 1667 } 1668 1669 static int trie_last_dflt=-1; 1670 1671 static void 1672 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 1673 { 1674 struct trie *t = (struct trie *) tb->tb_data; 1675 int order, last_idx; 1676 struct fib_info *fi = NULL; 1677 struct fib_info *last_resort; 1678 struct fib_alias *fa = NULL; 1679 struct list_head *fa_head; 1680 struct leaf *l; 1681 1682 last_idx = -1; 1683 last_resort = NULL; 1684 order = -1; 1685 1686 read_lock(&fib_lock); 1687 1688 l = fib_find_node(t, 0); 1689 if(!l) 1690 goto out; 1691 1692 fa_head = get_fa_head(l, 0); 1693 if(!fa_head) 1694 goto out; 1695 1696 if (list_empty(fa_head)) 1697 goto out; 1698 1699 list_for_each_entry(fa, fa_head, fa_list) { 1700 struct fib_info *next_fi = fa->fa_info; 1701 1702 if (fa->fa_scope != res->scope || 1703 fa->fa_type != RTN_UNICAST) 1704 continue; 1705 1706 if (next_fi->fib_priority > res->fi->fib_priority) 1707 break; 1708 if (!next_fi->fib_nh[0].nh_gw || 1709 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 1710 continue; 1711 fa->fa_state |= FA_S_ACCESSED; 1712 1713 if (fi == NULL) { 1714 if (next_fi != res->fi) 1715 break; 1716 } else if (!fib_detect_death(fi, order, &last_resort, 1717 &last_idx, &trie_last_dflt)) { 1718 if (res->fi) 1719 fib_info_put(res->fi); 1720 res->fi = fi; 1721 atomic_inc(&fi->fib_clntref); 1722 trie_last_dflt = order; 1723 goto out; 1724 } 1725 fi = next_fi; 1726 order++; 1727 } 1728 if (order <= 0 || fi == NULL) { 1729 trie_last_dflt = -1; 1730 goto out; 1731 } 1732 1733 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { 1734 if (res->fi) 1735 fib_info_put(res->fi); 1736 res->fi = fi; 1737 atomic_inc(&fi->fib_clntref); 1738 trie_last_dflt = order; 1739 goto out; 1740 } 1741 if (last_idx >= 0) { 1742 if (res->fi) 1743 fib_info_put(res->fi); 1744 res->fi = last_resort; 1745 if (last_resort) 1746 atomic_inc(&last_resort->fib_clntref); 1747 } 1748 trie_last_dflt = last_idx; 1749 out:; 1750 read_unlock(&fib_lock); 1751 } 1752 1753 static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, 1754 struct sk_buff *skb, struct netlink_callback *cb) 1755 { 1756 int i, s_i; 1757 struct fib_alias *fa; 1758 1759 u32 xkey=htonl(key); 1760 1761 s_i=cb->args[3]; 1762 i = 0; 1763 1764 list_for_each_entry(fa, fah, fa_list) { 1765 if (i < s_i) { 1766 i++; 1767 continue; 1768 } 1769 if (fa->fa_info->fib_nh == NULL) { 1770 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); 1771 i++; 1772 continue; 1773 } 1774 if (fa->fa_info == NULL) { 1775 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); 1776 i++; 1777 continue; 1778 } 1779 1780 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 1781 cb->nlh->nlmsg_seq, 1782 RTM_NEWROUTE, 1783 tb->tb_id, 1784 fa->fa_type, 1785 fa->fa_scope, 1786 &xkey, 1787 plen, 1788 fa->fa_tos, 1789 fa->fa_info, 0) < 0) { 1790 cb->args[3] = i; 1791 return -1; 1792 } 1793 i++; 1794 } 1795 cb->args[3]=i; 1796 return skb->len; 1797 } 1798 1799 static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb, 1800 struct netlink_callback *cb) 1801 { 1802 int h, s_h; 1803 struct list_head *fa_head; 1804 struct leaf *l = NULL; 1805 s_h=cb->args[2]; 1806 1807 for (h=0; (l = nextleaf(t, l)) != NULL; h++) { 1808 1809 if (h < s_h) 1810 continue; 1811 if (h > s_h) 1812 memset(&cb->args[3], 0, 1813 sizeof(cb->args) - 3*sizeof(cb->args[0])); 1814 1815 fa_head = get_fa_head(l, plen); 1816 1817 if(!fa_head) 1818 continue; 1819 1820 if(list_empty(fa_head)) 1821 continue; 1822 1823 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { 1824 cb->args[2]=h; 1825 return -1; 1826 } 1827 } 1828 cb->args[2]=h; 1829 return skb->len; 1830 } 1831 1832 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 1833 { 1834 int m, s_m; 1835 struct trie *t = (struct trie *) tb->tb_data; 1836 1837 s_m = cb->args[1]; 1838 1839 read_lock(&fib_lock); 1840 for (m=0; m<=32; m++) { 1841 1842 if (m < s_m) 1843 continue; 1844 if (m > s_m) 1845 memset(&cb->args[2], 0, 1846 sizeof(cb->args) - 2*sizeof(cb->args[0])); 1847 1848 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 1849 cb->args[1] = m; 1850 goto out; 1851 } 1852 } 1853 read_unlock(&fib_lock); 1854 cb->args[1] = m; 1855 return skb->len; 1856 out: 1857 read_unlock(&fib_lock); 1858 return -1; 1859 } 1860 1861 /* Fix more generic FIB names for init later */ 1862 1863 #ifdef CONFIG_IP_MULTIPLE_TABLES 1864 struct fib_table * fib_hash_init(int id) 1865 #else 1866 struct fib_table * __init fib_hash_init(int id) 1867 #endif 1868 { 1869 struct fib_table *tb; 1870 struct trie *t; 1871 1872 if (fn_alias_kmem == NULL) 1873 fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1874 sizeof(struct fib_alias), 1875 0, SLAB_HWCACHE_ALIGN, 1876 NULL, NULL); 1877 1878 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 1879 GFP_KERNEL); 1880 if (tb == NULL) 1881 return NULL; 1882 1883 tb->tb_id = id; 1884 tb->tb_lookup = fn_trie_lookup; 1885 tb->tb_insert = fn_trie_insert; 1886 tb->tb_delete = fn_trie_delete; 1887 tb->tb_flush = fn_trie_flush; 1888 tb->tb_select_default = fn_trie_select_default; 1889 tb->tb_dump = fn_trie_dump; 1890 memset(tb->tb_data, 0, sizeof(struct trie)); 1891 1892 t = (struct trie *) tb->tb_data; 1893 1894 trie_init(t); 1895 1896 if (id == RT_TABLE_LOCAL) 1897 trie_local=t; 1898 else if (id == RT_TABLE_MAIN) 1899 trie_main=t; 1900 1901 if (id == RT_TABLE_LOCAL) 1902 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); 1903 1904 return tb; 1905 } 1906 1907 /* Trie dump functions */ 1908 1909 static void putspace_seq(struct seq_file *seq, int n) 1910 { 1911 while (n--) seq_printf(seq, " "); 1912 } 1913 1914 static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) 1915 { 1916 while (bits--) 1917 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0"); 1918 } 1919 1920 static void printnode_seq(struct seq_file *seq, int indent, struct node *n, 1921 int pend, int cindex, int bits) 1922 { 1923 putspace_seq(seq, indent); 1924 if (IS_LEAF(n)) 1925 seq_printf(seq, "|"); 1926 else 1927 seq_printf(seq, "+"); 1928 if (bits) { 1929 seq_printf(seq, "%d/", cindex); 1930 printbin_seq(seq, cindex, bits); 1931 seq_printf(seq, ": "); 1932 } 1933 else 1934 seq_printf(seq, "<root>: "); 1935 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); 1936 1937 if (IS_LEAF(n)) 1938 seq_printf(seq, "key=%d.%d.%d.%d\n", 1939 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); 1940 else { 1941 int plen=((struct tnode *)n)->pos; 1942 t_key prf=MASK_PFX(n->key, plen); 1943 seq_printf(seq, "key=%d.%d.%d.%d/%d\n", 1944 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); 1945 } 1946 if (IS_LEAF(n)) { 1947 struct leaf *l=(struct leaf *)n; 1948 struct fib_alias *fa; 1949 int i; 1950 for (i=32; i>=0; i--) 1951 if(find_leaf_info(&l->list, i)) { 1952 1953 struct list_head *fa_head = get_fa_head(l, i); 1954 1955 if(!fa_head) 1956 continue; 1957 1958 if(list_empty(fa_head)) 1959 continue; 1960 1961 putspace_seq(seq, indent+2); 1962 seq_printf(seq, "{/%d...dumping}\n", i); 1963 1964 1965 list_for_each_entry(fa, fa_head, fa_list) { 1966 putspace_seq(seq, indent+2); 1967 if (fa->fa_info->fib_nh == NULL) { 1968 seq_printf(seq, "Error _fib_nh=NULL\n"); 1969 continue; 1970 } 1971 if (fa->fa_info == NULL) { 1972 seq_printf(seq, "Error fa_info=NULL\n"); 1973 continue; 1974 } 1975 1976 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", 1977 fa->fa_type, 1978 fa->fa_scope, 1979 fa->fa_tos); 1980 } 1981 } 1982 } 1983 else if (IS_TNODE(n)) { 1984 struct tnode *tn=(struct tnode *)n; 1985 putspace_seq(seq, indent); seq_printf(seq, "| "); 1986 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos)); 1987 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); 1988 seq_printf(seq, "}\n"); 1989 putspace_seq(seq, indent); seq_printf(seq, "| "); 1990 seq_printf(seq, "{pos=%d", tn->pos); 1991 seq_printf(seq, " (skip=%d bits)", tn->pos - pend); 1992 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits)); 1993 putspace_seq(seq, indent); seq_printf(seq, "| "); 1994 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children); 1995 } 1996 } 1997 1998 static void trie_dump_seq(struct seq_file *seq, struct trie *t) 1999 { 2000 struct node *n=t->trie; 2001 int cindex=0; 2002 int indent=1; 2003 int pend=0; 2004 int depth = 0; 2005 2006 read_lock(&fib_lock); 2007 2008 seq_printf(seq, "------ trie_dump of t=%p ------\n", t); 2009 if (n) { 2010 printnode_seq(seq, indent, n, pend, cindex, 0); 2011 if (IS_TNODE(n)) { 2012 struct tnode *tn=(struct tnode *)n; 2013 pend = tn->pos+tn->bits; 2014 putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); 2015 indent += 3; 2016 depth++; 2017 2018 while (tn && cindex < (1 << tn->bits)) { 2019 if (tn->child[cindex]) { 2020 2021 /* Got a child */ 2022 2023 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits); 2024 if (IS_LEAF(tn->child[cindex])) { 2025 cindex++; 2026 2027 } 2028 else { 2029 /* 2030 * New tnode. Decend one level 2031 */ 2032 2033 depth++; 2034 n=tn->child[cindex]; 2035 tn=(struct tnode *)n; 2036 pend=tn->pos+tn->bits; 2037 putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); 2038 indent+=3; 2039 cindex=0; 2040 } 2041 } 2042 else 2043 cindex++; 2044 2045 /* 2046 * Test if we are done 2047 */ 2048 2049 while (cindex >= (1 << tn->bits)) { 2050 2051 /* 2052 * Move upwards and test for root 2053 * pop off all traversed nodes 2054 */ 2055 2056 if (NODE_PARENT(tn) == NULL) { 2057 tn = NULL; 2058 n = NULL; 2059 break; 2060 } 2061 else { 2062 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); 2063 tn = NODE_PARENT(tn); 2064 cindex++; 2065 n=(struct node *)tn; 2066 pend=tn->pos+tn->bits; 2067 indent-=3; 2068 depth--; 2069 } 2070 } 2071 } 2072 } 2073 else n = NULL; 2074 } 2075 else seq_printf(seq, "------ trie is empty\n"); 2076 2077 read_unlock(&fib_lock); 2078 } 2079 2080 static struct trie_stat *trie_stat_new(void) 2081 { 2082 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); 2083 int i; 2084 2085 if(s) { 2086 s->totdepth = 0; 2087 s->maxdepth = 0; 2088 s->tnodes = 0; 2089 s->leaves = 0; 2090 s->nullpointers = 0; 2091 2092 for(i=0; i< MAX_CHILDS; i++) 2093 s->nodesizes[i] = 0; 2094 } 2095 return s; 2096 } 2097 2098 static struct trie_stat *trie_collect_stats(struct trie *t) 2099 { 2100 struct node *n=t->trie; 2101 struct trie_stat *s = trie_stat_new(); 2102 int cindex = 0; 2103 int indent = 1; 2104 int pend = 0; 2105 int depth = 0; 2106 2107 read_lock(&fib_lock); 2108 2109 if (s) { 2110 if (n) { 2111 if (IS_TNODE(n)) { 2112 struct tnode *tn = (struct tnode *)n; 2113 pend=tn->pos+tn->bits; 2114 indent += 3; 2115 s->nodesizes[tn->bits]++; 2116 depth++; 2117 2118 while (tn && cindex < (1 << tn->bits)) { 2119 if (tn->child[cindex]) { 2120 /* Got a child */ 2121 2122 if (IS_LEAF(tn->child[cindex])) { 2123 cindex++; 2124 2125 /* stats */ 2126 if (depth > s->maxdepth) 2127 s->maxdepth = depth; 2128 s->totdepth += depth; 2129 s->leaves++; 2130 } 2131 2132 else { 2133 /* 2134 * New tnode. Decend one level 2135 */ 2136 2137 s->tnodes++; 2138 s->nodesizes[tn->bits]++; 2139 depth++; 2140 2141 n = tn->child[cindex]; 2142 tn = (struct tnode *)n; 2143 pend = tn->pos+tn->bits; 2144 2145 indent += 3; 2146 cindex = 0; 2147 } 2148 } 2149 else { 2150 cindex++; 2151 s->nullpointers++; 2152 } 2153 2154 /* 2155 * Test if we are done 2156 */ 2157 2158 while (cindex >= (1 << tn->bits)) { 2159 2160 /* 2161 * Move upwards and test for root 2162 * pop off all traversed nodes 2163 */ 2164 2165 2166 if (NODE_PARENT(tn) == NULL) { 2167 tn = NULL; 2168 n = NULL; 2169 break; 2170 } 2171 else { 2172 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); 2173 tn = NODE_PARENT(tn); 2174 cindex++; 2175 n = (struct node *)tn; 2176 pend=tn->pos+tn->bits; 2177 indent -= 3; 2178 depth--; 2179 } 2180 } 2181 } 2182 } 2183 else n = NULL; 2184 } 2185 } 2186 2187 read_unlock(&fib_lock); 2188 return s; 2189 } 2190 2191 #ifdef CONFIG_PROC_FS 2192 2193 static struct fib_alias *fib_triestat_get_first(struct seq_file *seq) 2194 { 2195 return NULL; 2196 } 2197 2198 static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) 2199 { 2200 return NULL; 2201 } 2202 2203 static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) 2204 { 2205 void *v = NULL; 2206 2207 if (ip_fib_main_table) 2208 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN; 2209 return v; 2210 } 2211 2212 static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2213 { 2214 ++*pos; 2215 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq); 2216 } 2217 2218 static void fib_triestat_seq_stop(struct seq_file *seq, void *v) 2219 { 2220 2221 } 2222 2223 /* 2224 * This outputs /proc/net/fib_triestats 2225 * 2226 * It always works in backward compatibility mode. 2227 * The format of the file is not supposed to be changed. 2228 */ 2229 2230 static void collect_and_show(struct trie *t, struct seq_file *seq) 2231 { 2232 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ 2233 int i, max, pointers; 2234 struct trie_stat *stat; 2235 int avdepth; 2236 2237 stat = trie_collect_stats(t); 2238 2239 bytes=0; 2240 seq_printf(seq, "trie=%p\n", t); 2241 2242 if (stat) { 2243 if (stat->leaves) 2244 avdepth=stat->totdepth*100 / stat->leaves; 2245 else 2246 avdepth=0; 2247 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); 2248 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); 2249 2250 seq_printf(seq, "Leaves: %d\n", stat->leaves); 2251 bytes += sizeof(struct leaf) * stat->leaves; 2252 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); 2253 bytes += sizeof(struct tnode) * stat->tnodes; 2254 2255 max = MAX_CHILDS-1; 2256 2257 while (max >= 0 && stat->nodesizes[max] == 0) 2258 max--; 2259 pointers = 0; 2260 2261 for (i = 1; i <= max; i++) 2262 if (stat->nodesizes[i] != 0) { 2263 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); 2264 pointers += (1<<i) * stat->nodesizes[i]; 2265 } 2266 seq_printf(seq, "\n"); 2267 seq_printf(seq, "Pointers: %d\n", pointers); 2268 bytes += sizeof(struct node *) * pointers; 2269 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); 2270 seq_printf(seq, "Total size: %d kB\n", bytes / 1024); 2271 2272 kfree(stat); 2273 } 2274 2275 #ifdef CONFIG_IP_FIB_TRIE_STATS 2276 seq_printf(seq, "Counters:\n---------\n"); 2277 seq_printf(seq,"gets = %d\n", t->stats.gets); 2278 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); 2279 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 2280 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 2281 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 2282 #ifdef CLEAR_STATS 2283 memset(&(t->stats), 0, sizeof(t->stats)); 2284 #endif 2285 #endif /* CONFIG_IP_FIB_TRIE_STATS */ 2286 } 2287 2288 static int fib_triestat_seq_show(struct seq_file *seq, void *v) 2289 { 2290 char bf[128]; 2291 2292 if (v == SEQ_START_TOKEN) { 2293 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 2294 sizeof(struct leaf), sizeof(struct tnode)); 2295 if (trie_local) 2296 collect_and_show(trie_local, seq); 2297 2298 if (trie_main) 2299 collect_and_show(trie_main, seq); 2300 } 2301 else { 2302 snprintf(bf, sizeof(bf), 2303 "*\t%08X\t%08X", 200, 400); 2304 2305 seq_printf(seq, "%-127s\n", bf); 2306 } 2307 return 0; 2308 } 2309 2310 static struct seq_operations fib_triestat_seq_ops = { 2311 .start = fib_triestat_seq_start, 2312 .next = fib_triestat_seq_next, 2313 .stop = fib_triestat_seq_stop, 2314 .show = fib_triestat_seq_show, 2315 }; 2316 2317 static int fib_triestat_seq_open(struct inode *inode, struct file *file) 2318 { 2319 struct seq_file *seq; 2320 int rc = -ENOMEM; 2321 2322 rc = seq_open(file, &fib_triestat_seq_ops); 2323 if (rc) 2324 goto out_kfree; 2325 2326 seq = file->private_data; 2327 out: 2328 return rc; 2329 out_kfree: 2330 goto out; 2331 } 2332 2333 static struct file_operations fib_triestat_seq_fops = { 2334 .owner = THIS_MODULE, 2335 .open = fib_triestat_seq_open, 2336 .read = seq_read, 2337 .llseek = seq_lseek, 2338 .release = seq_release_private, 2339 }; 2340 2341 int __init fib_stat_proc_init(void) 2342 { 2343 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops)) 2344 return -ENOMEM; 2345 return 0; 2346 } 2347 2348 void __init fib_stat_proc_exit(void) 2349 { 2350 proc_net_remove("fib_triestat"); 2351 } 2352 2353 static struct fib_alias *fib_trie_get_first(struct seq_file *seq) 2354 { 2355 return NULL; 2356 } 2357 2358 static struct fib_alias *fib_trie_get_next(struct seq_file *seq) 2359 { 2360 return NULL; 2361 } 2362 2363 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2364 { 2365 void *v = NULL; 2366 2367 if (ip_fib_main_table) 2368 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN; 2369 return v; 2370 } 2371 2372 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2373 { 2374 ++*pos; 2375 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq); 2376 } 2377 2378 static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2379 { 2380 2381 } 2382 2383 /* 2384 * This outputs /proc/net/fib_trie. 2385 * 2386 * It always works in backward compatibility mode. 2387 * The format of the file is not supposed to be changed. 2388 */ 2389 2390 static int fib_trie_seq_show(struct seq_file *seq, void *v) 2391 { 2392 char bf[128]; 2393 2394 if (v == SEQ_START_TOKEN) { 2395 if (trie_local) 2396 trie_dump_seq(seq, trie_local); 2397 2398 if (trie_main) 2399 trie_dump_seq(seq, trie_main); 2400 } 2401 2402 else { 2403 snprintf(bf, sizeof(bf), 2404 "*\t%08X\t%08X", 200, 400); 2405 seq_printf(seq, "%-127s\n", bf); 2406 } 2407 2408 return 0; 2409 } 2410 2411 static struct seq_operations fib_trie_seq_ops = { 2412 .start = fib_trie_seq_start, 2413 .next = fib_trie_seq_next, 2414 .stop = fib_trie_seq_stop, 2415 .show = fib_trie_seq_show, 2416 }; 2417 2418 static int fib_trie_seq_open(struct inode *inode, struct file *file) 2419 { 2420 struct seq_file *seq; 2421 int rc = -ENOMEM; 2422 2423 rc = seq_open(file, &fib_trie_seq_ops); 2424 if (rc) 2425 goto out_kfree; 2426 2427 seq = file->private_data; 2428 out: 2429 return rc; 2430 out_kfree: 2431 goto out; 2432 } 2433 2434 static struct file_operations fib_trie_seq_fops = { 2435 .owner = THIS_MODULE, 2436 .open = fib_trie_seq_open, 2437 .read = seq_read, 2438 .llseek = seq_lseek, 2439 .release = seq_release_private, 2440 }; 2441 2442 int __init fib_proc_init(void) 2443 { 2444 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops)) 2445 return -ENOMEM; 2446 return 0; 2447 } 2448 2449 void __init fib_proc_exit(void) 2450 { 2451 proc_net_remove("fib_trie"); 2452 } 2453 2454 #endif /* CONFIG_PROC_FS */ 2455