1 /* 2 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 * 5 * Copyright (c) 1988, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)radix.c 8.5 (Berkeley) 5/19/95 33 * $FreeBSD: /repoman/r/ncvs/src/sys/net/radix.c,v 1.36.2.1 2005/01/31 23:26:23 34 * imp Exp $ 35 */ 36 37 #pragma ident "%Z%%M% %I% %E% SMI" 38 39 /* 40 * Routines to build and maintain radix trees for routing lookups. 41 */ 42 #include <sys/types.h> 43 44 #ifndef _RADIX_H_ 45 #include <sys/param.h> 46 #ifdef _KERNEL 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/systm.h> 50 #include <sys/cmn_err.h> 51 #else 52 #include <assert.h> 53 #define ASSERT assert 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <syslog.h> 57 #include <strings.h> 58 #endif /* _KERNEL */ 59 #include <net/radix.h> 60 #include <inet/ip_ftable.h> 61 #endif 62 63 #ifndef _KERNEL 64 void 65 panic(const char *str) 66 { 67 fprintf(stderr, "Panic - %s\n", str); 68 abort(); 69 } 70 #endif /* _KERNEL */ 71 72 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); 73 static struct radix_node 74 *rn_insert(void *, struct radix_node_head *, int *, 75 struct radix_node [2]), 76 *rn_newpair(void *, int, struct radix_node[2]), 77 *rn_search(void *, struct radix_node *), 78 *rn_search_m(void *, struct radix_node *, void *), 79 *rn_lookup(void *, void *, struct radix_node_head *), 80 *rn_match(void *, struct radix_node_head *), 81 *rn_match_args(void *, struct radix_node_head *, match_leaf_t *, 82 void *), 83 *rn_addmask(void *, int, int), 84 *rn_addroute(void *, void *, struct radix_node_head *, 85 struct radix_node [2]), 86 *rn_delete(void *, void *, struct radix_node_head *); 87 static boolean_t rn_refines(void *, void *); 88 89 #define MAX_KEYLEN 16 90 static int max_keylen = MAX_KEYLEN; 91 92 #ifdef _KERNEL 93 static struct kmem_cache *radix_mask_cache; /* for rn_mkfreelist */ 94 static struct kmem_cache *radix_node_cache; 95 #else 96 static char *radix_mask_cache, *radix_node_cache; /* dummy vars. never inited */ 97 #endif /* _KERNEL */ 98 99 static struct radix_mask *rn_mkfreelist; 100 static struct radix_node_head *mask_rnhead; 101 /* 102 * Work area -- the following point to 2 buffers of size max_keylen, 103 * allocated in this order in a block of memory malloc'ed by rn_init. 104 * A third buffer of size MAX_KEYLEN is allocated from the stack. 105 */ 106 static char *rn_zeros, *rn_ones; 107 108 #define MKGet(m) R_Malloc(m, radix_mask_cache, sizeof (struct radix_mask)) 109 #define MKFree(m) Free(m, radix_mask_cache) 110 #define rn_masktop (mask_rnhead->rnh_treetop) 111 112 static boolean_t rn_lexobetter(void *m_arg, void *n_arg); 113 static struct radix_mask * 114 rn_new_radix_mask(struct radix_node *tt, 115 struct radix_mask *next); 116 static boolean_t 117 rn_satisfies_leaf(char *trial, struct radix_node *leaf, 118 int skip, match_leaf_t *rn_leaf_fn, void *rn_leaf_arg); 119 120 #define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg)) 121 122 /* 123 * The data structure for the keys is a radix tree with one way 124 * branching removed. The index rn_bit at an internal node n represents a bit 125 * position to be tested. The tree is arranged so that all descendants 126 * of a node n have keys whose bits all agree up to position rn_bit - 1. 127 * (We say the index of n is rn_bit.) 128 * 129 * There is at least one descendant which has a one bit at position rn_bit, 130 * and at least one with a zero there. 131 * 132 * A route is determined by a pair of key and mask. We require that the 133 * bit-wise logical and of the key and mask to be the key. 134 * We define the index of a route associated with the mask to be 135 * the first bit number in the mask where 0 occurs (with bit number 0 136 * representing the highest order bit). 137 * 138 * We say a mask is normal if every bit is 0, past the index of the mask. 139 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 140 * and m is a normal mask, then the route applies to every descendant of n. 141 * If the index(m) < rn_bit, this implies the trailing last few bits of k 142 * before bit b are all 0, (and hence consequently true of every descendant 143 * of n), so the route applies to all descendants of the node as well. 144 * 145 * Similar logic shows that a non-normal mask m such that 146 * index(m) <= index(n) could potentially apply to many children of n. 147 * Thus, for each non-host route, we attach its mask to a list at an internal 148 * node as high in the tree as we can go. 149 * 150 * The present version of the code makes use of normal routes in short- 151 * circuiting an explict mask and compare operation when testing whether 152 * a key satisfies a normal route, and also in remembering the unique leaf 153 * that governs a subtree. 154 */ 155 156 /* 157 * Most of the functions in this code assume that the key/mask arguments 158 * are sockaddr-like structures, where the first byte is an uchar_t 159 * indicating the size of the entire structure. 160 * 161 * To make the assumption more explicit, we use the LEN() macro to access 162 * this field. It is safe to pass an expression with side effects 163 * to LEN() as the argument is evaluated only once. 164 */ 165 #define LEN(x) (*(const uchar_t *)(x)) 166 167 168 /* 169 * Search a node in the tree matching the key. 170 */ 171 static struct radix_node * 172 rn_search(v_arg, head) 173 void *v_arg; 174 struct radix_node *head; 175 { 176 struct radix_node *x; 177 caddr_t v; 178 179 for (x = head, v = v_arg; x->rn_bit >= 0; ) { 180 if (x->rn_bmask & v[x->rn_offset]) 181 x = x->rn_right; 182 else 183 x = x->rn_left; 184 } 185 return (x); 186 } 187 188 /* 189 * Same as above, but with an additional mask. 190 */ 191 static struct radix_node * 192 rn_search_m(v_arg, head, m_arg) 193 struct radix_node *head; 194 void *v_arg, *m_arg; 195 { 196 struct radix_node *x; 197 caddr_t v = v_arg, m = m_arg; 198 199 for (x = head; x->rn_bit >= 0; ) { 200 if ((x->rn_bmask & m[x->rn_offset]) && 201 (x->rn_bmask & v[x->rn_offset])) 202 x = x->rn_right; 203 else 204 x = x->rn_left; 205 } 206 return (x); 207 } 208 209 /* 210 * Returns true if there are no bits set in n_arg that are zero in 211 * m_arg and the masks aren't equal. In other words, it returns true 212 * when m_arg is a finer-granularity netmask -- it represents a subset 213 * of the destinations implied by n_arg. 214 */ 215 static boolean_t 216 rn_refines(m_arg, n_arg) 217 void *m_arg, *n_arg; 218 { 219 caddr_t m = m_arg, n = n_arg; 220 caddr_t lim = n + LEN(n), lim2 = lim; 221 int longer = LEN(n++) - (int)LEN(m++); 222 boolean_t masks_are_equal = B_TRUE; 223 224 if (longer > 0) 225 lim -= longer; 226 while (n < lim) { 227 if (*n & ~(*m)) 228 return (0); 229 if (*n++ != *m++) 230 masks_are_equal = B_FALSE; 231 } 232 while (n < lim2) 233 if (*n++) 234 return (B_FALSE); 235 if (masks_are_equal && (longer < 0)) 236 for (lim2 = m - longer; m < lim2; ) 237 if (*m++) 238 return (B_TRUE); 239 return (!masks_are_equal); 240 } 241 242 static struct radix_node * 243 rn_lookup(v_arg, m_arg, head) 244 void *v_arg, *m_arg; 245 struct radix_node_head *head; 246 { 247 struct radix_node *x; 248 caddr_t netmask = NULL; 249 250 if (m_arg) { 251 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); 252 if (x == NULL) 253 return (NULL); 254 netmask = x->rn_key; 255 } 256 x = rn_match(v_arg, head); 257 if (x && netmask) { 258 while (x && x->rn_mask != netmask) 259 x = x->rn_dupedkey; 260 } 261 return (x); 262 } 263 264 /* 265 * Returns true if address 'trial' has no bits differing from the 266 * leaf's key when compared under the leaf's mask. In other words, 267 * returns true when 'trial' matches leaf. 268 * In addition, if a rn_leaf_fn is passed in, that is used to find 269 * a match on conditions defined by the caller of rn_match. This is 270 * used by the kernel ftable to match on IRE_MATCH_* conditions. 271 */ 272 static boolean_t 273 rn_satisfies_leaf(trial, leaf, skip, rn_leaf_fn, rn_leaf_arg) 274 caddr_t trial; 275 struct radix_node *leaf; 276 int skip; 277 match_leaf_t *rn_leaf_fn; 278 void *rn_leaf_arg; 279 { 280 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 281 char *cplim; 282 int length = min(LEN(cp), LEN(cp2)); 283 284 if (cp3 == 0) 285 cp3 = rn_ones; 286 else 287 length = min(length, LEN(cp3)); 288 cplim = cp + length; 289 cp3 += skip; 290 cp2 += skip; 291 292 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) 293 if ((*cp ^ *cp2) & *cp3) 294 return (B_FALSE); 295 296 return (RN_MATCHF(leaf, rn_leaf_fn, rn_leaf_arg)); 297 } 298 299 static struct radix_node * 300 rn_match(v_arg, head) 301 void *v_arg; 302 struct radix_node_head *head; 303 { 304 return (rn_match_args(v_arg, head, NULL, NULL)); 305 } 306 307 static struct radix_node * 308 rn_match_args(v_arg, head, rn_leaf_fn, rn_leaf_arg) 309 void *v_arg; 310 struct radix_node_head *head; 311 match_leaf_t *rn_leaf_fn; 312 void *rn_leaf_arg; 313 { 314 caddr_t v = v_arg; 315 struct radix_node *t = head->rnh_treetop, *x; 316 caddr_t cp = v, cp2; 317 caddr_t cplim; 318 struct radix_node *saved_t, *top = t; 319 int off = t->rn_offset, vlen = LEN(cp), matched_off; 320 int test, b, rn_bit; 321 322 /* 323 * Open code rn_search(v, top) to avoid overhead of extra 324 * subroutine call. 325 */ 326 for (; t->rn_bit >= 0; ) { 327 if (t->rn_bmask & cp[t->rn_offset]) 328 t = t->rn_right; 329 else 330 t = t->rn_left; 331 } 332 /* 333 * See if we match exactly as a host destination 334 * or at least learn how many bits match, for normal mask finesse. 335 * 336 * It doesn't hurt us to limit how many bytes to check 337 * to the length of the mask, since if it matches we had a genuine 338 * match and the leaf we have is the most specific one anyway; 339 * if it didn't match with a shorter length it would fail 340 * with a long one. This wins big for class B&C netmasks which 341 * are probably the most common case... 342 */ 343 if (t->rn_mask) 344 vlen = LEN(t->rn_mask); 345 cp += off; cp2 = t->rn_key + off; cplim = v + vlen; 346 for (; cp < cplim; cp++, cp2++) 347 if (*cp != *cp2) 348 goto keydiff; 349 /* 350 * This extra grot is in case we are explicitly asked 351 * to look up the default. Ugh! 352 * 353 * Never return the root node itself, it seems to cause a 354 * lot of confusion. 355 */ 356 if (t->rn_flags & RNF_ROOT) 357 t = t->rn_dupedkey; 358 if (t == NULL || RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) { 359 return (t); 360 } else { 361 /* 362 * Although we found an exact match on the key, rn_leaf_fn 363 * is looking for some other criteria as well. Continue 364 * looking as if the exact match failed. 365 */ 366 if (t->rn_parent->rn_flags & RNF_ROOT) { 367 /* hit the top. have to give up */ 368 return (NULL); 369 } 370 b = 0; 371 goto keeplooking; 372 373 } 374 keydiff: 375 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 376 for (b = 7; (test >>= 1) > 0; ) 377 b--; 378 keeplooking: 379 matched_off = cp - v; 380 b += matched_off << 3; 381 rn_bit = -1 - b; 382 383 /* 384 * If there is a host route in a duped-key chain, it will be first. 385 */ 386 if ((saved_t = t)->rn_mask == 0) 387 t = t->rn_dupedkey; 388 for (; t != NULL; t = t->rn_dupedkey) { 389 /* 390 * Even if we don't match exactly as a host, 391 * we may match if the leaf we wound up at is 392 * a route to a net. 393 */ 394 395 if (t->rn_flags & RNF_NORMAL) { 396 if ((rn_bit <= t->rn_bit) && 397 RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) { 398 return (t); 399 } 400 } else if (rn_satisfies_leaf(v, t, matched_off, rn_leaf_fn, 401 rn_leaf_arg)) { 402 return (t); 403 } 404 } 405 t = saved_t; 406 /* start searching up the tree */ 407 do { 408 struct radix_mask *m; 409 410 t = t->rn_parent; 411 m = t->rn_mklist; 412 /* 413 * If non-contiguous masks ever become important 414 * we can restore the masking and open coding of 415 * the search and satisfaction test and put the 416 * calculation of "off" back before the "do". 417 */ 418 while (m) { 419 if (m->rm_flags & RNF_NORMAL) { 420 if ((rn_bit <= m->rm_bit) && 421 RN_MATCHF(m->rm_leaf, rn_leaf_fn, 422 rn_leaf_arg)) { 423 return (m->rm_leaf); 424 } 425 } else { 426 off = min(t->rn_offset, matched_off); 427 x = rn_search_m(v, t, m->rm_mask); 428 while (x != NULL && x->rn_mask != m->rm_mask) 429 x = x->rn_dupedkey; 430 if (x && rn_satisfies_leaf(v, x, off, 431 rn_leaf_fn, rn_leaf_arg)) { 432 return (x); 433 } 434 } 435 m = m->rm_mklist; 436 } 437 } while (t != top); 438 return (0); 439 } 440 441 /* 442 * Whenever we add a new leaf to the tree, we also add a parent node, 443 * so we allocate them as an array of two elements: the first one must be 444 * the leaf (see RNTORT() in route.c), the second one is the parent. 445 * This routine initializes the relevant fields of the nodes, so that 446 * the leaf is the left child of the parent node, and both nodes have 447 * (almost) all all fields filled as appropriate. 448 * The function returns a pointer to the parent node. 449 */ 450 451 static struct radix_node * 452 rn_newpair(v, b, nodes) 453 void *v; 454 int b; 455 struct radix_node nodes[2]; 456 { 457 struct radix_node *tt = nodes, *t = tt + 1; 458 459 t->rn_bit = b; 460 t->rn_bmask = 0x80 >> (b & 7); 461 t->rn_left = tt; 462 t->rn_offset = b >> 3; 463 464 /* 465 * t->rn_parent, r->rn_right, tt->rn_mask, tt->rn_dupedkey 466 * and tt->rn_bmask must have been zeroed by caller. 467 */ 468 tt->rn_bit = -1; 469 tt->rn_key = v; 470 tt->rn_parent = t; 471 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 472 tt->rn_mklist = t->rn_mklist = 0; 473 return (t); 474 } 475 476 static struct radix_node * 477 rn_insert(v_arg, head, dupentry, nodes) 478 void *v_arg; 479 struct radix_node_head *head; 480 int *dupentry; 481 struct radix_node nodes[2]; 482 { 483 caddr_t v = v_arg; 484 struct radix_node *top = head->rnh_treetop; 485 int head_off = top->rn_offset, vlen = (int)LEN(v); 486 struct radix_node *t = rn_search(v_arg, top); 487 caddr_t cp = v + head_off; 488 int b; 489 struct radix_node *tt; 490 491 /* 492 * Find first bit at which v and t->rn_key differ 493 */ 494 { 495 caddr_t cp2 = t->rn_key + head_off; 496 int cmp_res; 497 caddr_t cplim = v + vlen; 498 499 while (cp < cplim) 500 if (*cp2++ != *cp++) 501 goto on1; 502 *dupentry = 1; 503 return (t); 504 on1: 505 *dupentry = 0; 506 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 507 for (b = (cp - v) << 3; cmp_res; b--) 508 cmp_res >>= 1; 509 } 510 { 511 struct radix_node *p, *x = top; 512 cp = v; 513 do { 514 p = x; 515 if (cp[x->rn_offset] & x->rn_bmask) 516 x = x->rn_right; 517 else 518 x = x->rn_left; 519 } while (b > (unsigned)x->rn_bit); 520 /* x->rn_bit < b && x->rn_bit >= 0 */ 521 t = rn_newpair(v_arg, b, nodes); 522 tt = t->rn_left; 523 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 524 p->rn_left = t; 525 else 526 p->rn_right = t; 527 x->rn_parent = t; 528 t->rn_parent = p; 529 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 530 t->rn_right = x; 531 } else { 532 t->rn_right = tt; 533 t->rn_left = x; 534 } 535 } 536 return (tt); 537 } 538 539 static struct radix_node * 540 rn_addmask(n_arg, search, skip) 541 int search, skip; 542 void *n_arg; 543 { 544 caddr_t netmask = (caddr_t)n_arg; 545 struct radix_node *x; 546 caddr_t cp, cplim; 547 int b = 0, mlen, j; 548 int maskduplicated, m0, isnormal; 549 struct radix_node *saved_x; 550 int last_zeroed = 0; 551 char addmask_key[MAX_KEYLEN]; 552 553 if ((mlen = LEN(netmask)) > max_keylen) 554 mlen = max_keylen; 555 if (skip == 0) 556 skip = 1; 557 if (mlen <= skip) 558 return (mask_rnhead->rnh_nodes); 559 if (skip > 1) 560 bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 561 if ((m0 = mlen) > skip) 562 bcopy(netmask + skip, addmask_key + skip, mlen - skip); 563 /* 564 * Trim trailing zeroes. 565 */ 566 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0; ) 567 cp--; 568 mlen = cp - addmask_key; 569 if (mlen <= skip) { 570 if (m0 >= last_zeroed) 571 last_zeroed = mlen; 572 return (mask_rnhead->rnh_nodes); 573 } 574 if (m0 < last_zeroed) 575 bzero(addmask_key + m0, last_zeroed - m0); 576 *addmask_key = last_zeroed = mlen; 577 x = rn_search(addmask_key, rn_masktop); 578 if (bcmp(addmask_key, x->rn_key, mlen) != 0) 579 x = 0; 580 if (x || search) 581 return (x); 582 R_Zalloc(x, radix_node_cache, max_keylen + 2 * sizeof (*x)); 583 584 if ((saved_x = x) == 0) 585 return (0); 586 netmask = cp = (caddr_t)(x + 2); 587 bcopy(addmask_key, cp, mlen); 588 x = rn_insert(cp, mask_rnhead, &maskduplicated, x); 589 if (maskduplicated) { 590 #ifdef _KERNEL 591 cmn_err(CE_WARN, "rn_addmask: mask impossibly already in tree"); 592 #else 593 syslog(LOG_ERR, "rn_addmask: mask impossibly already in tree"); 594 #endif /* _KERNEL */ 595 Free(saved_x, radix_node_cache); 596 return (x); 597 } 598 /* 599 * Calculate index of mask, and check for normalcy. 600 * First find the first byte with a 0 bit, then if there are 601 * more bits left (remember we already trimmed the trailing 0's), 602 * the pattern must be one of those in normal_chars[], or we have 603 * a non-contiguous mask. 604 */ 605 cplim = netmask + mlen; 606 isnormal = 1; 607 for (cp = netmask + skip; (cp < cplim) && *(uchar_t *)cp == 0xff; ) 608 cp++; 609 if (cp != cplim) { 610 static uint8_t normal_chars[] = { 611 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; 612 613 for (j = 0x80; (j & *cp) != 0; j >>= 1) 614 b++; 615 if (*cp != normal_chars[b] || cp != (cplim - 1)) 616 isnormal = 0; 617 } 618 b += (cp - netmask) << 3; 619 x->rn_bit = -1 - b; 620 if (isnormal) 621 x->rn_flags |= RNF_NORMAL; 622 return (x); 623 } 624 625 /* arbitrary ordering for non-contiguous masks */ 626 static boolean_t 627 rn_lexobetter(m_arg, n_arg) 628 void *m_arg, *n_arg; 629 { 630 uchar_t *mp = m_arg, *np = n_arg, *lim; 631 632 if (LEN(mp) > LEN(np)) 633 /* not really, but need to check longer one first */ 634 return (B_TRUE); 635 if (LEN(mp) == LEN(np)) 636 for (lim = mp + LEN(mp); mp < lim; ) 637 if (*mp++ > *np++) 638 return (B_TRUE); 639 return (B_FALSE); 640 } 641 642 static struct radix_mask * 643 rn_new_radix_mask(tt, next) 644 struct radix_node *tt; 645 struct radix_mask *next; 646 { 647 struct radix_mask *m; 648 649 MKGet(m); 650 if (m == 0) { 651 #ifndef _KERNEL 652 syslog(LOG_ERR, "Mask for route not entered\n"); 653 #endif /* _KERNEL */ 654 return (0); 655 } 656 bzero(m, sizeof (*m)); 657 m->rm_bit = tt->rn_bit; 658 m->rm_flags = tt->rn_flags; 659 if (tt->rn_flags & RNF_NORMAL) 660 m->rm_leaf = tt; 661 else 662 m->rm_mask = tt->rn_mask; 663 m->rm_mklist = next; 664 tt->rn_mklist = m; 665 return (m); 666 } 667 668 static struct radix_node * 669 rn_addroute(v_arg, n_arg, head, treenodes) 670 void *v_arg, *n_arg; 671 struct radix_node_head *head; 672 struct radix_node treenodes[2]; 673 { 674 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; 675 struct radix_node *t, *x = 0, *tt; 676 struct radix_node *saved_tt, *top = head->rnh_treetop; 677 short b = 0, b_leaf = 0; 678 int keyduplicated; 679 caddr_t mmask; 680 struct radix_mask *m, **mp; 681 682 /* 683 * In dealing with non-contiguous masks, there may be 684 * many different routes which have the same mask. 685 * We will find it useful to have a unique pointer to 686 * the mask to speed avoiding duplicate references at 687 * nodes and possibly save time in calculating indices. 688 */ 689 if (netmask) { 690 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) 691 return (0); 692 b_leaf = x->rn_bit; 693 b = -1 - x->rn_bit; 694 netmask = x->rn_key; 695 } 696 /* 697 * Deal with duplicated keys: attach node to previous instance 698 */ 699 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); 700 if (keyduplicated) { 701 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 702 if (tt->rn_mask == netmask) 703 return (0); 704 if (netmask == 0 || 705 (tt->rn_mask && 706 /* index (netmask) > node */ 707 ((b_leaf < tt->rn_bit) || 708 rn_refines(netmask, tt->rn_mask) || 709 rn_lexobetter(netmask, tt->rn_mask)))) 710 break; 711 } 712 /* 713 * If the mask is not duplicated, we wouldn't 714 * find it among possible duplicate key entries 715 * anyway, so the above test doesn't hurt. 716 * 717 * We sort the masks for a duplicated key the same way as 718 * in a masklist -- most specific to least specific. 719 * This may require the unfortunate nuisance of relocating 720 * the head of the list. 721 * 722 * We also reverse, or doubly link the list through the 723 * parent pointer. 724 */ 725 if (tt == saved_tt) { 726 struct radix_node *xx = x; 727 /* link in at head of list */ 728 (tt = treenodes)->rn_dupedkey = t; 729 tt->rn_flags = t->rn_flags; 730 tt->rn_parent = x = t->rn_parent; 731 t->rn_parent = tt; /* parent */ 732 if (x->rn_left == t) 733 x->rn_left = tt; 734 else 735 x->rn_right = tt; 736 saved_tt = tt; x = xx; 737 } else { 738 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 739 t->rn_dupedkey = tt; 740 /* Set rn_parent value for tt and tt->rn_dupedkey */ 741 tt->rn_parent = t; 742 if (tt->rn_dupedkey) 743 tt->rn_dupedkey->rn_parent = tt; 744 } 745 tt->rn_key = v; 746 tt->rn_bit = -1; 747 tt->rn_flags = RNF_ACTIVE; 748 } 749 /* 750 * Put mask in tree. 751 */ 752 if (netmask) { 753 tt->rn_mask = netmask; 754 tt->rn_bit = x->rn_bit; 755 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 756 } 757 t = saved_tt->rn_parent; 758 if (keyduplicated) 759 goto key_exists; 760 b_leaf = -1 - t->rn_bit; 761 if (t->rn_right == saved_tt) 762 x = t->rn_left; 763 else 764 x = t->rn_right; 765 /* Promote general routes from below */ 766 if (x->rn_bit < 0) { 767 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) 768 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { 769 *mp = m = rn_new_radix_mask(x, 0); 770 if (m) 771 mp = &m->rm_mklist; 772 } 773 } else if (x->rn_mklist) { 774 /* 775 * Skip over masks whose index is > that of new node 776 */ 777 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) 778 if (m->rm_bit >= b_leaf) 779 break; 780 t->rn_mklist = m; *mp = 0; 781 } 782 key_exists: 783 /* Add new route to highest possible ancestor's list */ 784 if ((netmask == 0) || (b > t->rn_bit)) 785 return (tt); /* can't lift at all */ 786 b_leaf = tt->rn_bit; 787 do { 788 x = t; 789 t = t->rn_parent; 790 } while (b <= t->rn_bit && x != top); 791 /* 792 * Search through routes associated with node to 793 * insert new route according to index. 794 * Need same criteria as when sorting dupedkeys to avoid 795 * double loop on deletion. 796 */ 797 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) { 798 if (m->rm_bit < b_leaf) 799 continue; 800 if (m->rm_bit > b_leaf) 801 break; 802 if (m->rm_flags & RNF_NORMAL) { 803 mmask = m->rm_leaf->rn_mask; 804 if (tt->rn_flags & RNF_NORMAL) { 805 #ifdef _KERNEL 806 cmn_err(CE_WARN, "Non-unique normal route, " 807 "mask not entered\n"); 808 #else 809 syslog(LOG_ERR, "Non-unique normal route, " 810 "mask not entered\n"); 811 #endif /* _KERNEL */ 812 return (tt); 813 } 814 } else 815 mmask = m->rm_mask; 816 if (mmask == netmask) { 817 m->rm_refs++; 818 tt->rn_mklist = m; 819 return (tt); 820 } 821 if (rn_refines(netmask, mmask) || 822 rn_lexobetter(netmask, mmask)) 823 break; 824 } 825 *mp = rn_new_radix_mask(tt, *mp); 826 return (tt); 827 } 828 829 static struct radix_node * 830 rn_delete(v_arg, netmask_arg, head) 831 void *v_arg, *netmask_arg; 832 struct radix_node_head *head; 833 { 834 struct radix_node *t, *p, *x, *tt; 835 struct radix_mask *m, *saved_m, **mp; 836 struct radix_node *dupedkey, *saved_tt, *top; 837 caddr_t v, netmask; 838 int b, head_off, vlen; 839 840 v = v_arg; 841 netmask = netmask_arg; 842 x = head->rnh_treetop; 843 tt = rn_search(v, x); 844 head_off = x->rn_offset; 845 vlen = LEN(v); 846 saved_tt = tt; 847 top = x; 848 if (tt == 0 || 849 bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) 850 return (0); 851 /* 852 * Delete our route from mask lists. 853 */ 854 if (netmask) { 855 if ((x = rn_addmask(netmask, 1, head_off)) == 0) 856 return (0); 857 netmask = x->rn_key; 858 while (tt->rn_mask != netmask) 859 if ((tt = tt->rn_dupedkey) == 0) 860 return (0); 861 } 862 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) 863 goto on1; 864 if (tt->rn_flags & RNF_NORMAL) { 865 if (m->rm_leaf != tt || m->rm_refs > 0) { 866 #ifdef _KERNEL 867 cmn_err(CE_WARN, 868 "rn_delete: inconsistent annotation\n"); 869 #else 870 syslog(LOG_ERR, "rn_delete: inconsistent annotation\n"); 871 #endif /* _KERNEL */ 872 return (0); /* dangling ref could cause disaster */ 873 } 874 } else { 875 if (m->rm_mask != tt->rn_mask) { 876 #ifdef _KERNEL 877 cmn_err(CE_WARN, 878 "rn_delete: inconsistent annotation 2\n"); 879 #else 880 syslog(LOG_ERR, 881 "rn_delete: inconsistent annotation 2\n"); 882 #endif /* _KERNEL */ 883 goto on1; 884 } 885 if (--m->rm_refs >= 0) 886 goto on1; 887 } 888 b = -1 - tt->rn_bit; 889 t = saved_tt->rn_parent; 890 if (b > t->rn_bit) 891 goto on1; /* Wasn't lifted at all */ 892 do { 893 x = t; 894 t = t->rn_parent; 895 } while (b <= t->rn_bit && x != top); 896 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) 897 if (m == saved_m) { 898 *mp = m->rm_mklist; 899 MKFree(m); 900 break; 901 } 902 if (m == 0) { 903 #ifdef _KERNEL 904 cmn_err(CE_WARN, "rn_delete: couldn't find our annotation\n"); 905 #else 906 syslog(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 907 #endif /* _KERNEL */ 908 if (tt->rn_flags & RNF_NORMAL) 909 return (0); /* Dangling ref to us */ 910 } 911 on1: 912 /* 913 * Eliminate us from tree 914 */ 915 if (tt->rn_flags & RNF_ROOT) 916 return (0); 917 t = tt->rn_parent; 918 dupedkey = saved_tt->rn_dupedkey; 919 if (dupedkey) { 920 /* 921 * Here, tt is the deletion target and 922 * saved_tt is the head of the dupekey chain. 923 */ 924 if (tt == saved_tt) { 925 /* remove from head of chain */ 926 x = dupedkey; x->rn_parent = t; 927 if (t->rn_left == tt) 928 t->rn_left = x; 929 else 930 t->rn_right = x; 931 } else { 932 /* find node in front of tt on the chain */ 933 for (x = p = saved_tt; p && p->rn_dupedkey != tt; ) 934 p = p->rn_dupedkey; 935 if (p) { 936 p->rn_dupedkey = tt->rn_dupedkey; 937 if (tt->rn_dupedkey) /* parent */ 938 tt->rn_dupedkey->rn_parent = p; 939 /* parent */ 940 } else 941 #ifdef _KERNEL 942 cmn_err(CE_WARN, 943 "rn_delete: couldn't find us\n"); 944 #else 945 syslog(LOG_ERR, 946 "rn_delete: couldn't find us\n"); 947 #endif /* _KERNEL */ 948 } 949 t = tt + 1; 950 if (t->rn_flags & RNF_ACTIVE) { 951 *++x = *t; 952 p = t->rn_parent; 953 if (p->rn_left == t) 954 p->rn_left = x; 955 else 956 p->rn_right = x; 957 x->rn_left->rn_parent = x; 958 x->rn_right->rn_parent = x; 959 } 960 goto out; 961 } 962 if (t->rn_left == tt) 963 x = t->rn_right; 964 else 965 x = t->rn_left; 966 p = t->rn_parent; 967 if (p->rn_right == t) 968 p->rn_right = x; 969 else 970 p->rn_left = x; 971 x->rn_parent = p; 972 /* 973 * Demote routes attached to us. 974 */ 975 if (t->rn_mklist) { 976 if (x->rn_bit >= 0) { 977 for (mp = &x->rn_mklist; (m = *mp) != NULL; ) 978 mp = &m->rm_mklist; 979 *mp = t->rn_mklist; 980 } else { 981 /* 982 * If there are any key,mask pairs in a sibling 983 * duped-key chain, some subset will appear sorted 984 * in the same order attached to our mklist 985 */ 986 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 987 if (m == x->rn_mklist) { 988 struct radix_mask *mm = m->rm_mklist; 989 x->rn_mklist = 0; 990 if (--(m->rm_refs) < 0) 991 MKFree(m); 992 m = mm; 993 } 994 if (m) 995 #ifdef _KERNEL 996 cmn_err(CE_WARN, 997 "rn_delete: Orphaned Mask %p at %p\n", 998 (void *)m, (void *)x); 999 #else 1000 syslog(LOG_ERR, 1001 "rn_delete: Orphaned Mask %p at %p\n", 1002 (void *)m, (void *)x); 1003 #endif /* _KERNEL */ 1004 } 1005 } 1006 /* 1007 * We may be holding an active internal node in the tree. 1008 */ 1009 x = tt + 1; 1010 if (t != x) { 1011 *t = *x; 1012 t->rn_left->rn_parent = t; 1013 t->rn_right->rn_parent = t; 1014 p = x->rn_parent; 1015 if (p->rn_left == x) 1016 p->rn_left = t; 1017 else 1018 p->rn_right = t; 1019 } 1020 out: 1021 tt->rn_flags &= ~RNF_ACTIVE; 1022 tt[1].rn_flags &= ~RNF_ACTIVE; 1023 return (tt); 1024 } 1025 1026 /* 1027 * Walk the radix tree; For the kernel routing table, we hold additional 1028 * refs on the ire_bucket to ensure that the walk function f() does not 1029 * run into trashed memory. The kernel routing table is identified by 1030 * a rnh_treetop that has RNF_SUNW_FT set in the rn_flags. 1031 * Note that all refs takein in rn_walktree are released before it returns, 1032 * so that f() will need to take any additional references on memory 1033 * to be passed back to the caller of rn_walktree. 1034 */ 1035 static int 1036 rn_walktree(h, f, w) 1037 struct radix_node_head *h; 1038 walktree_f_t *f; 1039 void *w; 1040 { 1041 int error; 1042 struct radix_node *base, *next; 1043 struct radix_node *rn = h->rnh_treetop; 1044 boolean_t is_ftable = B_FALSE; 1045 1046 if (h->rnh_treetop->rn_flags & RNF_SUNW_FT) { 1047 /* 1048 * this is a kernel ip ftable with rt_t leaf structures. 1049 */ 1050 is_ftable = B_TRUE; 1051 } 1052 /* 1053 * This gets complicated because we may delete the node 1054 * while applying the function f to it, so we need to calculate 1055 * the successor node in advance. 1056 */ 1057 RADIX_NODE_HEAD_RLOCK(h); 1058 /* First time through node, go left */ 1059 while (rn->rn_bit >= 0) { 1060 rn = rn->rn_left; 1061 } 1062 1063 if (is_ftable) 1064 IRB_REFHOLD_RN(rn); 1065 1066 for (;;) { 1067 base = rn; 1068 /* If at right child go back up, otherwise, go right */ 1069 while (rn->rn_parent->rn_right == rn && 1070 (rn->rn_flags & RNF_ROOT) == 0) { 1071 rn = rn->rn_parent; 1072 } 1073 /* Find the next *leaf* since next node might vanish, too */ 1074 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0; ) { 1075 rn = rn->rn_left; 1076 } 1077 next = rn; 1078 1079 if (is_ftable && next != NULL) 1080 IRB_REFHOLD_RN(next); 1081 1082 /* Process leaves */ 1083 while ((rn = base) != NULL) { 1084 base = rn->rn_dupedkey; 1085 1086 if (is_ftable && base != NULL) 1087 IRB_REFHOLD_RN(base); 1088 1089 RADIX_NODE_HEAD_UNLOCK(h); 1090 if (!(rn->rn_flags & RNF_ROOT) && 1091 (error = (*f)(rn, w))) { 1092 if (is_ftable) { 1093 if (rn != NULL) 1094 IRB_REFRELE_RN(rn); 1095 if (base != NULL) 1096 IRB_REFRELE_RN(base); 1097 if (next != NULL) 1098 IRB_REFRELE_RN(next); 1099 } 1100 return (error); 1101 } 1102 if (is_ftable && rn != NULL) 1103 IRB_REFRELE_RN(rn); 1104 RADIX_NODE_HEAD_RLOCK(h); 1105 } 1106 rn = next; 1107 if (rn->rn_flags & RNF_ROOT) { 1108 RADIX_NODE_HEAD_UNLOCK(h); 1109 return (0); 1110 } 1111 } 1112 /* NOTREACHED */ 1113 } 1114 1115 /* 1116 * Allocate and initialize an empty tree. This has 3 nodes, which are 1117 * part of the radix_node_head (in the order <left,root,right>) and are 1118 * marked RNF_ROOT so they cannot be freed. 1119 * The leaves have all-zero and all-one keys, with significant 1120 * bits starting at 'off'. 1121 * Return 1 on success, 0 on error. 1122 */ 1123 int 1124 rn_inithead(head, off) 1125 void **head; 1126 int off; 1127 { 1128 struct radix_node_head *rnh; 1129 struct radix_node *t, *tt, *ttt; 1130 if (*head) 1131 return (1); 1132 R_ZallocSleep(rnh, struct radix_node_head *, sizeof (*rnh)); 1133 if (rnh == 0) 1134 return (0); 1135 #ifdef _KERNEL 1136 RADIX_NODE_HEAD_LOCK_INIT(rnh); 1137 #endif 1138 *head = rnh; 1139 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1140 ttt = rnh->rnh_nodes + 2; 1141 t->rn_right = ttt; 1142 t->rn_parent = t; 1143 tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */ 1144 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1145 tt->rn_bit = -1 - off; 1146 *ttt = *tt; 1147 ttt->rn_key = rn_ones; 1148 rnh->rnh_addaddr = rn_addroute; 1149 rnh->rnh_deladdr = rn_delete; 1150 rnh->rnh_matchaddr = rn_match; 1151 rnh->rnh_matchaddr_args = rn_match_args; 1152 rnh->rnh_lookup = rn_lookup; 1153 rnh->rnh_walktree = rn_walktree; 1154 rnh->rnh_walktree_from = NULL; /* not implemented */ 1155 rnh->rnh_treetop = t; 1156 return (1); 1157 } 1158 1159 void 1160 rn_init() 1161 { 1162 char *cp, *cplim; 1163 1164 #ifdef _KERNEL 1165 radix_mask_cache = kmem_cache_create("radix_mask", 1166 sizeof (struct radix_mask), 0, NULL, NULL, NULL, NULL, NULL, 0); 1167 radix_node_cache = kmem_cache_create("radix_node", 1168 max_keylen + 2 * sizeof (struct radix_node), 1169 0, NULL, NULL, NULL, NULL, NULL, 0); 1170 #endif /* _KERNEL */ 1171 R_ZallocSleep(rn_zeros, char *, 2 * max_keylen); 1172 1173 ASSERT(rn_zeros != NULL); 1174 bzero(rn_zeros, 2 * max_keylen); 1175 rn_ones = cp = rn_zeros + max_keylen; 1176 cplim = rn_ones + max_keylen; 1177 while (cp < cplim) 1178 *cp++ = -1; 1179 if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0) 1180 panic("rn_init: could not init mask_rnhead "); 1181 } 1182 1183 int 1184 rn_freenode(n, p) 1185 struct radix_node *n; 1186 void *p; 1187 { 1188 struct radix_node_head *rnh = p; 1189 struct radix_node *d; 1190 1191 d = rnh->rnh_deladdr(n->rn_key, NULL, rnh); 1192 if (d != NULL) { 1193 Free(d, radix_node_cache); 1194 } 1195 return (0); 1196 } 1197 1198 1199 void 1200 rn_freehead(rnh) 1201 struct radix_node_head *rnh; 1202 { 1203 (void) rn_walktree(rnh, rn_freenode, rnh); 1204 1205 rnh->rnh_addaddr = NULL; 1206 rnh->rnh_deladdr = NULL; 1207 rnh->rnh_matchaddr = NULL; 1208 rnh->rnh_lookup = NULL; 1209 rnh->rnh_walktree = NULL; 1210 1211 #ifdef _KERNEL 1212 FreeHead(rnh, sizeof (*rnh)); 1213 #else 1214 Free(rnh, NULL); 1215 #endif /* _KERNEL */ 1216 } 1217 1218 void 1219 rn_fini() 1220 { 1221 struct radix_mask *m; 1222 1223 if (rn_zeros != NULL) { 1224 #ifdef _KERNEL 1225 FreeHead(rn_zeros, 2 * max_keylen); 1226 #else 1227 Free(rn_zeros, NULL); 1228 #endif 1229 rn_zeros = NULL; 1230 } 1231 1232 1233 if (mask_rnhead != NULL) { 1234 rn_freehead(mask_rnhead); 1235 mask_rnhead = NULL; 1236 } 1237 1238 while ((m = rn_mkfreelist) != NULL) { 1239 rn_mkfreelist = m->rm_mklist; 1240 Free(m, NULL); 1241 } 1242 } 1243