1 /* 2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 * 5 * Copyright (c) 1988, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)radix.c 8.5 (Berkeley) 5/19/95 33 * $FreeBSD: /repoman/r/ncvs/src/sys/net/radix.c,v 1.36.2.1 2005/01/31 23:26:23 34 * imp Exp $ 35 */ 36 37 38 /* 39 * Routines to build and maintain radix trees for routing lookups. 40 */ 41 #include <sys/types.h> 42 43 #ifndef _RADIX_H_ 44 #include <sys/param.h> 45 #ifdef _KERNEL 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/systm.h> 49 #include <sys/cmn_err.h> 50 #else 51 #include <assert.h> 52 #define ASSERT assert 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <syslog.h> 56 #include <strings.h> 57 #endif /* _KERNEL */ 58 #include <net/radix.h> 59 #endif 60 61 #ifndef _KERNEL 62 void 63 panic(const char *str) 64 { 65 fprintf(stderr, "Panic - %s\n", str); 66 abort(); 67 } 68 #endif /* _KERNEL */ 69 70 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); 71 static int rn_walktree_mt(struct radix_node_head *, walktree_f_t *, 72 void *, lockf_t, lockf_t); 73 static struct radix_node 74 *rn_insert(void *, struct radix_node_head *, int *, 75 struct radix_node [2]), 76 *rn_newpair(void *, int, struct radix_node[2]), 77 *rn_search(void *, struct radix_node *), 78 *rn_search_m(void *, struct radix_node *, void *), 79 *rn_lookup(void *, void *, struct radix_node_head *), 80 *rn_match(void *, struct radix_node_head *), 81 *rn_match_args(void *, struct radix_node_head *, match_leaf_t *, 82 void *), 83 *rn_addmask(void *, int, int), 84 *rn_addroute(void *, void *, struct radix_node_head *, 85 struct radix_node [2]), 86 *rn_delete(void *, void *, struct radix_node_head *); 87 static boolean_t rn_refines(void *, void *); 88 89 /* 90 * IPF also uses PATRICIA tree to manage ippools. IPF stores its own structure 91 * addrfamily_t. sizeof (addrfamily_t) == 24. 92 */ 93 #define MAX_KEYLEN 24 94 static int max_keylen = MAX_KEYLEN; 95 96 #ifdef _KERNEL 97 static struct kmem_cache *radix_mask_cache; /* for rn_mkfreelist */ 98 static struct kmem_cache *radix_node_cache; 99 #else 100 static char *radix_mask_cache, *radix_node_cache; /* dummy vars. never inited */ 101 #endif /* _KERNEL */ 102 103 static struct radix_mask *rn_mkfreelist; 104 static struct radix_node_head *mask_rnhead; 105 /* 106 * Work area -- the following point to 2 buffers of size max_keylen, 107 * allocated in this order in a block of memory malloc'ed by rn_init. 108 * A third buffer of size MAX_KEYLEN is allocated from the stack. 109 */ 110 static char *rn_zeros, *rn_ones; 111 112 #define MKGet(m) R_Malloc(m, radix_mask_cache, sizeof (struct radix_mask)) 113 #define MKFree(m) Free(m, radix_mask_cache) 114 #define rn_masktop (mask_rnhead->rnh_treetop) 115 116 static boolean_t rn_lexobetter(void *m_arg, void *n_arg); 117 static struct radix_mask * 118 rn_new_radix_mask(struct radix_node *tt, 119 struct radix_mask *next); 120 static boolean_t 121 rn_satisfies_leaf(char *trial, struct radix_node *leaf, 122 int skip, match_leaf_t *rn_leaf_fn, void *rn_leaf_arg); 123 124 #define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg)) 125 126 /* 127 * The data structure for the keys is a radix tree with one way 128 * branching removed. The index rn_bit at an internal node n represents a bit 129 * position to be tested. The tree is arranged so that all descendants 130 * of a node n have keys whose bits all agree up to position rn_bit - 1. 131 * (We say the index of n is rn_bit.) 132 * 133 * There is at least one descendant which has a one bit at position rn_bit, 134 * and at least one with a zero there. 135 * 136 * A route is determined by a pair of key and mask. We require that the 137 * bit-wise logical and of the key and mask to be the key. 138 * We define the index of a route associated with the mask to be 139 * the first bit number in the mask where 0 occurs (with bit number 0 140 * representing the highest order bit). 141 * 142 * We say a mask is normal if every bit is 0, past the index of the mask. 143 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 144 * and m is a normal mask, then the route applies to every descendant of n. 145 * If the index(m) < rn_bit, this implies the trailing last few bits of k 146 * before bit b are all 0, (and hence consequently true of every descendant 147 * of n), so the route applies to all descendants of the node as well. 148 * 149 * Similar logic shows that a non-normal mask m such that 150 * index(m) <= index(n) could potentially apply to many children of n. 151 * Thus, for each non-host route, we attach its mask to a list at an internal 152 * node as high in the tree as we can go. 153 * 154 * The present version of the code makes use of normal routes in short- 155 * circuiting an explict mask and compare operation when testing whether 156 * a key satisfies a normal route, and also in remembering the unique leaf 157 * that governs a subtree. 158 */ 159 160 /* 161 * Most of the functions in this code assume that the key/mask arguments 162 * are sockaddr-like structures, where the first byte is an uchar_t 163 * indicating the size of the entire structure. 164 * 165 * To make the assumption more explicit, we use the LEN() macro to access 166 * this field. It is safe to pass an expression with side effects 167 * to LEN() as the argument is evaluated only once. 168 */ 169 #define LEN(x) (*(const uchar_t *)(x)) 170 171 172 /* 173 * Search a node in the tree matching the key. 174 */ 175 static struct radix_node * 176 rn_search(v_arg, head) 177 void *v_arg; 178 struct radix_node *head; 179 { 180 struct radix_node *x; 181 caddr_t v; 182 183 for (x = head, v = v_arg; x->rn_bit >= 0; ) { 184 if (x->rn_bmask & v[x->rn_offset]) 185 x = x->rn_right; 186 else 187 x = x->rn_left; 188 } 189 return (x); 190 } 191 192 /* 193 * Same as above, but with an additional mask. 194 */ 195 static struct radix_node * 196 rn_search_m(v_arg, head, m_arg) 197 struct radix_node *head; 198 void *v_arg, *m_arg; 199 { 200 struct radix_node *x; 201 caddr_t v = v_arg, m = m_arg; 202 203 for (x = head; x->rn_bit >= 0; ) { 204 if ((x->rn_bmask & m[x->rn_offset]) && 205 (x->rn_bmask & v[x->rn_offset])) 206 x = x->rn_right; 207 else 208 x = x->rn_left; 209 } 210 return (x); 211 } 212 213 /* 214 * Returns true if there are no bits set in n_arg that are zero in 215 * m_arg and the masks aren't equal. In other words, it returns true 216 * when m_arg is a finer-granularity netmask -- it represents a subset 217 * of the destinations implied by n_arg. 218 */ 219 static boolean_t 220 rn_refines(m_arg, n_arg) 221 void *m_arg, *n_arg; 222 { 223 caddr_t m = m_arg, n = n_arg; 224 caddr_t lim = n + LEN(n), lim2 = lim; 225 int longer = LEN(n++) - (int)LEN(m++); 226 boolean_t masks_are_equal = B_TRUE; 227 228 if (longer > 0) 229 lim -= longer; 230 while (n < lim) { 231 if (*n & ~(*m)) 232 return (0); 233 if (*n++ != *m++) 234 masks_are_equal = B_FALSE; 235 } 236 while (n < lim2) 237 if (*n++) 238 return (B_FALSE); 239 if (masks_are_equal && (longer < 0)) 240 for (lim2 = m - longer; m < lim2; ) 241 if (*m++) 242 return (B_TRUE); 243 return (!masks_are_equal); 244 } 245 246 static struct radix_node * 247 rn_lookup(v_arg, m_arg, head) 248 void *v_arg, *m_arg; 249 struct radix_node_head *head; 250 { 251 struct radix_node *x; 252 caddr_t netmask = NULL; 253 254 if (m_arg) { 255 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); 256 if (x == NULL) 257 return (NULL); 258 netmask = x->rn_key; 259 } 260 x = rn_match(v_arg, head); 261 if (x && netmask) { 262 while (x && x->rn_mask != netmask) 263 x = x->rn_dupedkey; 264 } 265 return (x); 266 } 267 268 /* 269 * Returns true if address 'trial' has no bits differing from the 270 * leaf's key when compared under the leaf's mask. In other words, 271 * returns true when 'trial' matches leaf. 272 * In addition, if a rn_leaf_fn is passed in, that is used to find 273 * a match on conditions defined by the caller of rn_match. This is 274 * used by the kernel ftable to match on IRE_MATCH_* conditions. 275 */ 276 static boolean_t 277 rn_satisfies_leaf(trial, leaf, skip, rn_leaf_fn, rn_leaf_arg) 278 caddr_t trial; 279 struct radix_node *leaf; 280 int skip; 281 match_leaf_t *rn_leaf_fn; 282 void *rn_leaf_arg; 283 { 284 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 285 char *cplim; 286 int length = min(LEN(cp), LEN(cp2)); 287 288 if (cp3 == 0) 289 cp3 = rn_ones; 290 else 291 length = min(length, LEN(cp3)); 292 cplim = cp + length; 293 cp3 += skip; 294 cp2 += skip; 295 296 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) 297 if ((*cp ^ *cp2) & *cp3) 298 return (B_FALSE); 299 300 return (RN_MATCHF(leaf, rn_leaf_fn, rn_leaf_arg)); 301 } 302 303 static struct radix_node * 304 rn_match(v_arg, head) 305 void *v_arg; 306 struct radix_node_head *head; 307 { 308 return (rn_match_args(v_arg, head, NULL, NULL)); 309 } 310 311 static struct radix_node * 312 rn_match_args(v_arg, head, rn_leaf_fn, rn_leaf_arg) 313 void *v_arg; 314 struct radix_node_head *head; 315 match_leaf_t *rn_leaf_fn; 316 void *rn_leaf_arg; 317 { 318 caddr_t v = v_arg; 319 struct radix_node *t = head->rnh_treetop, *x; 320 caddr_t cp = v, cp2; 321 caddr_t cplim; 322 struct radix_node *saved_t, *top = t; 323 int off = t->rn_offset, vlen = LEN(cp), matched_off; 324 int test, b, rn_bit; 325 326 /* 327 * Open code rn_search(v, top) to avoid overhead of extra 328 * subroutine call. 329 */ 330 for (; t->rn_bit >= 0; ) { 331 if (t->rn_bmask & cp[t->rn_offset]) 332 t = t->rn_right; 333 else 334 t = t->rn_left; 335 } 336 /* 337 * See if we match exactly as a host destination 338 * or at least learn how many bits match, for normal mask finesse. 339 * 340 * It doesn't hurt us to limit how many bytes to check 341 * to the length of the mask, since if it matches we had a genuine 342 * match and the leaf we have is the most specific one anyway; 343 * if it didn't match with a shorter length it would fail 344 * with a long one. This wins big for class B&C netmasks which 345 * are probably the most common case... 346 */ 347 if (t->rn_mask) 348 vlen = LEN(t->rn_mask); 349 cp += off; cp2 = t->rn_key + off; cplim = v + vlen; 350 for (; cp < cplim; cp++, cp2++) 351 if (*cp != *cp2) 352 goto keydiff; 353 /* 354 * This extra grot is in case we are explicitly asked 355 * to look up the default. Ugh! 356 * 357 * Never return the root node itself, it seems to cause a 358 * lot of confusion. 359 */ 360 if (t->rn_flags & RNF_ROOT) 361 t = t->rn_dupedkey; 362 if (t == NULL || RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) { 363 return (t); 364 } else { 365 /* 366 * Although we found an exact match on the key, rn_leaf_fn 367 * is looking for some other criteria as well. Continue 368 * looking as if the exact match failed. 369 */ 370 if (t->rn_parent->rn_flags & RNF_ROOT) { 371 /* hit the top. have to give up */ 372 return (NULL); 373 } 374 b = 0; 375 goto keeplooking; 376 377 } 378 keydiff: 379 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 380 for (b = 7; (test >>= 1) > 0; ) 381 b--; 382 keeplooking: 383 matched_off = cp - v; 384 b += matched_off << 3; 385 rn_bit = -1 - b; 386 387 /* 388 * If there is a host route in a duped-key chain, it will be first. 389 */ 390 if ((saved_t = t)->rn_mask == 0) 391 t = t->rn_dupedkey; 392 for (; t != NULL; t = t->rn_dupedkey) { 393 /* 394 * Even if we don't match exactly as a host, 395 * we may match if the leaf we wound up at is 396 * a route to a net. 397 */ 398 399 if (t->rn_flags & RNF_NORMAL) { 400 if ((rn_bit <= t->rn_bit) && 401 RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) { 402 return (t); 403 } 404 } else if (rn_satisfies_leaf(v, t, matched_off, rn_leaf_fn, 405 rn_leaf_arg)) { 406 return (t); 407 } 408 } 409 t = saved_t; 410 /* start searching up the tree */ 411 do { 412 struct radix_mask *m; 413 414 t = t->rn_parent; 415 m = t->rn_mklist; 416 /* 417 * If non-contiguous masks ever become important 418 * we can restore the masking and open coding of 419 * the search and satisfaction test and put the 420 * calculation of "off" back before the "do". 421 */ 422 while (m) { 423 if (m->rm_flags & RNF_NORMAL) { 424 if ((rn_bit <= m->rm_bit) && 425 RN_MATCHF(m->rm_leaf, rn_leaf_fn, 426 rn_leaf_arg)) { 427 return (m->rm_leaf); 428 } 429 } else { 430 off = min(t->rn_offset, matched_off); 431 x = rn_search_m(v, t, m->rm_mask); 432 while (x != NULL && x->rn_mask != m->rm_mask) 433 x = x->rn_dupedkey; 434 if (x && rn_satisfies_leaf(v, x, off, 435 rn_leaf_fn, rn_leaf_arg)) { 436 return (x); 437 } 438 } 439 m = m->rm_mklist; 440 } 441 } while (t != top); 442 return (0); 443 } 444 445 /* 446 * Whenever we add a new leaf to the tree, we also add a parent node, 447 * so we allocate them as an array of two elements: the first one must be 448 * the leaf (see RNTORT() in route.c), the second one is the parent. 449 * This routine initializes the relevant fields of the nodes, so that 450 * the leaf is the left child of the parent node, and both nodes have 451 * (almost) all all fields filled as appropriate. 452 * The function returns a pointer to the parent node. 453 */ 454 455 static struct radix_node * 456 rn_newpair(v, b, nodes) 457 void *v; 458 int b; 459 struct radix_node nodes[2]; 460 { 461 struct radix_node *tt = nodes, *t = tt + 1; 462 463 t->rn_bit = b; 464 t->rn_bmask = 0x80 >> (b & 7); 465 t->rn_left = tt; 466 t->rn_offset = b >> 3; 467 468 /* 469 * t->rn_parent, r->rn_right, tt->rn_mask, tt->rn_dupedkey 470 * and tt->rn_bmask must have been zeroed by caller. 471 */ 472 tt->rn_bit = -1; 473 tt->rn_key = v; 474 tt->rn_parent = t; 475 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 476 tt->rn_mklist = t->rn_mklist = 0; 477 return (t); 478 } 479 480 static struct radix_node * 481 rn_insert(v_arg, head, dupentry, nodes) 482 void *v_arg; 483 struct radix_node_head *head; 484 int *dupentry; 485 struct radix_node nodes[2]; 486 { 487 caddr_t v = v_arg; 488 struct radix_node *top = head->rnh_treetop; 489 int head_off = top->rn_offset, vlen = (int)LEN(v); 490 struct radix_node *t = rn_search(v_arg, top); 491 caddr_t cp = v + head_off; 492 int b; 493 struct radix_node *tt; 494 495 /* 496 * Find first bit at which v and t->rn_key differ 497 */ 498 { 499 caddr_t cp2 = t->rn_key + head_off; 500 int cmp_res; 501 caddr_t cplim = v + vlen; 502 503 while (cp < cplim) 504 if (*cp2++ != *cp++) 505 goto on1; 506 *dupentry = 1; 507 return (t); 508 on1: 509 *dupentry = 0; 510 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 511 for (b = (cp - v) << 3; cmp_res; b--) 512 cmp_res >>= 1; 513 } 514 { 515 struct radix_node *p, *x = top; 516 cp = v; 517 do { 518 p = x; 519 if (cp[x->rn_offset] & x->rn_bmask) 520 x = x->rn_right; 521 else 522 x = x->rn_left; 523 } while (b > (unsigned)x->rn_bit); 524 /* x->rn_bit < b && x->rn_bit >= 0 */ 525 t = rn_newpair(v_arg, b, nodes); 526 tt = t->rn_left; 527 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 528 p->rn_left = t; 529 else 530 p->rn_right = t; 531 x->rn_parent = t; 532 t->rn_parent = p; 533 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 534 t->rn_right = x; 535 } else { 536 t->rn_right = tt; 537 t->rn_left = x; 538 } 539 } 540 return (tt); 541 } 542 543 static struct radix_node * 544 rn_addmask(n_arg, search, skip) 545 int search, skip; 546 void *n_arg; 547 { 548 caddr_t netmask = (caddr_t)n_arg; 549 struct radix_node *x; 550 caddr_t cp, cplim; 551 int b = 0, mlen, j; 552 int maskduplicated, m0, isnormal; 553 struct radix_node *saved_x; 554 int last_zeroed = 0; 555 char addmask_key[MAX_KEYLEN]; 556 557 if ((mlen = LEN(netmask)) > max_keylen) 558 mlen = max_keylen; 559 if (skip == 0) 560 skip = 1; 561 if (mlen <= skip) 562 return (mask_rnhead->rnh_nodes); 563 if (skip > 1) 564 bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 565 if ((m0 = mlen) > skip) 566 bcopy(netmask + skip, addmask_key + skip, mlen - skip); 567 /* 568 * Trim trailing zeroes. 569 */ 570 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0; ) 571 cp--; 572 mlen = cp - addmask_key; 573 if (mlen <= skip) { 574 if (m0 >= last_zeroed) 575 last_zeroed = mlen; 576 return (mask_rnhead->rnh_nodes); 577 } 578 if (m0 < last_zeroed) 579 bzero(addmask_key + m0, last_zeroed - m0); 580 *addmask_key = last_zeroed = mlen; 581 x = rn_search(addmask_key, rn_masktop); 582 if (bcmp(addmask_key, x->rn_key, mlen) != 0) 583 x = 0; 584 if (x || search) 585 return (x); 586 R_Zalloc(x, radix_node_cache, max_keylen + 2 * sizeof (*x)); 587 588 if ((saved_x = x) == 0) 589 return (0); 590 netmask = cp = (caddr_t)(x + 2); 591 bcopy(addmask_key, cp, mlen); 592 x = rn_insert(cp, mask_rnhead, &maskduplicated, x); 593 if (maskduplicated) { 594 #ifdef _KERNEL 595 cmn_err(CE_WARN, "rn_addmask: mask impossibly already in tree"); 596 #else 597 syslog(LOG_ERR, "rn_addmask: mask impossibly already in tree"); 598 #endif /* _KERNEL */ 599 Free(saved_x, radix_node_cache); 600 return (x); 601 } 602 /* 603 * Calculate index of mask, and check for normalcy. 604 * First find the first byte with a 0 bit, then if there are 605 * more bits left (remember we already trimmed the trailing 0's), 606 * the pattern must be one of those in normal_chars[], or we have 607 * a non-contiguous mask. 608 */ 609 cplim = netmask + mlen; 610 isnormal = 1; 611 for (cp = netmask + skip; (cp < cplim) && *(uchar_t *)cp == 0xff; ) 612 cp++; 613 if (cp != cplim) { 614 static uint8_t normal_chars[] = { 615 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; 616 617 for (j = 0x80; (j & *cp) != 0; j >>= 1) 618 b++; 619 if (*cp != normal_chars[b] || cp != (cplim - 1)) 620 isnormal = 0; 621 } 622 b += (cp - netmask) << 3; 623 x->rn_bit = -1 - b; 624 if (isnormal) 625 x->rn_flags |= RNF_NORMAL; 626 return (x); 627 } 628 629 /* arbitrary ordering for non-contiguous masks */ 630 static boolean_t 631 rn_lexobetter(m_arg, n_arg) 632 void *m_arg, *n_arg; 633 { 634 uchar_t *mp = m_arg, *np = n_arg, *lim; 635 636 if (LEN(mp) > LEN(np)) 637 /* not really, but need to check longer one first */ 638 return (B_TRUE); 639 if (LEN(mp) == LEN(np)) 640 for (lim = mp + LEN(mp); mp < lim; ) 641 if (*mp++ > *np++) 642 return (B_TRUE); 643 return (B_FALSE); 644 } 645 646 static struct radix_mask * 647 rn_new_radix_mask(tt, next) 648 struct radix_node *tt; 649 struct radix_mask *next; 650 { 651 struct radix_mask *m; 652 653 MKGet(m); 654 if (m == 0) { 655 #ifndef _KERNEL 656 syslog(LOG_ERR, "Mask for route not entered\n"); 657 #endif /* _KERNEL */ 658 return (0); 659 } 660 bzero(m, sizeof (*m)); 661 m->rm_bit = tt->rn_bit; 662 m->rm_flags = tt->rn_flags; 663 if (tt->rn_flags & RNF_NORMAL) 664 m->rm_leaf = tt; 665 else 666 m->rm_mask = tt->rn_mask; 667 m->rm_mklist = next; 668 tt->rn_mklist = m; 669 return (m); 670 } 671 672 static struct radix_node * 673 rn_addroute(v_arg, n_arg, head, treenodes) 674 void *v_arg, *n_arg; 675 struct radix_node_head *head; 676 struct radix_node treenodes[2]; 677 { 678 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; 679 struct radix_node *t, *x = 0, *tt; 680 struct radix_node *saved_tt, *top = head->rnh_treetop; 681 short b = 0, b_leaf = 0; 682 int keyduplicated; 683 caddr_t mmask; 684 struct radix_mask *m, **mp; 685 686 /* 687 * In dealing with non-contiguous masks, there may be 688 * many different routes which have the same mask. 689 * We will find it useful to have a unique pointer to 690 * the mask to speed avoiding duplicate references at 691 * nodes and possibly save time in calculating indices. 692 */ 693 if (netmask) { 694 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) 695 return (0); 696 b_leaf = x->rn_bit; 697 b = -1 - x->rn_bit; 698 netmask = x->rn_key; 699 } 700 /* 701 * Deal with duplicated keys: attach node to previous instance 702 */ 703 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); 704 if (keyduplicated) { 705 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 706 if (tt->rn_mask == netmask) 707 return (0); 708 if (netmask == 0 || 709 (tt->rn_mask && 710 /* index (netmask) > node */ 711 ((b_leaf < tt->rn_bit) || 712 rn_refines(netmask, tt->rn_mask) || 713 rn_lexobetter(netmask, tt->rn_mask)))) 714 break; 715 } 716 /* 717 * If the mask is not duplicated, we wouldn't 718 * find it among possible duplicate key entries 719 * anyway, so the above test doesn't hurt. 720 * 721 * We sort the masks for a duplicated key the same way as 722 * in a masklist -- most specific to least specific. 723 * This may require the unfortunate nuisance of relocating 724 * the head of the list. 725 * 726 * We also reverse, or doubly link the list through the 727 * parent pointer. 728 */ 729 if (tt == saved_tt) { 730 struct radix_node *xx = x; 731 /* link in at head of list */ 732 (tt = treenodes)->rn_dupedkey = t; 733 tt->rn_flags = t->rn_flags; 734 tt->rn_parent = x = t->rn_parent; 735 t->rn_parent = tt; /* parent */ 736 if (x->rn_left == t) 737 x->rn_left = tt; 738 else 739 x->rn_right = tt; 740 saved_tt = tt; x = xx; 741 } else { 742 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 743 t->rn_dupedkey = tt; 744 /* Set rn_parent value for tt and tt->rn_dupedkey */ 745 tt->rn_parent = t; 746 if (tt->rn_dupedkey) 747 tt->rn_dupedkey->rn_parent = tt; 748 } 749 tt->rn_key = v; 750 tt->rn_bit = -1; 751 tt->rn_flags = RNF_ACTIVE; 752 } 753 /* 754 * Put mask in tree. 755 */ 756 if (netmask) { 757 tt->rn_mask = netmask; 758 tt->rn_bit = x->rn_bit; 759 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 760 } 761 t = saved_tt->rn_parent; 762 if (keyduplicated) 763 goto key_exists; 764 b_leaf = -1 - t->rn_bit; 765 if (t->rn_right == saved_tt) 766 x = t->rn_left; 767 else 768 x = t->rn_right; 769 /* Promote general routes from below */ 770 if (x->rn_bit < 0) { 771 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) 772 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { 773 *mp = m = rn_new_radix_mask(x, 0); 774 if (m) 775 mp = &m->rm_mklist; 776 } 777 } else if (x->rn_mklist) { 778 /* 779 * Skip over masks whose index is > that of new node 780 */ 781 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) 782 if (m->rm_bit >= b_leaf) 783 break; 784 t->rn_mklist = m; *mp = 0; 785 } 786 key_exists: 787 /* Add new route to highest possible ancestor's list */ 788 if ((netmask == 0) || (b > t->rn_bit)) 789 return (tt); /* can't lift at all */ 790 b_leaf = tt->rn_bit; 791 do { 792 x = t; 793 t = t->rn_parent; 794 } while (b <= t->rn_bit && x != top); 795 /* 796 * Search through routes associated with node to 797 * insert new route according to index. 798 * Need same criteria as when sorting dupedkeys to avoid 799 * double loop on deletion. 800 */ 801 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) { 802 if (m->rm_bit < b_leaf) 803 continue; 804 if (m->rm_bit > b_leaf) 805 break; 806 if (m->rm_flags & RNF_NORMAL) { 807 mmask = m->rm_leaf->rn_mask; 808 if (tt->rn_flags & RNF_NORMAL) { 809 #ifdef _KERNEL 810 cmn_err(CE_WARN, "Non-unique normal route, " 811 "mask not entered\n"); 812 #else 813 syslog(LOG_ERR, "Non-unique normal route, " 814 "mask not entered\n"); 815 #endif /* _KERNEL */ 816 return (tt); 817 } 818 } else 819 mmask = m->rm_mask; 820 if (mmask == netmask) { 821 m->rm_refs++; 822 tt->rn_mklist = m; 823 return (tt); 824 } 825 if (rn_refines(netmask, mmask) || 826 rn_lexobetter(netmask, mmask)) 827 break; 828 } 829 *mp = rn_new_radix_mask(tt, *mp); 830 return (tt); 831 } 832 833 static struct radix_node * 834 rn_delete(v_arg, netmask_arg, head) 835 void *v_arg, *netmask_arg; 836 struct radix_node_head *head; 837 { 838 struct radix_node *t, *p, *x, *tt; 839 struct radix_mask *m, *saved_m, **mp; 840 struct radix_node *dupedkey, *saved_tt, *top; 841 caddr_t v, netmask; 842 int b, head_off, vlen; 843 844 v = v_arg; 845 netmask = netmask_arg; 846 x = head->rnh_treetop; 847 tt = rn_search(v, x); 848 head_off = x->rn_offset; 849 vlen = LEN(v); 850 saved_tt = tt; 851 top = x; 852 if (tt == 0 || 853 bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) 854 return (0); 855 /* 856 * Delete our route from mask lists. 857 */ 858 if (netmask) { 859 if ((x = rn_addmask(netmask, 1, head_off)) == 0) 860 return (0); 861 netmask = x->rn_key; 862 while (tt->rn_mask != netmask) 863 if ((tt = tt->rn_dupedkey) == 0) 864 return (0); 865 } 866 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) 867 goto on1; 868 if (tt->rn_flags & RNF_NORMAL) { 869 if (m->rm_leaf != tt || m->rm_refs > 0) { 870 #ifdef _KERNEL 871 cmn_err(CE_WARN, 872 "rn_delete: inconsistent annotation\n"); 873 #else 874 syslog(LOG_ERR, "rn_delete: inconsistent annotation\n"); 875 #endif /* _KERNEL */ 876 return (0); /* dangling ref could cause disaster */ 877 } 878 } else { 879 if (m->rm_mask != tt->rn_mask) { 880 #ifdef _KERNEL 881 cmn_err(CE_WARN, 882 "rn_delete: inconsistent annotation 2\n"); 883 #else 884 syslog(LOG_ERR, 885 "rn_delete: inconsistent annotation 2\n"); 886 #endif /* _KERNEL */ 887 goto on1; 888 } 889 if (--m->rm_refs >= 0) 890 goto on1; 891 } 892 b = -1 - tt->rn_bit; 893 t = saved_tt->rn_parent; 894 if (b > t->rn_bit) 895 goto on1; /* Wasn't lifted at all */ 896 do { 897 x = t; 898 t = t->rn_parent; 899 } while (b <= t->rn_bit && x != top); 900 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) 901 if (m == saved_m) { 902 *mp = m->rm_mklist; 903 MKFree(m); 904 break; 905 } 906 if (m == 0) { 907 #ifdef _KERNEL 908 cmn_err(CE_WARN, "rn_delete: couldn't find our annotation\n"); 909 #else 910 syslog(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 911 #endif /* _KERNEL */ 912 if (tt->rn_flags & RNF_NORMAL) 913 return (0); /* Dangling ref to us */ 914 } 915 on1: 916 /* 917 * Eliminate us from tree 918 */ 919 if (tt->rn_flags & RNF_ROOT) 920 return (0); 921 t = tt->rn_parent; 922 dupedkey = saved_tt->rn_dupedkey; 923 if (dupedkey) { 924 /* 925 * Here, tt is the deletion target and 926 * saved_tt is the head of the dupekey chain. 927 */ 928 if (tt == saved_tt) { 929 /* remove from head of chain */ 930 x = dupedkey; x->rn_parent = t; 931 if (t->rn_left == tt) 932 t->rn_left = x; 933 else 934 t->rn_right = x; 935 } else { 936 /* find node in front of tt on the chain */ 937 for (x = p = saved_tt; p && p->rn_dupedkey != tt; ) 938 p = p->rn_dupedkey; 939 if (p) { 940 p->rn_dupedkey = tt->rn_dupedkey; 941 if (tt->rn_dupedkey) /* parent */ 942 tt->rn_dupedkey->rn_parent = p; 943 /* parent */ 944 } else 945 #ifdef _KERNEL 946 cmn_err(CE_WARN, 947 "rn_delete: couldn't find us\n"); 948 #else 949 syslog(LOG_ERR, 950 "rn_delete: couldn't find us\n"); 951 #endif /* _KERNEL */ 952 } 953 t = tt + 1; 954 if (t->rn_flags & RNF_ACTIVE) { 955 *++x = *t; 956 p = t->rn_parent; 957 if (p->rn_left == t) 958 p->rn_left = x; 959 else 960 p->rn_right = x; 961 x->rn_left->rn_parent = x; 962 x->rn_right->rn_parent = x; 963 } 964 goto out; 965 } 966 if (t->rn_left == tt) 967 x = t->rn_right; 968 else 969 x = t->rn_left; 970 p = t->rn_parent; 971 if (p->rn_right == t) 972 p->rn_right = x; 973 else 974 p->rn_left = x; 975 x->rn_parent = p; 976 /* 977 * Demote routes attached to us. 978 */ 979 if (t->rn_mklist) { 980 if (x->rn_bit >= 0) { 981 for (mp = &x->rn_mklist; (m = *mp) != NULL; ) 982 mp = &m->rm_mklist; 983 *mp = t->rn_mklist; 984 } else { 985 /* 986 * If there are any key,mask pairs in a sibling 987 * duped-key chain, some subset will appear sorted 988 * in the same order attached to our mklist 989 */ 990 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 991 if (m == x->rn_mklist) { 992 struct radix_mask *mm = m->rm_mklist; 993 x->rn_mklist = 0; 994 if (--(m->rm_refs) < 0) 995 MKFree(m); 996 m = mm; 997 } 998 if (m) 999 #ifdef _KERNEL 1000 cmn_err(CE_WARN, 1001 "rn_delete: Orphaned Mask %p at %p\n", 1002 (void *)m, (void *)x); 1003 #else 1004 syslog(LOG_ERR, 1005 "rn_delete: Orphaned Mask %p at %p\n", 1006 (void *)m, (void *)x); 1007 #endif /* _KERNEL */ 1008 } 1009 } 1010 /* 1011 * We may be holding an active internal node in the tree. 1012 */ 1013 x = tt + 1; 1014 if (t != x) { 1015 *t = *x; 1016 t->rn_left->rn_parent = t; 1017 t->rn_right->rn_parent = t; 1018 p = x->rn_parent; 1019 if (p->rn_left == x) 1020 p->rn_left = t; 1021 else 1022 p->rn_right = t; 1023 } 1024 out: 1025 tt->rn_flags &= ~RNF_ACTIVE; 1026 tt[1].rn_flags &= ~RNF_ACTIVE; 1027 return (tt); 1028 } 1029 1030 /* 1031 * Walk the radix tree; For the kernel routing table, we hold additional 1032 * refs on the ire_bucket to ensure that the walk function f() does not 1033 * run into trashed memory. The kernel routing table is identified by 1034 * a rnh_treetop that has RNF_SUNW_FT set in the rn_flags. 1035 * Note that all refs takein in rn_walktree are released before it returns, 1036 * so that f() will need to take any additional references on memory 1037 * to be passed back to the caller of rn_walktree. 1038 */ 1039 static int 1040 rn_walktree(h, f, w) 1041 struct radix_node_head *h; 1042 walktree_f_t *f; 1043 void *w; 1044 { 1045 return (rn_walktree_mt(h, f, w, NULL, NULL)); 1046 } 1047 static int 1048 rn_walktree_mt(h, f, w, lockf, unlockf) 1049 struct radix_node_head *h; 1050 walktree_f_t *f; 1051 void *w; 1052 lockf_t lockf, unlockf; 1053 { 1054 int error; 1055 struct radix_node *base, *next; 1056 struct radix_node *rn = h->rnh_treetop; 1057 boolean_t is_mt = B_FALSE; 1058 1059 if (lockf != NULL) { 1060 ASSERT(unlockf != NULL); 1061 is_mt = B_TRUE; 1062 } 1063 /* 1064 * This gets complicated because we may delete the node 1065 * while applying the function f to it, so we need to calculate 1066 * the successor node in advance. 1067 */ 1068 RADIX_NODE_HEAD_RLOCK(h); 1069 /* First time through node, go left */ 1070 while (rn->rn_bit >= 0) { 1071 rn = rn->rn_left; 1072 } 1073 1074 if (is_mt) 1075 (*lockf)(rn); 1076 1077 for (;;) { 1078 base = rn; 1079 /* If at right child go back up, otherwise, go right */ 1080 while (rn->rn_parent->rn_right == rn && 1081 (rn->rn_flags & RNF_ROOT) == 0) { 1082 rn = rn->rn_parent; 1083 } 1084 /* Find the next *leaf* since next node might vanish, too */ 1085 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0; ) { 1086 rn = rn->rn_left; 1087 } 1088 next = rn; 1089 1090 if (is_mt && next != NULL) 1091 (*lockf)(next); 1092 1093 /* Process leaves */ 1094 while ((rn = base) != NULL) { 1095 base = rn->rn_dupedkey; 1096 1097 if (is_mt && base != NULL) 1098 (*lockf)(base); 1099 1100 RADIX_NODE_HEAD_UNLOCK(h); 1101 if (!(rn->rn_flags & RNF_ROOT) && 1102 (error = (*f)(rn, w))) { 1103 if (is_mt) { 1104 (*unlockf)(rn); 1105 if (base != NULL) 1106 (*unlockf)(base); 1107 if (next != NULL) 1108 (*unlockf)(next); 1109 } 1110 return (error); 1111 } 1112 if (is_mt) 1113 (*unlockf)(rn); 1114 RADIX_NODE_HEAD_RLOCK(h); 1115 } 1116 rn = next; 1117 if (rn->rn_flags & RNF_ROOT) { 1118 RADIX_NODE_HEAD_UNLOCK(h); 1119 /* 1120 * no ref to release, since we never take a ref 1121 * on the root node- it can't be deleted. 1122 */ 1123 return (0); 1124 } 1125 } 1126 /* NOTREACHED */ 1127 } 1128 1129 /* 1130 * Allocate and initialize an empty tree. This has 3 nodes, which are 1131 * part of the radix_node_head (in the order <left,root,right>) and are 1132 * marked RNF_ROOT so they cannot be freed. 1133 * The leaves have all-zero and all-one keys, with significant 1134 * bits starting at 'off'. 1135 * Return 1 on success, 0 on error. 1136 */ 1137 int 1138 rn_inithead(head, off) 1139 void **head; 1140 int off; 1141 { 1142 struct radix_node_head *rnh; 1143 struct radix_node *t, *tt, *ttt; 1144 if (*head) 1145 return (1); 1146 R_ZallocSleep(rnh, struct radix_node_head *, sizeof (*rnh)); 1147 if (rnh == 0) 1148 return (0); 1149 #ifdef _KERNEL 1150 RADIX_NODE_HEAD_LOCK_INIT(rnh); 1151 #endif 1152 *head = rnh; 1153 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1154 ttt = rnh->rnh_nodes + 2; 1155 t->rn_right = ttt; 1156 t->rn_parent = t; 1157 tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */ 1158 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1159 tt->rn_bit = -1 - off; 1160 *ttt = *tt; 1161 ttt->rn_key = rn_ones; 1162 rnh->rnh_addaddr = rn_addroute; 1163 rnh->rnh_deladdr = rn_delete; 1164 rnh->rnh_matchaddr = rn_match; 1165 rnh->rnh_matchaddr_args = rn_match_args; 1166 rnh->rnh_lookup = rn_lookup; 1167 rnh->rnh_walktree = rn_walktree; 1168 rnh->rnh_walktree_mt = rn_walktree_mt; 1169 rnh->rnh_walktree_from = NULL; /* not implemented */ 1170 rnh->rnh_treetop = t; 1171 return (1); 1172 } 1173 1174 void 1175 rn_init() 1176 { 1177 char *cp, *cplim; 1178 1179 #ifdef _KERNEL 1180 radix_mask_cache = kmem_cache_create("radix_mask", 1181 sizeof (struct radix_mask), 0, NULL, NULL, NULL, NULL, NULL, 0); 1182 radix_node_cache = kmem_cache_create("radix_node", 1183 max_keylen + 2 * sizeof (struct radix_node), 1184 0, NULL, NULL, NULL, NULL, NULL, 0); 1185 #endif /* _KERNEL */ 1186 R_ZallocSleep(rn_zeros, char *, 2 * max_keylen); 1187 1188 ASSERT(rn_zeros != NULL); 1189 bzero(rn_zeros, 2 * max_keylen); 1190 rn_ones = cp = rn_zeros + max_keylen; 1191 cplim = rn_ones + max_keylen; 1192 while (cp < cplim) 1193 *cp++ = -1; 1194 if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0) 1195 panic("rn_init: could not init mask_rnhead "); 1196 } 1197 1198 int 1199 rn_freenode(n, p) 1200 struct radix_node *n; 1201 void *p; 1202 { 1203 struct radix_node_head *rnh = p; 1204 struct radix_node *d; 1205 1206 d = rnh->rnh_deladdr(n->rn_key, NULL, rnh); 1207 if (d != NULL) { 1208 Free(d, radix_node_cache); 1209 } 1210 return (0); 1211 } 1212 1213 1214 void 1215 rn_freehead(rnh) 1216 struct radix_node_head *rnh; 1217 { 1218 (void) rn_walktree(rnh, rn_freenode, rnh); 1219 1220 rnh->rnh_addaddr = NULL; 1221 rnh->rnh_deladdr = NULL; 1222 rnh->rnh_matchaddr = NULL; 1223 rnh->rnh_lookup = NULL; 1224 rnh->rnh_walktree = NULL; 1225 1226 #ifdef _KERNEL 1227 RADIX_NODE_HEAD_DESTROY(rnh); 1228 FreeHead(rnh, sizeof (*rnh)); 1229 #else 1230 Free(rnh, NULL); 1231 #endif /* _KERNEL */ 1232 } 1233 1234 void 1235 rn_fini() 1236 { 1237 struct radix_mask *m; 1238 1239 if (rn_zeros != NULL) { 1240 #ifdef _KERNEL 1241 FreeHead(rn_zeros, 2 * max_keylen); 1242 #else 1243 Free(rn_zeros, NULL); 1244 #endif 1245 rn_zeros = NULL; 1246 } 1247 1248 1249 if (mask_rnhead != NULL) { 1250 rn_freehead(mask_rnhead); 1251 mask_rnhead = NULL; 1252 } 1253 1254 while ((m = rn_mkfreelist) != NULL) { 1255 rn_mkfreelist = m->rm_mklist; 1256 Free(m, NULL); 1257 } 1258 } 1259