1 /* 2 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 * 5 * Copyright (c) 1988, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)radix.c 8.5 (Berkeley) 5/19/95 33 * $FreeBSD: /repoman/r/ncvs/src/sys/net/radix.c,v 1.36.2.1 2005/01/31 23:26:23 34 * imp Exp $ 35 */ 36 37 38 /* 39 * Routines to build and maintain radix trees for routing lookups. 40 */ 41 #include <sys/types.h> 42 43 #ifndef _RADIX_H_ 44 #include <sys/param.h> 45 #ifdef _KERNEL 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/systm.h> 49 #include <sys/cmn_err.h> 50 #else 51 #include <assert.h> 52 #define ASSERT assert 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <syslog.h> 56 #include <strings.h> 57 #endif /* _KERNEL */ 58 #include <net/radix.h> 59 #endif 60 61 #ifndef _KERNEL 62 void 63 panic(const char *str) 64 { 65 fprintf(stderr, "Panic - %s\n", str); 66 abort(); 67 } 68 #endif /* _KERNEL */ 69 70 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); 71 static int rn_walktree_mt(struct radix_node_head *, walktree_f_t *, 72 void *, lockf_t, lockf_t); 73 static struct radix_node 74 *rn_insert(void *, struct radix_node_head *, int *, 75 struct radix_node [2]), 76 *rn_newpair(void *, int, struct radix_node[2]), 77 *rn_search(void *, struct radix_node *), 78 *rn_search_m(void *, struct radix_node *, void *), 79 *rn_lookup(void *, void *, struct radix_node_head *), 80 *rn_match(void *, struct radix_node_head *), 81 *rn_match_args(void *, struct radix_node_head *, match_leaf_t *, 82 void *), 83 *rn_addmask(void *, int, int), 84 *rn_addroute(void *, void *, struct radix_node_head *, 85 struct radix_node [2]), 86 *rn_delete(void *, void *, struct radix_node_head *); 87 static boolean_t rn_refines(void *, void *); 88 89 /* 90 * IPF also uses PATRICIA tree to manage ippools. IPF stores its own structure 91 * addrfamily_t. sizeof (addrfamily_t) == 24. 92 */ 93 #define MAX_KEYLEN 24 94 static int max_keylen = MAX_KEYLEN; 95 96 #ifdef _KERNEL 97 static struct kmem_cache *radix_mask_cache; /* for rn_mkfreelist */ 98 static struct kmem_cache *radix_node_cache; 99 #else 100 static char *radix_mask_cache, *radix_node_cache; /* dummy vars. never inited */ 101 #endif /* _KERNEL */ 102 103 static struct radix_mask *rn_mkfreelist; 104 static struct radix_node_head *mask_rnhead; 105 /* 106 * Work area -- the following point to 2 buffers of size max_keylen, 107 * allocated in this order in a block of memory malloc'ed by rn_init. 108 * A third buffer of size MAX_KEYLEN is allocated from the stack. 109 */ 110 static char *rn_zeros, *rn_ones; 111 112 #define MKGet(m) R_Malloc(m, radix_mask_cache, sizeof (struct radix_mask)) 113 #define MKFree(m) Free(m, radix_mask_cache) 114 #define rn_masktop (mask_rnhead->rnh_treetop) 115 116 static boolean_t rn_lexobetter(void *m_arg, void *n_arg); 117 static struct radix_mask * 118 rn_new_radix_mask(struct radix_node *tt, 119 struct radix_mask *next); 120 static boolean_t 121 rn_satisfies_leaf(char *trial, struct radix_node *leaf, 122 int skip, match_leaf_t *rn_leaf_fn, void *rn_leaf_arg); 123 124 #define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg)) 125 126 /* 127 * The data structure for the keys is a radix tree with one way 128 * branching removed. The index rn_bit at an internal node n represents a bit 129 * position to be tested. The tree is arranged so that all descendants 130 * of a node n have keys whose bits all agree up to position rn_bit - 1. 131 * (We say the index of n is rn_bit.) 132 * 133 * There is at least one descendant which has a one bit at position rn_bit, 134 * and at least one with a zero there. 135 * 136 * A route is determined by a pair of key and mask. We require that the 137 * bit-wise logical and of the key and mask to be the key. 138 * We define the index of a route associated with the mask to be 139 * the first bit number in the mask where 0 occurs (with bit number 0 140 * representing the highest order bit). 141 * 142 * We say a mask is normal if every bit is 0, past the index of the mask. 143 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 144 * and m is a normal mask, then the route applies to every descendant of n. 145 * If the index(m) < rn_bit, this implies the trailing last few bits of k 146 * before bit b are all 0, (and hence consequently true of every descendant 147 * of n), so the route applies to all descendants of the node as well. 148 * 149 * Similar logic shows that a non-normal mask m such that 150 * index(m) <= index(n) could potentially apply to many children of n. 151 * Thus, for each non-host route, we attach its mask to a list at an internal 152 * node as high in the tree as we can go. 153 * 154 * The present version of the code makes use of normal routes in short- 155 * circuiting an explict mask and compare operation when testing whether 156 * a key satisfies a normal route, and also in remembering the unique leaf 157 * that governs a subtree. 158 */ 159 160 /* 161 * Most of the functions in this code assume that the key/mask arguments 162 * are sockaddr-like structures, where the first byte is an uchar_t 163 * indicating the size of the entire structure. 164 * 165 * To make the assumption more explicit, we use the LEN() macro to access 166 * this field. It is safe to pass an expression with side effects 167 * to LEN() as the argument is evaluated only once. 168 */ 169 #define LEN(x) (*(const uchar_t *)(x)) 170 171 172 /* 173 * Search a node in the tree matching the key. 174 */ 175 static struct radix_node * 176 rn_search(v_arg, head) 177 void *v_arg; 178 struct radix_node *head; 179 { 180 struct radix_node *x; 181 caddr_t v; 182 183 for (x = head, v = v_arg; x->rn_bit >= 0; ) { 184 if (x->rn_bmask & v[x->rn_offset]) 185 x = x->rn_right; 186 else 187 x = x->rn_left; 188 } 189 return (x); 190 } 191 192 /* 193 * Same as above, but with an additional mask. 194 */ 195 static struct radix_node * 196 rn_search_m(v_arg, head, m_arg) 197 struct radix_node *head; 198 void *v_arg, *m_arg; 199 { 200 struct radix_node *x; 201 caddr_t v = v_arg, m = m_arg; 202 203 for (x = head; x->rn_bit >= 0; ) { 204 if ((x->rn_bmask & m[x->rn_offset]) && 205 (x->rn_bmask & v[x->rn_offset])) 206 x = x->rn_right; 207 else 208 x = x->rn_left; 209 } 210 return (x); 211 } 212 213 /* 214 * Returns true if there are no bits set in n_arg that are zero in 215 * m_arg and the masks aren't equal. In other words, it returns true 216 * when m_arg is a finer-granularity netmask -- it represents a subset 217 * of the destinations implied by n_arg. 218 */ 219 static boolean_t 220 rn_refines(m_arg, n_arg) 221 void *m_arg, *n_arg; 222 { 223 caddr_t m = m_arg, n = n_arg; 224 caddr_t lim = n + LEN(n), lim2 = lim; 225 int longer = LEN(n++) - (int)LEN(m++); 226 boolean_t masks_are_equal = B_TRUE; 227 228 if (longer > 0) 229 lim -= longer; 230 while (n < lim) { 231 if (*n & ~(*m)) 232 return (0); 233 if (*n++ != *m++) 234 masks_are_equal = B_FALSE; 235 } 236 while (n < lim2) 237 if (*n++) 238 return (B_FALSE); 239 if (masks_are_equal && (longer < 0)) 240 for (lim2 = m - longer; m < lim2; ) 241 if (*m++) 242 return (B_TRUE); 243 return (!masks_are_equal); 244 } 245 246 static struct radix_node * 247 rn_lookup(v_arg, m_arg, head) 248 void *v_arg, *m_arg; 249 struct radix_node_head *head; 250 { 251 struct radix_node *x; 252 caddr_t netmask = NULL; 253 254 if (m_arg) { 255 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); 256 if (x == NULL) 257 return (NULL); 258 netmask = x->rn_key; 259 } 260 x = rn_match(v_arg, head); 261 if (x && netmask) { 262 while (x && x->rn_mask != netmask) 263 x = x->rn_dupedkey; 264 } 265 return (x); 266 } 267 268 /* 269 * Returns true if address 'trial' has no bits differing from the 270 * leaf's key when compared under the leaf's mask. In other words, 271 * returns true when 'trial' matches leaf. 272 * In addition, if a rn_leaf_fn is passed in, that is used to find 273 * a match on conditions defined by the caller of rn_match. This is 274 * used by the kernel ftable to match on IRE_MATCH_* conditions. 275 */ 276 static boolean_t 277 rn_satisfies_leaf(trial, leaf, skip, rn_leaf_fn, rn_leaf_arg) 278 caddr_t trial; 279 struct radix_node *leaf; 280 int skip; 281 match_leaf_t *rn_leaf_fn; 282 void *rn_leaf_arg; 283 { 284 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 285 char *cplim; 286 int length = min(LEN(cp), LEN(cp2)); 287 288 if (cp3 == 0) 289 cp3 = rn_ones; 290 else 291 length = min(length, LEN(cp3)); 292 cplim = cp + length; 293 cp3 += skip; 294 cp2 += skip; 295 296 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) 297 if ((*cp ^ *cp2) & *cp3) 298 return (B_FALSE); 299 300 return (RN_MATCHF(leaf, rn_leaf_fn, rn_leaf_arg)); 301 } 302 303 static struct radix_node * 304 rn_match(v_arg, head) 305 void *v_arg; 306 struct radix_node_head *head; 307 { 308 return (rn_match_args(v_arg, head, NULL, NULL)); 309 } 310 311 static struct radix_node * 312 rn_match_args(v_arg, head, rn_leaf_fn, rn_leaf_arg) 313 void *v_arg; 314 struct radix_node_head *head; 315 match_leaf_t *rn_leaf_fn; 316 void *rn_leaf_arg; 317 { 318 caddr_t v = v_arg; 319 struct radix_node *t = head->rnh_treetop, *x; 320 caddr_t cp = v, cp2; 321 caddr_t cplim; 322 struct radix_node *saved_t, *top = t; 323 int off = t->rn_offset, vlen = LEN(cp), matched_off; 324 int test, b, rn_bit; 325 326 /* 327 * Open code rn_search(v, top) to avoid overhead of extra 328 * subroutine call. 329 */ 330 for (; t->rn_bit >= 0; ) { 331 if (t->rn_bmask & cp[t->rn_offset]) 332 t = t->rn_right; 333 else 334 t = t->rn_left; 335 } 336 /* 337 * See if we match exactly as a host destination 338 * or at least learn how many bits match, for normal mask finesse. 339 * 340 * It doesn't hurt us to limit how many bytes to check 341 * to the length of the mask, since if it matches we had a genuine 342 * match and the leaf we have is the most specific one anyway; 343 * if it didn't match with a shorter length it would fail 344 * with a long one. This wins big for class B&C netmasks which 345 * are probably the most common case... 346 */ 347 if (t->rn_mask) 348 vlen = LEN(t->rn_mask); 349 cp += off; cp2 = t->rn_key + off; cplim = v + vlen; 350 for (; cp < cplim; cp++, cp2++) 351 if (*cp != *cp2) 352 goto keydiff; 353 /* 354 * This extra grot is in case we are explicitly asked 355 * to look up the default. Ugh! 356 * 357 * Never return the root node itself, it seems to cause a 358 * lot of confusion. 359 */ 360 if (t->rn_flags & RNF_ROOT) 361 t = t->rn_dupedkey; 362 if (t == NULL || RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) { 363 return (t); 364 } else { 365 /* 366 * Although we found an exact match on the key, rn_leaf_fn 367 * is looking for some other criteria as well. Continue 368 * looking as if the exact match failed. 369 */ 370 if (t->rn_dupedkey == NULL && 371 (t->rn_parent->rn_flags & RNF_ROOT)) { 372 /* no more dupedkeys and hit the top. have to give up */ 373 return (NULL); 374 } 375 b = 0; 376 goto keeplooking; 377 378 } 379 keydiff: 380 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 381 for (b = 7; (test >>= 1) > 0; ) 382 b--; 383 keeplooking: 384 matched_off = cp - v; 385 b += matched_off << 3; 386 rn_bit = -1 - b; 387 388 /* 389 * If there is a host route in a duped-key chain, it will be first. 390 */ 391 if ((saved_t = t)->rn_mask == 0) 392 t = t->rn_dupedkey; 393 for (; t != NULL; t = t->rn_dupedkey) { 394 /* 395 * Even if we don't match exactly as a host, 396 * we may match if the leaf we wound up at is 397 * a route to a net. 398 */ 399 400 if (t->rn_flags & RNF_NORMAL) { 401 if ((rn_bit <= t->rn_bit) && 402 RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) { 403 return (t); 404 } 405 } else if (rn_satisfies_leaf(v, t, matched_off, rn_leaf_fn, 406 rn_leaf_arg)) { 407 return (t); 408 } 409 } 410 t = saved_t; 411 /* start searching up the tree */ 412 do { 413 struct radix_mask *m; 414 415 t = t->rn_parent; 416 m = t->rn_mklist; 417 /* 418 * If non-contiguous masks ever become important 419 * we can restore the masking and open coding of 420 * the search and satisfaction test and put the 421 * calculation of "off" back before the "do". 422 */ 423 while (m) { 424 if (m->rm_flags & RNF_NORMAL) { 425 if ((rn_bit <= m->rm_bit) && 426 RN_MATCHF(m->rm_leaf, rn_leaf_fn, 427 rn_leaf_arg)) { 428 return (m->rm_leaf); 429 } 430 } else { 431 off = min(t->rn_offset, matched_off); 432 x = rn_search_m(v, t, m->rm_mask); 433 while (x != NULL && x->rn_mask != m->rm_mask) 434 x = x->rn_dupedkey; 435 if (x && rn_satisfies_leaf(v, x, off, 436 rn_leaf_fn, rn_leaf_arg)) { 437 return (x); 438 } 439 } 440 m = m->rm_mklist; 441 } 442 } while (t != top); 443 return (0); 444 } 445 446 /* 447 * Whenever we add a new leaf to the tree, we also add a parent node, 448 * so we allocate them as an array of two elements: the first one must be 449 * the leaf (see RNTORT() in route.c), the second one is the parent. 450 * This routine initializes the relevant fields of the nodes, so that 451 * the leaf is the left child of the parent node, and both nodes have 452 * (almost) all all fields filled as appropriate. 453 * The function returns a pointer to the parent node. 454 */ 455 456 static struct radix_node * 457 rn_newpair(v, b, nodes) 458 void *v; 459 int b; 460 struct radix_node nodes[2]; 461 { 462 struct radix_node *tt = nodes, *t = tt + 1; 463 464 t->rn_bit = b; 465 t->rn_bmask = 0x80 >> (b & 7); 466 t->rn_left = tt; 467 t->rn_offset = b >> 3; 468 469 /* 470 * t->rn_parent, r->rn_right, tt->rn_mask, tt->rn_dupedkey 471 * and tt->rn_bmask must have been zeroed by caller. 472 */ 473 tt->rn_bit = -1; 474 tt->rn_key = v; 475 tt->rn_parent = t; 476 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 477 tt->rn_mklist = t->rn_mklist = 0; 478 return (t); 479 } 480 481 static struct radix_node * 482 rn_insert(v_arg, head, dupentry, nodes) 483 void *v_arg; 484 struct radix_node_head *head; 485 int *dupentry; 486 struct radix_node nodes[2]; 487 { 488 caddr_t v = v_arg; 489 struct radix_node *top = head->rnh_treetop; 490 struct radix_node *p, *x; 491 int head_off = top->rn_offset, vlen = (int)LEN(v); 492 struct radix_node *t = rn_search(v_arg, top); 493 caddr_t cp = v + head_off; 494 int b; 495 struct radix_node *tt; 496 caddr_t cp2 = t->rn_key + head_off; 497 int cmp_res; 498 caddr_t cplim = v + vlen; 499 500 /* 501 * Find first bit at which v and t->rn_key differ 502 */ 503 while (cp < cplim) 504 if (*cp2++ != *cp++) 505 goto on1; 506 *dupentry = 1; 507 return (t); 508 on1: 509 *dupentry = 0; 510 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 511 /* 512 * (cp - v) is the number of bytes where the match is relevant. 513 * Multiply by 8 to get number of bits. Then reduce this number 514 * by the trailing bits in the last byte where we have a match 515 * by looking at (cmp_res >> 1) in each iteration below. 516 * Note that v starts at the beginning of the key, so, when key 517 * is a sockaddr structure, the preliminary len/family/port bytes 518 * are accounted for. 519 */ 520 for (b = (cp - v) << 3; cmp_res; b--) 521 cmp_res >>= 1; 522 cp = v; 523 x = top; 524 do { 525 p = x; 526 if (cp[x->rn_offset] & x->rn_bmask) 527 x = x->rn_right; 528 else 529 x = x->rn_left; 530 } while (b > (unsigned)x->rn_bit); 531 /* x->rn_bit < b && x->rn_bit >= 0 */ 532 /* 533 * now the rightmost bit where v and rn_key differ (b) is < 534 * x->rn_bit. 535 * 536 * We will add a new branch at p. b cannot equal x->rn_bit 537 * because we know we didn't find a duplicated key. 538 * The tree will be re-adjusted so that t is inserted between p 539 * and x. 540 */ 541 t = rn_newpair(v_arg, b, nodes); 542 tt = t->rn_left; 543 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 544 p->rn_left = t; 545 else 546 p->rn_right = t; 547 x->rn_parent = t; 548 t->rn_parent = p; 549 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 550 t->rn_right = x; 551 } else { 552 t->rn_right = tt; 553 t->rn_left = x; 554 } 555 return (tt); 556 } 557 558 static struct radix_node * 559 rn_addmask(n_arg, search, skip) 560 int search, skip; 561 void *n_arg; 562 { 563 caddr_t netmask = (caddr_t)n_arg; 564 struct radix_node *x; 565 caddr_t cp, cplim; 566 int b = 0, mlen, j; 567 int maskduplicated, m0, isnormal; 568 struct radix_node *saved_x; 569 int last_zeroed = 0; 570 char addmask_key[MAX_KEYLEN]; 571 572 if ((mlen = LEN(netmask)) > max_keylen) 573 mlen = max_keylen; 574 if (skip == 0) 575 skip = 1; 576 if (mlen <= skip) 577 return (mask_rnhead->rnh_nodes); 578 if (skip > 1) 579 bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 580 if ((m0 = mlen) > skip) 581 bcopy(netmask + skip, addmask_key + skip, mlen - skip); 582 /* 583 * Trim trailing zeroes. 584 */ 585 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0; ) 586 cp--; 587 mlen = cp - addmask_key; 588 if (mlen <= skip) { 589 if (m0 >= last_zeroed) 590 last_zeroed = mlen; 591 return (mask_rnhead->rnh_nodes); 592 } 593 if (m0 < last_zeroed) 594 bzero(addmask_key + m0, last_zeroed - m0); 595 *addmask_key = last_zeroed = mlen; 596 x = rn_search(addmask_key, rn_masktop); 597 if (bcmp(addmask_key, x->rn_key, mlen) != 0) 598 x = 0; 599 if (x || search) 600 return (x); 601 R_Zalloc(x, radix_node_cache, max_keylen + 2 * sizeof (*x)); 602 603 if ((saved_x = x) == 0) 604 return (0); 605 netmask = cp = (caddr_t)(x + 2); 606 bcopy(addmask_key, cp, mlen); 607 x = rn_insert(cp, mask_rnhead, &maskduplicated, x); 608 if (maskduplicated) { 609 #ifdef _KERNEL 610 cmn_err(CE_WARN, "rn_addmask: mask impossibly already in tree"); 611 #else 612 syslog(LOG_ERR, "rn_addmask: mask impossibly already in tree"); 613 #endif /* _KERNEL */ 614 Free(saved_x, radix_node_cache); 615 return (x); 616 } 617 /* 618 * Calculate index of mask, and check for normalcy. 619 * First find the first byte with a 0 bit, then if there are 620 * more bits left (remember we already trimmed the trailing 0's), 621 * the pattern must be one of those in normal_chars[], or we have 622 * a non-contiguous mask. 623 */ 624 cplim = netmask + mlen; 625 isnormal = 1; 626 for (cp = netmask + skip; (cp < cplim) && *(uchar_t *)cp == 0xff; ) 627 cp++; 628 if (cp != cplim) { 629 static uint8_t normal_chars[] = { 630 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; 631 632 for (j = 0x80; (j & *cp) != 0; j >>= 1) 633 b++; 634 if (*cp != normal_chars[b] || cp != (cplim - 1)) 635 isnormal = 0; 636 } 637 b += (cp - netmask) << 3; 638 x->rn_bit = -1 - b; 639 if (isnormal) 640 x->rn_flags |= RNF_NORMAL; 641 return (x); 642 } 643 644 /* arbitrary ordering for non-contiguous masks */ 645 static boolean_t 646 rn_lexobetter(m_arg, n_arg) 647 void *m_arg, *n_arg; 648 { 649 uchar_t *mp = m_arg, *np = n_arg, *lim; 650 651 if (LEN(mp) > LEN(np)) 652 /* not really, but need to check longer one first */ 653 return (B_TRUE); 654 if (LEN(mp) == LEN(np)) 655 for (lim = mp + LEN(mp); mp < lim; ) 656 if (*mp++ > *np++) 657 return (B_TRUE); 658 return (B_FALSE); 659 } 660 661 static struct radix_mask * 662 rn_new_radix_mask(tt, next) 663 struct radix_node *tt; 664 struct radix_mask *next; 665 { 666 struct radix_mask *m; 667 668 MKGet(m); 669 if (m == 0) { 670 #ifndef _KERNEL 671 syslog(LOG_ERR, "Mask for route not entered\n"); 672 #endif /* _KERNEL */ 673 return (0); 674 } 675 bzero(m, sizeof (*m)); 676 m->rm_bit = tt->rn_bit; 677 m->rm_flags = tt->rn_flags; 678 if (tt->rn_flags & RNF_NORMAL) 679 m->rm_leaf = tt; 680 else 681 m->rm_mask = tt->rn_mask; 682 m->rm_mklist = next; 683 tt->rn_mklist = m; 684 return (m); 685 } 686 687 static struct radix_node * 688 rn_addroute(v_arg, n_arg, head, treenodes) 689 void *v_arg, *n_arg; 690 struct radix_node_head *head; 691 struct radix_node treenodes[2]; 692 { 693 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; 694 struct radix_node *t, *x = 0, *tt; 695 struct radix_node *saved_tt, *top = head->rnh_treetop; 696 short b = 0, b_leaf = 0; 697 int keyduplicated; 698 caddr_t mmask; 699 struct radix_mask *m, **mp; 700 701 /* 702 * In dealing with non-contiguous masks, there may be 703 * many different routes which have the same mask. 704 * We will find it useful to have a unique pointer to 705 * the mask to speed avoiding duplicate references at 706 * nodes and possibly save time in calculating indices. 707 */ 708 if (netmask) { 709 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) 710 return (0); 711 b_leaf = x->rn_bit; 712 b = -1 - x->rn_bit; 713 netmask = x->rn_key; 714 } 715 /* 716 * Deal with duplicated keys: attach node to previous instance 717 */ 718 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); 719 if (keyduplicated) { 720 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 721 if (tt->rn_mask == netmask) 722 return (0); 723 if (netmask == 0 || 724 (tt->rn_mask && 725 /* index (netmask) > node */ 726 ((b_leaf < tt->rn_bit) || 727 rn_refines(netmask, tt->rn_mask) || 728 rn_lexobetter(netmask, tt->rn_mask)))) 729 break; 730 } 731 /* 732 * If the mask is not duplicated, we wouldn't 733 * find it among possible duplicate key entries 734 * anyway, so the above test doesn't hurt. 735 * 736 * Insert treenodes before tt. 737 * 738 * We sort the masks for a duplicated key the same way as 739 * in a masklist -- most specific to least specific. 740 * This may require the unfortunate nuisance of relocating 741 * the head of the list. 742 * 743 * We also reverse, or doubly link the list through the 744 * parent pointer. 745 */ 746 if (tt == saved_tt) { 747 struct radix_node *xx = x; 748 /* link in at head of list */ 749 (tt = treenodes)->rn_dupedkey = t; 750 tt->rn_flags = t->rn_flags; 751 tt->rn_parent = x = t->rn_parent; 752 t->rn_parent = tt; /* parent */ 753 if (x->rn_left == t) 754 x->rn_left = tt; 755 else 756 x->rn_right = tt; 757 saved_tt = tt; x = xx; 758 } else { 759 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 760 t->rn_dupedkey = tt; 761 /* Set rn_parent value for tt and tt->rn_dupedkey */ 762 tt->rn_parent = t; 763 if (tt->rn_dupedkey) 764 tt->rn_dupedkey->rn_parent = tt; 765 } 766 tt->rn_key = v; 767 tt->rn_bit = -1; 768 tt->rn_flags = RNF_ACTIVE; 769 } 770 /* 771 * Put mask in tree. 772 */ 773 if (netmask) { 774 tt->rn_mask = netmask; 775 tt->rn_bit = x->rn_bit; 776 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 777 } 778 /* BEGIN CSTYLED */ 779 /* 780 * at this point the parent-child relationship for p, t, x, tt is 781 * one of the following: 782 * p p 783 * : (left/right child) : 784 * : : 785 * t t 786 * / \ / \ 787 * x tt tt x 788 * 789 * tt == saved_tt returned by rn_insert(). 790 */ 791 /* END CSTYLED */ 792 t = saved_tt->rn_parent; 793 if (keyduplicated) 794 goto key_exists; 795 b_leaf = -1 - t->rn_bit; 796 /* 797 * b_leaf is now normalized to be in the leaf rn_bit format 798 * (it is the rn_bit value of a leaf corresponding to netmask 799 * of t->rn_bit). 800 */ 801 if (t->rn_right == saved_tt) 802 x = t->rn_left; 803 else 804 x = t->rn_right; 805 /* 806 * Promote general routes from below. 807 * Identify the less specific netmasks and add them to t->rm_mklist 808 */ 809 if (x->rn_bit < 0) { 810 /* x is the sibling node. it is a leaf node. */ 811 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) 812 if (x->rn_mask && (x->rn_bit >= b_leaf) && 813 x->rn_mklist == 0) { 814 /* 815 * x is the first node in the dupedkey chain 816 * without a mklist, and with a shorter mask 817 * than b_leaf. Create a radix_mask 818 * corresponding to x's mask and add it to 819 * t's rn_mklist. The mask list gets created 820 * in decreasing order of mask length. 821 */ 822 *mp = m = rn_new_radix_mask(x, 0); 823 if (m) 824 mp = &m->rm_mklist; 825 } 826 } else if (x->rn_mklist) { 827 /* 828 * Skip over masks whose index is > that of new node 829 */ 830 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) 831 if (m->rm_bit >= b_leaf) 832 break; 833 t->rn_mklist = m; *mp = 0; 834 } 835 key_exists: 836 /* Add new route to highest possible ancestor's list */ 837 if ((netmask == 0) || (b > t->rn_bit)) 838 return (tt); /* can't lift at all */ 839 b_leaf = tt->rn_bit; 840 /* b is the index of the netmask */ 841 do { 842 x = t; 843 t = t->rn_parent; 844 } while (b <= t->rn_bit && x != top); 845 /* 846 * Search through routes associated with node to 847 * insert new route according to index. 848 * Need same criteria as when sorting dupedkeys to avoid 849 * double loop on deletion. 850 */ 851 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) { 852 if (m->rm_bit < b_leaf) 853 continue; 854 if (m->rm_bit > b_leaf) 855 break; 856 if (m->rm_flags & RNF_NORMAL) { 857 mmask = m->rm_leaf->rn_mask; 858 if (tt->rn_flags & RNF_NORMAL) { 859 #ifdef _KERNEL 860 cmn_err(CE_WARN, "Non-unique normal route, " 861 "mask not entered\n"); 862 #else 863 syslog(LOG_ERR, "Non-unique normal route, " 864 "mask not entered\n"); 865 #endif /* _KERNEL */ 866 return (tt); 867 } 868 } else 869 mmask = m->rm_mask; 870 if (mmask == netmask) { 871 m->rm_refs++; 872 tt->rn_mklist = m; 873 return (tt); 874 } 875 if (rn_refines(netmask, mmask) || 876 rn_lexobetter(netmask, mmask)) 877 break; 878 } 879 *mp = rn_new_radix_mask(tt, *mp); 880 return (tt); 881 } 882 883 static struct radix_node * 884 rn_delete(v_arg, netmask_arg, head) 885 void *v_arg, *netmask_arg; 886 struct radix_node_head *head; 887 { 888 struct radix_node *t, *p, *x, *tt; 889 struct radix_mask *m, *saved_m, **mp; 890 struct radix_node *dupedkey, *saved_tt, *top; 891 caddr_t v, netmask; 892 int b, head_off, vlen; 893 894 v = v_arg; 895 netmask = netmask_arg; 896 x = head->rnh_treetop; 897 tt = rn_search(v, x); 898 head_off = x->rn_offset; 899 vlen = LEN(v); 900 saved_tt = tt; 901 top = x; 902 if (tt == 0 || 903 bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) 904 return (0); 905 /* 906 * Delete our route from mask lists. 907 */ 908 if (netmask) { 909 if ((x = rn_addmask(netmask, 1, head_off)) == 0) 910 return (0); 911 netmask = x->rn_key; 912 while (tt->rn_mask != netmask) 913 if ((tt = tt->rn_dupedkey) == 0) 914 return (0); 915 } 916 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) 917 goto on1; 918 if (tt->rn_flags & RNF_NORMAL) { 919 if (m->rm_leaf != tt || m->rm_refs > 0) { 920 #ifdef _KERNEL 921 cmn_err(CE_WARN, 922 "rn_delete: inconsistent annotation\n"); 923 #else 924 syslog(LOG_ERR, "rn_delete: inconsistent annotation\n"); 925 #endif /* _KERNEL */ 926 return (0); /* dangling ref could cause disaster */ 927 } 928 } else { 929 if (m->rm_mask != tt->rn_mask) { 930 #ifdef _KERNEL 931 cmn_err(CE_WARN, 932 "rn_delete: inconsistent annotation 2\n"); 933 #else 934 syslog(LOG_ERR, 935 "rn_delete: inconsistent annotation 2\n"); 936 #endif /* _KERNEL */ 937 goto on1; 938 } 939 if (--m->rm_refs >= 0) 940 goto on1; 941 } 942 b = -1 - tt->rn_bit; 943 t = saved_tt->rn_parent; 944 if (b > t->rn_bit) 945 goto on1; /* Wasn't lifted at all */ 946 do { 947 x = t; 948 t = t->rn_parent; 949 } while (b <= t->rn_bit && x != top); 950 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) 951 if (m == saved_m) { 952 *mp = m->rm_mklist; 953 MKFree(m); 954 break; 955 } 956 if (m == 0) { 957 #ifdef _KERNEL 958 cmn_err(CE_WARN, "rn_delete: couldn't find our annotation\n"); 959 #else 960 syslog(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 961 #endif /* _KERNEL */ 962 if (tt->rn_flags & RNF_NORMAL) 963 return (0); /* Dangling ref to us */ 964 } 965 on1: 966 /* 967 * Eliminate us from tree 968 */ 969 if (tt->rn_flags & RNF_ROOT) 970 return (0); 971 t = tt->rn_parent; 972 dupedkey = saved_tt->rn_dupedkey; 973 if (dupedkey) { 974 /* 975 * Here, tt is the deletion target and 976 * saved_tt is the head of the dupekey chain. 977 */ 978 if (tt == saved_tt) { 979 /* remove from head of chain */ 980 x = dupedkey; x->rn_parent = t; 981 if (t->rn_left == tt) 982 t->rn_left = x; 983 else 984 t->rn_right = x; 985 } else { 986 /* find node in front of tt on the chain */ 987 for (x = p = saved_tt; p && p->rn_dupedkey != tt; ) 988 p = p->rn_dupedkey; 989 if (p) { 990 p->rn_dupedkey = tt->rn_dupedkey; 991 if (tt->rn_dupedkey) /* parent */ 992 tt->rn_dupedkey->rn_parent = p; 993 /* parent */ 994 } else 995 #ifdef _KERNEL 996 cmn_err(CE_WARN, 997 "rn_delete: couldn't find us\n"); 998 #else 999 syslog(LOG_ERR, 1000 "rn_delete: couldn't find us\n"); 1001 #endif /* _KERNEL */ 1002 } 1003 t = tt + 1; 1004 if (t->rn_flags & RNF_ACTIVE) { 1005 *++x = *t; 1006 p = t->rn_parent; 1007 if (p->rn_left == t) 1008 p->rn_left = x; 1009 else 1010 p->rn_right = x; 1011 x->rn_left->rn_parent = x; 1012 x->rn_right->rn_parent = x; 1013 } 1014 goto out; 1015 } 1016 if (t->rn_left == tt) 1017 x = t->rn_right; 1018 else 1019 x = t->rn_left; 1020 p = t->rn_parent; 1021 if (p->rn_right == t) 1022 p->rn_right = x; 1023 else 1024 p->rn_left = x; 1025 x->rn_parent = p; 1026 /* 1027 * Demote routes attached to us. 1028 */ 1029 if (t->rn_mklist) { 1030 if (x->rn_bit >= 0) { 1031 for (mp = &x->rn_mklist; (m = *mp) != NULL; ) 1032 mp = &m->rm_mklist; 1033 *mp = t->rn_mklist; 1034 } else { 1035 /* 1036 * If there are any key,mask pairs in a sibling 1037 * duped-key chain, some subset will appear sorted 1038 * in the same order attached to our mklist 1039 */ 1040 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 1041 if (m == x->rn_mklist) { 1042 struct radix_mask *mm = m->rm_mklist; 1043 x->rn_mklist = 0; 1044 if (--(m->rm_refs) < 0) 1045 MKFree(m); 1046 m = mm; 1047 } 1048 if (m) 1049 #ifdef _KERNEL 1050 cmn_err(CE_WARN, 1051 "rn_delete: Orphaned Mask %p at %p\n", 1052 (void *)m, (void *)x); 1053 #else 1054 syslog(LOG_ERR, 1055 "rn_delete: Orphaned Mask %p at %p\n", 1056 (void *)m, (void *)x); 1057 #endif /* _KERNEL */ 1058 } 1059 } 1060 /* 1061 * We may be holding an active internal node in the tree. 1062 */ 1063 x = tt + 1; 1064 if (t != x) { 1065 *t = *x; 1066 t->rn_left->rn_parent = t; 1067 t->rn_right->rn_parent = t; 1068 p = x->rn_parent; 1069 if (p->rn_left == x) 1070 p->rn_left = t; 1071 else 1072 p->rn_right = t; 1073 } 1074 out: 1075 tt->rn_flags &= ~RNF_ACTIVE; 1076 tt[1].rn_flags &= ~RNF_ACTIVE; 1077 return (tt); 1078 } 1079 1080 /* 1081 * Walk the radix tree; For the kernel routing table, we hold additional 1082 * refs on the ire_bucket to ensure that the walk function f() does not 1083 * run into trashed memory. The kernel routing table is identified by 1084 * a rnh_treetop that has RNF_SUNW_FT set in the rn_flags. 1085 * Note that all refs takein in rn_walktree are released before it returns, 1086 * so that f() will need to take any additional references on memory 1087 * to be passed back to the caller of rn_walktree. 1088 */ 1089 static int 1090 rn_walktree(h, f, w) 1091 struct radix_node_head *h; 1092 walktree_f_t *f; 1093 void *w; 1094 { 1095 return (rn_walktree_mt(h, f, w, NULL, NULL)); 1096 } 1097 static int 1098 rn_walktree_mt(h, f, w, lockf, unlockf) 1099 struct radix_node_head *h; 1100 walktree_f_t *f; 1101 void *w; 1102 lockf_t lockf, unlockf; 1103 { 1104 int error; 1105 struct radix_node *base, *next; 1106 struct radix_node *rn = h->rnh_treetop; 1107 boolean_t is_mt = B_FALSE; 1108 1109 if (lockf != NULL) { 1110 ASSERT(unlockf != NULL); 1111 is_mt = B_TRUE; 1112 } 1113 /* 1114 * This gets complicated because we may delete the node 1115 * while applying the function f to it, so we need to calculate 1116 * the successor node in advance. 1117 */ 1118 RADIX_NODE_HEAD_RLOCK(h); 1119 /* First time through node, go left */ 1120 while (rn->rn_bit >= 0) { 1121 rn = rn->rn_left; 1122 } 1123 1124 if (is_mt) 1125 (*lockf)(rn); 1126 1127 for (;;) { 1128 base = rn; 1129 /* If at right child go back up, otherwise, go right */ 1130 while (rn->rn_parent->rn_right == rn && 1131 (rn->rn_flags & RNF_ROOT) == 0) { 1132 rn = rn->rn_parent; 1133 } 1134 /* Find the next *leaf* since next node might vanish, too */ 1135 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0; ) { 1136 rn = rn->rn_left; 1137 } 1138 next = rn; 1139 1140 if (is_mt && next != NULL) 1141 (*lockf)(next); 1142 1143 /* Process leaves */ 1144 while ((rn = base) != NULL) { 1145 base = rn->rn_dupedkey; 1146 1147 if (is_mt && base != NULL) 1148 (*lockf)(base); 1149 1150 RADIX_NODE_HEAD_UNLOCK(h); 1151 if (!(rn->rn_flags & RNF_ROOT) && 1152 (error = (*f)(rn, w))) { 1153 if (is_mt) { 1154 (*unlockf)(rn); 1155 if (base != NULL) 1156 (*unlockf)(base); 1157 if (next != NULL) 1158 (*unlockf)(next); 1159 } 1160 return (error); 1161 } 1162 if (is_mt) 1163 (*unlockf)(rn); 1164 RADIX_NODE_HEAD_RLOCK(h); 1165 } 1166 rn = next; 1167 if (rn->rn_flags & RNF_ROOT) { 1168 RADIX_NODE_HEAD_UNLOCK(h); 1169 /* 1170 * no ref to release, since we never take a ref 1171 * on the root node- it can't be deleted. 1172 */ 1173 return (0); 1174 } 1175 } 1176 /* NOTREACHED */ 1177 } 1178 1179 /* 1180 * Allocate and initialize an empty tree. This has 3 nodes, which are 1181 * part of the radix_node_head (in the order <left,root,right>) and are 1182 * marked RNF_ROOT so they cannot be freed. 1183 * The leaves have all-zero and all-one keys, with significant 1184 * bits starting at 'off'. 1185 * Return 1 on success, 0 on error. 1186 */ 1187 int 1188 rn_inithead(head, off) 1189 void **head; 1190 int off; 1191 { 1192 struct radix_node_head *rnh; 1193 struct radix_node *t, *tt, *ttt; 1194 if (*head) 1195 return (1); 1196 R_ZallocSleep(rnh, struct radix_node_head *, sizeof (*rnh)); 1197 if (rnh == 0) 1198 return (0); 1199 #ifdef _KERNEL 1200 RADIX_NODE_HEAD_LOCK_INIT(rnh); 1201 #endif 1202 *head = rnh; 1203 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1204 ttt = rnh->rnh_nodes + 2; 1205 t->rn_right = ttt; 1206 t->rn_parent = t; 1207 tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */ 1208 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1209 tt->rn_bit = -1 - off; 1210 *ttt = *tt; 1211 ttt->rn_key = rn_ones; 1212 rnh->rnh_addaddr = rn_addroute; 1213 rnh->rnh_deladdr = rn_delete; 1214 rnh->rnh_matchaddr = rn_match; 1215 rnh->rnh_matchaddr_args = rn_match_args; 1216 rnh->rnh_lookup = rn_lookup; 1217 rnh->rnh_walktree = rn_walktree; 1218 rnh->rnh_walktree_mt = rn_walktree_mt; 1219 rnh->rnh_walktree_from = NULL; /* not implemented */ 1220 rnh->rnh_treetop = t; 1221 return (1); 1222 } 1223 1224 void 1225 rn_init() 1226 { 1227 char *cp, *cplim; 1228 1229 #ifdef _KERNEL 1230 radix_mask_cache = kmem_cache_create("radix_mask", 1231 sizeof (struct radix_mask), 0, NULL, NULL, NULL, NULL, NULL, 0); 1232 radix_node_cache = kmem_cache_create("radix_node", 1233 max_keylen + 2 * sizeof (struct radix_node), 1234 0, NULL, NULL, NULL, NULL, NULL, 0); 1235 #endif /* _KERNEL */ 1236 R_ZallocSleep(rn_zeros, char *, 2 * max_keylen); 1237 1238 ASSERT(rn_zeros != NULL); 1239 bzero(rn_zeros, 2 * max_keylen); 1240 rn_ones = cp = rn_zeros + max_keylen; 1241 cplim = rn_ones + max_keylen; 1242 while (cp < cplim) 1243 *cp++ = -1; 1244 if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0) 1245 panic("rn_init: could not init mask_rnhead "); 1246 } 1247 1248 int 1249 rn_freenode(n, p) 1250 struct radix_node *n; 1251 void *p; 1252 { 1253 struct radix_node_head *rnh = p; 1254 struct radix_node *d; 1255 1256 d = rnh->rnh_deladdr(n->rn_key, NULL, rnh); 1257 if (d != NULL) { 1258 Free(d, radix_node_cache); 1259 } 1260 return (0); 1261 } 1262 1263 1264 void 1265 rn_freehead(rnh) 1266 struct radix_node_head *rnh; 1267 { 1268 (void) rn_walktree(rnh, rn_freenode, rnh); 1269 1270 rnh->rnh_addaddr = NULL; 1271 rnh->rnh_deladdr = NULL; 1272 rnh->rnh_matchaddr = NULL; 1273 rnh->rnh_lookup = NULL; 1274 rnh->rnh_walktree = NULL; 1275 1276 #ifdef _KERNEL 1277 RADIX_NODE_HEAD_DESTROY(rnh); 1278 FreeHead(rnh, sizeof (*rnh)); 1279 #else 1280 Free(rnh, NULL); 1281 #endif /* _KERNEL */ 1282 } 1283 1284 void 1285 rn_fini() 1286 { 1287 struct radix_mask *m; 1288 1289 if (rn_zeros != NULL) { 1290 #ifdef _KERNEL 1291 FreeHead(rn_zeros, 2 * max_keylen); 1292 #else 1293 Free(rn_zeros, NULL); 1294 #endif 1295 rn_zeros = NULL; 1296 } 1297 1298 1299 if (mask_rnhead != NULL) { 1300 rn_freehead(mask_rnhead); 1301 mask_rnhead = NULL; 1302 } 1303 1304 while ((m = rn_mkfreelist) != NULL) { 1305 rn_mkfreelist = m->rm_mklist; 1306 Free(m, NULL); 1307 } 1308 } 1309