1 /*- 2 * Copyright (c) 1988, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)radix.c 8.5 (Berkeley) 5/19/95 30 * $FreeBSD$ 31 */ 32 33 /* 34 * Routines to build and maintain radix trees for routing lookups. 35 */ 36 #include <sys/param.h> 37 #ifdef _KERNEL 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/rwlock.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/syslog.h> 44 #include <net/radix.h> 45 #include "opt_mpath.h" 46 #ifdef RADIX_MPATH 47 #include <net/radix_mpath.h> 48 #endif 49 #else /* !_KERNEL */ 50 #include <stdio.h> 51 #include <strings.h> 52 #include <stdlib.h> 53 #define log(x, arg...) fprintf(stderr, ## arg) 54 #define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1) 55 #define min(a, b) ((a) < (b) ? (a) : (b) ) 56 #include <net/radix.h> 57 #endif /* !_KERNEL */ 58 59 static int rn_walktree_from(struct radix_node_head *h, void *a, void *m, 60 walktree_f_t *f, void *w); 61 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); 62 static struct radix_node 63 *rn_insert(void *, struct radix_node_head *, int *, 64 struct radix_node [2]), 65 *rn_newpair(void *, int, struct radix_node[2]), 66 *rn_search(void *, struct radix_node *), 67 *rn_search_m(void *, struct radix_node *, void *); 68 69 static void rn_detachhead_internal(void **head); 70 static int rn_inithead_internal(void **head, int off); 71 72 #define RADIX_MAX_KEY_LEN 32 73 74 static char rn_zeros[RADIX_MAX_KEY_LEN]; 75 static char rn_ones[RADIX_MAX_KEY_LEN] = { 76 -1, -1, -1, -1, -1, -1, -1, -1, 77 -1, -1, -1, -1, -1, -1, -1, -1, 78 -1, -1, -1, -1, -1, -1, -1, -1, 79 -1, -1, -1, -1, -1, -1, -1, -1, 80 }; 81 82 83 static int rn_lexobetter(void *m_arg, void *n_arg); 84 static struct radix_mask * 85 rn_new_radix_mask(struct radix_node *tt, 86 struct radix_mask *next); 87 static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, 88 int skip); 89 90 /* 91 * The data structure for the keys is a radix tree with one way 92 * branching removed. The index rn_bit at an internal node n represents a bit 93 * position to be tested. The tree is arranged so that all descendants 94 * of a node n have keys whose bits all agree up to position rn_bit - 1. 95 * (We say the index of n is rn_bit.) 96 * 97 * There is at least one descendant which has a one bit at position rn_bit, 98 * and at least one with a zero there. 99 * 100 * A route is determined by a pair of key and mask. We require that the 101 * bit-wise logical and of the key and mask to be the key. 102 * We define the index of a route to associated with the mask to be 103 * the first bit number in the mask where 0 occurs (with bit number 0 104 * representing the highest order bit). 105 * 106 * We say a mask is normal if every bit is 0, past the index of the mask. 107 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 108 * and m is a normal mask, then the route applies to every descendant of n. 109 * If the index(m) < rn_bit, this implies the trailing last few bits of k 110 * before bit b are all 0, (and hence consequently true of every descendant 111 * of n), so the route applies to all descendants of the node as well. 112 * 113 * Similar logic shows that a non-normal mask m such that 114 * index(m) <= index(n) could potentially apply to many children of n. 115 * Thus, for each non-host route, we attach its mask to a list at an internal 116 * node as high in the tree as we can go. 117 * 118 * The present version of the code makes use of normal routes in short- 119 * circuiting an explict mask and compare operation when testing whether 120 * a key satisfies a normal route, and also in remembering the unique leaf 121 * that governs a subtree. 122 */ 123 124 /* 125 * Most of the functions in this code assume that the key/mask arguments 126 * are sockaddr-like structures, where the first byte is an u_char 127 * indicating the size of the entire structure. 128 * 129 * To make the assumption more explicit, we use the LEN() macro to access 130 * this field. It is safe to pass an expression with side effects 131 * to LEN() as the argument is evaluated only once. 132 * We cast the result to int as this is the dominant usage. 133 */ 134 #define LEN(x) ( (int) (*(const u_char *)(x)) ) 135 136 /* 137 * XXX THIS NEEDS TO BE FIXED 138 * In the code, pointers to keys and masks are passed as either 139 * 'void *' (because callers use to pass pointers of various kinds), or 140 * 'caddr_t' (which is fine for pointer arithmetics, but not very 141 * clean when you dereference it to access data). Furthermore, caddr_t 142 * is really 'char *', while the natural type to operate on keys and 143 * masks would be 'u_char'. This mismatch require a lot of casts and 144 * intermediate variables to adapt types that clutter the code. 145 */ 146 147 /* 148 * Search a node in the tree matching the key. 149 */ 150 static struct radix_node * 151 rn_search(void *v_arg, struct radix_node *head) 152 { 153 struct radix_node *x; 154 caddr_t v; 155 156 for (x = head, v = v_arg; x->rn_bit >= 0;) { 157 if (x->rn_bmask & v[x->rn_offset]) 158 x = x->rn_right; 159 else 160 x = x->rn_left; 161 } 162 return (x); 163 } 164 165 /* 166 * Same as above, but with an additional mask. 167 * XXX note this function is used only once. 168 */ 169 static struct radix_node * 170 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg) 171 { 172 struct radix_node *x; 173 caddr_t v = v_arg, m = m_arg; 174 175 for (x = head; x->rn_bit >= 0;) { 176 if ((x->rn_bmask & m[x->rn_offset]) && 177 (x->rn_bmask & v[x->rn_offset])) 178 x = x->rn_right; 179 else 180 x = x->rn_left; 181 } 182 return (x); 183 } 184 185 int 186 rn_refines(void *m_arg, void *n_arg) 187 { 188 caddr_t m = m_arg, n = n_arg; 189 caddr_t lim, lim2 = lim = n + LEN(n); 190 int longer = LEN(n++) - LEN(m++); 191 int masks_are_equal = 1; 192 193 if (longer > 0) 194 lim -= longer; 195 while (n < lim) { 196 if (*n & ~(*m)) 197 return (0); 198 if (*n++ != *m++) 199 masks_are_equal = 0; 200 } 201 while (n < lim2) 202 if (*n++) 203 return (0); 204 if (masks_are_equal && (longer < 0)) 205 for (lim2 = m - longer; m < lim2; ) 206 if (*m++) 207 return (1); 208 return (!masks_are_equal); 209 } 210 211 /* 212 * Search for exact match in given @head. 213 * Assume host bits are cleared in @v_arg if @m_arg is not NULL 214 * Note that prefixes with /32 or /128 masks are treated differently 215 * from host routes. 216 */ 217 struct radix_node * 218 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head) 219 { 220 struct radix_node *x; 221 caddr_t netmask; 222 223 if (m_arg != NULL) { 224 /* 225 * Most common case: search exact prefix/mask 226 */ 227 x = rn_addmask(m_arg, head->rnh_masks, 1, 228 head->rnh_treetop->rn_offset); 229 if (x == NULL) 230 return (NULL); 231 netmask = x->rn_key; 232 233 x = rn_match(v_arg, head); 234 235 while (x != NULL && x->rn_mask != netmask) 236 x = x->rn_dupedkey; 237 238 return (x); 239 } 240 241 /* 242 * Search for host address. 243 */ 244 if ((x = rn_match(v_arg, head)) == NULL) 245 return (NULL); 246 247 /* Check if found key is the same */ 248 if (LEN(x->rn_key) != LEN(v_arg) || bcmp(x->rn_key, v_arg, LEN(v_arg))) 249 return (NULL); 250 251 /* Check if this is not host route */ 252 if (x->rn_mask != NULL) 253 return (NULL); 254 255 return (x); 256 } 257 258 static int 259 rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip) 260 { 261 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 262 char *cplim; 263 int length = min(LEN(cp), LEN(cp2)); 264 265 if (cp3 == NULL) 266 cp3 = rn_ones; 267 else 268 length = min(length, LEN(cp3)); 269 cplim = cp + length; cp3 += skip; cp2 += skip; 270 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) 271 if ((*cp ^ *cp2) & *cp3) 272 return (0); 273 return (1); 274 } 275 276 /* 277 * Search for longest-prefix match in given @head 278 */ 279 struct radix_node * 280 rn_match(void *v_arg, struct radix_node_head *head) 281 { 282 caddr_t v = v_arg; 283 struct radix_node *t = head->rnh_treetop, *x; 284 caddr_t cp = v, cp2; 285 caddr_t cplim; 286 struct radix_node *saved_t, *top = t; 287 int off = t->rn_offset, vlen = LEN(cp), matched_off; 288 int test, b, rn_bit; 289 290 /* 291 * Open code rn_search(v, top) to avoid overhead of extra 292 * subroutine call. 293 */ 294 for (; t->rn_bit >= 0; ) { 295 if (t->rn_bmask & cp[t->rn_offset]) 296 t = t->rn_right; 297 else 298 t = t->rn_left; 299 } 300 /* 301 * See if we match exactly as a host destination 302 * or at least learn how many bits match, for normal mask finesse. 303 * 304 * It doesn't hurt us to limit how many bytes to check 305 * to the length of the mask, since if it matches we had a genuine 306 * match and the leaf we have is the most specific one anyway; 307 * if it didn't match with a shorter length it would fail 308 * with a long one. This wins big for class B&C netmasks which 309 * are probably the most common case... 310 */ 311 if (t->rn_mask) 312 vlen = *(u_char *)t->rn_mask; 313 cp += off; cp2 = t->rn_key + off; cplim = v + vlen; 314 for (; cp < cplim; cp++, cp2++) 315 if (*cp != *cp2) 316 goto on1; 317 /* 318 * This extra grot is in case we are explicitly asked 319 * to look up the default. Ugh! 320 * 321 * Never return the root node itself, it seems to cause a 322 * lot of confusion. 323 */ 324 if (t->rn_flags & RNF_ROOT) 325 t = t->rn_dupedkey; 326 return (t); 327 on1: 328 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 329 for (b = 7; (test >>= 1) > 0;) 330 b--; 331 matched_off = cp - v; 332 b += matched_off << 3; 333 rn_bit = -1 - b; 334 /* 335 * If there is a host route in a duped-key chain, it will be first. 336 */ 337 if ((saved_t = t)->rn_mask == 0) 338 t = t->rn_dupedkey; 339 for (; t; t = t->rn_dupedkey) 340 /* 341 * Even if we don't match exactly as a host, 342 * we may match if the leaf we wound up at is 343 * a route to a net. 344 */ 345 if (t->rn_flags & RNF_NORMAL) { 346 if (rn_bit <= t->rn_bit) 347 return (t); 348 } else if (rn_satisfies_leaf(v, t, matched_off)) 349 return (t); 350 t = saved_t; 351 /* start searching up the tree */ 352 do { 353 struct radix_mask *m; 354 t = t->rn_parent; 355 m = t->rn_mklist; 356 /* 357 * If non-contiguous masks ever become important 358 * we can restore the masking and open coding of 359 * the search and satisfaction test and put the 360 * calculation of "off" back before the "do". 361 */ 362 while (m) { 363 if (m->rm_flags & RNF_NORMAL) { 364 if (rn_bit <= m->rm_bit) 365 return (m->rm_leaf); 366 } else { 367 off = min(t->rn_offset, matched_off); 368 x = rn_search_m(v, t, m->rm_mask); 369 while (x && x->rn_mask != m->rm_mask) 370 x = x->rn_dupedkey; 371 if (x && rn_satisfies_leaf(v, x, off)) 372 return (x); 373 } 374 m = m->rm_mklist; 375 } 376 } while (t != top); 377 return (0); 378 } 379 380 #ifdef RN_DEBUG 381 int rn_nodenum; 382 struct radix_node *rn_clist; 383 int rn_saveinfo; 384 int rn_debug = 1; 385 #endif 386 387 /* 388 * Whenever we add a new leaf to the tree, we also add a parent node, 389 * so we allocate them as an array of two elements: the first one must be 390 * the leaf (see RNTORT() in route.c), the second one is the parent. 391 * This routine initializes the relevant fields of the nodes, so that 392 * the leaf is the left child of the parent node, and both nodes have 393 * (almost) all all fields filled as appropriate. 394 * (XXX some fields are left unset, see the '#if 0' section). 395 * The function returns a pointer to the parent node. 396 */ 397 398 static struct radix_node * 399 rn_newpair(void *v, int b, struct radix_node nodes[2]) 400 { 401 struct radix_node *tt = nodes, *t = tt + 1; 402 t->rn_bit = b; 403 t->rn_bmask = 0x80 >> (b & 7); 404 t->rn_left = tt; 405 t->rn_offset = b >> 3; 406 407 #if 0 /* XXX perhaps we should fill these fields as well. */ 408 t->rn_parent = t->rn_right = NULL; 409 410 tt->rn_mask = NULL; 411 tt->rn_dupedkey = NULL; 412 tt->rn_bmask = 0; 413 #endif 414 tt->rn_bit = -1; 415 tt->rn_key = (caddr_t)v; 416 tt->rn_parent = t; 417 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 418 tt->rn_mklist = t->rn_mklist = 0; 419 #ifdef RN_DEBUG 420 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 421 tt->rn_twin = t; 422 tt->rn_ybro = rn_clist; 423 rn_clist = tt; 424 #endif 425 return (t); 426 } 427 428 static struct radix_node * 429 rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry, 430 struct radix_node nodes[2]) 431 { 432 caddr_t v = v_arg; 433 struct radix_node *top = head->rnh_treetop; 434 int head_off = top->rn_offset, vlen = LEN(v); 435 struct radix_node *t = rn_search(v_arg, top); 436 caddr_t cp = v + head_off; 437 int b; 438 struct radix_node *p, *tt, *x; 439 /* 440 * Find first bit at which v and t->rn_key differ 441 */ 442 caddr_t cp2 = t->rn_key + head_off; 443 int cmp_res; 444 caddr_t cplim = v + vlen; 445 446 while (cp < cplim) 447 if (*cp2++ != *cp++) 448 goto on1; 449 *dupentry = 1; 450 return (t); 451 on1: 452 *dupentry = 0; 453 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 454 for (b = (cp - v) << 3; cmp_res; b--) 455 cmp_res >>= 1; 456 457 x = top; 458 cp = v; 459 do { 460 p = x; 461 if (cp[x->rn_offset] & x->rn_bmask) 462 x = x->rn_right; 463 else 464 x = x->rn_left; 465 } while (b > (unsigned) x->rn_bit); 466 /* x->rn_bit < b && x->rn_bit >= 0 */ 467 #ifdef RN_DEBUG 468 if (rn_debug) 469 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p); 470 #endif 471 t = rn_newpair(v_arg, b, nodes); 472 tt = t->rn_left; 473 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 474 p->rn_left = t; 475 else 476 p->rn_right = t; 477 x->rn_parent = t; 478 t->rn_parent = p; /* frees x, p as temp vars below */ 479 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 480 t->rn_right = x; 481 } else { 482 t->rn_right = tt; 483 t->rn_left = x; 484 } 485 #ifdef RN_DEBUG 486 if (rn_debug) 487 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p); 488 #endif 489 return (tt); 490 } 491 492 struct radix_node * 493 rn_addmask(void *n_arg, struct radix_node_head *maskhead, int search, int skip) 494 { 495 unsigned char *netmask = n_arg; 496 unsigned char *cp, *cplim; 497 struct radix_node *x; 498 int b = 0, mlen, j; 499 int maskduplicated, isnormal; 500 struct radix_node *saved_x; 501 unsigned char addmask_key[RADIX_MAX_KEY_LEN]; 502 503 if ((mlen = LEN(netmask)) > RADIX_MAX_KEY_LEN) 504 mlen = RADIX_MAX_KEY_LEN; 505 if (skip == 0) 506 skip = 1; 507 if (mlen <= skip) 508 return (maskhead->rnh_nodes); 509 510 bzero(addmask_key, RADIX_MAX_KEY_LEN); 511 if (skip > 1) 512 bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 513 bcopy(netmask + skip, addmask_key + skip, mlen - skip); 514 /* 515 * Trim trailing zeroes. 516 */ 517 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 518 cp--; 519 mlen = cp - addmask_key; 520 if (mlen <= skip) 521 return (maskhead->rnh_nodes); 522 *addmask_key = mlen; 523 x = rn_search(addmask_key, maskhead->rnh_treetop); 524 if (bcmp(addmask_key, x->rn_key, mlen) != 0) 525 x = 0; 526 if (x || search) 527 return (x); 528 R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x)); 529 if ((saved_x = x) == 0) 530 return (0); 531 netmask = cp = (caddr_t)(x + 2); 532 bcopy(addmask_key, cp, mlen); 533 x = rn_insert(cp, maskhead, &maskduplicated, x); 534 if (maskduplicated) { 535 log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); 536 Free(saved_x); 537 return (x); 538 } 539 /* 540 * Calculate index of mask, and check for normalcy. 541 * First find the first byte with a 0 bit, then if there are 542 * more bits left (remember we already trimmed the trailing 0's), 543 * the bits should be contiguous, otherwise we have got 544 * a non-contiguous mask. 545 */ 546 #define CONTIG(_c) (((~(_c) + 1) & (_c)) == (unsigned char)(~(_c) + 1)) 547 cplim = netmask + mlen; 548 isnormal = 1; 549 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) 550 cp++; 551 if (cp != cplim) { 552 for (j = 0x80; (j & *cp) != 0; j >>= 1) 553 b++; 554 if (!CONTIG(*cp) || cp != (cplim - 1)) 555 isnormal = 0; 556 } 557 b += (cp - netmask) << 3; 558 x->rn_bit = -1 - b; 559 if (isnormal) 560 x->rn_flags |= RNF_NORMAL; 561 return (x); 562 } 563 564 static int /* XXX: arbitrary ordering for non-contiguous masks */ 565 rn_lexobetter(void *m_arg, void *n_arg) 566 { 567 u_char *mp = m_arg, *np = n_arg, *lim; 568 569 if (LEN(mp) > LEN(np)) 570 return (1); /* not really, but need to check longer one first */ 571 if (LEN(mp) == LEN(np)) 572 for (lim = mp + LEN(mp); mp < lim;) 573 if (*mp++ > *np++) 574 return (1); 575 return (0); 576 } 577 578 static struct radix_mask * 579 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next) 580 { 581 struct radix_mask *m; 582 583 R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); 584 if (m == NULL) { 585 log(LOG_ERR, "Failed to allocate route mask\n"); 586 return (0); 587 } 588 bzero(m, sizeof(*m)); 589 m->rm_bit = tt->rn_bit; 590 m->rm_flags = tt->rn_flags; 591 if (tt->rn_flags & RNF_NORMAL) 592 m->rm_leaf = tt; 593 else 594 m->rm_mask = tt->rn_mask; 595 m->rm_mklist = next; 596 tt->rn_mklist = m; 597 return (m); 598 } 599 600 struct radix_node * 601 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 602 struct radix_node treenodes[2]) 603 { 604 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; 605 struct radix_node *t, *x = 0, *tt; 606 struct radix_node *saved_tt, *top = head->rnh_treetop; 607 short b = 0, b_leaf = 0; 608 int keyduplicated; 609 caddr_t mmask; 610 struct radix_mask *m, **mp; 611 612 /* 613 * In dealing with non-contiguous masks, there may be 614 * many different routes which have the same mask. 615 * We will find it useful to have a unique pointer to 616 * the mask to speed avoiding duplicate references at 617 * nodes and possibly save time in calculating indices. 618 */ 619 if (netmask) { 620 x = rn_addmask(netmask, head->rnh_masks, 0, top->rn_offset); 621 if (x == NULL) 622 return (0); 623 b_leaf = x->rn_bit; 624 b = -1 - x->rn_bit; 625 netmask = x->rn_key; 626 } 627 /* 628 * Deal with duplicated keys: attach node to previous instance 629 */ 630 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); 631 if (keyduplicated) { 632 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 633 #ifdef RADIX_MPATH 634 /* permit multipath, if enabled for the family */ 635 if (rn_mpath_capable(head) && netmask == tt->rn_mask) { 636 /* 637 * go down to the end of multipaths, so that 638 * new entry goes into the end of rn_dupedkey 639 * chain. 640 */ 641 do { 642 t = tt; 643 tt = tt->rn_dupedkey; 644 } while (tt && t->rn_mask == tt->rn_mask); 645 break; 646 } 647 #endif 648 if (tt->rn_mask == netmask) 649 return (0); 650 if (netmask == 0 || 651 (tt->rn_mask && 652 ((b_leaf < tt->rn_bit) /* index(netmask) > node */ 653 || rn_refines(netmask, tt->rn_mask) 654 || rn_lexobetter(netmask, tt->rn_mask)))) 655 break; 656 } 657 /* 658 * If the mask is not duplicated, we wouldn't 659 * find it among possible duplicate key entries 660 * anyway, so the above test doesn't hurt. 661 * 662 * We sort the masks for a duplicated key the same way as 663 * in a masklist -- most specific to least specific. 664 * This may require the unfortunate nuisance of relocating 665 * the head of the list. 666 * 667 * We also reverse, or doubly link the list through the 668 * parent pointer. 669 */ 670 if (tt == saved_tt) { 671 struct radix_node *xx = x; 672 /* link in at head of list */ 673 (tt = treenodes)->rn_dupedkey = t; 674 tt->rn_flags = t->rn_flags; 675 tt->rn_parent = x = t->rn_parent; 676 t->rn_parent = tt; /* parent */ 677 if (x->rn_left == t) 678 x->rn_left = tt; 679 else 680 x->rn_right = tt; 681 saved_tt = tt; x = xx; 682 } else { 683 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 684 t->rn_dupedkey = tt; 685 tt->rn_parent = t; /* parent */ 686 if (tt->rn_dupedkey) /* parent */ 687 tt->rn_dupedkey->rn_parent = tt; /* parent */ 688 } 689 #ifdef RN_DEBUG 690 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 691 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; 692 #endif 693 tt->rn_key = (caddr_t) v; 694 tt->rn_bit = -1; 695 tt->rn_flags = RNF_ACTIVE; 696 } 697 /* 698 * Put mask in tree. 699 */ 700 if (netmask) { 701 tt->rn_mask = netmask; 702 tt->rn_bit = x->rn_bit; 703 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 704 } 705 t = saved_tt->rn_parent; 706 if (keyduplicated) 707 goto on2; 708 b_leaf = -1 - t->rn_bit; 709 if (t->rn_right == saved_tt) 710 x = t->rn_left; 711 else 712 x = t->rn_right; 713 /* Promote general routes from below */ 714 if (x->rn_bit < 0) { 715 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) 716 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { 717 *mp = m = rn_new_radix_mask(x, 0); 718 if (m) 719 mp = &m->rm_mklist; 720 } 721 } else if (x->rn_mklist) { 722 /* 723 * Skip over masks whose index is > that of new node 724 */ 725 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 726 if (m->rm_bit >= b_leaf) 727 break; 728 t->rn_mklist = m; *mp = 0; 729 } 730 on2: 731 /* Add new route to highest possible ancestor's list */ 732 if ((netmask == 0) || (b > t->rn_bit )) 733 return (tt); /* can't lift at all */ 734 b_leaf = tt->rn_bit; 735 do { 736 x = t; 737 t = t->rn_parent; 738 } while (b <= t->rn_bit && x != top); 739 /* 740 * Search through routes associated with node to 741 * insert new route according to index. 742 * Need same criteria as when sorting dupedkeys to avoid 743 * double loop on deletion. 744 */ 745 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { 746 if (m->rm_bit < b_leaf) 747 continue; 748 if (m->rm_bit > b_leaf) 749 break; 750 if (m->rm_flags & RNF_NORMAL) { 751 mmask = m->rm_leaf->rn_mask; 752 if (tt->rn_flags & RNF_NORMAL) { 753 #if !defined(RADIX_MPATH) 754 log(LOG_ERR, 755 "Non-unique normal route, mask not entered\n"); 756 #endif 757 return (tt); 758 } 759 } else 760 mmask = m->rm_mask; 761 if (mmask == netmask) { 762 m->rm_refs++; 763 tt->rn_mklist = m; 764 return (tt); 765 } 766 if (rn_refines(netmask, mmask) 767 || rn_lexobetter(netmask, mmask)) 768 break; 769 } 770 *mp = rn_new_radix_mask(tt, *mp); 771 return (tt); 772 } 773 774 struct radix_node * 775 rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head) 776 { 777 struct radix_node *t, *p, *x, *tt; 778 struct radix_mask *m, *saved_m, **mp; 779 struct radix_node *dupedkey, *saved_tt, *top; 780 caddr_t v, netmask; 781 int b, head_off, vlen; 782 783 v = v_arg; 784 netmask = netmask_arg; 785 x = head->rnh_treetop; 786 tt = rn_search(v, x); 787 head_off = x->rn_offset; 788 vlen = LEN(v); 789 saved_tt = tt; 790 top = x; 791 if (tt == 0 || 792 bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) 793 return (0); 794 /* 795 * Delete our route from mask lists. 796 */ 797 if (netmask) { 798 x = rn_addmask(netmask, head->rnh_masks, 1, head_off); 799 if (x == NULL) 800 return (0); 801 netmask = x->rn_key; 802 while (tt->rn_mask != netmask) 803 if ((tt = tt->rn_dupedkey) == 0) 804 return (0); 805 } 806 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) 807 goto on1; 808 if (tt->rn_flags & RNF_NORMAL) { 809 if (m->rm_leaf != tt || m->rm_refs > 0) { 810 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 811 return (0); /* dangling ref could cause disaster */ 812 } 813 } else { 814 if (m->rm_mask != tt->rn_mask) { 815 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 816 goto on1; 817 } 818 if (--m->rm_refs >= 0) 819 goto on1; 820 } 821 b = -1 - tt->rn_bit; 822 t = saved_tt->rn_parent; 823 if (b > t->rn_bit) 824 goto on1; /* Wasn't lifted at all */ 825 do { 826 x = t; 827 t = t->rn_parent; 828 } while (b <= t->rn_bit && x != top); 829 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 830 if (m == saved_m) { 831 *mp = m->rm_mklist; 832 Free(m); 833 break; 834 } 835 if (m == 0) { 836 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 837 if (tt->rn_flags & RNF_NORMAL) 838 return (0); /* Dangling ref to us */ 839 } 840 on1: 841 /* 842 * Eliminate us from tree 843 */ 844 if (tt->rn_flags & RNF_ROOT) 845 return (0); 846 #ifdef RN_DEBUG 847 /* Get us out of the creation list */ 848 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {} 849 if (t) t->rn_ybro = tt->rn_ybro; 850 #endif 851 t = tt->rn_parent; 852 dupedkey = saved_tt->rn_dupedkey; 853 if (dupedkey) { 854 /* 855 * Here, tt is the deletion target and 856 * saved_tt is the head of the dupekey chain. 857 */ 858 if (tt == saved_tt) { 859 /* remove from head of chain */ 860 x = dupedkey; x->rn_parent = t; 861 if (t->rn_left == tt) 862 t->rn_left = x; 863 else 864 t->rn_right = x; 865 } else { 866 /* find node in front of tt on the chain */ 867 for (x = p = saved_tt; p && p->rn_dupedkey != tt;) 868 p = p->rn_dupedkey; 869 if (p) { 870 p->rn_dupedkey = tt->rn_dupedkey; 871 if (tt->rn_dupedkey) /* parent */ 872 tt->rn_dupedkey->rn_parent = p; 873 /* parent */ 874 } else log(LOG_ERR, "rn_delete: couldn't find us\n"); 875 } 876 t = tt + 1; 877 if (t->rn_flags & RNF_ACTIVE) { 878 #ifndef RN_DEBUG 879 *++x = *t; 880 p = t->rn_parent; 881 #else 882 b = t->rn_info; 883 *++x = *t; 884 t->rn_info = b; 885 p = t->rn_parent; 886 #endif 887 if (p->rn_left == t) 888 p->rn_left = x; 889 else 890 p->rn_right = x; 891 x->rn_left->rn_parent = x; 892 x->rn_right->rn_parent = x; 893 } 894 goto out; 895 } 896 if (t->rn_left == tt) 897 x = t->rn_right; 898 else 899 x = t->rn_left; 900 p = t->rn_parent; 901 if (p->rn_right == t) 902 p->rn_right = x; 903 else 904 p->rn_left = x; 905 x->rn_parent = p; 906 /* 907 * Demote routes attached to us. 908 */ 909 if (t->rn_mklist) { 910 if (x->rn_bit >= 0) { 911 for (mp = &x->rn_mklist; (m = *mp);) 912 mp = &m->rm_mklist; 913 *mp = t->rn_mklist; 914 } else { 915 /* If there are any key,mask pairs in a sibling 916 duped-key chain, some subset will appear sorted 917 in the same order attached to our mklist */ 918 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 919 if (m == x->rn_mklist) { 920 struct radix_mask *mm = m->rm_mklist; 921 x->rn_mklist = 0; 922 if (--(m->rm_refs) < 0) 923 Free(m); 924 m = mm; 925 } 926 if (m) 927 log(LOG_ERR, 928 "rn_delete: Orphaned Mask %p at %p\n", 929 m, x); 930 } 931 } 932 /* 933 * We may be holding an active internal node in the tree. 934 */ 935 x = tt + 1; 936 if (t != x) { 937 #ifndef RN_DEBUG 938 *t = *x; 939 #else 940 b = t->rn_info; 941 *t = *x; 942 t->rn_info = b; 943 #endif 944 t->rn_left->rn_parent = t; 945 t->rn_right->rn_parent = t; 946 p = x->rn_parent; 947 if (p->rn_left == x) 948 p->rn_left = t; 949 else 950 p->rn_right = t; 951 } 952 out: 953 tt->rn_flags &= ~RNF_ACTIVE; 954 tt[1].rn_flags &= ~RNF_ACTIVE; 955 return (tt); 956 } 957 958 /* 959 * This is the same as rn_walktree() except for the parameters and the 960 * exit. 961 */ 962 static int 963 rn_walktree_from(struct radix_node_head *h, void *a, void *m, 964 walktree_f_t *f, void *w) 965 { 966 int error; 967 struct radix_node *base, *next; 968 u_char *xa = (u_char *)a; 969 u_char *xm = (u_char *)m; 970 struct radix_node *rn, *last = NULL; /* shut up gcc */ 971 int stopping = 0; 972 int lastb; 973 974 /* 975 * rn_search_m is sort-of-open-coded here. We cannot use the 976 * function because we need to keep track of the last node seen. 977 */ 978 /* printf("about to search\n"); */ 979 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { 980 last = rn; 981 /* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n", 982 rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */ 983 if (!(rn->rn_bmask & xm[rn->rn_offset])) { 984 break; 985 } 986 if (rn->rn_bmask & xa[rn->rn_offset]) { 987 rn = rn->rn_right; 988 } else { 989 rn = rn->rn_left; 990 } 991 } 992 /* printf("done searching\n"); */ 993 994 /* 995 * Two cases: either we stepped off the end of our mask, 996 * in which case last == rn, or we reached a leaf, in which 997 * case we want to start from the last node we looked at. 998 * Either way, last is the node we want to start from. 999 */ 1000 rn = last; 1001 lastb = rn->rn_bit; 1002 1003 /* printf("rn %p, lastb %d\n", rn, lastb);*/ 1004 1005 /* 1006 * This gets complicated because we may delete the node 1007 * while applying the function f to it, so we need to calculate 1008 * the successor node in advance. 1009 */ 1010 while (rn->rn_bit >= 0) 1011 rn = rn->rn_left; 1012 1013 while (!stopping) { 1014 /* printf("node %p (%d)\n", rn, rn->rn_bit); */ 1015 base = rn; 1016 /* If at right child go back up, otherwise, go right */ 1017 while (rn->rn_parent->rn_right == rn 1018 && !(rn->rn_flags & RNF_ROOT)) { 1019 rn = rn->rn_parent; 1020 1021 /* if went up beyond last, stop */ 1022 if (rn->rn_bit <= lastb) { 1023 stopping = 1; 1024 /* printf("up too far\n"); */ 1025 /* 1026 * XXX we should jump to the 'Process leaves' 1027 * part, because the values of 'rn' and 'next' 1028 * we compute will not be used. Not a big deal 1029 * because this loop will terminate, but it is 1030 * inefficient and hard to understand! 1031 */ 1032 } 1033 } 1034 1035 /* 1036 * At the top of the tree, no need to traverse the right 1037 * half, prevent the traversal of the entire tree in the 1038 * case of default route. 1039 */ 1040 if (rn->rn_parent->rn_flags & RNF_ROOT) 1041 stopping = 1; 1042 1043 /* Find the next *leaf* since next node might vanish, too */ 1044 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1045 rn = rn->rn_left; 1046 next = rn; 1047 /* Process leaves */ 1048 while ((rn = base) != 0) { 1049 base = rn->rn_dupedkey; 1050 /* printf("leaf %p\n", rn); */ 1051 if (!(rn->rn_flags & RNF_ROOT) 1052 && (error = (*f)(rn, w))) 1053 return (error); 1054 } 1055 rn = next; 1056 1057 if (rn->rn_flags & RNF_ROOT) { 1058 /* printf("root, stopping"); */ 1059 stopping = 1; 1060 } 1061 1062 } 1063 return (0); 1064 } 1065 1066 static int 1067 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w) 1068 { 1069 int error; 1070 struct radix_node *base, *next; 1071 struct radix_node *rn = h->rnh_treetop; 1072 /* 1073 * This gets complicated because we may delete the node 1074 * while applying the function f to it, so we need to calculate 1075 * the successor node in advance. 1076 */ 1077 1078 /* First time through node, go left */ 1079 while (rn->rn_bit >= 0) 1080 rn = rn->rn_left; 1081 for (;;) { 1082 base = rn; 1083 /* If at right child go back up, otherwise, go right */ 1084 while (rn->rn_parent->rn_right == rn 1085 && (rn->rn_flags & RNF_ROOT) == 0) 1086 rn = rn->rn_parent; 1087 /* Find the next *leaf* since next node might vanish, too */ 1088 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1089 rn = rn->rn_left; 1090 next = rn; 1091 /* Process leaves */ 1092 while ((rn = base)) { 1093 base = rn->rn_dupedkey; 1094 if (!(rn->rn_flags & RNF_ROOT) 1095 && (error = (*f)(rn, w))) 1096 return (error); 1097 } 1098 rn = next; 1099 if (rn->rn_flags & RNF_ROOT) 1100 return (0); 1101 } 1102 /* NOTREACHED */ 1103 } 1104 1105 /* 1106 * Allocate and initialize an empty tree. This has 3 nodes, which are 1107 * part of the radix_node_head (in the order <left,root,right>) and are 1108 * marked RNF_ROOT so they cannot be freed. 1109 * The leaves have all-zero and all-one keys, with significant 1110 * bits starting at 'off'. 1111 * Return 1 on success, 0 on error. 1112 */ 1113 static int 1114 rn_inithead_internal(void **head, int off) 1115 { 1116 struct radix_node_head *rnh; 1117 struct radix_node *t, *tt, *ttt; 1118 if (*head) 1119 return (1); 1120 R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh)); 1121 if (rnh == 0) 1122 return (0); 1123 #ifdef _KERNEL 1124 RADIX_NODE_HEAD_LOCK_INIT(rnh); 1125 #endif 1126 *head = rnh; 1127 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1128 ttt = rnh->rnh_nodes + 2; 1129 t->rn_right = ttt; 1130 t->rn_parent = t; 1131 tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */ 1132 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1133 tt->rn_bit = -1 - off; 1134 *ttt = *tt; 1135 ttt->rn_key = rn_ones; 1136 rnh->rnh_addaddr = rn_addroute; 1137 rnh->rnh_deladdr = rn_delete; 1138 rnh->rnh_matchaddr = rn_match; 1139 rnh->rnh_lookup = rn_lookup; 1140 rnh->rnh_walktree = rn_walktree; 1141 rnh->rnh_walktree_from = rn_walktree_from; 1142 rnh->rnh_treetop = t; 1143 return (1); 1144 } 1145 1146 static void 1147 rn_detachhead_internal(void **head) 1148 { 1149 struct radix_node_head *rnh; 1150 1151 KASSERT((head != NULL && *head != NULL), 1152 ("%s: head already freed", __func__)); 1153 rnh = *head; 1154 1155 /* Free <left,root,right> nodes. */ 1156 Free(rnh); 1157 1158 *head = NULL; 1159 } 1160 1161 int 1162 rn_inithead(void **head, int off) 1163 { 1164 struct radix_node_head *rnh; 1165 1166 if (*head != NULL) 1167 return (1); 1168 1169 if (rn_inithead_internal(head, off) == 0) 1170 return (0); 1171 1172 rnh = (struct radix_node_head *)(*head); 1173 1174 if (rn_inithead_internal((void **)&rnh->rnh_masks, 0) == 0) { 1175 rn_detachhead_internal(head); 1176 return (0); 1177 } 1178 1179 return (1); 1180 } 1181 1182 int 1183 rn_detachhead(void **head) 1184 { 1185 struct radix_node_head *rnh; 1186 1187 KASSERT((head != NULL && *head != NULL), 1188 ("%s: head already freed", __func__)); 1189 1190 rnh = *head; 1191 1192 rn_detachhead_internal((void **)&rnh->rnh_masks); 1193 rn_detachhead_internal(head); 1194 return (1); 1195 } 1196 1197