xref: /illumos-gate/usr/src/uts/common/ipp/ipgpc/trie.c (revision 90221f9148b67fdc90178b67f9600b7bd4e3bc7c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2002-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <ipp/ipgpc/trie.h>
30 #include <ipp/ipgpc/filters.h>
31 #include <ipp/ipgpc/classifier.h>
32 #include <inet/ip6.h>
33 
34 /* trie data structure used for classifying IP addresses and TCP/UDP ports */
35 
36 #define	ZERO	0
37 #define	ONE	1
38 
39 
40 /* Statics */
41 static void t_split(node_t **, uint8_t, uint8_t);
42 static boolean_t t_traverse_delete(node_t **, uint8_t, key_t, uint32_t,
43     uint32_t, trie_id_t **);
44 
45 /*
46  * create_node(flag)
47  *
48  * generates a pointer to a new trie node
49  * flag is passed to kmem_alloc
50  * returns NULL to signify memory error
51  */
52 node_t *
53 create_node(int flag)
54 {
55 	node_t *buf = kmem_cache_alloc(trie_node_cache, flag);
56 
57 	if (buf == NULL) {
58 		return (NULL);
59 	}
60 	buf->elements = NULL;
61 	buf->zero = NULL;
62 	buf->one = NULL;
63 	buf->pos = 0;
64 	buf->bits = 0;
65 	buf->val = 0;
66 	buf->mask = 0;
67 	buf->isroot = 0;
68 	return (buf);
69 }
70 
71 
72 /*
73  * t_split(c_node, pos, key_len)
74  *
75  * performs a split on c_node for the following three cases:
76  * 1 a mismatch occured between the insert key and the value at the node
77  * 2 the insert key specifies a shorter key than the one at the node
78  * 3 the insert key specifies a longer key than the one at the node
79  * cases 1 and 2 are handled in the same way
80  * when t_split returns, c_node->one and c_node->zero must != NULL
81  *
82  * (note: we assume a key_len = n (where in the real world n = 16 | 32),
83  *  and a "key" in this example is actaully some value of key_len n that
84  *  has its high order bits masked.
85  *  For example: key = 1011 with key_len = 8, would actaully be the key:mask
86  *  combo 1011xxxx:11110000.  I am using short keys for ease of example)
87  * Case 1 and 2:
88  *
89  * assume 8 bit keys for all examples
90  *
91  * trie A contains keys 111011, 0, 10
92  *       *
93  *      / \
94  *         *
95  *        / \
96  *        *  * bits = 4 pos = 5 val = 1011 mask = 00111100
97  * inserting 111100 would result in the following split
98  *                       *
99  *                      / \
100  *                         *
101  *                        / \
102  *                           *  bits = 1 pos = 5 val = 1 mask = 00100000
103  *                          / \
104  *  bits = 2 pos = 3 val=11*   * (to be inserted: (bits = 2 pos = 3 val = 00
105  *  mask = 00001100                               mask = 00001100))
106  *
107  * Case 3:
108  *
109  * trie A same as above, before insert
110  * inserting key 11101111 would results in the following split
111  *       *
112  *      / \
113  *         *
114  *        / \
115  *        *  * bits = 4 pos = 5 val = 1011 mask = 00111100
116  *          / \
117  *         *   *  (to be inserted: bits = 1 pos = 0 val = 1 mask = 00000001)
118  */
119 /* ARGSUSED */
120 static void
121 t_split(node_t **c_node, uint8_t pos, uint8_t key_len)
122 {
123 	uint8_t old_bits = 0;
124 	uint8_t i;
125 	int bit;
126 	node_t *nodep = *c_node;
127 	node_t *tnodep = NULL;
128 
129 	/* check if case is that the mask is longer */
130 	if (pos == (nodep->pos - nodep->bits)) {
131 		/* pos is past the last bit covered at this node */
132 		ASSERT(nodep->one == NULL);
133 		ASSERT(nodep->zero == NULL);
134 		nodep->one = create_node(KM_SLEEP);
135 		nodep->zero = create_node(KM_SLEEP);
136 	} else {		/* pos > (nodep->pos - nodep->bits) */
137 		old_bits = nodep->bits; /* save old bits entry */
138 		/* nodep->pos will remain the same */
139 		nodep->bits = nodep->pos - pos;
140 		/* find the mismatch bit */
141 		bit = EXTRACTBIT(nodep->val, pos, key_len);
142 		if (bit == ZERO) {
143 			if ((nodep->one == NULL) && (nodep->zero == NULL)) {
144 				nodep->one = create_node(KM_SLEEP);
145 				nodep->zero = create_node(KM_SLEEP);
146 			} else {
147 				tnodep = create_node(KM_SLEEP);
148 				tnodep->one = nodep->one;
149 				tnodep->zero = nodep->zero;
150 				nodep->zero = tnodep;
151 				nodep->one = create_node(KM_SLEEP);
152 			}
153 			/* pos is before the last bit covered at this node */
154 			nodep->zero->pos = pos - 1; /* link is one bit */
155 			/* bits gets remaining bits minus the link */
156 			nodep->zero->bits = (old_bits - nodep->bits) - 1;
157 			/* set bits that are covered by this node */
158 			for (i = 0; i < nodep->zero->bits; ++i) {
159 				SETBIT(nodep->zero->val,
160 				    (nodep->zero->pos - i),
161 				    EXTRACTBIT(nodep->val,
162 					(nodep->zero->pos - i), key_len),
163 				    key_len);
164 				SETBIT(nodep->zero->mask,
165 				    (nodep->zero->pos - i), 1, key_len);
166 			}
167 			nodep->zero->elements = nodep->elements;
168 			nodep->elements = NULL;
169 		} else {	/* bit == ONE */
170 			if ((nodep->one == NULL) && (nodep->zero == NULL)) {
171 				nodep->one = create_node(KM_SLEEP);
172 				nodep->zero = create_node(KM_SLEEP);
173 			} else {
174 				tnodep = create_node(KM_SLEEP);
175 				tnodep->one = nodep->one;
176 				tnodep->zero = nodep->zero;
177 				nodep->one = tnodep;
178 				nodep->zero = create_node(KM_SLEEP);
179 			}
180 			/* pos is before the last bit covered at this node */
181 			nodep->one->pos = pos - 1; /* link is one bit */
182 			/* bits gets remaining bits minus the link */
183 			nodep->one->bits = (old_bits - nodep->bits) - 1;
184 			/* set bits that are covered by this node */
185 			for (i = 0; i < nodep->one->bits; ++i) {
186 				SETBIT(nodep->one->val, (nodep->one->pos - i),
187 				    EXTRACTBIT(nodep->val,
188 					(nodep->one->pos - i), key_len),
189 				    key_len);
190 				SETBIT(nodep->one->mask,
191 				    (nodep->one->pos - i), 1, key_len);
192 			}
193 			nodep->one->elements = nodep->elements;
194 			nodep->elements = NULL;
195 		}
196 
197 		/* clear bits no longer covered by this node, from pos=>0 */
198 		for (i = 0; i <= pos; ++i) {
199 			UNSETBIT(nodep->val, i, key_len);
200 			UNSETBIT(nodep->mask, i, key_len);
201 		}
202 	}
203 }
204 
205 /*
206  * t_insert(tid, id, key, mask)
207  *
208  * inserts a new value, id, into the trie, tid->trie with the input key
209  * - if node exists, id is appended to element list at the node, if id does
210  *   not already exist.
211  * - if node does not exist, a new node is created and id is the head of a new
212  *   element list
213  * return DONTCARE_VALUE if mask == 0, otherwise NORMAL_VALUE
214  */
215 int
216 t_insert(trie_id_t *tid, key_t id, uint32_t key, uint32_t mask)
217 {
218 	node_t *c_node;
219 	int bit;
220 	uint8_t pos;
221 	uint8_t key_len = (uint8_t)tid->key_len;
222 
223 	/* don't insert if don't care */
224 	if (mask == 0) {
225 		++tid->stats.num_dontcare;
226 		return (DONTCARE_VALUE);
227 	}
228 
229 	rw_enter(&tid->rw_lock, RW_WRITER);
230 	c_node = tid->trie;	/* point at trie root */
231 	key &= mask;		/* apply mask */
232 	/* traverse trie to the correct position */
233 	for (pos = key_len; pos > 0; --pos) {
234 		/* check if bit is significant */
235 		/* bit in key is significant if it is covered by the mask */
236 		if (EXTRACTBIT(mask, (pos - 1), key_len) != 1) {
237 			/* check if this is a path compressed internal node */
238 			if (c_node->bits > 0) {
239 				/* check if split is needed */
240 				if ((pos - 1) > (c_node->pos - c_node->bits)) {
241 					t_split(&c_node, (pos - 1), key_len);
242 					ASSERT(c_node->one != NULL);
243 					ASSERT(c_node->zero != NULL);
244 				}
245 			}
246 			break;
247 		}
248 		/* extra bit at current position */
249 		bit = EXTRACTBIT(key, (pos - 1), key_len);
250 		/* check if this is a path compressed internal node */
251 		if (c_node->bits > 0) { /* path compressed node */
252 			/* check if split is needed */
253 			if ((pos - 1) > (c_node->pos - c_node->bits)) {
254 				/* testing for mismatch */
255 				if (bit != EXTRACTBIT(c_node->val, (pos - 1),
256 				    key_len)) {
257 					t_split(&c_node, (pos - 1), key_len);
258 					ASSERT(c_node->one != NULL);
259 					ASSERT(c_node->zero != NULL);
260 				} else {
261 					continue; /* bits match, so go on */
262 				}
263 			} else if ((pos - 1) == (c_node->pos - c_node->bits)) {
264 				/* check if at a leaf node with elements */
265 				if ((c_node->one == NULL) &&
266 				    (c_node->zero == NULL) &&
267 				    (c_node->elements != NULL)) {
268 					/*
269 					 * this case occurs when mask for key
270 					 * is longer than mask for key at
271 					 * current node
272 					 */
273 					t_split(&c_node, (pos - 1), key_len);
274 					ASSERT(c_node->one != NULL);
275 					ASSERT(c_node->zero != NULL);
276 				}
277 			} /* else continue onto child */
278 		}
279 		if (bit == ZERO) {
280 			if (c_node->zero == NULL) { /* leaf node */
281 				if (c_node->bits == 0) {
282 					c_node->pos = (pos - 1);
283 				}
284 				c_node->bits++;
285 				/* bit at pos for node value should be 0 */
286 				UNSETBIT(c_node->val, (pos - 1), key_len);
287 				SETBIT(c_node->mask, (pos - 1), 1, key_len);
288 			} else {
289 				/* assert that trie is path compressed */
290 				ASSERT(c_node->one != NULL);
291 				c_node = c_node->zero; /* internal node */
292 			}
293 		} else {	/* ONE bit */
294 			if (c_node->one == NULL) { /* leaf node */
295 				if (c_node->bits == 0) {
296 					c_node->pos = (pos - 1);
297 				}
298 				c_node->bits++;
299 				/* bit at pos for node value should be 1 */
300 				SETBIT(c_node->val, (pos - 1), 1, key_len);
301 				SETBIT(c_node->mask, (pos - 1), 1, key_len);
302 			} else {
303 				/* assert that trie is path compressed */
304 				ASSERT(c_node->zero != NULL);
305 				c_node = c_node->one; /* internal node */
306 			}
307 		}
308 	}
309 	/* insert at node */
310 	(void) ipgpc_list_insert(&c_node->elements, id);
311 	/* update stats */
312 	++tid->stats.num_inserted;
313 	/*
314 	 * check if this is the first key to be inserted that is not a
315 	 * don't care (*)
316 	 */
317 	if (tid->info.dontcareonly == B_TRUE) {
318 		tid->info.dontcareonly = B_FALSE;
319 	}
320 	rw_exit(&tid->rw_lock);
321 	return (NORMAL_VALUE);
322 }
323 
324 /*
325  * t_insert6(tid, id, key, mask)
326  *
327  * specific to inserting keys of 128 bits in length
328  */
329 int
330 t_insert6(trie_id_t *tid, key_t id, in6_addr_t key, in6_addr_t mask)
331 {
332 	node_t *c_node;
333 	int bit, i;
334 	uint8_t pos;
335 	uint8_t type_len = IP_ABITS;
336 	in6_addr_t zero_addr = IN6ADDR_ANY_INIT;
337 
338 	/* don't insert if don't care */
339 	if (IN6_ARE_ADDR_EQUAL(&mask, &zero_addr)) {
340 		++tid->stats.num_dontcare;
341 		return (DONTCARE_VALUE);
342 	}
343 
344 	rw_enter(&tid->rw_lock, RW_WRITER);
345 	c_node = tid->trie;	/* point at root of trie */
346 	V6_MASK_COPY(key, mask, key); /* apply mask to key */
347 	/*
348 	 * A IPv6 address is structured as an array of four uint32_t
349 	 * values.  The highest order of the bits are located in array[0]
350 	 */
351 	for (i = 0; i < 4; ++i) {
352 		/* traverse trie to the correct position */
353 		for (pos = type_len; pos > 0; --pos) {
354 			/* check if bit is significant */
355 			if (EXTRACTBIT(mask.s6_addr32[i], (pos - 1), type_len)
356 			    != ONE) {
357 				break;
358 			}
359 			bit = EXTRACTBIT(key.s6_addr32[i], (pos - 1), type_len);
360 			if (bit == ZERO) {
361 				if (c_node->zero == NULL) {
362 					c_node->zero = create_node(KM_SLEEP);
363 				}
364 				c_node = c_node->zero;
365 			} else {	/* ONE bit */
366 				if (c_node->one == NULL) {
367 					c_node->one = create_node(KM_SLEEP);
368 				}
369 				c_node = c_node->one;
370 			}
371 
372 		}
373 	}
374 	/* insert at node */
375 	(void) ipgpc_list_insert(&c_node->elements, id);
376 	/* update stats */
377 	++tid->stats.num_inserted;
378 	/*
379 	 * check if this is the first key to be inserted that is not a
380 	 * don't care (*)
381 	 */
382 	if (tid->info.dontcareonly == B_TRUE) {
383 		tid->info.dontcareonly = B_FALSE;
384 	}
385 	rw_exit(&tid->rw_lock);
386 	return (NORMAL_VALUE);
387 }
388 
389 /*
390  * t_traverse_delete(in_node, pos, id, key, mask, tid)
391  *
392  * used to traverse to the node containing id, as found under key
393  * once id is found, it is removed from the trie.
394  * Upon removing the id from a given node in the trie, path compression
395  * will be applied to nodes that are no longer compressed.
396  * If the id is successfully removed, tid->stats are updated
397  */
398 static boolean_t
399 t_traverse_delete(node_t **in_node, uint8_t pos, key_t id, uint32_t key,
400     uint32_t mask, trie_id_t **tid)
401 {
402 	node_t *c_node = *in_node;
403 	node_t *t_node;
404 	int bit;
405 
406 	if (c_node == NULL) {
407 		return (B_FALSE); /* base failure case */
408 	}
409 
410 	/* we've found the node the id is probably at */
411 	if ((pos == 0) ||
412 	    (EXTRACTBIT(mask, (pos - 1), (uint8_t)(*tid)->key_len) != 1)) {
413 		if (ipgpc_list_remove(&c_node->elements, id) == B_FALSE) {
414 			ipgpc0dbg(("t_traverse_delete: id %d does not " \
415 			    "exist in trie\n", id));
416 			return (B_FALSE); /* key does not exist at node */
417 		} else {
418 			/* update stats */
419 			--(*tid)->stats.num_inserted;
420 			/* check if 0 values are inserted in this trie */
421 			if ((*tid)->stats.num_inserted == 0) {
422 				/* update dontcareonly boolean */
423 				(*tid)->info.dontcareonly = B_TRUE;
424 			}
425 		}
426 		/* check if node has zero elements, is a LEAF node */
427 		if ((c_node->elements == NULL) &&
428 		    ((c_node->one == NULL) && (c_node->zero == NULL))) {
429 			/* make sure we don't delete the root */
430 			if (c_node->isroot != 1) {
431 				kmem_cache_free(trie_node_cache, c_node);
432 				return (B_TRUE);
433 			} else {
434 				/* this is the root, just zero out the info */
435 				c_node->pos = 0;
436 				c_node->bits = 0;
437 				c_node->val = 0;
438 				c_node->mask = 0;
439 			}
440 		}
441 		return (B_FALSE);
442 	}
443 
444 	/* check to see if node describes bits to skip */
445 	if (c_node->bits > 0) {
446 		if ((key & c_node->mask) != c_node->val) {
447 			ipgpc0dbg(("t_traverse_delete: id %d does not " \
448 			    "exist in trie\n", id));
449 			return (B_FALSE); /* key does not exist at node */
450 		}
451 		pos = (c_node->pos - c_node->bits) + 1;
452 		/* search should continue if mask and pos are valid */
453 		if ((pos == 0) ||
454 		    (EXTRACTBIT(mask, (pos - 1), (uint8_t)(*tid)->key_len)
455 			!= 1)) {
456 			/* this node probably contains the id */
457 			if (ipgpc_list_remove(&c_node->elements,
458 			    id) == B_FALSE) {
459 				ipgpc0dbg(("t_traverse_delete: id %d does" \
460 				    "not exist in trie\n", id));
461 				return (B_FALSE);
462 			} else {
463 				/* update stats */
464 				--(*tid)->stats.num_inserted;
465 				/* check if 0 values are inserted */
466 				if ((*tid)->stats.num_inserted == 0) {
467 					/* update dontcare boolean */
468 					(*tid)->info.dontcareonly = B_TRUE;
469 				}
470 			}
471 			/* check if node has zero elements & is a LEAF node */
472 			if ((c_node->elements == NULL) &&
473 			    ((c_node->one == NULL) &&
474 				(c_node->zero == NULL))) {
475 				/* make sure we don't delete the root */
476 				if (c_node->isroot != 1) {
477 					kmem_cache_free(trie_node_cache,
478 					    c_node);
479 					return (B_TRUE);
480 				} else {
481 					/* this is the root, zero out info */
482 					c_node->pos = 0;
483 					c_node->bits = 0;
484 					c_node->val = 0;
485 					c_node->mask = 0;
486 				}
487 			}
488 			return (B_FALSE);
489 		}
490 	}
491 	/* extract next bit and test */
492 	bit = EXTRACTBIT(key, (pos - 1), (uint8_t)(*tid)->key_len);
493 	if (bit == ZERO) {
494 		if (t_traverse_delete(&c_node->zero, (pos - 1), id, key, mask,
495 		    tid) == B_TRUE) {
496 			c_node->zero = NULL;
497 		}
498 	} else {	/* ONE bit */
499 		if (t_traverse_delete(&c_node->one, (pos - 1), id, key, mask,
500 		    tid) == B_TRUE) {
501 			c_node->one = NULL;
502 		}
503 	}
504 	/*
505 	 * non path-compressed nodes will contain one child and no elements
506 	 * what occurs here:
507 	 *	  *
508 	 *	 / \
509 	 *	*   *  <-- p_node->elements == NULL
510 	 *	   /
511 	 *	  *  <-- c_node->elements = foo
512 	 *	 / \
513 	 *	*   *  <-- children of c_node
514 	 * after:
515 	 *	  *
516 	 *	 / \
517 	 *	*   *   <-- p_node->elements = foo
518 	 *	   / \
519 	 *	  *   *  <-- p_node adopts children of c_node
520 	 */
521 	if ((c_node->one == NULL) && (c_node->zero != NULL)) {
522 		if (c_node->elements == NULL) {
523 			/* move child elements to parent */
524 			c_node->elements = c_node->zero->elements;
525 			/* be sure to include the link in the bits */
526 			c_node->bits += c_node->zero->bits + 1;
527 			/* c_node->pos will remain the same */
528 			c_node->mask |= c_node->zero->mask;
529 			/* don't forget to mark the link */
530 			SETBIT(c_node->mask, (pos - 1), 1,
531 			    (uint8_t)(*tid)->key_len);
532 			c_node->val |= c_node->zero->val;
533 			/* don't forget to mark the link  */
534 			UNSETBIT(c_node->val, (pos - 1),
535 			    (uint8_t)(*tid)->key_len);
536 			/* adopt children */
537 			t_node = c_node->zero;
538 			c_node->one = c_node->zero->one;
539 			c_node->zero = c_node->zero->zero;
540 			kmem_cache_free(trie_node_cache, t_node);
541 		} else {
542 			ASSERT(c_node->zero->one == NULL);
543 			ASSERT(c_node->zero->zero == NULL);
544 			kmem_cache_free(trie_node_cache, c_node->zero);
545 			c_node->zero = NULL;
546 		}
547 	} else if ((c_node->one != NULL) && (c_node->zero == NULL)) {
548 		if (c_node->elements == NULL) {
549 			/* move child elements to parent */
550 			c_node->elements = c_node->one->elements;
551 			/* be sure to include the link in the bits */
552 			c_node->bits += c_node->one->bits + 1;
553 			/* c_node->pos will remain the same */
554 			c_node->mask |= c_node->one->mask;
555 			/* don't forget to mark the link */
556 			SETBIT(c_node->mask, (pos - 1), 1,
557 			    (uint8_t)(*tid)->key_len);
558 			c_node->val |= c_node->one->val;
559 			/* don't forget to mark the link  */
560 			SETBIT(c_node->val, (pos - 1), 1,
561 			    (uint8_t)(*tid)->key_len);
562 			/* adopt children */
563 			t_node = c_node->one;
564 			c_node->zero = c_node->one->zero;
565 			c_node->one = c_node->one->one;
566 			kmem_cache_free(trie_node_cache, t_node);
567 		} else {
568 			ASSERT(c_node->one->one == NULL);
569 			ASSERT(c_node->one->zero == NULL);
570 			kmem_cache_free(trie_node_cache, c_node->one);
571 			c_node->one = NULL;
572 		}
573 	}
574 	/* check if node has zero elements, is a LEAF node */
575 	if ((c_node->elements == NULL) &&
576 	    ((c_node->one == NULL) && (c_node->zero == NULL))) {
577 		/* make sure we don't delete the root */
578 		if (c_node->isroot != 1) {
579 			kmem_cache_free(trie_node_cache, c_node);
580 			return (B_TRUE);
581 		} else {
582 			/* this is the root, just zero out the info */
583 			c_node->pos = 0;
584 			c_node->bits = 0;
585 			c_node->val = 0;
586 			c_node->mask = 0;
587 		}
588 	}
589 	return (B_FALSE);
590 }
591 
592 
593 
594 /*
595  * t_remove(tid, id, key, mask)
596  *
597  * removes a value associated with an id from the trie
598  * - if the item does not exist, nothing is removed
599  * - if more than one id share the same key, only the id specified is removed
600  */
601 void
602 t_remove(trie_id_t *tid, key_t id, uint32_t key, uint32_t mask)
603 {
604 	node_t *c_node;
605 
606 	/* don't cares are not inserted */
607 	if (mask == 0) {
608 		--tid->stats.num_dontcare;
609 		return;
610 	}
611 
612 	key &= mask;		/* apply mask */
613 	/* traverse to node containing id and remove the id from the trie */
614 	rw_enter(&tid->rw_lock, RW_WRITER);
615 	c_node = tid->trie;
616 	(void) t_traverse_delete(&c_node, (uint8_t)tid->key_len, id, key, mask,
617 	    &tid);
618 	rw_exit(&tid->rw_lock);
619 }
620 
621 /*
622  * t_remove6(tid, id, key, mask)
623  *
624  * specific to removing key of 128 bits in length
625  */
626 void
627 t_remove6(trie_id_t *tid, key_t id, in6_addr_t key, in6_addr_t mask)
628 {
629 	node_t *c_node;
630 	int bit, i;
631 	uint8_t pos;
632 	uint8_t type_len = IP_ABITS;
633 	in6_addr_t zero_addr = IN6ADDR_ANY_INIT;
634 
635 	/* don't cares are not inserted */
636 	if (IN6_ARE_ADDR_EQUAL(&mask, &zero_addr)) {
637 		--tid->stats.num_dontcare;
638 		return;
639 	}
640 
641 	rw_enter(&tid->rw_lock, RW_WRITER);
642 	c_node = tid->trie;	/* point at root of trie */
643 	V6_MASK_COPY(key, mask, key);
644 	/*
645 	 * A IPv6 address is structured as an array of four uint32_t
646 	 * values.  The higest order of the bits are located in array[0]
647 	 */
648 	for (i = 0; i < 4; ++i) {
649 		/* traverse trie to the correct position */
650 		for (pos = type_len; pos > 0; --pos) {
651 			/* check if bit is significant */
652 			if (EXTRACTBIT(mask.s6_addr32[i], (pos - 1), type_len)
653 			    != ONE) {
654 				break;
655 			}
656 			bit = EXTRACTBIT(key.s6_addr32[i], (pos - 1), type_len);
657 			if (bit == ZERO) {
658 				if (c_node->zero == NULL) {
659 					break;
660 				}
661 				c_node = c_node->zero;
662 			} else {	/* ONE bit */
663 				if (c_node->one == NULL) {
664 					break;
665 				}
666 				c_node = c_node->one;
667 			}
668 
669 		}
670 	}
671 	if (c_node != NULL) {
672 		if (ipgpc_list_remove(&c_node->elements, id)) {
673 			/* update stats */
674 			--tid->stats.num_inserted;
675 			/*
676 			 * check to see if only dontcare's are inserted
677 			 */
678 			if (tid->stats.num_inserted <= 0) {
679 				tid->info.dontcareonly = B_TRUE;
680 			}
681 		}
682 	}
683 	rw_exit(&tid->rw_lock);
684 }
685 
686 
687 /*
688  * t_retrieve(tid, key, fid_table)
689  *
690  * returns the number of found filters that match the input key
691  * - each value that matches either a partial or exact match on the key
692  *   is inserted into the fid_table
693  * - some nodes may contain multiple id's, all items will be inserted
694  *   into the fid_table
695  * - the find stops when an edge node is reached, the left and right nodes
696  *   for the current node are null
697  * - 0 is returned if no matches are found, otherwise the number of matches
698  *   is returned
699  * - (-1) is returned if a memory error occurred
700  */
701 int
702 t_retrieve(trie_id_t *tid, uint32_t key, ht_match_t *fid_table)
703 {
704 	int bit;
705 	uint8_t pos;
706 	int num_found = 0;
707 	int ret;
708 	node_t *c_node;
709 
710 	rw_enter(&tid->rw_lock, RW_READER);
711 	c_node = tid->trie;	/* point at root of trie */
712 
713 	/* ensure trie structure is allocated */
714 	if (c_node == NULL) {
715 		rw_exit(&tid->rw_lock);
716 		return (num_found);
717 	}
718 	/*
719 	 * foreach node encountered in the search, collect elements and append
720 	 * to a list to be returned
721 	 */
722 	for (pos = (uint8_t)tid->key_len; pos > 0; --pos) {
723 		/* check node for bits to check */
724 		if (c_node->bits > 0) {
725 			if ((key & c_node->mask) != c_node->val) {
726 				rw_exit(&tid->rw_lock);
727 				return (num_found); /* search is done */
728 			}
729 			/* pos is set to next bit not covered by node */
730 			if ((pos = (c_node->pos - c_node->bits) + 1) == 0) {
731 				/* if node covers rest of bits in key */
732 				break;
733 			}
734 		}
735 		/* check node for elements */
736 		if (c_node->elements != NULL) {
737 			if ((ret = ipgpc_mark_found(tid->info.mask,
738 			    c_node->elements, fid_table)) == -1) {
739 				/* signifies a memory error */
740 				rw_exit(&tid->rw_lock);
741 				return (-1);
742 			}
743 			num_found += ret; /* increment num_found */
744 		}
745 
746 		bit = EXTRACTBIT(key, (pos - 1), (uint8_t)tid->key_len);
747 		if (bit == ZERO) { /* choose leaf */
748 			c_node = c_node->zero;
749 
750 		} else {	/* bit == ONE */
751 			c_node = c_node->one;
752 
753 		}
754 		if (c_node == NULL) {
755 			/* search is finished, edge node reached */
756 			rw_exit(&tid->rw_lock);
757 			return (num_found);
758 		}
759 	}
760 	/* see if current node contains elements */
761 	if (c_node->elements != NULL) {
762 		if ((ret = ipgpc_mark_found(tid->info.mask, c_node->elements,
763 		    fid_table)) == -1) {
764 			rw_exit(&tid->rw_lock);
765 			return (-1); /* signifies a memory error */
766 		}
767 		num_found += ret; /* increment num_found */
768 	}
769 	rw_exit(&tid->rw_lock);
770 	return (num_found);
771 }
772 
773 /*
774  * t_retrieve6(tid, key, fid_table)
775  *
776  * specific to retrieving keys of 128 bits in length
777  */
778 int
779 t_retrieve6(trie_id_t *tid, in6_addr_t key, ht_match_t *fid_table)
780 {
781 	int bit, i;
782 	uint8_t pos;
783 	int num_found = 0;
784 	int ret;
785 	node_t *c_node;
786 	uint8_t type_len = IP_ABITS;
787 
788 	rw_enter(&tid->rw_lock, RW_READER);
789 	c_node = tid->trie;
790 
791 	/* ensure trie structure is allocated */
792 	if (c_node == NULL) {
793 		rw_exit(&tid->rw_lock);
794 		return (num_found);
795 	}
796 	/*
797 	 * A IPv6 address is structured as an array of four uint32_t
798 	 * values.  The higest order of the bits are located in array[0]
799 	 */
800 	for (i = 0; i < 4; ++i) {
801 		/*
802 		 * foreach node encountered in the search, collect elements
803 		 * and append to a list to be returned
804 		 */
805 		for (pos = type_len; pos > 0; --pos) {
806 			/* extract bit at pos */
807 			bit =
808 			    EXTRACTBIT(key.s6_addr32[i], (pos - 1), type_len);
809 			if (bit == ZERO) { /* choose leaf */
810 				c_node = c_node->zero;
811 
812 			} else {
813 				c_node = c_node->one;
814 
815 			}
816 			if (c_node == NULL) {
817 				/* search is finished, edge node reached */
818 				rw_exit(&tid->rw_lock);
819 				return (num_found);
820 			}
821 			/* see if current node contains elements */
822 			if (c_node->elements != NULL) {
823 				if ((ret = ipgpc_mark_found(tid->info.mask,
824 				    c_node->elements, fid_table)) == -1) {
825 					/* signifies a memory error */
826 					rw_exit(&tid->rw_lock);
827 					return (-1);
828 				}
829 				num_found += ret; /* increment num_found */
830 			}
831 		}
832 	}
833 	rw_exit(&tid->rw_lock);
834 	return (num_found);
835 }
836