xref: /titanic_50/usr/src/cmd/cmd-inet/usr.sbin/in.routed/table.c (revision 936b7af69172dce89b577831f79c0e18d15e854b)
1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  *
5  * Copyright (c) 1983, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgment:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * $FreeBSD: src/sbin/routed/table.c,v 1.15 2000/08/11 08:24:38 sheldonh Exp $
37  */
38 
39 #pragma ident	"%Z%%M%	%I%	%E% SMI"
40 
41 #include "defs.h"
42 #include <fcntl.h>
43 #include <stropts.h>
44 #include <sys/tihdr.h>
45 #include <inet/mib2.h>
46 #include <inet/ip.h>
47 
48 /* This structure is used to store a disassembled routing socket message. */
49 struct rt_addrinfo {
50 	int	rti_addrs;
51 	struct sockaddr_storage *rti_info[RTAX_MAX];
52 };
53 
54 static struct rt_spare *rts_better(struct rt_entry *);
55 static struct rt_spare rts_empty = EMPTY_RT_SPARE;
56 static void set_need_flash(void);
57 static void rtbad(struct rt_entry *, struct interface *);
58 static int rt_xaddrs(struct rt_addrinfo *, struct sockaddr_storage *,
59     char *, int);
60 static struct interface *gwkludge_iflookup(in_addr_t, in_addr_t, in_addr_t);
61 
62 struct radix_node_head *rhead;		/* root of the radix tree */
63 
64 /* Flash update needed.  _B_TRUE to suppress the 1st. */
65 boolean_t need_flash = _B_TRUE;
66 
67 struct timeval age_timer;		/* next check of old routes */
68 struct timeval need_kern = {		/* need to update kernel table */
69 	EPOCH+MIN_WAITTIME-1, 0
70 };
71 
72 static uint32_t	total_routes;
73 
74 #define	ROUNDUP_LONG(a) \
75 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
76 
77 /*
78  * It is desirable to "aggregate" routes, to combine differing routes of
79  * the same metric and next hop into a common route with a smaller netmask
80  * or to suppress redundant routes, routes that add no information to
81  * routes with smaller netmasks.
82  *
83  * A route is redundant if and only if any and all routes with smaller
84  * but matching netmasks and nets are the same.  Since routes are
85  * kept sorted in the radix tree, redundant routes always come second.
86  *
87  * There are two kinds of aggregations.  First, two routes of the same bit
88  * mask and differing only in the least significant bit of the network
89  * number can be combined into a single route with a coarser mask.
90  *
91  * Second, a route can be suppressed in favor of another route with a more
92  * coarse mask provided no incompatible routes with intermediate masks
93  * are present.  The second kind of aggregation involves suppressing routes.
94  * A route must not be suppressed if an incompatible route exists with
95  * an intermediate mask, since the suppressed route would be covered
96  * by the intermediate.
97  *
98  * This code relies on the radix tree walk encountering routes
99  * sorted first by address, with the smallest address first.
100  */
101 
102 static struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest,
103 	*ag_finest;
104 
105 #ifdef DEBUG_AG
106 #define	CHECK_AG() do { int acnt = 0; struct ag_info *cag;	\
107 	for (cag = ag_avail; cag != NULL; cag = cag->ag_fine)	\
108 		acnt++;						\
109 	for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine)	\
110 		acnt++;						\
111 	if (acnt != NUM_AG_SLOTS)				\
112 		abort();					\
113 } while (_B_FALSE)
114 #else
115 #define	CHECK_AG()	(void)0
116 #endif
117 
118 
119 /*
120  * Output the contents of an aggregation table slot.
121  *	This function must always be immediately followed with the deletion
122  *	of the target slot.
123  */
124 static void
125 ag_out(struct ag_info *ag, void (*out)(struct ag_info *))
126 {
127 	struct ag_info *ag_cors;
128 	uint32_t bit;
129 
130 
131 	/* Forget it if this route should not be output for split-horizon. */
132 	if (ag->ag_state & AGS_SPLIT_HZ)
133 		return;
134 
135 	/*
136 	 * If we output both the even and odd twins, then the immediate parent,
137 	 * if it is present, is redundant, unless the parent manages to
138 	 * aggregate into something coarser.
139 	 * On successive calls, this code detects the even and odd twins,
140 	 * and marks the parent.
141 	 *
142 	 * Note that the order in which the radix tree code emits routes
143 	 * ensures that the twins are seen before the parent is emitted.
144 	 */
145 	ag_cors = ag->ag_cors;
146 	if (ag_cors != NULL &&
147 	    ag_cors->ag_mask == (ag->ag_mask << 1) &&
148 	    ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
149 		ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h) ?
150 		    AGS_REDUN0 : AGS_REDUN1);
151 	}
152 
153 	/*
154 	 * Skip it if this route is itself redundant.
155 	 *
156 	 * It is ok to change the contents of the slot here, since it is
157 	 * always deleted next.
158 	 */
159 	if (ag->ag_state & AGS_REDUN0) {
160 		if (ag->ag_state & AGS_REDUN1)
161 			return;		/* quit if fully redundant */
162 		/* make it finer if it is half-redundant */
163 		bit = (-ag->ag_mask) >> 1;
164 		ag->ag_dst_h |= bit;
165 		ag->ag_mask |= bit;
166 
167 	} else if (ag->ag_state & AGS_REDUN1) {
168 		/* make it finer if it is half-redundant */
169 		bit = (-ag->ag_mask) >> 1;
170 		ag->ag_mask |= bit;
171 	}
172 	out(ag);
173 }
174 
175 
176 static void
177 ag_del(struct ag_info *ag)
178 {
179 	CHECK_AG();
180 
181 	if (ag->ag_cors == NULL)
182 		ag_corsest = ag->ag_fine;
183 	else
184 		ag->ag_cors->ag_fine = ag->ag_fine;
185 
186 	if (ag->ag_fine == NULL)
187 		ag_finest = ag->ag_cors;
188 	else
189 		ag->ag_fine->ag_cors = ag->ag_cors;
190 
191 	ag->ag_fine = ag_avail;
192 	ag_avail = ag;
193 
194 	CHECK_AG();
195 }
196 
197 
198 /* Look for a route that can suppress the given route. */
199 static struct ag_info *
200 ag_find_suppressor(struct ag_info *ag)
201 {
202 	struct ag_info *ag_cors;
203 	in_addr_t dst_h = ag->ag_dst_h;
204 
205 	for (ag_cors = ag->ag_cors; ag_cors != NULL;
206 	    ag_cors = ag_cors->ag_cors) {
207 
208 		if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
209 			/*
210 			 * We found a route with a coarser mask that covers
211 			 * the given target.  It can suppress the target
212 			 * only if it has a good enough metric and it
213 			 * either has the same (gateway, ifp), or if its state
214 			 * includes AGS_CORS_GATE or the target's state
215 			 * includes AGS_FINE_GATE.
216 			 */
217 			if (ag_cors->ag_pref <= ag->ag_pref &&
218 			    (((ag->ag_nhop == ag_cors->ag_nhop) &&
219 			    (ag->ag_ifp == ag_cors->ag_ifp)) ||
220 			    ag_cors->ag_state & AGS_CORS_GATE ||
221 			    ag->ag_state & AGS_FINE_GATE)) {
222 				return (ag_cors);
223 			}
224 		}
225 	}
226 
227 	return (NULL);
228 }
229 
230 
231 /*
232  * Flush routes waiting for aggregation.
233  * This must not suppress a route unless it is known that among all routes
234  * with coarser masks that match it, the one with the longest mask is
235  * appropriate.  This is ensured by scanning the routes in lexical order,
236  * and with the most restrictive mask first among routes to the same
237  * destination.
238  */
239 void
240 ag_flush(in_addr_t lim_dst_h,	/* flush routes to here */
241     in_addr_t lim_mask,		/* matching this mask */
242     void (*out)(struct ag_info *))
243 {
244 	struct ag_info *ag, *ag_cors, *ag_supr;
245 	in_addr_t dst_h;
246 
247 
248 	for (ag = ag_finest; ag != NULL && ag->ag_mask >= lim_mask;
249 	    ag = ag_cors) {
250 		/* Get the next route now, before we delete ag. */
251 		ag_cors = ag->ag_cors;
252 
253 		/* Work on only the specified routes. */
254 		dst_h = ag->ag_dst_h;
255 		if ((dst_h & lim_mask) != lim_dst_h)
256 			continue;
257 
258 		/*
259 		 * Don't try to suppress the route if its state doesn't
260 		 * include AGS_SUPPRESS.
261 		 */
262 		if (!(ag->ag_state & AGS_SUPPRESS)) {
263 			ag_out(ag, out);
264 			ag_del(ag);
265 			continue;
266 		}
267 
268 		ag_supr = ag_find_suppressor(ag);
269 		if (ag_supr == NULL) {
270 			/*
271 			 * We didn't find a route which suppresses the
272 			 * target, so the target can go out.
273 			 */
274 			ag_out(ag, out);
275 		} else {
276 			/*
277 			 * We found a route which suppresses the target, so
278 			 * don't output the target.
279 			 */
280 			if (TRACEACTIONS) {
281 				trace_misc("aggregated away %s",
282 				    rtname(htonl(ag->ag_dst_h), ag->ag_mask,
283 				    ag->ag_nhop));
284 				trace_misc("on coarser route %s",
285 				    rtname(htonl(ag_supr->ag_dst_h),
286 				    ag_supr->ag_mask, ag_supr->ag_nhop));
287 			}
288 			/*
289 			 * If the suppressed target was redundant, then
290 			 * mark the suppressor as redundant.
291 			 */
292 			if (AG_IS_REDUN(ag->ag_state) &&
293 			    ag_supr->ag_mask == (ag->ag_mask<<1)) {
294 				if (ag_supr->ag_dst_h == dst_h)
295 					ag_supr->ag_state |= AGS_REDUN0;
296 				else
297 					ag_supr->ag_state |= AGS_REDUN1;
298 			}
299 			if (ag->ag_tag != ag_supr->ag_tag)
300 				ag_supr->ag_tag = 0;
301 			if (ag->ag_nhop != ag_supr->ag_nhop)
302 				ag_supr->ag_nhop = 0;
303 		}
304 
305 		/* The route has either been output or suppressed */
306 		ag_del(ag);
307 	}
308 
309 	CHECK_AG();
310 }
311 
312 
313 /* Try to aggregate a route with previous routes. */
314 void
315 ag_check(in_addr_t dst,
316     in_addr_t	mask,
317     in_addr_t	gate,
318     struct interface *ifp,
319     in_addr_t	nhop,
320     uint8_t	metric,
321     uint8_t	pref,
322     uint32_t	seqno,
323     uint16_t	tag,
324     uint16_t	state,
325     void (*out)(struct ag_info *))	/* output using this */
326 {
327 	struct ag_info *ag, *nag, *ag_cors;
328 	in_addr_t xaddr;
329 	int tmp;
330 	struct interface *xifp;
331 
332 	dst = ntohl(dst);
333 
334 	/*
335 	 * Don't bother trying to aggregate routes with non-contiguous
336 	 * subnet masks.
337 	 *
338 	 * (X & -X) contains a single bit if and only if X is a power of 2.
339 	 * (X + (X & -X)) == 0 if and only if X is a power of 2.
340 	 */
341 	if ((mask & -mask) + mask != 0) {
342 		struct ag_info nc_ag;
343 
344 		nc_ag.ag_dst_h = dst;
345 		nc_ag.ag_mask = mask;
346 		nc_ag.ag_gate = gate;
347 		nc_ag.ag_ifp = ifp;
348 		nc_ag.ag_nhop = nhop;
349 		nc_ag.ag_metric = metric;
350 		nc_ag.ag_pref = pref;
351 		nc_ag.ag_tag = tag;
352 		nc_ag.ag_state = state;
353 		nc_ag.ag_seqno = seqno;
354 		out(&nc_ag);
355 		return;
356 	}
357 
358 	/* Search for the right slot in the aggregation table. */
359 	ag_cors = NULL;
360 	ag = ag_corsest;
361 	while (ag != NULL) {
362 		if (ag->ag_mask >= mask)
363 			break;
364 
365 		/*
366 		 * Suppress old routes (i.e. combine with compatible routes
367 		 * with coarser masks) as we look for the right slot in the
368 		 * aggregation table for the new route.
369 		 * A route to an address less than the current destination
370 		 * will not be affected by the current route or any route
371 		 * seen hereafter.  That means it is safe to suppress it.
372 		 * This check keeps poor routes (e.g. with large hop counts)
373 		 * from preventing suppression of finer routes.
374 		 */
375 		if (ag_cors != NULL && ag->ag_dst_h < dst &&
376 		    (ag->ag_state & AGS_SUPPRESS) &&
377 		    ag_cors->ag_pref <= ag->ag_pref &&
378 		    (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h &&
379 		    ((ag_cors->ag_nhop == ag->ag_nhop &&
380 		    (ag_cors->ag_ifp == ag->ag_ifp))||
381 		    (ag->ag_state & AGS_FINE_GATE) ||
382 		    (ag_cors->ag_state & AGS_CORS_GATE))) {
383 			/*
384 			 * If the suppressed target was redundant,
385 			 * then mark the suppressor redundant.
386 			 */
387 			if (AG_IS_REDUN(ag->ag_state) &&
388 			    ag_cors->ag_mask == (ag->ag_mask << 1)) {
389 				if (ag_cors->ag_dst_h == dst)
390 					ag_cors->ag_state |= AGS_REDUN0;
391 				else
392 					ag_cors->ag_state |= AGS_REDUN1;
393 			}
394 			if (ag->ag_tag != ag_cors->ag_tag)
395 				ag_cors->ag_tag = 0;
396 			if (ag->ag_nhop != ag_cors->ag_nhop)
397 				ag_cors->ag_nhop = 0;
398 			ag_del(ag);
399 			CHECK_AG();
400 		} else {
401 			ag_cors = ag;
402 		}
403 		ag = ag_cors->ag_fine;
404 	}
405 
406 	/*
407 	 * If we find the even/odd twin of the new route, and if the
408 	 * masks and so forth are equal, we can aggregate them.
409 	 * We can probably promote one of the pair.
410 	 *
411 	 * Since the routes are encountered in lexical order,
412 	 * the new route must be odd.  However, the second or later
413 	 * times around this loop, it could be the even twin promoted
414 	 * from the even/odd pair of twins of the finer route.
415 	 */
416 	while (ag != NULL && ag->ag_mask == mask &&
417 	    ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
418 
419 		/*
420 		 * Here we know the target route and the route in the current
421 		 * slot have the same netmasks and differ by at most the
422 		 * last bit.  They are either for the same destination, or
423 		 * for an even/odd pair of destinations.
424 		 */
425 		if (ag->ag_dst_h == dst) {
426 			if (ag->ag_nhop == nhop && ag->ag_ifp == ifp) {
427 				/*
428 				 * We have two routes to the same destination,
429 				 * with the same nexthop and interface.
430 				 * Routes are encountered in lexical order,
431 				 * so a route is never promoted until the
432 				 * parent route is already present.  So we
433 				 * know that the new route is a promoted (or
434 				 * aggregated) pair and the route already in
435 				 * the slot is the explicit route.
436 				 *
437 				 * Prefer the best route if their metrics
438 				 * differ, or the aggregated one if not,
439 				 * following a sort of longest-match rule.
440 				 */
441 				if (pref <= ag->ag_pref) {
442 					ag->ag_gate = gate;
443 					ag->ag_ifp = ifp;
444 					ag->ag_nhop = nhop;
445 					ag->ag_tag = tag;
446 					ag->ag_metric = metric;
447 					ag->ag_pref = pref;
448 					if (seqno > ag->ag_seqno)
449 						ag->ag_seqno = seqno;
450 					tmp = ag->ag_state;
451 					ag->ag_state = state;
452 					state = tmp;
453 				}
454 
455 				/*
456 				 * Some bits are set if they are set on
457 				 * either route, except when the route is
458 				 * for an interface.
459 				 */
460 				if (!(ag->ag_state & AGS_IF))
461 					ag->ag_state |=
462 					    (state & (AGS_AGGREGATE_EITHER |
463 					    AGS_REDUN0 | AGS_REDUN1));
464 
465 				return;
466 			} else {
467 				/*
468 				 * multiple routes to same dest/mask with
469 				 * differing gate nexthop/or ifp. Flush
470 				 * both out.
471 				 */
472 				break;
473 			}
474 		}
475 
476 		/*
477 		 * If one of the routes can be promoted and the other can
478 		 * be suppressed, it may be possible to combine them or
479 		 * worthwhile to promote one.
480 		 *
481 		 * Any route that can be promoted is always
482 		 * marked to be eligible to be suppressed.
483 		 */
484 		if (!((state & AGS_AGGREGATE) &&
485 		    (ag->ag_state & AGS_SUPPRESS)) &&
486 		    !((ag->ag_state & AGS_AGGREGATE) && (state & AGS_SUPPRESS)))
487 			break;
488 
489 		/*
490 		 * A pair of even/odd twin routes can be combined
491 		 * if either is redundant, or if they are via the
492 		 * same gateway and have the same metric.
493 		 */
494 		if (AG_IS_REDUN(ag->ag_state) || AG_IS_REDUN(state) ||
495 		    (ag->ag_nhop == nhop && ag->ag_ifp == ifp &&
496 		    ag->ag_pref == pref &&
497 		    (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
498 
499 			/*
500 			 * We have both the even and odd pairs.
501 			 * Since the routes are encountered in order,
502 			 * the route in the slot must be the even twin.
503 			 *
504 			 * Combine and promote (aggregate) the pair of routes.
505 			 */
506 			if (seqno < ag->ag_seqno)
507 				seqno = ag->ag_seqno;
508 			if (!AG_IS_REDUN(state))
509 				state &= ~AGS_REDUN1;
510 			if (AG_IS_REDUN(ag->ag_state))
511 				state |= AGS_REDUN0;
512 			else
513 				state &= ~AGS_REDUN0;
514 			state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
515 			if (ag->ag_tag != tag)
516 				tag = 0;
517 			if (ag->ag_nhop != nhop)
518 				nhop = 0;
519 
520 			/*
521 			 * Get rid of the even twin that was already
522 			 * in the slot.
523 			 */
524 			ag_del(ag);
525 
526 		} else if (ag->ag_pref >= pref &&
527 		    (ag->ag_state & AGS_AGGREGATE)) {
528 			/*
529 			 * If we cannot combine the pair, maybe the route
530 			 * with the worse metric can be promoted.
531 			 *
532 			 * Promote the old, even twin, by giving its slot
533 			 * in the table to the new, odd twin.
534 			 */
535 			ag->ag_dst_h = dst;
536 
537 			xaddr = ag->ag_gate;
538 			ag->ag_gate = gate;
539 			gate = xaddr;
540 
541 			xifp = ag->ag_ifp;
542 			ag->ag_ifp = ifp;
543 			ifp = xifp;
544 
545 			xaddr = ag->ag_nhop;
546 			ag->ag_nhop = nhop;
547 			nhop = xaddr;
548 
549 			tmp = ag->ag_tag;
550 			ag->ag_tag = tag;
551 			tag = tmp;
552 
553 			/*
554 			 * The promoted route is even-redundant only if the
555 			 * even twin was fully redundant.  It is not
556 			 * odd-redundant because the odd-twin will still be
557 			 * in the table.
558 			 */
559 			tmp = ag->ag_state;
560 			if (!AG_IS_REDUN(tmp))
561 				tmp &= ~AGS_REDUN0;
562 			tmp &= ~AGS_REDUN1;
563 			ag->ag_state = state;
564 			state = tmp;
565 
566 			tmp = ag->ag_metric;
567 			ag->ag_metric = metric;
568 			metric = tmp;
569 
570 			tmp = ag->ag_pref;
571 			ag->ag_pref = pref;
572 			pref = tmp;
573 
574 			/* take the newest sequence number */
575 			if (seqno <= ag->ag_seqno)
576 				seqno = ag->ag_seqno;
577 			else
578 				ag->ag_seqno = seqno;
579 
580 		} else {
581 			if (!(state & AGS_AGGREGATE))
582 				break;	/* cannot promote either twin */
583 
584 			/*
585 			 * Promote the new, odd twin by shaving its
586 			 * mask and address.
587 			 * The promoted route is odd-redundant only if the
588 			 * odd twin was fully redundant.  It is not
589 			 * even-redundant because the even twin is still in
590 			 * the table.
591 			 */
592 			if (!AG_IS_REDUN(state))
593 				state &= ~AGS_REDUN1;
594 			state &= ~AGS_REDUN0;
595 			if (seqno < ag->ag_seqno)
596 				seqno = ag->ag_seqno;
597 			else
598 				ag->ag_seqno = seqno;
599 		}
600 
601 		mask <<= 1;
602 		dst &= mask;
603 
604 		if (ag_cors == NULL) {
605 			ag = ag_corsest;
606 			break;
607 		}
608 		ag = ag_cors;
609 		ag_cors = ag->ag_cors;
610 	}
611 
612 	/*
613 	 * When we can no longer promote and combine routes,
614 	 * flush the old route in the target slot.  Also flush
615 	 * any finer routes that we know will never be aggregated by
616 	 * the new route.
617 	 *
618 	 * In case we moved toward coarser masks,
619 	 * get back where we belong
620 	 */
621 	if (ag != NULL && ag->ag_mask < mask) {
622 		ag_cors = ag;
623 		ag = ag->ag_fine;
624 	}
625 
626 	/* Empty the target slot */
627 	if (ag != NULL && ag->ag_mask == mask) {
628 		ag_flush(ag->ag_dst_h, ag->ag_mask, out);
629 		ag = (ag_cors == NULL) ? ag_corsest : ag_cors->ag_fine;
630 	}
631 
632 #ifdef DEBUG_AG
633 	if (ag == NULL && ag_cors != ag_finest)
634 		abort();
635 	if (ag_cors == NULL && ag != ag_corsest)
636 		abort();
637 	if (ag != NULL && ag->ag_cors != ag_cors)
638 		abort();
639 	if (ag_cors != NULL && ag_cors->ag_fine != ag)
640 		abort();
641 	CHECK_AG();
642 #endif
643 
644 	/* Save the new route on the end of the table. */
645 	nag = ag_avail;
646 	ag_avail = nag->ag_fine;
647 
648 	nag->ag_dst_h = dst;
649 	nag->ag_mask = mask;
650 	nag->ag_ifp = ifp;
651 	nag->ag_gate = gate;
652 	nag->ag_nhop = nhop;
653 	nag->ag_metric = metric;
654 	nag->ag_pref = pref;
655 	nag->ag_tag = tag;
656 	nag->ag_state = state;
657 	nag->ag_seqno = seqno;
658 
659 	nag->ag_fine = ag;
660 	if (ag != NULL)
661 		ag->ag_cors = nag;
662 	else
663 		ag_finest = nag;
664 	nag->ag_cors = ag_cors;
665 	if (ag_cors == NULL)
666 		ag_corsest = nag;
667 	else
668 		ag_cors->ag_fine = nag;
669 	CHECK_AG();
670 }
671 
672 
673 static const char *
674 rtm_type_name(uchar_t type)
675 {
676 	static const char *rtm_types[] = {
677 		"RTM_ADD",
678 		"RTM_DELETE",
679 		"RTM_CHANGE",
680 		"RTM_GET",
681 		"RTM_LOSING",
682 		"RTM_REDIRECT",
683 		"RTM_MISS",
684 		"RTM_LOCK",
685 		"RTM_OLDADD",
686 		"RTM_OLDDEL",
687 		"RTM_RESOLVE",
688 		"RTM_NEWADDR",
689 		"RTM_DELADDR",
690 		"RTM_IFINFO",
691 		"RTM_NEWMADDR",
692 		"RTM_DELMADDR"
693 	};
694 #define	NEW_RTM_PAT	"RTM type %#x"
695 	static char name0[sizeof (NEW_RTM_PAT) + 2];
696 
697 	if (type > sizeof (rtm_types) / sizeof (rtm_types[0]) || type == 0) {
698 		(void) snprintf(name0, sizeof (name0), NEW_RTM_PAT, type);
699 		return (name0);
700 	} else {
701 		return (rtm_types[type-1]);
702 	}
703 #undef	NEW_RTM_PAT
704 }
705 
706 
707 static void
708 dump_rt_msg(const char *act, struct rt_msghdr *rtm, int mlen)
709 {
710 	const char *mtype;
711 	uchar_t *cp;
712 	int i, j;
713 	char buffer[16*3 + 1], *ibs;
714 	struct ifa_msghdr *ifam;
715 	struct if_msghdr *ifm;
716 
717 	switch (rtm->rtm_type) {
718 	case RTM_NEWADDR:
719 	case RTM_DELADDR:
720 		mtype = "ifam";
721 		break;
722 	case RTM_IFINFO:
723 		mtype = "ifm";
724 		break;
725 	default:
726 		mtype = "rtm";
727 		break;
728 	}
729 	trace_misc("%s %s %d bytes", act, mtype, mlen);
730 	if (mlen > rtm->rtm_msglen) {
731 		trace_misc("%s: extra %d bytes ignored", mtype,
732 		    mlen - rtm->rtm_msglen);
733 		mlen = rtm->rtm_msglen;
734 	} else if (mlen < rtm->rtm_msglen) {
735 		trace_misc("%s: truncated by %d bytes", mtype,
736 		    rtm->rtm_msglen - mlen);
737 	}
738 	switch (rtm->rtm_type) {
739 	case RTM_NEWADDR:
740 	case RTM_DELADDR:
741 		ifam = (struct ifa_msghdr *)rtm;
742 		trace_misc("ifam: msglen %d version %d type %d addrs %X",
743 		    ifam->ifam_msglen, ifam->ifam_version, ifam->ifam_type,
744 		    ifam->ifam_addrs);
745 		trace_misc("ifam: flags %X index %d metric %d",
746 		    ifam->ifam_flags, ifam->ifam_index, ifam->ifam_metric);
747 		cp = (uchar_t *)(ifam + 1);
748 		break;
749 	case RTM_IFINFO:
750 		ifm = (struct if_msghdr *)rtm;
751 		trace_misc("ifm: msglen %d version %d type %d addrs %X",
752 		    ifm->ifm_msglen, ifm->ifm_version, ifm->ifm_type,
753 		    ifm->ifm_addrs);
754 		ibs = if_bit_string(ifm->ifm_flags, _B_TRUE);
755 		if (ibs == NULL) {
756 			trace_misc("ifm: flags %#x index %d", ifm->ifm_flags,
757 			    ifm->ifm_index);
758 		} else {
759 			trace_misc("ifm: flags %s index %d", ibs,
760 			    ifm->ifm_index);
761 			free(ibs);
762 		}
763 		cp = (uchar_t *)(ifm + 1);
764 		break;
765 	default:
766 		trace_misc("rtm: msglen %d version %d type %d index %d",
767 		    rtm->rtm_msglen, rtm->rtm_version, rtm->rtm_type,
768 		    rtm->rtm_index);
769 		trace_misc("rtm: flags %X addrs %X pid %d seq %d",
770 		    rtm->rtm_flags, rtm->rtm_addrs, rtm->rtm_pid, rtm->rtm_seq);
771 		trace_misc("rtm: errno %d use %d inits %X", rtm->rtm_errno,
772 		    rtm->rtm_use, rtm->rtm_inits);
773 		cp = (uchar_t *)(rtm + 1);
774 		break;
775 	}
776 	i = mlen - (cp - (uint8_t *)rtm);
777 	while (i > 0) {
778 		buffer[0] = '\0';
779 		ibs = buffer;
780 		for (j = 0; j < 16 && i > 0; j++, i--)
781 			ibs += sprintf(ibs, " %02X", *cp++);
782 		trace_misc("addr%s", buffer);
783 	}
784 }
785 
786 /*
787  * Tell the kernel to add, delete or change a route
788  * Pass k_state from khash in for diagnostic info.
789  */
790 static void
791 rtioctl(int action,			/* RTM_DELETE, etc */
792     in_addr_t dst,
793     in_addr_t gate,
794     in_addr_t mask,
795     struct interface *ifp,
796     uint8_t metric,
797     int flags)
798 {
799 	static int rt_sock_seqno = 0;
800 	struct {
801 		struct rt_msghdr w_rtm;
802 		struct sockaddr_in w_dst;
803 		struct sockaddr_in w_gate;
804 		uint8_t w_space[512];
805 	} w;
806 	struct sockaddr_in w_mask;
807 	struct sockaddr_dl w_ifp;
808 	uint8_t *cp;
809 	long cc;
810 #define	PAT " %-10s %s metric=%d flags=%#x"
811 #define	ARGS rtm_type_name(action), rtname(dst, mask, gate), metric, flags
812 
813 again:
814 	(void) memset(&w, 0, sizeof (w));
815 	(void) memset(&w_mask, 0, sizeof (w_mask));
816 	(void) memset(&w_ifp, 0, sizeof (w_ifp));
817 	cp = w.w_space;
818 	w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
819 	    2 * ROUNDUP_LONG(sizeof (struct sockaddr_in));
820 	w.w_rtm.rtm_version = RTM_VERSION;
821 	w.w_rtm.rtm_type = action;
822 	w.w_rtm.rtm_flags = flags;
823 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
824 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
825 	if (metric != 0 || action == RTM_CHANGE) {
826 		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
827 		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
828 	}
829 	w.w_dst.sin_family = AF_INET;
830 	w.w_dst.sin_addr.s_addr = dst;
831 	w.w_gate.sin_family = AF_INET;
832 	w.w_gate.sin_addr.s_addr = gate;
833 	if (mask == HOST_MASK) {
834 		w.w_rtm.rtm_flags |= RTF_HOST;
835 	} else {
836 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
837 		w_mask.sin_family = AF_INET;
838 		w_mask.sin_addr.s_addr = htonl(mask);
839 		(void) memmove(cp, &w_mask, sizeof (w_mask));
840 		cp += ROUNDUP_LONG(sizeof (struct sockaddr_in));
841 		w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_in));
842 	}
843 	if (ifp == NULL)
844 		ifp = iflookup(gate);
845 
846 	if ((ifp == NULL) || (ifp->int_phys == NULL)) {
847 		trace_misc("no ifp for" PAT, ARGS);
848 	} else {
849 		if (ifp->int_phys->phyi_index > UINT16_MAX) {
850 			trace_misc("ifindex %d is too big for sdl_index",
851 			    ifp->int_phys->phyi_index);
852 		} else {
853 			w_ifp.sdl_family = AF_LINK;
854 			w.w_rtm.rtm_addrs |= RTA_IFP;
855 			w_ifp.sdl_index = ifp->int_phys->phyi_index;
856 			(void) memmove(cp, &w_ifp, sizeof (w_ifp));
857 			w.w_rtm.rtm_msglen +=
858 			    ROUNDUP_LONG(sizeof (struct sockaddr_dl));
859 		}
860 	}
861 
862 
863 	if (!no_install) {
864 		if (TRACERTS)
865 			dump_rt_msg("write", &w.w_rtm, w.w_rtm.rtm_msglen);
866 		cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
867 		if (cc < 0) {
868 			if (errno == ESRCH && (action == RTM_CHANGE ||
869 			    action == RTM_DELETE)) {
870 				trace_act("route disappeared before" PAT, ARGS);
871 				if (action == RTM_CHANGE) {
872 					action = RTM_ADD;
873 					goto again;
874 				}
875 				return;
876 			}
877 			writelog(LOG_WARNING, "write(rt_sock)" PAT ": %s ",
878 			    ARGS, rip_strerror(errno));
879 			return;
880 		} else if (cc != w.w_rtm.rtm_msglen) {
881 			msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
882 			    cc, w.w_rtm.rtm_msglen, ARGS);
883 			return;
884 		}
885 	}
886 	if (TRACEKERNEL)
887 		trace_misc("write kernel" PAT, ARGS);
888 #undef PAT
889 #undef ARGS
890 }
891 
892 
893 /* Hash table containing our image of the kernel forwarding table. */
894 #define	KHASH_SIZE 71			/* should be prime */
895 #define	KHASH(a, m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
896 static struct khash *khash_bins[KHASH_SIZE];
897 
898 #define	K_KEEP_LIM	30	/* k_keep */
899 
900 static struct khash *
901 kern_find(in_addr_t dst, in_addr_t mask, in_addr_t gate,
902     struct interface *ifp, struct khash ***ppk)
903 {
904 	struct khash *k, **pk;
905 
906 	if (ifp != NULL && ifp->int_phys != NULL) {
907 		ifp = ifwithname(ifp->int_phys->phyi_name);
908 	}
909 
910 	for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) {
911 		if (k->k_dst == dst && k->k_mask == mask &&
912 		    (gate == 0 || k->k_gate == gate) &&
913 		    (ifp == NULL || k->k_ifp == ifp)) {
914 			break;
915 		}
916 	}
917 	if (ppk != NULL)
918 		*ppk = pk;
919 	return (k);
920 }
921 
922 
923 /*
924  * Find out if there is an alternate route to a given destination
925  * off of a given interface.
926  */
927 static struct khash *
928 kern_alternate(in_addr_t dst, in_addr_t mask, in_addr_t gate,
929     struct interface *ifp, struct khash ***ppk)
930 {
931 	struct khash *k, **pk;
932 
933 	if (ifp != NULL && ifp->int_phys != NULL) {
934 		ifp = ifwithname(ifp->int_phys->phyi_name);
935 	}
936 	for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) {
937 		if (k->k_dst == dst && k->k_mask == mask &&
938 		    (k->k_gate != gate) &&
939 		    (k->k_ifp == ifp)) {
940 			break;
941 		}
942 	}
943 	if (ppk != NULL)
944 		*ppk = pk;
945 	return (k);
946 }
947 
948 static struct khash *
949 kern_add(in_addr_t dst, uint32_t mask, in_addr_t gate, struct interface *ifp)
950 {
951 	struct khash *k, **pk;
952 
953 	if (ifp != NULL && ifp->int_phys != NULL) {
954 		ifp = ifwithname(ifp->int_phys->phyi_name);
955 	}
956 	k = kern_find(dst, mask, gate, ifp, &pk);
957 	if (k != NULL)
958 		return (k);
959 
960 	k = rtmalloc(sizeof (*k), "kern_add");
961 
962 	(void) memset(k, 0, sizeof (*k));
963 	k->k_dst = dst;
964 	k->k_mask = mask;
965 	k->k_state = KS_NEW;
966 	k->k_keep = now.tv_sec;
967 	k->k_gate = gate;
968 	k->k_ifp = ifp;
969 	*pk = k;
970 
971 	return (k);
972 }
973 
974 /* delete all khash entries that are wired through the interface ifp */
975 void
976 kern_flush_ifp(struct interface *ifp)
977 {
978 	struct khash *k, *kprev, *knext;
979 	int i;
980 
981 	if (ifp != NULL && ifp->int_phys != NULL) {
982 		/*
983 		 * Only calculate phy ifp when the passed ifp is
984 		 * a logical IP interface. Otherwise the call
985 		 * ifwithname(phy ifname) will return NULL as we
986 		 * unlinked ifp from hashtables prior to this call
987 		 * in ifdel.
988 		 */
989 		if (strchr(ifp->int_phys->phyi_name, ':')) {
990 			ifp = ifwithname(ifp->int_phys->phyi_name);
991 		}
992 	}
993 
994 	for (i = 0; i < KHASH_SIZE; i++) {
995 		kprev = NULL;
996 		for (k = khash_bins[i]; k != NULL; k = knext) {
997 			knext = k->k_next;
998 			if (k->k_ifp == ifp) {
999 				if (kprev != NULL)
1000 					kprev->k_next = k->k_next;
1001 				else
1002 					khash_bins[i] = k->k_next;
1003 				free(k);
1004 				continue;
1005 			}
1006 			kprev = k;
1007 		}
1008 	}
1009 }
1010 
1011 /*
1012  * rewire khash entries that currently go through oldifp to
1013  * go through newifp.
1014  */
1015 void
1016 kern_rewire_ifp(struct interface *oldifp, struct interface *newifp)
1017 {
1018 	struct khash *k;
1019 	int i;
1020 
1021 	if (oldifp != NULL && oldifp->int_phys != NULL) {
1022 		oldifp = ifwithname(oldifp->int_phys->phyi_name);
1023 	}
1024 	if (newifp != NULL && newifp->int_phys != NULL) {
1025 		newifp = ifwithname(newifp->int_phys->phyi_name);
1026 	}
1027 	for (i = 0; i < KHASH_SIZE; i++) {
1028 		for (k = khash_bins[i]; k; k = k->k_next) {
1029 			if (k->k_ifp == oldifp) {
1030 				k->k_ifp = newifp;
1031 				trace_misc("kern_rewire_ifp k 0x%lx "
1032 				    "from %s to %s", k, oldifp->int_name,
1033 				    newifp->int_name);
1034 			}
1035 		}
1036 	}
1037 }
1038 
1039 
1040 /*
1041  * Check that a static route it is still in the daemon table, and not
1042  * deleted by interfaces coming and going.  This is also the routine
1043  * responsible for adding new static routes to the daemon table.
1044  */
1045 static void
1046 kern_check_static(struct khash *k, struct interface *ifp)
1047 {
1048 	struct rt_entry *rt;
1049 	struct rt_spare new;
1050 	uint16_t rt_state = RS_STATIC;
1051 
1052 	if (ifp != NULL && ifp->int_phys != NULL) {
1053 		ifp = ifwithname(ifp->int_phys->phyi_name);
1054 	}
1055 	(void) memset(&new, 0, sizeof (new));
1056 	new.rts_ifp = ifp;
1057 	new.rts_gate = k->k_gate;
1058 	new.rts_router = (ifp != NULL) ? ifp->int_addr : loopaddr;
1059 	new.rts_metric = k->k_metric;
1060 	new.rts_time = now.tv_sec;
1061 	new.rts_origin = RO_STATIC;
1062 
1063 	rt = rtget(k->k_dst, k->k_mask);
1064 	if ((ifp != NULL && !IS_IFF_ROUTING(ifp->int_if_flags)) ||
1065 	    (k->k_state & KS_PRIVATE))
1066 		rt_state |= RS_NOPROPAGATE;
1067 
1068 	if (rt != NULL) {
1069 		if ((rt->rt_state & RS_STATIC) == 0) {
1070 			/*
1071 			 * We are already tracking this dest/mask
1072 			 * via RIP/RDISC. Ignore the static route,
1073 			 * because we don't currently have a good
1074 			 * way to compare metrics on static routes
1075 			 * with rip metrics, and therefore cannot
1076 			 * mix and match the two.
1077 			 */
1078 			return;
1079 		}
1080 		rt_state |= rt->rt_state;
1081 		if (rt->rt_state != rt_state)
1082 			rtchange(rt, rt_state, &new, 0);
1083 	} else {
1084 		rtadd(k->k_dst, k->k_mask, rt_state, &new);
1085 	}
1086 }
1087 
1088 
1089 /* operate on a kernel entry */
1090 static void
1091 kern_ioctl(struct khash *k,
1092     int action,			/* RTM_DELETE, etc */
1093     int flags)
1094 {
1095 	if (((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) ||
1096 	    (k->k_state & KS_DEPRE_IF)) {
1097 		/*
1098 		 * Prevent execution of RTM_DELETE, RTM_ADD or
1099 		 * RTM_CHANGE of interface routes
1100 		 */
1101 		trace_act("Blocking execution of %s  %s --> %s ",
1102 		    rtm_type_name(action),
1103 		    addrname(k->k_dst, k->k_mask, 0), naddr_ntoa(k->k_gate));
1104 		return;
1105 	}
1106 
1107 	switch (action) {
1108 	case RTM_DELETE:
1109 		k->k_state &= ~KS_DYNAMIC;
1110 		if (k->k_state & KS_DELETED)
1111 			return;
1112 		k->k_state |= KS_DELETED;
1113 		break;
1114 	case RTM_ADD:
1115 		k->k_state &= ~KS_DELETED;
1116 		break;
1117 	case RTM_CHANGE:
1118 		if (k->k_state & KS_DELETED) {
1119 			action = RTM_ADD;
1120 			k->k_state &= ~KS_DELETED;
1121 		}
1122 		break;
1123 	}
1124 
1125 	rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_ifp,
1126 	    k->k_metric, flags);
1127 }
1128 
1129 
1130 /* add a route the kernel told us */
1131 static void
1132 rtm_add(struct rt_msghdr *rtm,
1133     struct rt_addrinfo *info,
1134     time_t keep,
1135     boolean_t interf_route,
1136     struct interface *ifptr)
1137 {
1138 	struct khash *k;
1139 	struct interface *ifp = ifptr;
1140 	in_addr_t mask, gate = 0;
1141 	static struct msg_limit msg_no_ifp;
1142 
1143 	if (rtm->rtm_flags & RTF_HOST) {
1144 		mask = HOST_MASK;
1145 	} else if (INFO_MASK(info) != 0) {
1146 		mask = ntohl(S_ADDR(INFO_MASK(info)));
1147 	} else {
1148 		writelog(LOG_WARNING,
1149 		    "ignore %s without mask", rtm_type_name(rtm->rtm_type));
1150 		return;
1151 	}
1152 
1153 	/*
1154 	 * Find the interface toward the gateway.
1155 	 */
1156 	if (INFO_GATE(info) != NULL)
1157 		gate = S_ADDR(INFO_GATE(info));
1158 
1159 	if (ifp == NULL) {
1160 		if (INFO_GATE(info) != NULL)
1161 			ifp = iflookup(gate);
1162 		if (ifp == NULL) {
1163 			msglim(&msg_no_ifp, gate,
1164 			    "route %s --> %s nexthop is not directly connected",
1165 			    addrname(S_ADDR(INFO_DST(info)), mask, 0),
1166 			    naddr_ntoa(gate));
1167 		} else {
1168 			if (ifp->int_phys != NULL) {
1169 				ifp = ifwithname(ifp->int_phys->phyi_name);
1170 			}
1171 		}
1172 	}
1173 
1174 	k = kern_add(S_ADDR(INFO_DST(info)), mask, gate, ifp);
1175 
1176 	if (k->k_state & KS_NEW)
1177 		k->k_keep = now.tv_sec+keep;
1178 	if (INFO_GATE(info) == 0) {
1179 		trace_act("note %s without gateway",
1180 		    rtm_type_name(rtm->rtm_type));
1181 		k->k_metric = HOPCNT_INFINITY;
1182 	} else if (INFO_GATE(info)->ss_family != AF_INET) {
1183 		trace_act("note %s with gateway AF=%d",
1184 		    rtm_type_name(rtm->rtm_type),
1185 		    INFO_GATE(info)->ss_family);
1186 		k->k_metric = HOPCNT_INFINITY;
1187 	} else {
1188 		k->k_gate = S_ADDR(INFO_GATE(info));
1189 		k->k_metric = rtm->rtm_rmx.rmx_hopcount;
1190 		if (k->k_metric < 0)
1191 			k->k_metric = 0;
1192 		else if (k->k_metric > HOPCNT_INFINITY-1)
1193 			k->k_metric = HOPCNT_INFINITY-1;
1194 	}
1195 
1196 	if ((k->k_state & KS_NEW) && interf_route) {
1197 		if (k->k_gate != 0 && findifaddr(k->k_gate) == NULL)
1198 			k->k_state |= KS_DEPRE_IF;
1199 		else
1200 			k->k_state |= KS_IF;
1201 	}
1202 
1203 	k->k_state &= ~(KS_NEW | KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD |
1204 	    KS_STATIC | KS_GATEWAY | KS_DELETED | KS_PRIVATE | KS_CHECK);
1205 	if (rtm->rtm_flags & RTF_GATEWAY)
1206 		k->k_state |= KS_GATEWAY;
1207 	if (rtm->rtm_flags & RTF_STATIC)
1208 		k->k_state |= KS_STATIC;
1209 	if (rtm->rtm_flags & RTF_PRIVATE)
1210 		k->k_state |= KS_PRIVATE;
1211 
1212 
1213 	if (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED)) {
1214 		if (INFO_AUTHOR(info) != 0 &&
1215 		    INFO_AUTHOR(info)->ss_family == AF_INET)
1216 			ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
1217 		else
1218 			ifp = NULL;
1219 		if (should_supply(ifp) && (ifp == NULL ||
1220 		    !(ifp->int_state & IS_REDIRECT_OK))) {
1221 			/*
1222 			 * Routers are not supposed to listen to redirects,
1223 			 * so delete it if it came via an unknown interface
1224 			 * or the interface does not have special permission.
1225 			 */
1226 			k->k_state &= ~KS_DYNAMIC;
1227 			k->k_state |= KS_DELETE;
1228 			LIM_SEC(need_kern, 0);
1229 			trace_act("mark for deletion redirected %s --> %s"
1230 			    " via %s",
1231 			    addrname(k->k_dst, k->k_mask, 0),
1232 			    naddr_ntoa(k->k_gate),
1233 			    ifp ? ifp->int_name : "unknown interface");
1234 		} else {
1235 			k->k_state |= KS_DYNAMIC;
1236 			k->k_redirect_time = now.tv_sec;
1237 			trace_act("accept redirected %s --> %s via %s",
1238 			    addrname(k->k_dst, k->k_mask, 0),
1239 			    naddr_ntoa(k->k_gate),
1240 			    ifp ? ifp->int_name : "unknown interface");
1241 		}
1242 		return;
1243 	}
1244 
1245 	/*
1246 	 * If it is not a static route, quit until the next comparison
1247 	 * between the kernel and daemon tables, when it will be deleted.
1248 	 */
1249 	if (!(k->k_state & KS_STATIC)) {
1250 		if (!(k->k_state & (KS_IF|KS_DEPRE_IF|KS_FILE)))
1251 			k->k_state |= KS_DELETE;
1252 		LIM_SEC(need_kern, k->k_keep);
1253 		return;
1254 	}
1255 
1256 	/*
1257 	 * Put static routes with real metrics into the daemon table so
1258 	 * they can be advertised.
1259 	 */
1260 
1261 	kern_check_static(k, ifp);
1262 }
1263 
1264 
1265 /* deal with packet loss */
1266 static void
1267 rtm_lose(struct rt_msghdr *rtm, struct rt_addrinfo *info)
1268 {
1269 	if (INFO_GATE(info) == NULL || INFO_GATE(info)->ss_family != AF_INET) {
1270 		trace_act("ignore %s without gateway",
1271 		    rtm_type_name(rtm->rtm_type));
1272 		age(0);
1273 		return;
1274 	}
1275 
1276 	if (rdisc_ok)
1277 		rdisc_age(S_ADDR(INFO_GATE(info)));
1278 	age(S_ADDR(INFO_GATE(info)));
1279 }
1280 
1281 
1282 /*
1283  * Make the gateway slot of an info structure point to something
1284  * useful.  If it is not already useful, but it specifies an interface,
1285  * then fill in the sockaddr_in provided and point it there.
1286  */
1287 static int
1288 get_info_gate(struct sockaddr_storage **ssp, struct sockaddr_in *sin)
1289 {
1290 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)*ssp;
1291 	struct interface *ifp;
1292 
1293 	if (sdl == NULL)
1294 		return (0);
1295 	if ((sdl)->sdl_family == AF_INET)
1296 		return (1);
1297 	if ((sdl)->sdl_family != AF_LINK)
1298 		return (0);
1299 
1300 	ifp = ifwithindex(sdl->sdl_index, _B_TRUE);
1301 	if (ifp == NULL)
1302 		return (0);
1303 
1304 	sin->sin_addr.s_addr = ifp->int_addr;
1305 	sin->sin_family = AF_INET;
1306 	/* LINTED */
1307 	*ssp = (struct sockaddr_storage *)sin;
1308 
1309 	return (1);
1310 }
1311 
1312 
1313 /*
1314  * Clean the kernel table by copying it to the daemon image.
1315  * Eventually the daemon will delete any extra routes.
1316  */
1317 void
1318 sync_kern(void)
1319 {
1320 	int i;
1321 	struct khash *k;
1322 	struct {
1323 		struct T_optmgmt_req req;
1324 		struct opthdr hdr;
1325 	} req;
1326 	union {
1327 		struct T_optmgmt_ack ack;
1328 		unsigned char space[64];
1329 	} ack;
1330 	struct opthdr *rh;
1331 	struct strbuf cbuf, dbuf;
1332 	int ipfd, nroutes, flags, r;
1333 	mib2_ipRouteEntry_t routes[8];
1334 	mib2_ipRouteEntry_t *rp;
1335 	struct rt_msghdr rtm;
1336 	struct rt_addrinfo info;
1337 	struct sockaddr_in sin_dst;
1338 	struct sockaddr_in sin_gate;
1339 	struct sockaddr_in sin_mask;
1340 	struct sockaddr_in sin_author;
1341 	struct interface *ifp;
1342 	char ifname[LIFNAMSIZ + 1];
1343 
1344 	for (i = 0; i < KHASH_SIZE; i++) {
1345 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1346 			if (!(k->k_state & (KS_IF|KS_DEPRE_IF)))
1347 				k->k_state |= KS_CHECK;
1348 		}
1349 	}
1350 
1351 	ipfd = open(IP_DEV_NAME, O_RDWR);
1352 	if (ipfd == -1) {
1353 		msglog("open " IP_DEV_NAME ": %s", rip_strerror(errno));
1354 		goto hash_clean;
1355 	}
1356 
1357 	req.req.PRIM_type = T_OPTMGMT_REQ;
1358 	req.req.OPT_offset = (caddr_t)&req.hdr - (caddr_t)&req;
1359 	req.req.OPT_length = sizeof (req.hdr);
1360 	req.req.MGMT_flags = T_CURRENT;
1361 
1362 	req.hdr.level = MIB2_IP;
1363 	req.hdr.name = 0;
1364 	req.hdr.len = 0;
1365 
1366 	cbuf.buf = (caddr_t)&req;
1367 	cbuf.len = sizeof (req);
1368 
1369 	if (putmsg(ipfd, &cbuf, NULL, 0) == -1) {
1370 		msglog("T_OPTMGMT_REQ putmsg: %s", rip_strerror(errno));
1371 		goto hash_clean;
1372 	}
1373 
1374 	for (;;) {
1375 		cbuf.buf = (caddr_t)&ack;
1376 		cbuf.maxlen = sizeof (ack);
1377 		dbuf.buf = (caddr_t)routes;
1378 		dbuf.maxlen = sizeof (routes);
1379 		flags = 0;
1380 		r = getmsg(ipfd, &cbuf, &dbuf, &flags);
1381 		if (r == -1) {
1382 			msglog("T_OPTMGMT_REQ getmsg: %s", rip_strerror(errno));
1383 			goto hash_clean;
1384 		}
1385 
1386 		if (cbuf.len < sizeof (struct T_optmgmt_ack) ||
1387 		    ack.ack.PRIM_type != T_OPTMGMT_ACK ||
1388 		    ack.ack.MGMT_flags != T_SUCCESS ||
1389 		    ack.ack.OPT_length < sizeof (struct opthdr)) {
1390 			msglog("bad T_OPTMGMT response; len=%d prim=%d "
1391 			    "flags=%d optlen=%d", cbuf.len, ack.ack.PRIM_type,
1392 			    ack.ack.MGMT_flags, ack.ack.OPT_length);
1393 			goto hash_clean;
1394 		}
1395 		/* LINTED */
1396 		rh = (struct opthdr *)((caddr_t)&ack + ack.ack.OPT_offset);
1397 		if (rh->level == 0 && rh->name == 0) {
1398 			break;
1399 		}
1400 		if (rh->level != MIB2_IP || rh->name != MIB2_IP_21) {
1401 			while (r == MOREDATA) {
1402 				r = getmsg(ipfd, NULL, &dbuf, &flags);
1403 			}
1404 			continue;
1405 		}
1406 		break;
1407 	}
1408 
1409 	(void) memset(&rtm, 0, sizeof (rtm));
1410 	(void) memset(&info, 0, sizeof (info));
1411 	(void) memset(&sin_dst, 0, sizeof (sin_dst));
1412 	(void) memset(&sin_gate, 0, sizeof (sin_gate));
1413 	(void) memset(&sin_mask, 0, sizeof (sin_mask));
1414 	(void) memset(&sin_author, 0, sizeof (sin_author));
1415 	sin_dst.sin_family = AF_INET;
1416 	/* LINTED */
1417 	info.rti_info[RTAX_DST] = (struct sockaddr_storage *)&sin_dst;
1418 	sin_gate.sin_family = AF_INET;
1419 	/* LINTED */
1420 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr_storage *)&sin_gate;
1421 	sin_mask.sin_family = AF_INET;
1422 	/* LINTED */
1423 	info.rti_info[RTAX_NETMASK] = (struct sockaddr_storage *)&sin_mask;
1424 	sin_dst.sin_family = AF_INET;
1425 	/* LINTED */
1426 	info.rti_info[RTAX_AUTHOR] = (struct sockaddr_storage *)&sin_author;
1427 
1428 	for (;;) {
1429 		nroutes = dbuf.len / sizeof (mib2_ipRouteEntry_t);
1430 		for (rp = routes; nroutes > 0; ++rp, nroutes--) {
1431 
1432 			/*
1433 			 * Ignore IRE cache, broadcast, and local address
1434 			 * entries; they're not subject to routing socket
1435 			 * control.
1436 			 */
1437 			if (rp->ipRouteInfo.re_ire_type &
1438 			    (IRE_BROADCAST | IRE_CACHE | IRE_LOCAL))
1439 				continue;
1440 
1441 			/* ignore multicast and link local addresses */
1442 			if (IN_MULTICAST(ntohl(rp->ipRouteDest)) ||
1443 			    IN_LINKLOCAL(ntohl(rp->ipRouteDest))) {
1444 				continue;
1445 			}
1446 
1447 
1448 #ifdef DEBUG_KERNEL_ROUTE_READ
1449 			(void) fprintf(stderr, "route type %d, ire type %08X, "
1450 			    "flags %08X: %s", rp->ipRouteType,
1451 			    rp->ipRouteInfo.re_ire_type,
1452 			    rp->ipRouteInfo.re_flags,
1453 			    naddr_ntoa(rp->ipRouteDest));
1454 			(void) fprintf(stderr, " %s",
1455 			    naddr_ntoa(rp->ipRouteMask));
1456 			(void) fprintf(stderr, " %s\n",
1457 			    naddr_ntoa(rp->ipRouteNextHop));
1458 #endif
1459 
1460 			/* Fake up the needed entries */
1461 			rtm.rtm_flags = rp->ipRouteInfo.re_flags;
1462 			rtm.rtm_type = RTM_GET;
1463 			rtm.rtm_rmx.rmx_hopcount = rp->ipRouteMetric1;
1464 
1465 			(void) memset(ifname, 0, sizeof (ifname));
1466 			if (rp->ipRouteIfIndex.o_length <
1467 			    sizeof (rp->ipRouteIfIndex.o_bytes))
1468 				rp->ipRouteIfIndex.o_bytes[
1469 				    rp->ipRouteIfIndex.o_length] = '\0';
1470 				(void) strncpy(ifname,
1471 				    rp->ipRouteIfIndex.o_bytes,
1472 				    sizeof (ifname));
1473 
1474 			/*
1475 			 * First try to match up on gwkludge entries
1476 			 * before trying to match ifp by name.
1477 			 */
1478 			if ((ifp = gwkludge_iflookup(rp->ipRouteDest,
1479 			    rp->ipRouteNextHop,
1480 			    ntohl(rp->ipRouteMask))) == NULL) {
1481 				ifp = ifwithname(ifname);
1482 				if (ifp != NULL && ifp->int_phys != NULL) {
1483 					ifp = ifwithname(
1484 					    ifp->int_phys->phyi_name);
1485 				}
1486 			}
1487 
1488 			info.rti_addrs = RTA_DST | RTA_GATEWAY | RTA_NETMASK;
1489 			if (rp->ipRouteInfo.re_ire_type & IRE_HOST_REDIRECT)
1490 				info.rti_addrs |= RTA_AUTHOR;
1491 			sin_dst.sin_addr.s_addr = rp->ipRouteDest;
1492 			sin_gate.sin_addr.s_addr = rp->ipRouteNextHop;
1493 			sin_mask.sin_addr.s_addr = rp->ipRouteMask;
1494 			sin_author.sin_addr.s_addr =
1495 			    rp->ipRouteInfo.re_src_addr;
1496 
1497 			/*
1498 			 * Note static routes and interface routes, and also
1499 			 * preload the image of the kernel table so that
1500 			 * we can later clean it, as well as avoid making
1501 			 * unneeded changes.  Keep the old kernel routes for a
1502 			 * few seconds to allow a RIP or router-discovery
1503 			 * response to be heard.
1504 			 */
1505 			rtm_add(&rtm, &info, MAX_WAITTIME,
1506 			    ((rp->ipRouteInfo.re_ire_type &
1507 			    (IRE_INTERFACE|IRE_LOOPBACK)) != 0), ifp);
1508 		}
1509 		if (r == 0) {
1510 			break;
1511 		}
1512 		r = getmsg(ipfd, NULL, &dbuf, &flags);
1513 	}
1514 
1515 hash_clean:
1516 	if (ipfd != -1)
1517 		(void) close(ipfd);
1518 	for (i = 0; i < KHASH_SIZE; i++) {
1519 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1520 
1521 			/*
1522 			 * KS_DELETED routes have been removed from the
1523 			 * kernel, but we keep them around for reasons
1524 			 * stated in del_static(), so we skip the check
1525 			 * for KS_DELETED routes here.
1526 			 */
1527 			if ((k->k_state & (KS_CHECK|KS_DELETED)) == KS_CHECK) {
1528 
1529 				if (!(k->k_state & KS_DYNAMIC)) {
1530 					writelog(LOG_WARNING,
1531 					    "%s --> %s disappeared from kernel",
1532 					    addrname(k->k_dst, k->k_mask, 0),
1533 					    naddr_ntoa(k->k_gate));
1534 				}
1535 				del_static(k->k_dst, k->k_mask, k->k_gate,
1536 				    k->k_ifp, 1);
1537 
1538 			}
1539 		}
1540 	}
1541 }
1542 
1543 
1544 /* Listen to announcements from the kernel */
1545 void
1546 read_rt(void)
1547 {
1548 	long cc;
1549 	struct interface *ifp;
1550 	struct sockaddr_in gate_sin;
1551 	in_addr_t mask, gate;
1552 	union {
1553 		struct {
1554 			struct rt_msghdr rtm;
1555 			struct sockaddr_storage addrs[RTA_NUMBITS];
1556 		} r;
1557 		struct if_msghdr ifm;
1558 	} m;
1559 	char str[100], *strp;
1560 	struct rt_addrinfo info;
1561 
1562 
1563 	for (;;) {
1564 		cc = read(rt_sock, &m, sizeof (m));
1565 		if (cc <= 0) {
1566 			if (cc < 0 && errno != EWOULDBLOCK)
1567 				LOGERR("read(rt_sock)");
1568 			return;
1569 		}
1570 
1571 		if (TRACERTS)
1572 			dump_rt_msg("read", &m.r.rtm, cc);
1573 
1574 		if (cc < m.r.rtm.rtm_msglen) {
1575 			msglog("routing message truncated (%d < %d)",
1576 			    cc, m.r.rtm.rtm_msglen);
1577 		}
1578 
1579 		if (m.r.rtm.rtm_version != RTM_VERSION) {
1580 			msglog("bogus routing message version %d",
1581 			    m.r.rtm.rtm_version);
1582 			continue;
1583 		}
1584 
1585 		ifp = NULL;
1586 
1587 		if (m.r.rtm.rtm_type == RTM_IFINFO ||
1588 		    m.r.rtm.rtm_type == RTM_NEWADDR ||
1589 		    m.r.rtm.rtm_type == RTM_DELADDR) {
1590 			strp = if_bit_string(m.ifm.ifm_flags, _B_TRUE);
1591 			if (strp == NULL) {
1592 				strp = str;
1593 				(void) sprintf(str, "%#x", m.ifm.ifm_flags);
1594 			}
1595 			ifp = ifwithindex(m.ifm.ifm_index,
1596 			    m.r.rtm.rtm_type != RTM_DELADDR);
1597 			if (ifp == NULL) {
1598 				char ifname[LIFNAMSIZ], *ifnamep;
1599 
1600 				ifnamep = if_indextoname(m.ifm.ifm_index,
1601 				    ifname);
1602 				if (ifnamep == NULL) {
1603 					trace_act("note %s with flags %s"
1604 					    " for unknown interface index #%d",
1605 					    rtm_type_name(m.r.rtm.rtm_type),
1606 					    strp, m.ifm.ifm_index);
1607 				} else {
1608 					trace_act("note %s with flags %s"
1609 					    " for unknown interface %s",
1610 					    rtm_type_name(m.r.rtm.rtm_type),
1611 					    strp, ifnamep);
1612 				}
1613 			} else {
1614 				trace_act("note %s with flags %s for %s",
1615 				    rtm_type_name(m.r.rtm.rtm_type),
1616 				    strp, ifp->int_name);
1617 			}
1618 			if (strp != str)
1619 				free(strp);
1620 
1621 			/*
1622 			 * After being informed of a change to an interface,
1623 			 * check them all now if the check would otherwise
1624 			 * be a long time from now, if the interface is
1625 			 * not known, or if the interface has been turned
1626 			 * off or on.
1627 			 */
1628 			if (ifscan_timer.tv_sec-now.tv_sec >=
1629 			    CHECK_BAD_INTERVAL || ifp == NULL ||
1630 			    ((ifp->int_if_flags ^ m.ifm.ifm_flags) &
1631 			    IFF_UP) != 0)
1632 				ifscan_timer.tv_sec = now.tv_sec;
1633 			continue;
1634 		} else {
1635 			if (m.r.rtm.rtm_index != 0)
1636 				ifp = ifwithindex(m.r.rtm.rtm_index, 1);
1637 		}
1638 
1639 		(void) strlcpy(str, rtm_type_name(m.r.rtm.rtm_type),
1640 		    sizeof (str));
1641 		strp = &str[strlen(str)];
1642 		if (m.r.rtm.rtm_type <= RTM_CHANGE)
1643 			strp += snprintf(strp, sizeof (str) - (strp - str),
1644 			    " from pid %d", (int)m.r.rtm.rtm_pid);
1645 
1646 		/* LINTED */
1647 		(void) rt_xaddrs(&info, (struct sockaddr_storage *)(&m.r.rtm +
1648 		    1), (char *)&m + cc, m.r.rtm.rtm_addrs);
1649 
1650 		if (INFO_DST(&info) == 0) {
1651 			trace_act("ignore %s without dst", str);
1652 			continue;
1653 		}
1654 
1655 		if (INFO_DST(&info)->ss_family != AF_INET) {
1656 			trace_act("ignore %s for AF %d", str,
1657 			    INFO_DST(&info)->ss_family);
1658 			continue;
1659 		}
1660 
1661 		mask = ((INFO_MASK(&info) != 0) ?
1662 		    ntohl(S_ADDR(INFO_MASK(&info))) :
1663 		    (m.r.rtm.rtm_flags & RTF_HOST) ?
1664 		    HOST_MASK : std_mask(S_ADDR(INFO_DST(&info))));
1665 
1666 		strp += snprintf(strp, sizeof (str) - (strp - str), ": %s",
1667 		    addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1668 
1669 		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))) ||
1670 		    IN_LINKLOCAL(ntohl(S_ADDR(INFO_DST(&info))))) {
1671 			trace_act("ignore multicast/link local %s", str);
1672 			continue;
1673 		}
1674 
1675 		if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1676 			trace_act("ignore ARP %s", str);
1677 			continue;
1678 		}
1679 
1680 		if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1681 			gate = S_ADDR(INFO_GATE(&info));
1682 			strp += snprintf(strp, sizeof (str) - (strp - str),
1683 			    " --> %s", naddr_ntoa(gate));
1684 		} else {
1685 			gate = 0;
1686 		}
1687 
1688 		if (INFO_AUTHOR(&info) != 0)
1689 			strp += snprintf(strp, sizeof (str) - (strp - str),
1690 			    " by authority of %s",
1691 			    saddr_ntoa(INFO_AUTHOR(&info)));
1692 
1693 		switch (m.r.rtm.rtm_type) {
1694 		case RTM_ADD:
1695 		case RTM_CHANGE:
1696 		case RTM_REDIRECT:
1697 			if (m.r.rtm.rtm_errno != 0) {
1698 				trace_act("ignore %s with \"%s\" error",
1699 				    str, rip_strerror(m.r.rtm.rtm_errno));
1700 			} else {
1701 				trace_act("%s", str);
1702 				rtm_add(&m.r.rtm, &info, 0,
1703 				    !(m.r.rtm.rtm_flags & RTF_GATEWAY) &&
1704 				    m.r.rtm.rtm_type != RTM_REDIRECT, ifp);
1705 
1706 			}
1707 			break;
1708 
1709 		case RTM_DELETE:
1710 			if (m.r.rtm.rtm_errno != 0 &&
1711 			    m.r.rtm.rtm_errno != ESRCH) {
1712 				trace_act("ignore %s with \"%s\" error",
1713 				    str, rip_strerror(m.r.rtm.rtm_errno));
1714 			} else {
1715 				trace_act("%s", str);
1716 				del_static(S_ADDR(INFO_DST(&info)), mask,
1717 				    gate, ifp, 1);
1718 			}
1719 			break;
1720 
1721 		case RTM_LOSING:
1722 			trace_act("%s", str);
1723 			rtm_lose(&m.r.rtm, &info);
1724 			break;
1725 
1726 		default:
1727 			trace_act("ignore %s", str);
1728 			break;
1729 		}
1730 	}
1731 }
1732 
1733 
1734 /*
1735  * Disassemble a routing message.  The result is an array of pointers
1736  * to sockaddr_storage structures stored in the info argument.
1737  *
1738  * ss is a pointer to the beginning of the data following the
1739  * rt_msghdr contained in the routing socket message, which consists
1740  * of a string of concatenated sockaddr structure of different types.
1741  *
1742  * Extended attributes can be appended at the end of the list.
1743  */
1744 static int
1745 rt_xaddrs(struct rt_addrinfo *info,
1746     struct sockaddr_storage *ss,
1747     char *lim,
1748     int addrs)
1749 {
1750 	int retv = 0;
1751 	int i;
1752 	int abit;
1753 	int complaints;
1754 	static int prev_complaints;
1755 
1756 #define	XBAD_AF		0x1
1757 #define	XBAD_SHORT	0x2
1758 #define	XBAD_LONG	0x4
1759 
1760 	(void) memset(info, 0, sizeof (*info));
1761 	info->rti_addrs = addrs;
1762 	complaints = 0;
1763 	for (i = 0, abit = 1; i < RTAX_MAX && (char *)ss < lim;
1764 	    i++, abit <<= 1) {
1765 		if ((addrs & abit) == 0)
1766 			continue;
1767 		info->rti_info[i] = ss;
1768 		/* Horrible interface here */
1769 		switch (ss->ss_family) {
1770 		case AF_UNIX:
1771 			/* LINTED */
1772 			ss = (struct sockaddr_storage *)(
1773 			    (struct sockaddr_un *)ss + 1);
1774 			break;
1775 		case AF_INET:
1776 			/* LINTED */
1777 			ss = (struct sockaddr_storage *)(
1778 			    (struct sockaddr_in *)ss + 1);
1779 			break;
1780 		case AF_LINK:
1781 			/* LINTED */
1782 			ss = (struct sockaddr_storage *)(
1783 			    (struct sockaddr_dl *)ss + 1);
1784 			break;
1785 		case AF_INET6:
1786 			/* LINTED */
1787 			ss = (struct sockaddr_storage *)(
1788 			    (struct sockaddr_in6 *)ss + 1);
1789 			break;
1790 		default:
1791 			if (!(prev_complaints & XBAD_AF))
1792 				writelog(LOG_WARNING,
1793 				    "unknown address family %d "
1794 				    "encountered", ss->ss_family);
1795 			if (complaints & XBAD_AF)
1796 				goto xaddr_done;
1797 			/* LINTED */
1798 			ss = (struct sockaddr_storage *)(
1799 			    (struct sockaddr *)ss + 1);
1800 			complaints |= XBAD_AF;
1801 			info->rti_addrs &= abit - 1;
1802 			addrs = info->rti_addrs;
1803 			retv = -1;
1804 			break;
1805 		}
1806 		if ((char *)ss > lim) {
1807 			if (!(prev_complaints & XBAD_SHORT))
1808 				msglog("sockaddr %d too short by %d "
1809 				    "bytes", i + 1, (char *)ss - lim);
1810 			complaints |= XBAD_SHORT;
1811 			info->rti_info[i] = NULL;
1812 			info->rti_addrs &= abit - 1;
1813 			retv = -1;
1814 			goto xaddr_done;
1815 		}
1816 	}
1817 
1818 	while (((char *)ss + sizeof (rtm_ext_t)) <= lim) {
1819 		rtm_ext_t *tp;
1820 		char *nxt;
1821 
1822 		/* LINTED: alignment */
1823 		tp = (rtm_ext_t *)ss;
1824 		nxt = (char *)(tp + 1) + tp->rtmex_len;
1825 
1826 		if (!IS_P2ALIGNED(tp->rtmex_len, sizeof (uint32_t)) ||
1827 		    nxt > lim) {
1828 			break;
1829 		}
1830 
1831 		/* LINTED: alignment */
1832 		ss = (struct sockaddr_storage *)nxt;
1833 	}
1834 
1835 	if ((char *)ss != lim) {
1836 		if ((char *)ss > lim) {
1837 			if (!(prev_complaints & XBAD_SHORT))
1838 				msglog("routing message too short by %d bytes",
1839 				    (char *)ss - lim);
1840 			complaints |= XBAD_SHORT;
1841 		} else if (!(prev_complaints & XBAD_LONG)) {
1842 			msglog("%d bytes of routing message left over",
1843 			    lim - (char *)ss);
1844 			complaints |= XBAD_LONG;
1845 		}
1846 		retv = -1;
1847 	}
1848 xaddr_done:
1849 	prev_complaints = complaints;
1850 	return (retv);
1851 }
1852 
1853 
1854 /* after aggregating, note routes that belong in the kernel */
1855 static void
1856 kern_out(struct ag_info *ag)
1857 {
1858 	struct khash *k;
1859 	struct interface *ifp;
1860 
1861 	ifp = ag->ag_ifp;
1862 
1863 	if (ifp != NULL && ifp->int_phys != NULL) {
1864 		ifp = ifwithname(ifp->int_phys->phyi_name);
1865 	}
1866 
1867 	/*
1868 	 * Do not install bad routes if they are not already present.
1869 	 * This includes routes that had RS_NET_SYN for interfaces that
1870 	 * recently died.
1871 	 */
1872 	if (ag->ag_metric == HOPCNT_INFINITY) {
1873 		k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask,
1874 		    ag->ag_nhop, ag->ag_ifp, NULL);
1875 		if (k == NULL)
1876 			return;
1877 	} else {
1878 		k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask, ag->ag_nhop,
1879 		    ifp);
1880 	}
1881 
1882 	if (k->k_state & KS_NEW) {
1883 		/* will need to add new entry to the kernel table */
1884 		k->k_state = KS_ADD;
1885 		if (ag->ag_state & AGS_GATEWAY)
1886 			k->k_state |= KS_GATEWAY;
1887 		if (ag->ag_state & AGS_IF)
1888 			k->k_state |= KS_IF;
1889 		if (ag->ag_state & AGS_PASSIVE)
1890 			k->k_state |= KS_PASSIVE;
1891 		if (ag->ag_state & AGS_FILE)
1892 			k->k_state |= KS_FILE;
1893 		k->k_gate = ag->ag_nhop;
1894 		k->k_ifp = ifp;
1895 		k->k_metric = ag->ag_metric;
1896 		return;
1897 	}
1898 
1899 	if ((k->k_state & (KS_STATIC|KS_DEPRE_IF)) ||
1900 	    ((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF)) {
1901 		return;
1902 	}
1903 
1904 	/* modify existing kernel entry if necessary */
1905 	if (k->k_gate == ag->ag_nhop && k->k_ifp == ag->ag_ifp &&
1906 	    k->k_metric != ag->ag_metric) {
1907 			/*
1908 			 * Must delete bad interface routes etc.
1909 			 * to change them.
1910 			 */
1911 			if (k->k_metric == HOPCNT_INFINITY)
1912 				k->k_state |= KS_DEL_ADD;
1913 			k->k_gate = ag->ag_nhop;
1914 			k->k_metric = ag->ag_metric;
1915 			k->k_state |= KS_CHANGE;
1916 	}
1917 
1918 	/*
1919 	 * If the daemon thinks the route should exist, forget
1920 	 * about any redirections.
1921 	 * If the daemon thinks the route should exist, eventually
1922 	 * override manual intervention by the operator.
1923 	 */
1924 	if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1925 		k->k_state &= ~KS_DYNAMIC;
1926 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1927 	}
1928 
1929 	if ((k->k_state & KS_GATEWAY) && !(ag->ag_state & AGS_GATEWAY)) {
1930 		k->k_state &= ~KS_GATEWAY;
1931 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1932 	} else if (!(k->k_state & KS_GATEWAY) && (ag->ag_state & AGS_GATEWAY)) {
1933 		k->k_state |= KS_GATEWAY;
1934 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1935 	}
1936 
1937 	/*
1938 	 * Deleting-and-adding is necessary to change aspects of a route.
1939 	 * Just delete instead of deleting and then adding a bad route.
1940 	 * Otherwise, we want to keep the route in the kernel.
1941 	 */
1942 	if (k->k_metric == HOPCNT_INFINITY && (k->k_state & KS_DEL_ADD))
1943 		k->k_state |= KS_DELETE;
1944 	else
1945 		k->k_state &= ~KS_DELETE;
1946 #undef RT
1947 }
1948 
1949 /*
1950  * Update our image of the kernel forwarding table using the given
1951  * route from our internal routing table.
1952  */
1953 
1954 /*ARGSUSED1*/
1955 static int
1956 walk_kern(struct radix_node *rn, void *argp)
1957 {
1958 #define	RT ((struct rt_entry *)rn)
1959 	uint8_t metric, pref;
1960 	uint_t ags = 0;
1961 	int i;
1962 	struct rt_spare *rts;
1963 
1964 	/* Do not install synthetic routes */
1965 	if (RT->rt_state & RS_NET_SYN)
1966 		return (0);
1967 
1968 	/*
1969 	 * Do not install static routes here. Only
1970 	 * read_rt->rtm_add->kern_add should install those
1971 	 */
1972 	if ((RT->rt_state & RS_STATIC) &&
1973 	    (RT->rt_spares[0].rts_origin != RO_FILE))
1974 		return (0);
1975 
1976 	/* Do not clobber kernel if this is a route for a dead interface */
1977 	if (RT->rt_state & RS_BADIF)
1978 		return (0);
1979 
1980 	if (!(RT->rt_state & RS_IF)) {
1981 		/* This is an ordinary route, not for an interface. */
1982 
1983 		/*
1984 		 * aggregate, ordinary good routes without regard to
1985 		 * their metric
1986 		 */
1987 		pref = 1;
1988 		ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1989 
1990 		/*
1991 		 * Do not install host routes directly to hosts, to avoid
1992 		 * interfering with ARP entries in the kernel table.
1993 		 */
1994 		if (RT_ISHOST(RT) && ntohl(RT->rt_dst) == RT->rt_gate)
1995 			return (0);
1996 
1997 	} else {
1998 		/*
1999 		 * This is an interface route.
2000 		 * Do not install routes for "external" remote interfaces.
2001 		 */
2002 		if (RT->rt_ifp != NULL && (RT->rt_ifp->int_state & IS_EXTERNAL))
2003 			return (0);
2004 
2005 		/* Interfaces should override received routes. */
2006 		pref = 0;
2007 		ags |= (AGS_IF | AGS_CORS_GATE);
2008 		if (RT->rt_ifp != NULL &&
2009 		    !(RT->rt_ifp->int_if_flags & IFF_LOOPBACK) &&
2010 		    (RT->rt_ifp->int_state & (IS_PASSIVE|IS_ALIAS)) ==
2011 		    IS_PASSIVE) {
2012 			ags |= AGS_PASSIVE;
2013 		}
2014 
2015 		/*
2016 		 * If it is not an interface, or an alias for an interface,
2017 		 * it must be a "gateway."
2018 		 *
2019 		 * If it is a "remote" interface, it is also a "gateway" to
2020 		 * the kernel if is not a alias.
2021 		 */
2022 		if (RT->rt_ifp == NULL || (RT->rt_ifp->int_state & IS_REMOTE)) {
2023 
2024 			ags |= (AGS_GATEWAY | AGS_SUPPRESS);
2025 
2026 			/*
2027 			 * Do not aggregate IS_PASSIVE routes.
2028 			 */
2029 			if (!(RT->rt_ifp->int_state & IS_PASSIVE))
2030 				ags |= AGS_AGGREGATE;
2031 		}
2032 	}
2033 
2034 	metric = RT->rt_metric;
2035 	if (metric == HOPCNT_INFINITY) {
2036 		/* If the route is dead, try hard to aggregate. */
2037 		pref = HOPCNT_INFINITY;
2038 		ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
2039 		ags &= ~(AGS_IF | AGS_CORS_GATE);
2040 	}
2041 
2042 	/*
2043 	 * dump all routes that have the same metric as rt_spares[0]
2044 	 * into the kern_table, to be added to the kernel.
2045 	 */
2046 	for (i = 0; i < RT->rt_num_spares; i++) {
2047 		rts = &RT->rt_spares[i];
2048 
2049 		/* Do not install external routes */
2050 		if (rts->rts_flags & RTS_EXTERNAL)
2051 			continue;
2052 
2053 		if (rts->rts_metric == metric) {
2054 			ag_check(RT->rt_dst, RT->rt_mask,
2055 			    rts->rts_router, rts->rts_ifp, rts->rts_gate,
2056 			    metric, pref, 0, 0,
2057 			    (rts->rts_origin & RO_FILE) ? (ags|AGS_FILE) : ags,
2058 			    kern_out);
2059 		}
2060 	}
2061 	return (0);
2062 #undef RT
2063 }
2064 
2065 
2066 /* Update the kernel table to match the daemon table. */
2067 static void
2068 fix_kern(void)
2069 {
2070 	int i;
2071 	struct khash *k, *pk, *knext;
2072 
2073 
2074 	need_kern = age_timer;
2075 
2076 	/* Walk daemon table, updating the copy of the kernel table. */
2077 	(void) rn_walktree(rhead, walk_kern, NULL);
2078 	ag_flush(0, 0, kern_out);
2079 
2080 	for (i = 0; i < KHASH_SIZE; i++) {
2081 		pk = NULL;
2082 		for (k = khash_bins[i]; k != NULL;  k = knext) {
2083 			knext = k->k_next;
2084 
2085 			/* Do not touch local interface routes */
2086 			if ((k->k_state & KS_DEPRE_IF) ||
2087 			    (k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) {
2088 				pk = k;
2089 				continue;
2090 			}
2091 
2092 			/* Do not touch static routes */
2093 			if (k->k_state & KS_STATIC) {
2094 				kern_check_static(k, 0);
2095 				pk = k;
2096 				continue;
2097 			}
2098 
2099 			/* check hold on routes deleted by the operator */
2100 			if (k->k_keep > now.tv_sec) {
2101 				/* ensure we check when the hold is over */
2102 				LIM_SEC(need_kern, k->k_keep);
2103 				pk = k;
2104 				continue;
2105 			}
2106 
2107 			if ((k->k_state & KS_DELETE) &&
2108 			    !(k->k_state & KS_DYNAMIC)) {
2109 				if ((k->k_dst == RIP_DEFAULT) &&
2110 				    (k->k_ifp != NULL) &&
2111 				    (kern_alternate(RIP_DEFAULT,
2112 				    k->k_mask, k->k_gate, k->k_ifp,
2113 				    NULL) == NULL))
2114 					rdisc_restore(k->k_ifp);
2115 				kern_ioctl(k, RTM_DELETE, 0);
2116 				if (pk != NULL)
2117 					pk->k_next = knext;
2118 				else
2119 					khash_bins[i] = knext;
2120 				free(k);
2121 				continue;
2122 			}
2123 
2124 			if (k->k_state & KS_DEL_ADD)
2125 				kern_ioctl(k, RTM_DELETE, 0);
2126 
2127 			if (k->k_state & KS_ADD) {
2128 				if ((k->k_dst == RIP_DEFAULT) &&
2129 				    (k->k_ifp != NULL))
2130 					rdisc_suppress(k->k_ifp);
2131 				kern_ioctl(k, RTM_ADD,
2132 				    ((0 != (k->k_state & (KS_GATEWAY |
2133 				    KS_DYNAMIC))) ? RTF_GATEWAY : 0));
2134 			} else if (k->k_state & KS_CHANGE) {
2135 				/*
2136 				 * Should be using RTM_CHANGE here, but
2137 				 * since RTM_CHANGE is currently
2138 				 * not multipath-aware, and assumes
2139 				 * that RTF_GATEWAY implies the gateway
2140 				 * of the route for dst has to be
2141 				 * changed, we play safe, and do a del + add.
2142 				 */
2143 				kern_ioctl(k,  RTM_DELETE, 0);
2144 				kern_ioctl(k, RTM_ADD,
2145 				    ((0 != (k->k_state & (KS_GATEWAY |
2146 				    KS_DYNAMIC))) ? RTF_GATEWAY : 0));
2147 			}
2148 			k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
2149 
2150 			/*
2151 			 * Mark this route to be deleted in the next cycle.
2152 			 * This deletes routes that disappear from the
2153 			 * daemon table, since the normal aging code
2154 			 * will clear the bit for routes that have not
2155 			 * disappeared from the daemon table.
2156 			 */
2157 			k->k_state |= KS_DELETE;
2158 			pk = k;
2159 		}
2160 	}
2161 }
2162 
2163 
2164 /* Delete a static route in the image of the kernel table. */
2165 void
2166 del_static(in_addr_t dst, in_addr_t mask, in_addr_t gate,
2167     struct interface *ifp, int gone)
2168 {
2169 	struct khash *k;
2170 	struct rt_entry *rt;
2171 
2172 	/*
2173 	 * Just mark it in the table to be deleted next time the kernel
2174 	 * table is updated.
2175 	 * If it has already been deleted, mark it as such, and set its
2176 	 * keep-timer so that it will not be deleted again for a while.
2177 	 * This lets the operator delete a route added by the daemon
2178 	 * and add a replacement.
2179 	 */
2180 	k = kern_find(dst, mask, gate, ifp, NULL);
2181 	if (k != NULL && (gate == 0 || k->k_gate == gate)) {
2182 		k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
2183 		k->k_state |= KS_DELETE;
2184 		if (gone) {
2185 			k->k_state |= KS_DELETED;
2186 			k->k_keep = now.tv_sec + K_KEEP_LIM;
2187 		}
2188 	}
2189 
2190 	rt = rtget(dst, mask);
2191 	if (rt != NULL && (rt->rt_state & RS_STATIC))
2192 		rtbad(rt, NULL);
2193 }
2194 
2195 
2196 /*
2197  * Delete all routes generated from ICMP Redirects that use a given gateway,
2198  * as well as old redirected routes.
2199  */
2200 void
2201 del_redirects(in_addr_t bad_gate, time_t old)
2202 {
2203 	int i;
2204 	struct khash *k;
2205 	boolean_t dosupply = should_supply(NULL);
2206 
2207 	for (i = 0; i < KHASH_SIZE; i++) {
2208 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
2209 			if (!(k->k_state & KS_DYNAMIC) ||
2210 			    (k->k_state & (KS_STATIC|KS_IF|KS_DEPRE_IF)))
2211 				continue;
2212 
2213 			if (k->k_gate != bad_gate && k->k_redirect_time > old &&
2214 			    !dosupply)
2215 				continue;
2216 
2217 			k->k_state |= KS_DELETE;
2218 			k->k_state &= ~KS_DYNAMIC;
2219 			need_kern.tv_sec = now.tv_sec;
2220 			trace_act("mark redirected %s --> %s for deletion",
2221 			    addrname(k->k_dst, k->k_mask, 0),
2222 			    naddr_ntoa(k->k_gate));
2223 		}
2224 	}
2225 }
2226 
2227 /* Start the daemon tables. */
2228 void
2229 rtinit(void)
2230 {
2231 	int i;
2232 	struct ag_info *ag;
2233 
2234 	/* Initialize the radix trees */
2235 	rn_init();
2236 	(void) rn_inithead((void**)&rhead, 32);
2237 
2238 	/* mark all of the slots in the table free */
2239 	ag_avail = ag_slots;
2240 	for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
2241 		ag->ag_fine = ag+1;
2242 		ag++;
2243 	}
2244 }
2245 
2246 
2247 static struct sockaddr_in dst_sock = {AF_INET};
2248 static struct sockaddr_in mask_sock = {AF_INET};
2249 
2250 
2251 static void
2252 set_need_flash(void)
2253 {
2254 	if (!need_flash) {
2255 		need_flash = _B_TRUE;
2256 		/*
2257 		 * Do not send the flash update immediately.  Wait a little
2258 		 * while to hear from other routers.
2259 		 */
2260 		no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
2261 	}
2262 }
2263 
2264 
2265 /* Get a particular routing table entry */
2266 struct rt_entry *
2267 rtget(in_addr_t dst, in_addr_t mask)
2268 {
2269 	struct rt_entry *rt;
2270 
2271 	dst_sock.sin_addr.s_addr = dst;
2272 	mask_sock.sin_addr.s_addr = htonl(mask);
2273 	rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock, &mask_sock, rhead);
2274 	if (rt == NULL || rt->rt_dst != dst || rt->rt_mask != mask)
2275 		return (NULL);
2276 
2277 	return (rt);
2278 }
2279 
2280 
2281 /* Find a route to dst as the kernel would. */
2282 struct rt_entry *
2283 rtfind(in_addr_t dst)
2284 {
2285 	dst_sock.sin_addr.s_addr = dst;
2286 	return ((struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead));
2287 }
2288 
2289 
2290 /* add a route to the table */
2291 void
2292 rtadd(in_addr_t	dst,
2293     in_addr_t	mask,
2294     uint16_t	state,			/* rt_state for the entry */
2295     struct	rt_spare *new)
2296 {
2297 	struct rt_entry *rt;
2298 	in_addr_t smask;
2299 	int i;
2300 	struct rt_spare *rts;
2301 
2302 	/* This is the only function that increments total_routes. */
2303 	if (total_routes == MAX_ROUTES) {
2304 		msglog("have maximum (%d) routes", total_routes);
2305 		return;
2306 	}
2307 
2308 	rt = rtmalloc(sizeof (*rt), "rtadd");
2309 	(void) memset(rt, 0, sizeof (*rt));
2310 	rt->rt_spares = rtmalloc(SPARE_INC  * sizeof (struct rt_spare),
2311 	    "rtadd");
2312 	rt->rt_num_spares = SPARE_INC;
2313 	(void) memset(rt->rt_spares, 0, SPARE_INC  * sizeof (struct rt_spare));
2314 	for (rts = rt->rt_spares, i = rt->rt_num_spares; i != 0; i--, rts++)
2315 		rts->rts_metric = HOPCNT_INFINITY;
2316 
2317 	rt->rt_nodes->rn_key = (uint8_t *)&rt->rt_dst_sock;
2318 	rt->rt_dst = dst;
2319 	rt->rt_dst_sock.sin_family = AF_INET;
2320 	if (mask != HOST_MASK) {
2321 		smask = std_mask(dst);
2322 		if ((smask & ~mask) == 0 && mask > smask)
2323 			state |= RS_SUBNET;
2324 	}
2325 	mask_sock.sin_addr.s_addr = htonl(mask);
2326 	rt->rt_mask = mask;
2327 	rt->rt_spares[0] = *new;
2328 	rt->rt_state = state;
2329 	rt->rt_time = now.tv_sec;
2330 	rt->rt_poison_metric = HOPCNT_INFINITY;
2331 	rt->rt_seqno = update_seqno;
2332 
2333 	if (TRACEACTIONS)
2334 		trace_add_del("Add", rt);
2335 
2336 	need_kern.tv_sec = now.tv_sec;
2337 	set_need_flash();
2338 
2339 	if (NULL == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock, rhead,
2340 	    rt->rt_nodes)) {
2341 		msglog("rnh_addaddr() failed for %s mask=%s",
2342 		    naddr_ntoa(dst), naddr_ntoa(htonl(mask)));
2343 		free(rt);
2344 	}
2345 
2346 	total_routes++;
2347 }
2348 
2349 
2350 /* notice a changed route */
2351 void
2352 rtchange(struct rt_entry *rt,
2353     uint16_t	state,			/* new state bits */
2354     struct rt_spare *new,
2355     char	*label)
2356 {
2357 	if (rt->rt_metric != new->rts_metric) {
2358 		/*
2359 		 * Fix the kernel immediately if it seems the route
2360 		 * has gone bad, since there may be a working route that
2361 		 * aggregates this route.
2362 		 */
2363 		if (new->rts_metric == HOPCNT_INFINITY) {
2364 			need_kern.tv_sec = now.tv_sec;
2365 			if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
2366 				new->rts_time = now.tv_sec - EXPIRE_TIME;
2367 		}
2368 		rt->rt_seqno = update_seqno;
2369 		set_need_flash();
2370 	}
2371 
2372 	if (rt->rt_gate != new->rts_gate) {
2373 		need_kern.tv_sec = now.tv_sec;
2374 		rt->rt_seqno = update_seqno;
2375 		set_need_flash();
2376 	}
2377 
2378 	state |= (rt->rt_state & RS_SUBNET);
2379 
2380 	/* Keep various things from deciding ageless routes are stale. */
2381 	if (!AGE_RT(state, rt->rt_spares[0].rts_origin, new->rts_ifp))
2382 		new->rts_time = now.tv_sec;
2383 
2384 	if (TRACEACTIONS)
2385 		trace_change(rt, state, new,
2386 		    label ? label : "Chg   ");
2387 
2388 	rt->rt_state = state;
2389 	/*
2390 	 * If the interface state of the new primary route is good,
2391 	 * turn off RS_BADIF flag
2392 	 */
2393 	if ((rt->rt_state & RS_BADIF) &&
2394 	    IS_IFF_UP(new->rts_ifp->int_if_flags) &&
2395 	    !(new->rts_ifp->int_state & (IS_BROKE | IS_SICK)))
2396 		rt->rt_state &= ~(RS_BADIF);
2397 
2398 	rt->rt_spares[0] = *new;
2399 }
2400 
2401 
2402 /* check for a better route among the spares */
2403 static struct rt_spare *
2404 rts_better(struct rt_entry *rt)
2405 {
2406 	struct rt_spare *rts, *rts1;
2407 	int i;
2408 
2409 	/* find the best alternative among the spares */
2410 	rts = rt->rt_spares+1;
2411 	for (i = rt->rt_num_spares, rts1 = rts+1; i > 2; i--, rts1++) {
2412 		if (BETTER_LINK(rt, rts1, rts))
2413 			rts = rts1;
2414 	}
2415 
2416 	return (rts);
2417 }
2418 
2419 
2420 /* switch to a backup route */
2421 void
2422 rtswitch(struct rt_entry *rt,
2423     struct rt_spare *rts)
2424 {
2425 	struct rt_spare swap;
2426 	char label[10];
2427 
2428 	/* Do not change permanent routes */
2429 	if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC |
2430 	    RS_NET_SYN | RS_IF)))
2431 		return;
2432 
2433 	/* find the best alternative among the spares */
2434 	if (rts == NULL)
2435 		rts = rts_better(rt);
2436 
2437 	/* Do not bother if it is not worthwhile. */
2438 	if (!BETTER_LINK(rt, rts, rt->rt_spares))
2439 		return;
2440 
2441 	swap = rt->rt_spares[0];
2442 	(void) snprintf(label, sizeof (label), "Use #%d",
2443 	    (int)(rts - rt->rt_spares));
2444 	rtchange(rt, rt->rt_state & ~(RS_NET_SYN), rts, label);
2445 
2446 	if (swap.rts_metric == HOPCNT_INFINITY) {
2447 		*rts = rts_empty;
2448 	} else {
2449 		*rts = swap;
2450 	}
2451 
2452 }
2453 
2454 
2455 void
2456 rtdelete(struct rt_entry *rt)
2457 {
2458 	struct rt_entry *deleted_rt;
2459 	struct rt_spare *rts;
2460 	int i;
2461 	in_addr_t gate = rt->rt_gate; /* for debugging */
2462 
2463 	if (TRACEACTIONS)
2464 		trace_add_del("Del", rt);
2465 
2466 	for (i = 0; i < rt->rt_num_spares; i++) {
2467 		rts = &rt->rt_spares[i];
2468 		rts_delete(rt, rts);
2469 	}
2470 
2471 	dst_sock.sin_addr.s_addr = rt->rt_dst;
2472 	mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
2473 	if (rt != (deleted_rt =
2474 	    ((struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
2475 	    rhead)))) {
2476 		msglog("rnh_deladdr(%s) failed; found rt 0x%lx",
2477 		    rtname(rt->rt_dst, rt->rt_mask, gate), deleted_rt);
2478 		if (deleted_rt != NULL)
2479 			free(deleted_rt);
2480 	}
2481 	total_routes--;
2482 	free(rt->rt_spares);
2483 	free(rt);
2484 
2485 	if (dst_sock.sin_addr.s_addr == RIP_DEFAULT) {
2486 		/*
2487 		 * we just deleted the default route. Trigger rdisc_sort
2488 		 * so that we can recover from any rdisc information that
2489 		 * is valid
2490 		 */
2491 		rdisc_timer.tv_sec = 0;
2492 	}
2493 }
2494 
2495 void
2496 rts_delete(struct rt_entry *rt, struct rt_spare *rts)
2497 {
2498 	struct khash *k;
2499 
2500 	trace_upslot(rt, rts, &rts_empty);
2501 	k = kern_find(rt->rt_dst, rt->rt_mask,
2502 	    rts->rts_gate, rts->rts_ifp, NULL);
2503 	if (k != NULL &&
2504 	    !(k->k_state & KS_DEPRE_IF) &&
2505 	    ((k->k_state & (KS_IF|KS_PASSIVE)) != KS_IF)) {
2506 		k->k_state |= KS_DELETE;
2507 		need_kern.tv_sec = now.tv_sec;
2508 	}
2509 
2510 	*rts = rts_empty;
2511 }
2512 
2513 /*
2514  * Get rid of a bad route, and try to switch to a replacement.
2515  * If the route has gone bad because of a bad interface,
2516  * the information about the dead interface is available in badifp
2517  * for the purpose of sanity checks, if_flags checks etc.
2518  */
2519 static void
2520 rtbad(struct rt_entry *rt, struct interface *badifp)
2521 {
2522 	struct rt_spare new;
2523 	uint16_t rt_state;
2524 
2525 
2526 	if (badifp == NULL || (rt->rt_spares[0].rts_ifp == badifp)) {
2527 		/* Poison the route */
2528 		new = rt->rt_spares[0];
2529 		new.rts_metric = HOPCNT_INFINITY;
2530 		rt_state = rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC);
2531 	}
2532 
2533 	if (badifp != NULL) {
2534 		/*
2535 		 * Dont mark the rtentry bad unless the ifp for the primary
2536 		 * route is the bad ifp
2537 		 */
2538 		if (rt->rt_spares[0].rts_ifp != badifp)
2539 			return;
2540 		/*
2541 		 * badifp has just gone bad. We want to keep this
2542 		 * rt_entry around so that we tell our rip-neighbors
2543 		 * about the bad route, but we can't do anything
2544 		 * to the kernel itself, so mark it as RS_BADIF
2545 		 */
2546 		trace_misc("rtbad:Setting RS_BADIF (%s)", badifp->int_name);
2547 		rt_state |= RS_BADIF;
2548 		new.rts_ifp = &dummy_ifp;
2549 	}
2550 	rtchange(rt, rt_state, &new, 0);
2551 	rtswitch(rt, 0);
2552 }
2553 
2554 
2555 /*
2556  * Junk a RS_NET_SYN or RS_LOCAL route,
2557  *	unless it is needed by another interface.
2558  */
2559 void
2560 rtbad_sub(struct rt_entry *rt, struct interface *badifp)
2561 {
2562 	struct interface *ifp, *ifp1;
2563 	struct intnet *intnetp;
2564 	uint_t state;
2565 
2566 
2567 	ifp1 = NULL;
2568 	state = 0;
2569 
2570 	if (rt->rt_state & RS_LOCAL) {
2571 		/*
2572 		 * Is this the route through loopback for the interface?
2573 		 * If so, see if it is used by any other interfaces, such
2574 		 * as a point-to-point interface with the same local address.
2575 		 */
2576 		for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
2577 			/* Retain it if another interface needs it. */
2578 			if (ifp->int_addr == rt->rt_ifp->int_addr) {
2579 				state |= RS_LOCAL;
2580 				ifp1 = ifp;
2581 				break;
2582 			}
2583 		}
2584 
2585 	}
2586 
2587 	if (!(state & RS_LOCAL)) {
2588 		/*
2589 		 * Retain RIPv1 logical network route if there is another
2590 		 * interface that justifies it.
2591 		 */
2592 		if (rt->rt_state & RS_NET_SYN) {
2593 			for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
2594 				if ((ifp->int_state & IS_NEED_NET_SYN) &&
2595 				    rt->rt_mask == ifp->int_std_mask &&
2596 				    rt->rt_dst == ifp->int_std_addr) {
2597 					state |= RS_NET_SYN;
2598 					ifp1 = ifp;
2599 					break;
2600 				}
2601 			}
2602 		}
2603 
2604 		/* or if there is an authority route that needs it. */
2605 		for (intnetp = intnets; intnetp != NULL;
2606 		    intnetp = intnetp->intnet_next) {
2607 			if (intnetp->intnet_addr == rt->rt_dst &&
2608 			    intnetp->intnet_mask == rt->rt_mask) {
2609 				state |= (RS_NET_SYN | RS_NET_INT);
2610 				break;
2611 			}
2612 		}
2613 	}
2614 
2615 	if (ifp1 != NULL || (state & RS_NET_SYN)) {
2616 		struct rt_spare new = rt->rt_spares[0];
2617 		new.rts_ifp = ifp1;
2618 		rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
2619 		    &new, 0);
2620 	} else {
2621 		rtbad(rt, badifp);
2622 	}
2623 }
2624 
2625 /*
2626  * Called while walking the table looking for sick interfaces
2627  * or after a time change.
2628  */
2629 int
2630 walk_bad(struct radix_node *rn,
2631     void *argp)
2632 {
2633 #define	RT ((struct rt_entry *)rn)
2634 	struct rt_spare *rts;
2635 	int i, j = -1;
2636 
2637 	/* fix any spare routes through the interface */
2638 	for (i = 1; i < RT->rt_num_spares; i++) {
2639 		rts = &((struct rt_entry *)rn)->rt_spares[i];
2640 
2641 		if (rts->rts_metric < HOPCNT_INFINITY &&
2642 		    (rts->rts_ifp == NULL ||
2643 		    (rts->rts_ifp->int_state & IS_BROKE)))
2644 			rts_delete(RT, rts);
2645 		else {
2646 			if (rts->rts_origin != RO_NONE)
2647 				j = i;
2648 		}
2649 	}
2650 
2651 	/*
2652 	 * Deal with the main route
2653 	 * finished if it has been handled before or if its interface is ok
2654 	 */
2655 	if (RT->rt_ifp == NULL || !(RT->rt_ifp->int_state & IS_BROKE))
2656 		return (0);
2657 
2658 	/* Bad routes for other than interfaces are easy. */
2659 	if (!(RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
2660 		if (j > 0) {
2661 			RT->rt_spares[0].rts_metric = HOPCNT_INFINITY;
2662 			rtswitch(RT, NULL);
2663 		} else {
2664 			rtbad(RT, (struct interface *)argp);
2665 		}
2666 		return (0);
2667 	}
2668 
2669 	rtbad_sub(RT, (struct interface *)argp);
2670 	return (0);
2671 #undef RT
2672 }
2673 
2674 /*
2675  * Called while walking the table to replace a duplicate interface
2676  * with a backup.
2677  */
2678 int
2679 walk_rewire(struct radix_node *rn, void *argp)
2680 {
2681 	struct rt_entry *RT = (struct rt_entry *)rn;
2682 	struct rewire_data *wire = (struct rewire_data *)argp;
2683 	struct rt_spare *rts;
2684 	int i;
2685 
2686 	/* fix any spare routes through the interface */
2687 	rts = RT->rt_spares;
2688 	for (i = RT->rt_num_spares; i > 0; i--, rts++) {
2689 		if (rts->rts_ifp == wire->if_old) {
2690 			rts->rts_ifp = wire->if_new;
2691 			if ((RT->rt_dst == RIP_DEFAULT) &&
2692 			    (wire->if_old->int_state & IS_SUPPRESS_RDISC))
2693 				rdisc_suppress(rts->rts_ifp);
2694 			if ((rts->rts_metric += wire->metric_delta) >
2695 			    HOPCNT_INFINITY)
2696 				rts->rts_metric = HOPCNT_INFINITY;
2697 
2698 			/*
2699 			 * If the main route is getting a worse metric,
2700 			 * then it may be time to switch to a backup.
2701 			 */
2702 			if (i == RT->rt_num_spares && wire->metric_delta > 0) {
2703 				rtswitch(RT, NULL);
2704 			}
2705 		}
2706 	}
2707 
2708 	return (0);
2709 }
2710 
2711 /* Check the age of an individual route. */
2712 static int
2713 walk_age(struct radix_node *rn, void *argp)
2714 {
2715 #define	RT ((struct rt_entry *)rn)
2716 	struct interface *ifp;
2717 	struct rt_spare *rts;
2718 	int i;
2719 	in_addr_t age_bad_gate = *(in_addr_t *)argp;
2720 
2721 
2722 	/*
2723 	 * age all of the spare routes, including the primary route
2724 	 * currently in use
2725 	 */
2726 	rts = RT->rt_spares;
2727 	for (i = RT->rt_num_spares; i != 0; i--, rts++) {
2728 
2729 		ifp = rts->rts_ifp;
2730 		if (i == RT->rt_num_spares) {
2731 			if (!AGE_RT(RT->rt_state, rts->rts_origin, ifp)) {
2732 				/*
2733 				 * Keep various things from deciding ageless
2734 				 * routes are stale
2735 				 */
2736 				rts->rts_time = now.tv_sec;
2737 				continue;
2738 			}
2739 
2740 			/* forget RIP routes after RIP has been turned off. */
2741 			if (rip_sock < 0) {
2742 				rts->rts_time = now_stale + 1;
2743 			}
2744 		}
2745 
2746 		/* age failing routes */
2747 		if (age_bad_gate == rts->rts_gate &&
2748 		    rts->rts_time >= now_stale) {
2749 			rts->rts_time -= SUPPLY_INTERVAL;
2750 		}
2751 
2752 		/* trash the spare routes when they go bad */
2753 		if (rts->rts_origin == RO_RIP &&
2754 		    ((rip_sock < 0) ||
2755 		    (rts->rts_metric < HOPCNT_INFINITY &&
2756 		    now_garbage > rts->rts_time)) &&
2757 		    i != RT->rt_num_spares) {
2758 			rts_delete(RT, rts);
2759 		}
2760 	}
2761 
2762 
2763 	/* finished if the active route is still fresh */
2764 	if (now_stale <= RT->rt_time)
2765 		return (0);
2766 
2767 	/* try to switch to an alternative */
2768 	rtswitch(RT, NULL);
2769 
2770 	/* Delete a dead route after it has been publically mourned. */
2771 	if (now_garbage > RT->rt_time) {
2772 		rtdelete(RT);
2773 		return (0);
2774 	}
2775 
2776 	/* Start poisoning a bad route before deleting it. */
2777 	if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2778 		struct rt_spare new = RT->rt_spares[0];
2779 
2780 		new.rts_metric = HOPCNT_INFINITY;
2781 		rtchange(RT, RT->rt_state, &new, 0);
2782 	}
2783 	return (0);
2784 }
2785 
2786 
2787 /* Watch for dead routes and interfaces. */
2788 void
2789 age(in_addr_t bad_gate)
2790 {
2791 	struct interface *ifp;
2792 	int need_query = 0;
2793 
2794 	/*
2795 	 * If not listening to RIP, there is no need to age the routes in
2796 	 * the table.
2797 	 */
2798 	age_timer.tv_sec = (now.tv_sec
2799 	    + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2800 
2801 	/*
2802 	 * Check for dead IS_REMOTE interfaces by timing their
2803 	 * transmissions.
2804 	 */
2805 	for (ifp = ifnet; ifp; ifp = ifp->int_next) {
2806 		if (!(ifp->int_state & IS_REMOTE))
2807 			continue;
2808 
2809 		/* ignore unreachable remote interfaces */
2810 		if (!check_remote(ifp))
2811 			continue;
2812 
2813 		/* Restore remote interface that has become reachable */
2814 		if (ifp->int_state & IS_BROKE)
2815 			if_ok(ifp, "remote ", _B_FALSE);
2816 
2817 		if (ifp->int_act_time != NEVER &&
2818 		    now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2819 			writelog(LOG_NOTICE,
2820 			    "remote interface %s to %s timed out after"
2821 			    " %ld:%ld",
2822 			    ifp->int_name,
2823 			    naddr_ntoa(ifp->int_dstaddr),
2824 			    (now.tv_sec - ifp->int_act_time)/60,
2825 			    (now.tv_sec - ifp->int_act_time)%60);
2826 			if_sick(ifp, _B_FALSE);
2827 		}
2828 
2829 		/*
2830 		 * If we have not heard from the other router
2831 		 * recently, ask it.
2832 		 */
2833 		if (now.tv_sec >= ifp->int_query_time) {
2834 			ifp->int_query_time = NEVER;
2835 			need_query = 1;
2836 		}
2837 	}
2838 
2839 	/* Age routes. */
2840 	(void) rn_walktree(rhead, walk_age, &bad_gate);
2841 
2842 	/*
2843 	 * delete old redirected routes to keep the kernel table small
2844 	 * and prevent blackholes
2845 	 */
2846 	del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2847 
2848 	/* Update the kernel routing table. */
2849 	fix_kern();
2850 
2851 	/* poke reticent remote gateways */
2852 	if (need_query)
2853 		rip_query();
2854 }
2855 
2856 void
2857 kern_dump(void)
2858 {
2859 	int i;
2860 	struct khash *k;
2861 
2862 	for (i = 0; i < KHASH_SIZE; i++) {
2863 		for (k = khash_bins[i]; k != NULL; k = k->k_next)
2864 			trace_khash(k);
2865 	}
2866 }
2867 
2868 
2869 static struct interface *
2870 gwkludge_iflookup(in_addr_t dstaddr, in_addr_t addr, in_addr_t mask)
2871 {
2872 	uint32_t int_state;
2873 	struct interface *ifp;
2874 
2875 	for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
2876 		int_state = ifp->int_state;
2877 
2878 		if (!(int_state & IS_REMOTE))
2879 			continue;
2880 
2881 		if (ifp->int_dstaddr == dstaddr && ifp->int_addr == addr &&
2882 		    ifp->int_mask == mask)
2883 			return (ifp);
2884 	}
2885 	return (NULL);
2886 }
2887