xref: /titanic_41/usr/src/cmd/cmd-inet/usr.sbin/in.routed/table.c (revision dadce0f24641320781134b8ea798cce6840860b5)
1 /*
2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  *
5  * Copyright (c) 1983, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgment:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * $FreeBSD: src/sbin/routed/table.c,v 1.15 2000/08/11 08:24:38 sheldonh Exp $
37  */
38 
39 #include "defs.h"
40 #include <fcntl.h>
41 #include <stropts.h>
42 #include <sys/tihdr.h>
43 #include <inet/mib2.h>
44 #include <inet/ip.h>
45 
46 /* This structure is used to store a disassembled routing socket message. */
47 struct rt_addrinfo {
48 	int	rti_addrs;
49 	struct sockaddr_storage *rti_info[RTAX_MAX];
50 };
51 
52 static struct rt_spare *rts_better(struct rt_entry *);
53 static struct rt_spare rts_empty = EMPTY_RT_SPARE;
54 static void set_need_flash(void);
55 static void rtbad(struct rt_entry *, struct interface *);
56 static int rt_xaddrs(struct rt_addrinfo *, struct sockaddr_storage *,
57     char *, int);
58 static struct interface *gwkludge_iflookup(in_addr_t, in_addr_t, in_addr_t);
59 static struct interface *lifp_iflookup(in_addr_t, const char *);
60 
61 struct radix_node_head *rhead;		/* root of the radix tree */
62 
63 /* Flash update needed.  _B_TRUE to suppress the 1st. */
64 boolean_t need_flash = _B_TRUE;
65 
66 struct timeval age_timer;		/* next check of old routes */
67 struct timeval need_kern = {		/* need to update kernel table */
68 	EPOCH+MIN_WAITTIME-1, 0
69 };
70 
71 static uint32_t	total_routes;
72 
73 #define	ROUNDUP_LONG(a) \
74 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
75 
76 /*
77  * It is desirable to "aggregate" routes, to combine differing routes of
78  * the same metric and next hop into a common route with a smaller netmask
79  * or to suppress redundant routes, routes that add no information to
80  * routes with smaller netmasks.
81  *
82  * A route is redundant if and only if any and all routes with smaller
83  * but matching netmasks and nets are the same.  Since routes are
84  * kept sorted in the radix tree, redundant routes always come second.
85  *
86  * There are two kinds of aggregations.  First, two routes of the same bit
87  * mask and differing only in the least significant bit of the network
88  * number can be combined into a single route with a coarser mask.
89  *
90  * Second, a route can be suppressed in favor of another route with a more
91  * coarse mask provided no incompatible routes with intermediate masks
92  * are present.  The second kind of aggregation involves suppressing routes.
93  * A route must not be suppressed if an incompatible route exists with
94  * an intermediate mask, since the suppressed route would be covered
95  * by the intermediate.
96  *
97  * This code relies on the radix tree walk encountering routes
98  * sorted first by address, with the smallest address first.
99  */
100 
101 static struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest,
102 	*ag_finest;
103 
104 #ifdef DEBUG_AG
105 #define	CHECK_AG() do { int acnt = 0; struct ag_info *cag;	\
106 	for (cag = ag_avail; cag != NULL; cag = cag->ag_fine)	\
107 		acnt++;						\
108 	for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine)	\
109 		acnt++;						\
110 	if (acnt != NUM_AG_SLOTS)				\
111 		abort();					\
112 } while (_B_FALSE)
113 #else
114 #define	CHECK_AG()	(void)0
115 #endif
116 
117 
118 /*
119  * Output the contents of an aggregation table slot.
120  *	This function must always be immediately followed with the deletion
121  *	of the target slot.
122  */
123 static void
124 ag_out(struct ag_info *ag, void (*out)(struct ag_info *))
125 {
126 	struct ag_info *ag_cors;
127 	uint32_t bit;
128 
129 
130 	/* Forget it if this route should not be output for split-horizon. */
131 	if (ag->ag_state & AGS_SPLIT_HZ)
132 		return;
133 
134 	/*
135 	 * If we output both the even and odd twins, then the immediate parent,
136 	 * if it is present, is redundant, unless the parent manages to
137 	 * aggregate into something coarser.
138 	 * On successive calls, this code detects the even and odd twins,
139 	 * and marks the parent.
140 	 *
141 	 * Note that the order in which the radix tree code emits routes
142 	 * ensures that the twins are seen before the parent is emitted.
143 	 */
144 	ag_cors = ag->ag_cors;
145 	if (ag_cors != NULL &&
146 	    ag_cors->ag_mask == (ag->ag_mask << 1) &&
147 	    ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
148 		ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h) ?
149 		    AGS_REDUN0 : AGS_REDUN1);
150 	}
151 
152 	/*
153 	 * Skip it if this route is itself redundant.
154 	 *
155 	 * It is ok to change the contents of the slot here, since it is
156 	 * always deleted next.
157 	 */
158 	if (ag->ag_state & AGS_REDUN0) {
159 		if (ag->ag_state & AGS_REDUN1)
160 			return;		/* quit if fully redundant */
161 		/* make it finer if it is half-redundant */
162 		bit = (-ag->ag_mask) >> 1;
163 		ag->ag_dst_h |= bit;
164 		ag->ag_mask |= bit;
165 
166 	} else if (ag->ag_state & AGS_REDUN1) {
167 		/* make it finer if it is half-redundant */
168 		bit = (-ag->ag_mask) >> 1;
169 		ag->ag_mask |= bit;
170 	}
171 	out(ag);
172 }
173 
174 
175 static void
176 ag_del(struct ag_info *ag)
177 {
178 	CHECK_AG();
179 
180 	if (ag->ag_cors == NULL)
181 		ag_corsest = ag->ag_fine;
182 	else
183 		ag->ag_cors->ag_fine = ag->ag_fine;
184 
185 	if (ag->ag_fine == NULL)
186 		ag_finest = ag->ag_cors;
187 	else
188 		ag->ag_fine->ag_cors = ag->ag_cors;
189 
190 	ag->ag_fine = ag_avail;
191 	ag_avail = ag;
192 
193 	CHECK_AG();
194 }
195 
196 
197 /* Look for a route that can suppress the given route. */
198 static struct ag_info *
199 ag_find_suppressor(struct ag_info *ag)
200 {
201 	struct ag_info *ag_cors;
202 	in_addr_t dst_h = ag->ag_dst_h;
203 
204 	for (ag_cors = ag->ag_cors; ag_cors != NULL;
205 	    ag_cors = ag_cors->ag_cors) {
206 
207 		if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
208 			/*
209 			 * We found a route with a coarser mask that covers
210 			 * the given target.  It can suppress the target
211 			 * only if it has a good enough metric and it
212 			 * either has the same (gateway, ifp), or if its state
213 			 * includes AGS_CORS_GATE or the target's state
214 			 * includes AGS_FINE_GATE.
215 			 */
216 			if (ag_cors->ag_pref <= ag->ag_pref &&
217 			    (((ag->ag_nhop == ag_cors->ag_nhop) &&
218 			    (ag->ag_ifp == ag_cors->ag_ifp)) ||
219 			    ag_cors->ag_state & AGS_CORS_GATE ||
220 			    ag->ag_state & AGS_FINE_GATE)) {
221 				return (ag_cors);
222 			}
223 		}
224 	}
225 
226 	return (NULL);
227 }
228 
229 
230 /*
231  * Flush routes waiting for aggregation.
232  * This must not suppress a route unless it is known that among all routes
233  * with coarser masks that match it, the one with the longest mask is
234  * appropriate.  This is ensured by scanning the routes in lexical order,
235  * and with the most restrictive mask first among routes to the same
236  * destination.
237  */
238 void
239 ag_flush(in_addr_t lim_dst_h,	/* flush routes to here */
240     in_addr_t lim_mask,		/* matching this mask */
241     void (*out)(struct ag_info *))
242 {
243 	struct ag_info *ag, *ag_cors, *ag_supr;
244 	in_addr_t dst_h;
245 
246 
247 	for (ag = ag_finest; ag != NULL && ag->ag_mask >= lim_mask;
248 	    ag = ag_cors) {
249 		/* Get the next route now, before we delete ag. */
250 		ag_cors = ag->ag_cors;
251 
252 		/* Work on only the specified routes. */
253 		dst_h = ag->ag_dst_h;
254 		if ((dst_h & lim_mask) != lim_dst_h)
255 			continue;
256 
257 		/*
258 		 * Don't try to suppress the route if its state doesn't
259 		 * include AGS_SUPPRESS.
260 		 */
261 		if (!(ag->ag_state & AGS_SUPPRESS)) {
262 			ag_out(ag, out);
263 			ag_del(ag);
264 			continue;
265 		}
266 
267 		ag_supr = ag_find_suppressor(ag);
268 		if (ag_supr == NULL) {
269 			/*
270 			 * We didn't find a route which suppresses the
271 			 * target, so the target can go out.
272 			 */
273 			ag_out(ag, out);
274 		} else {
275 			/*
276 			 * We found a route which suppresses the target, so
277 			 * don't output the target.
278 			 */
279 			if (TRACEACTIONS) {
280 				trace_misc("aggregated away %s",
281 				    rtname(htonl(ag->ag_dst_h), ag->ag_mask,
282 				    ag->ag_nhop));
283 				trace_misc("on coarser route %s",
284 				    rtname(htonl(ag_supr->ag_dst_h),
285 				    ag_supr->ag_mask, ag_supr->ag_nhop));
286 			}
287 			/*
288 			 * If the suppressed target was redundant, then
289 			 * mark the suppressor as redundant.
290 			 */
291 			if (AG_IS_REDUN(ag->ag_state) &&
292 			    ag_supr->ag_mask == (ag->ag_mask<<1)) {
293 				if (ag_supr->ag_dst_h == dst_h)
294 					ag_supr->ag_state |= AGS_REDUN0;
295 				else
296 					ag_supr->ag_state |= AGS_REDUN1;
297 			}
298 			if (ag->ag_tag != ag_supr->ag_tag)
299 				ag_supr->ag_tag = 0;
300 			if (ag->ag_nhop != ag_supr->ag_nhop)
301 				ag_supr->ag_nhop = 0;
302 		}
303 
304 		/* The route has either been output or suppressed */
305 		ag_del(ag);
306 	}
307 
308 	CHECK_AG();
309 }
310 
311 
312 /* Try to aggregate a route with previous routes. */
313 void
314 ag_check(in_addr_t dst,
315     in_addr_t	mask,
316     in_addr_t	gate,
317     struct interface *ifp,
318     in_addr_t	nhop,
319     uint8_t	metric,
320     uint8_t	pref,
321     uint32_t	seqno,
322     uint16_t	tag,
323     uint16_t	state,
324     void (*out)(struct ag_info *))	/* output using this */
325 {
326 	struct ag_info *ag, *nag, *ag_cors;
327 	in_addr_t xaddr;
328 	int tmp;
329 	struct interface *xifp;
330 
331 	dst = ntohl(dst);
332 
333 	/*
334 	 * Don't bother trying to aggregate routes with non-contiguous
335 	 * subnet masks.
336 	 *
337 	 * (X & -X) contains a single bit if and only if X is a power of 2.
338 	 * (X + (X & -X)) == 0 if and only if X is a power of 2.
339 	 */
340 	if ((mask & -mask) + mask != 0) {
341 		struct ag_info nc_ag;
342 
343 		nc_ag.ag_dst_h = dst;
344 		nc_ag.ag_mask = mask;
345 		nc_ag.ag_gate = gate;
346 		nc_ag.ag_ifp = ifp;
347 		nc_ag.ag_nhop = nhop;
348 		nc_ag.ag_metric = metric;
349 		nc_ag.ag_pref = pref;
350 		nc_ag.ag_tag = tag;
351 		nc_ag.ag_state = state;
352 		nc_ag.ag_seqno = seqno;
353 		out(&nc_ag);
354 		return;
355 	}
356 
357 	/* Search for the right slot in the aggregation table. */
358 	ag_cors = NULL;
359 	ag = ag_corsest;
360 	while (ag != NULL) {
361 		if (ag->ag_mask >= mask)
362 			break;
363 
364 		/*
365 		 * Suppress old routes (i.e. combine with compatible routes
366 		 * with coarser masks) as we look for the right slot in the
367 		 * aggregation table for the new route.
368 		 * A route to an address less than the current destination
369 		 * will not be affected by the current route or any route
370 		 * seen hereafter.  That means it is safe to suppress it.
371 		 * This check keeps poor routes (e.g. with large hop counts)
372 		 * from preventing suppression of finer routes.
373 		 */
374 		if (ag_cors != NULL && ag->ag_dst_h < dst &&
375 		    (ag->ag_state & AGS_SUPPRESS) &&
376 		    ag_cors->ag_pref <= ag->ag_pref &&
377 		    (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h &&
378 		    ((ag_cors->ag_nhop == ag->ag_nhop &&
379 		    (ag_cors->ag_ifp == ag->ag_ifp))||
380 		    (ag->ag_state & AGS_FINE_GATE) ||
381 		    (ag_cors->ag_state & AGS_CORS_GATE))) {
382 			/*
383 			 * If the suppressed target was redundant,
384 			 * then mark the suppressor redundant.
385 			 */
386 			if (AG_IS_REDUN(ag->ag_state) &&
387 			    ag_cors->ag_mask == (ag->ag_mask << 1)) {
388 				if (ag_cors->ag_dst_h == dst)
389 					ag_cors->ag_state |= AGS_REDUN0;
390 				else
391 					ag_cors->ag_state |= AGS_REDUN1;
392 			}
393 			if (ag->ag_tag != ag_cors->ag_tag)
394 				ag_cors->ag_tag = 0;
395 			if (ag->ag_nhop != ag_cors->ag_nhop)
396 				ag_cors->ag_nhop = 0;
397 			ag_del(ag);
398 			CHECK_AG();
399 		} else {
400 			ag_cors = ag;
401 		}
402 		ag = ag_cors->ag_fine;
403 	}
404 
405 	/*
406 	 * If we find the even/odd twin of the new route, and if the
407 	 * masks and so forth are equal, we can aggregate them.
408 	 * We can probably promote one of the pair.
409 	 *
410 	 * Since the routes are encountered in lexical order,
411 	 * the new route must be odd.  However, the second or later
412 	 * times around this loop, it could be the even twin promoted
413 	 * from the even/odd pair of twins of the finer route.
414 	 */
415 	while (ag != NULL && ag->ag_mask == mask &&
416 	    ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
417 
418 		/*
419 		 * Here we know the target route and the route in the current
420 		 * slot have the same netmasks and differ by at most the
421 		 * last bit.  They are either for the same destination, or
422 		 * for an even/odd pair of destinations.
423 		 */
424 		if (ag->ag_dst_h == dst) {
425 			if (ag->ag_nhop == nhop && ag->ag_ifp == ifp) {
426 				/*
427 				 * We have two routes to the same destination,
428 				 * with the same nexthop and interface.
429 				 * Routes are encountered in lexical order,
430 				 * so a route is never promoted until the
431 				 * parent route is already present.  So we
432 				 * know that the new route is a promoted (or
433 				 * aggregated) pair and the route already in
434 				 * the slot is the explicit route.
435 				 *
436 				 * Prefer the best route if their metrics
437 				 * differ, or the aggregated one if not,
438 				 * following a sort of longest-match rule.
439 				 */
440 				if (pref <= ag->ag_pref) {
441 					ag->ag_gate = gate;
442 					ag->ag_ifp = ifp;
443 					ag->ag_nhop = nhop;
444 					ag->ag_tag = tag;
445 					ag->ag_metric = metric;
446 					ag->ag_pref = pref;
447 					if (seqno > ag->ag_seqno)
448 						ag->ag_seqno = seqno;
449 					tmp = ag->ag_state;
450 					ag->ag_state = state;
451 					state = tmp;
452 				}
453 
454 				/*
455 				 * Some bits are set if they are set on
456 				 * either route, except when the route is
457 				 * for an interface.
458 				 */
459 				if (!(ag->ag_state & AGS_IF))
460 					ag->ag_state |=
461 					    (state & (AGS_AGGREGATE_EITHER |
462 					    AGS_REDUN0 | AGS_REDUN1));
463 
464 				return;
465 			} else {
466 				/*
467 				 * multiple routes to same dest/mask with
468 				 * differing gate nexthop/or ifp. Flush
469 				 * both out.
470 				 */
471 				break;
472 			}
473 		}
474 
475 		/*
476 		 * If one of the routes can be promoted and the other can
477 		 * be suppressed, it may be possible to combine them or
478 		 * worthwhile to promote one.
479 		 *
480 		 * Any route that can be promoted is always
481 		 * marked to be eligible to be suppressed.
482 		 */
483 		if (!((state & AGS_AGGREGATE) &&
484 		    (ag->ag_state & AGS_SUPPRESS)) &&
485 		    !((ag->ag_state & AGS_AGGREGATE) && (state & AGS_SUPPRESS)))
486 			break;
487 
488 		/*
489 		 * A pair of even/odd twin routes can be combined
490 		 * if either is redundant, or if they are via the
491 		 * same gateway and have the same metric.
492 		 */
493 		if (AG_IS_REDUN(ag->ag_state) || AG_IS_REDUN(state) ||
494 		    (ag->ag_nhop == nhop && ag->ag_ifp == ifp &&
495 		    ag->ag_pref == pref &&
496 		    (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
497 
498 			/*
499 			 * We have both the even and odd pairs.
500 			 * Since the routes are encountered in order,
501 			 * the route in the slot must be the even twin.
502 			 *
503 			 * Combine and promote (aggregate) the pair of routes.
504 			 */
505 			if (seqno < ag->ag_seqno)
506 				seqno = ag->ag_seqno;
507 			if (!AG_IS_REDUN(state))
508 				state &= ~AGS_REDUN1;
509 			if (AG_IS_REDUN(ag->ag_state))
510 				state |= AGS_REDUN0;
511 			else
512 				state &= ~AGS_REDUN0;
513 			state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
514 			if (ag->ag_tag != tag)
515 				tag = 0;
516 			if (ag->ag_nhop != nhop)
517 				nhop = 0;
518 
519 			/*
520 			 * Get rid of the even twin that was already
521 			 * in the slot.
522 			 */
523 			ag_del(ag);
524 
525 		} else if (ag->ag_pref >= pref &&
526 		    (ag->ag_state & AGS_AGGREGATE)) {
527 			/*
528 			 * If we cannot combine the pair, maybe the route
529 			 * with the worse metric can be promoted.
530 			 *
531 			 * Promote the old, even twin, by giving its slot
532 			 * in the table to the new, odd twin.
533 			 */
534 			ag->ag_dst_h = dst;
535 
536 			xaddr = ag->ag_gate;
537 			ag->ag_gate = gate;
538 			gate = xaddr;
539 
540 			xifp = ag->ag_ifp;
541 			ag->ag_ifp = ifp;
542 			ifp = xifp;
543 
544 			xaddr = ag->ag_nhop;
545 			ag->ag_nhop = nhop;
546 			nhop = xaddr;
547 
548 			tmp = ag->ag_tag;
549 			ag->ag_tag = tag;
550 			tag = tmp;
551 
552 			/*
553 			 * The promoted route is even-redundant only if the
554 			 * even twin was fully redundant.  It is not
555 			 * odd-redundant because the odd-twin will still be
556 			 * in the table.
557 			 */
558 			tmp = ag->ag_state;
559 			if (!AG_IS_REDUN(tmp))
560 				tmp &= ~AGS_REDUN0;
561 			tmp &= ~AGS_REDUN1;
562 			ag->ag_state = state;
563 			state = tmp;
564 
565 			tmp = ag->ag_metric;
566 			ag->ag_metric = metric;
567 			metric = tmp;
568 
569 			tmp = ag->ag_pref;
570 			ag->ag_pref = pref;
571 			pref = tmp;
572 
573 			/* take the newest sequence number */
574 			if (seqno <= ag->ag_seqno)
575 				seqno = ag->ag_seqno;
576 			else
577 				ag->ag_seqno = seqno;
578 
579 		} else {
580 			if (!(state & AGS_AGGREGATE))
581 				break;	/* cannot promote either twin */
582 
583 			/*
584 			 * Promote the new, odd twin by shaving its
585 			 * mask and address.
586 			 * The promoted route is odd-redundant only if the
587 			 * odd twin was fully redundant.  It is not
588 			 * even-redundant because the even twin is still in
589 			 * the table.
590 			 */
591 			if (!AG_IS_REDUN(state))
592 				state &= ~AGS_REDUN1;
593 			state &= ~AGS_REDUN0;
594 			if (seqno < ag->ag_seqno)
595 				seqno = ag->ag_seqno;
596 			else
597 				ag->ag_seqno = seqno;
598 		}
599 
600 		mask <<= 1;
601 		dst &= mask;
602 
603 		if (ag_cors == NULL) {
604 			ag = ag_corsest;
605 			break;
606 		}
607 		ag = ag_cors;
608 		ag_cors = ag->ag_cors;
609 	}
610 
611 	/*
612 	 * When we can no longer promote and combine routes,
613 	 * flush the old route in the target slot.  Also flush
614 	 * any finer routes that we know will never be aggregated by
615 	 * the new route.
616 	 *
617 	 * In case we moved toward coarser masks,
618 	 * get back where we belong
619 	 */
620 	if (ag != NULL && ag->ag_mask < mask) {
621 		ag_cors = ag;
622 		ag = ag->ag_fine;
623 	}
624 
625 	/* Empty the target slot */
626 	if (ag != NULL && ag->ag_mask == mask) {
627 		ag_flush(ag->ag_dst_h, ag->ag_mask, out);
628 		ag = (ag_cors == NULL) ? ag_corsest : ag_cors->ag_fine;
629 	}
630 
631 #ifdef DEBUG_AG
632 	if (ag == NULL && ag_cors != ag_finest)
633 		abort();
634 	if (ag_cors == NULL && ag != ag_corsest)
635 		abort();
636 	if (ag != NULL && ag->ag_cors != ag_cors)
637 		abort();
638 	if (ag_cors != NULL && ag_cors->ag_fine != ag)
639 		abort();
640 	CHECK_AG();
641 #endif
642 
643 	/* Save the new route on the end of the table. */
644 	nag = ag_avail;
645 	ag_avail = nag->ag_fine;
646 
647 	nag->ag_dst_h = dst;
648 	nag->ag_mask = mask;
649 	nag->ag_ifp = ifp;
650 	nag->ag_gate = gate;
651 	nag->ag_nhop = nhop;
652 	nag->ag_metric = metric;
653 	nag->ag_pref = pref;
654 	nag->ag_tag = tag;
655 	nag->ag_state = state;
656 	nag->ag_seqno = seqno;
657 
658 	nag->ag_fine = ag;
659 	if (ag != NULL)
660 		ag->ag_cors = nag;
661 	else
662 		ag_finest = nag;
663 	nag->ag_cors = ag_cors;
664 	if (ag_cors == NULL)
665 		ag_corsest = nag;
666 	else
667 		ag_cors->ag_fine = nag;
668 	CHECK_AG();
669 }
670 
671 
672 static const char *
673 rtm_type_name(uchar_t type)
674 {
675 	static const char *rtm_types[] = {
676 		"RTM_ADD",
677 		"RTM_DELETE",
678 		"RTM_CHANGE",
679 		"RTM_GET",
680 		"RTM_LOSING",
681 		"RTM_REDIRECT",
682 		"RTM_MISS",
683 		"RTM_LOCK",
684 		"RTM_OLDADD",
685 		"RTM_OLDDEL",
686 		"RTM_RESOLVE",
687 		"RTM_NEWADDR",
688 		"RTM_DELADDR",
689 		"RTM_IFINFO",
690 		"RTM_CHGMADDR",
691 		"RTM_FREEMADDR"
692 	};
693 #define	NEW_RTM_PAT	"RTM type %#x"
694 	static char name0[sizeof (NEW_RTM_PAT) + 2];
695 
696 	if (type > sizeof (rtm_types) / sizeof (rtm_types[0]) || type == 0) {
697 		(void) snprintf(name0, sizeof (name0), NEW_RTM_PAT, type);
698 		return (name0);
699 	} else {
700 		return (rtm_types[type-1]);
701 	}
702 #undef	NEW_RTM_PAT
703 }
704 
705 
706 static void
707 dump_rt_msg(const char *act, struct rt_msghdr *rtm, int mlen)
708 {
709 	const char *mtype;
710 	uchar_t *cp;
711 	int i, j;
712 	char buffer[16*3 + 1], *ibs;
713 	struct ifa_msghdr *ifam;
714 	struct if_msghdr *ifm;
715 
716 	switch (rtm->rtm_type) {
717 	case RTM_NEWADDR:
718 	case RTM_DELADDR:
719 	case RTM_FREEADDR:
720 	case RTM_CHGADDR:
721 		mtype = "ifam";
722 		break;
723 	case RTM_IFINFO:
724 		mtype = "ifm";
725 		break;
726 	default:
727 		mtype = "rtm";
728 		break;
729 	}
730 	trace_misc("%s %s %d bytes", act, mtype, mlen);
731 	if (mlen > rtm->rtm_msglen) {
732 		trace_misc("%s: extra %d bytes ignored", mtype,
733 		    mlen - rtm->rtm_msglen);
734 		mlen = rtm->rtm_msglen;
735 	} else if (mlen < rtm->rtm_msglen) {
736 		trace_misc("%s: truncated by %d bytes", mtype,
737 		    rtm->rtm_msglen - mlen);
738 	}
739 	switch (rtm->rtm_type) {
740 	case RTM_NEWADDR:
741 	case RTM_DELADDR:
742 	case RTM_CHGADDR:
743 	case RTM_FREEADDR:
744 		ifam = (struct ifa_msghdr *)rtm;
745 		trace_misc("ifam: msglen %d version %d type %d addrs %X",
746 		    ifam->ifam_msglen, ifam->ifam_version, ifam->ifam_type,
747 		    ifam->ifam_addrs);
748 		trace_misc("ifam: flags %X index %d metric %d",
749 		    ifam->ifam_flags, ifam->ifam_index, ifam->ifam_metric);
750 		cp = (uchar_t *)(ifam + 1);
751 		break;
752 	case RTM_IFINFO:
753 		ifm = (struct if_msghdr *)rtm;
754 		trace_misc("ifm: msglen %d version %d type %d addrs %X",
755 		    ifm->ifm_msglen, ifm->ifm_version, ifm->ifm_type,
756 		    ifm->ifm_addrs);
757 		ibs = if_bit_string(ifm->ifm_flags, _B_TRUE);
758 		if (ibs == NULL) {
759 			trace_misc("ifm: flags %#x index %d", ifm->ifm_flags,
760 			    ifm->ifm_index);
761 		} else {
762 			trace_misc("ifm: flags %s index %d", ibs,
763 			    ifm->ifm_index);
764 			free(ibs);
765 		}
766 		cp = (uchar_t *)(ifm + 1);
767 		break;
768 	default:
769 		trace_misc("rtm: msglen %d version %d type %d index %d",
770 		    rtm->rtm_msglen, rtm->rtm_version, rtm->rtm_type,
771 		    rtm->rtm_index);
772 		trace_misc("rtm: flags %X addrs %X pid %d seq %d",
773 		    rtm->rtm_flags, rtm->rtm_addrs, rtm->rtm_pid, rtm->rtm_seq);
774 		trace_misc("rtm: errno %d use %d inits %X", rtm->rtm_errno,
775 		    rtm->rtm_use, rtm->rtm_inits);
776 		cp = (uchar_t *)(rtm + 1);
777 		break;
778 	}
779 	i = mlen - (cp - (uint8_t *)rtm);
780 	while (i > 0) {
781 		buffer[0] = '\0';
782 		ibs = buffer;
783 		for (j = 0; j < 16 && i > 0; j++, i--)
784 			ibs += sprintf(ibs, " %02X", *cp++);
785 		trace_misc("addr%s", buffer);
786 	}
787 }
788 
789 /*
790  * Tell the kernel to add, delete or change a route
791  * Pass k_state from khash in for diagnostic info.
792  */
793 static void
794 rtioctl(int action,			/* RTM_DELETE, etc */
795     in_addr_t dst,
796     in_addr_t gate,
797     in_addr_t mask,
798     struct interface *ifp,
799     uint8_t metric,
800     int flags)
801 {
802 	static int rt_sock_seqno = 0;
803 	struct {
804 		struct rt_msghdr w_rtm;
805 		struct sockaddr_in w_dst;
806 		struct sockaddr_in w_gate;
807 		uint8_t w_space[512];
808 	} w;
809 	struct sockaddr_in w_mask;
810 	struct sockaddr_dl w_ifp;
811 	uint8_t *cp;
812 	long cc;
813 #define	PAT " %-10s %s metric=%d flags=%#x"
814 #define	ARGS rtm_type_name(action), rtname(dst, mask, gate), metric, flags
815 
816 again:
817 	(void) memset(&w, 0, sizeof (w));
818 	(void) memset(&w_mask, 0, sizeof (w_mask));
819 	(void) memset(&w_ifp, 0, sizeof (w_ifp));
820 	cp = w.w_space;
821 	w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
822 	    2 * ROUNDUP_LONG(sizeof (struct sockaddr_in));
823 	w.w_rtm.rtm_version = RTM_VERSION;
824 	w.w_rtm.rtm_type = action;
825 	w.w_rtm.rtm_flags = flags;
826 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
827 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
828 	if (metric != 0 || action == RTM_CHANGE) {
829 		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
830 		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
831 	}
832 	w.w_dst.sin_family = AF_INET;
833 	w.w_dst.sin_addr.s_addr = dst;
834 	w.w_gate.sin_family = AF_INET;
835 	w.w_gate.sin_addr.s_addr = gate;
836 	if (mask == HOST_MASK) {
837 		w.w_rtm.rtm_flags |= RTF_HOST;
838 	} else {
839 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
840 		w_mask.sin_family = AF_INET;
841 		w_mask.sin_addr.s_addr = htonl(mask);
842 		(void) memmove(cp, &w_mask, sizeof (w_mask));
843 		cp += ROUNDUP_LONG(sizeof (struct sockaddr_in));
844 		w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_in));
845 	}
846 	if (ifp == NULL)
847 		ifp = iflookup(gate);
848 
849 	if (ifp == NULL || (ifp->int_phys == NULL)) {
850 		trace_misc("no ifp for" PAT, ARGS);
851 	} else {
852 		if (ifp->int_phys->phyi_index > UINT16_MAX) {
853 			trace_misc("ifindex %d is too big for sdl_index",
854 			    ifp->int_phys->phyi_index);
855 		} else {
856 			w_ifp.sdl_family = AF_LINK;
857 			w.w_rtm.rtm_addrs |= RTA_IFP;
858 			w_ifp.sdl_index = ifp->int_phys->phyi_index;
859 			(void) memmove(cp, &w_ifp, sizeof (w_ifp));
860 			w.w_rtm.rtm_msglen +=
861 			    ROUNDUP_LONG(sizeof (struct sockaddr_dl));
862 		}
863 	}
864 
865 
866 	if (!no_install) {
867 		if (TRACERTS)
868 			dump_rt_msg("write", &w.w_rtm, w.w_rtm.rtm_msglen);
869 		cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
870 		if (cc < 0) {
871 			if (errno == ESRCH && (action == RTM_CHANGE ||
872 			    action == RTM_DELETE)) {
873 				trace_act("route disappeared before" PAT, ARGS);
874 				if (action == RTM_CHANGE) {
875 					action = RTM_ADD;
876 					goto again;
877 				}
878 				return;
879 			}
880 			writelog(LOG_WARNING, "write(rt_sock)" PAT ": %s ",
881 			    ARGS, rip_strerror(errno));
882 			return;
883 		} else if (cc != w.w_rtm.rtm_msglen) {
884 			msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
885 			    cc, w.w_rtm.rtm_msglen, ARGS);
886 			return;
887 		}
888 	}
889 	if (TRACEKERNEL)
890 		trace_misc("write kernel" PAT, ARGS);
891 #undef PAT
892 #undef ARGS
893 }
894 
895 
896 /* Hash table containing our image of the kernel forwarding table. */
897 #define	KHASH_SIZE 71			/* should be prime */
898 #define	KHASH(a, m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
899 static struct khash *khash_bins[KHASH_SIZE];
900 
901 #define	K_KEEP_LIM	30	/* k_keep */
902 
903 static struct khash *
904 kern_find(in_addr_t dst, in_addr_t mask, in_addr_t gate,
905     struct interface *ifp, struct khash ***ppk)
906 {
907 	struct khash *k, **pk;
908 
909 	for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) {
910 		if (k->k_dst == dst && k->k_mask == mask &&
911 		    (gate == 0 || k->k_gate == gate) &&
912 		    (ifp == NULL || k->k_ifp == ifp)) {
913 			break;
914 		}
915 	}
916 	if (ppk != NULL)
917 		*ppk = pk;
918 	return (k);
919 }
920 
921 
922 /*
923  * Find out if there is an alternate route to a given destination
924  * off of a given interface.
925  */
926 static struct khash *
927 kern_alternate(in_addr_t dst, in_addr_t mask, in_addr_t gate,
928     struct interface *ifp, struct khash ***ppk)
929 {
930 	struct khash *k, **pk;
931 
932 	for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) {
933 		if (k->k_dst == dst && k->k_mask == mask &&
934 		    (k->k_gate != gate) &&
935 		    (k->k_ifp == ifp)) {
936 			break;
937 		}
938 	}
939 	if (ppk != NULL)
940 		*ppk = pk;
941 	return (k);
942 }
943 
944 static struct khash *
945 kern_add(in_addr_t dst, uint32_t mask, in_addr_t gate, struct interface *ifp)
946 {
947 	struct khash *k, **pk;
948 
949 	k = kern_find(dst, mask, gate, ifp, &pk);
950 	if (k != NULL)
951 		return (k);
952 
953 	k = rtmalloc(sizeof (*k), "kern_add");
954 
955 	(void) memset(k, 0, sizeof (*k));
956 	k->k_dst = dst;
957 	k->k_mask = mask;
958 	k->k_state = KS_NEW;
959 	k->k_keep = now.tv_sec;
960 	k->k_gate = gate;
961 	k->k_ifp = ifp;
962 	*pk = k;
963 
964 	return (k);
965 }
966 
967 /* delete all khash entries that are wired through the interface ifp */
968 void
969 kern_flush_ifp(struct interface *ifp)
970 {
971 	struct khash *k, *kprev, *knext;
972 	int i;
973 
974 	for (i = 0; i < KHASH_SIZE; i++) {
975 		kprev = NULL;
976 		for (k = khash_bins[i]; k != NULL; k = knext) {
977 			knext = k->k_next;
978 			if (k->k_ifp == ifp) {
979 				if (kprev != NULL)
980 					kprev->k_next = k->k_next;
981 				else
982 					khash_bins[i] = k->k_next;
983 				free(k);
984 				continue;
985 			}
986 			kprev = k;
987 		}
988 	}
989 }
990 
991 /*
992  * rewire khash entries that currently go through oldifp to
993  * go through newifp.
994  */
995 void
996 kern_rewire_ifp(struct interface *oldifp, struct interface *newifp)
997 {
998 	struct khash *k;
999 	int i;
1000 
1001 	for (i = 0; i < KHASH_SIZE; i++) {
1002 		for (k = khash_bins[i]; k; k = k->k_next) {
1003 			if (k->k_ifp == oldifp) {
1004 				k->k_ifp = newifp;
1005 				trace_misc("kern_rewire_ifp k 0x%lx "
1006 				    "from %s to %s", k, oldifp->int_name,
1007 				    newifp->int_name);
1008 			}
1009 		}
1010 	}
1011 }
1012 
1013 /*
1014  * Check that a static route it is still in the daemon table, and not
1015  * deleted by interfaces coming and going.  This is also the routine
1016  * responsible for adding new static routes to the daemon table.
1017  */
1018 static void
1019 kern_check_static(struct khash *k, struct interface *ifp)
1020 {
1021 	struct rt_entry *rt;
1022 	struct rt_spare new;
1023 	uint16_t rt_state = RS_STATIC;
1024 
1025 	(void) memset(&new, 0, sizeof (new));
1026 	new.rts_ifp = ifp;
1027 	new.rts_gate = k->k_gate;
1028 	new.rts_router = (ifp != NULL) ? ifp->int_addr : loopaddr;
1029 	new.rts_metric = k->k_metric;
1030 	new.rts_time = now.tv_sec;
1031 	new.rts_origin = RO_STATIC;
1032 
1033 	rt = rtget(k->k_dst, k->k_mask);
1034 	if ((ifp != NULL && !IS_IFF_ROUTING(ifp->int_if_flags)) ||
1035 	    (k->k_state & KS_PRIVATE))
1036 		rt_state |= RS_NOPROPAGATE;
1037 
1038 	if (rt != NULL) {
1039 		if ((rt->rt_state & RS_STATIC) == 0) {
1040 			/*
1041 			 * We are already tracking this dest/mask
1042 			 * via RIP/RDISC. Ignore the static route,
1043 			 * because we don't currently have a good
1044 			 * way to compare metrics on static routes
1045 			 * with rip metrics, and therefore cannot
1046 			 * mix and match the two.
1047 			 */
1048 			return;
1049 		}
1050 		rt_state |= rt->rt_state;
1051 		if (rt->rt_state != rt_state)
1052 			rtchange(rt, rt_state, &new, 0);
1053 	} else {
1054 		rtadd(k->k_dst, k->k_mask, rt_state, &new);
1055 	}
1056 }
1057 
1058 
1059 /* operate on a kernel entry */
1060 static void
1061 kern_ioctl(struct khash *k,
1062     int action,			/* RTM_DELETE, etc */
1063     int flags)
1064 {
1065 	if (((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) ||
1066 	    (k->k_state & KS_DEPRE_IF)) {
1067 		/*
1068 		 * Prevent execution of RTM_DELETE, RTM_ADD or
1069 		 * RTM_CHANGE of interface routes
1070 		 */
1071 		trace_act("Blocking execution of %s  %s --> %s ",
1072 		    rtm_type_name(action),
1073 		    addrname(k->k_dst, k->k_mask, 0), naddr_ntoa(k->k_gate));
1074 		return;
1075 	}
1076 
1077 	switch (action) {
1078 	case RTM_DELETE:
1079 		k->k_state &= ~KS_DYNAMIC;
1080 		if (k->k_state & KS_DELETED)
1081 			return;
1082 		k->k_state |= KS_DELETED;
1083 		break;
1084 	case RTM_ADD:
1085 		k->k_state &= ~KS_DELETED;
1086 		break;
1087 	case RTM_CHANGE:
1088 		if (k->k_state & KS_DELETED) {
1089 			action = RTM_ADD;
1090 			k->k_state &= ~KS_DELETED;
1091 		}
1092 		break;
1093 	}
1094 
1095 	rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_ifp,
1096 	    k->k_metric, flags);
1097 }
1098 
1099 
1100 /* add a route the kernel told us */
1101 static void
1102 rtm_add(struct rt_msghdr *rtm,
1103     struct rt_addrinfo *info,
1104     time_t keep,
1105     boolean_t interf_route,
1106     struct interface *ifptr)
1107 {
1108 	struct khash *k;
1109 	struct interface *ifp = ifptr;
1110 	in_addr_t mask, gate = 0;
1111 	static struct msg_limit msg_no_ifp;
1112 
1113 	if (rtm->rtm_flags & RTF_HOST) {
1114 		mask = HOST_MASK;
1115 	} else if (INFO_MASK(info) != 0) {
1116 		mask = ntohl(S_ADDR(INFO_MASK(info)));
1117 	} else {
1118 		writelog(LOG_WARNING,
1119 		    "ignore %s without mask", rtm_type_name(rtm->rtm_type));
1120 		return;
1121 	}
1122 
1123 	/*
1124 	 * Find the interface toward the gateway.
1125 	 */
1126 	if (INFO_GATE(info) != NULL)
1127 		gate = S_ADDR(INFO_GATE(info));
1128 
1129 	if (ifp == NULL) {
1130 		if (INFO_GATE(info) != NULL)
1131 			ifp = iflookup(gate);
1132 		if (ifp == NULL) {
1133 			msglim(&msg_no_ifp, gate,
1134 			    "route %s --> %s nexthop is not directly connected",
1135 			    addrname(S_ADDR(INFO_DST(info)), mask, 0),
1136 			    naddr_ntoa(gate));
1137 		}
1138 	}
1139 
1140 	k = kern_add(S_ADDR(INFO_DST(info)), mask, gate, ifp);
1141 
1142 	if (k->k_state & KS_NEW)
1143 		k->k_keep = now.tv_sec+keep;
1144 	if (INFO_GATE(info) == 0) {
1145 		trace_act("note %s without gateway",
1146 		    rtm_type_name(rtm->rtm_type));
1147 		k->k_metric = HOPCNT_INFINITY;
1148 	} else if (INFO_GATE(info)->ss_family != AF_INET) {
1149 		trace_act("note %s with gateway AF=%d",
1150 		    rtm_type_name(rtm->rtm_type),
1151 		    INFO_GATE(info)->ss_family);
1152 		k->k_metric = HOPCNT_INFINITY;
1153 	} else {
1154 		k->k_gate = S_ADDR(INFO_GATE(info));
1155 		k->k_metric = rtm->rtm_rmx.rmx_hopcount;
1156 		if (k->k_metric < 0)
1157 			k->k_metric = 0;
1158 		else if (k->k_metric > HOPCNT_INFINITY-1)
1159 			k->k_metric = HOPCNT_INFINITY-1;
1160 	}
1161 
1162 	if ((k->k_state & KS_NEW) && interf_route) {
1163 		if (k->k_gate != 0 && findifaddr(k->k_gate) == NULL)
1164 			k->k_state |= KS_DEPRE_IF;
1165 		else
1166 			k->k_state |= KS_IF;
1167 	}
1168 
1169 	k->k_state &= ~(KS_NEW | KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD |
1170 	    KS_STATIC | KS_GATEWAY | KS_DELETED | KS_PRIVATE | KS_CHECK);
1171 	if (rtm->rtm_flags & RTF_GATEWAY)
1172 		k->k_state |= KS_GATEWAY;
1173 	if (rtm->rtm_flags & RTF_STATIC)
1174 		k->k_state |= KS_STATIC;
1175 	if (rtm->rtm_flags & RTF_PRIVATE)
1176 		k->k_state |= KS_PRIVATE;
1177 
1178 
1179 	if (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED)) {
1180 		if (INFO_AUTHOR(info) != 0 &&
1181 		    INFO_AUTHOR(info)->ss_family == AF_INET)
1182 			ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
1183 		else
1184 			ifp = NULL;
1185 		if (should_supply(ifp) && (ifp == NULL ||
1186 		    !(ifp->int_state & IS_REDIRECT_OK))) {
1187 			/*
1188 			 * Routers are not supposed to listen to redirects,
1189 			 * so delete it if it came via an unknown interface
1190 			 * or the interface does not have special permission.
1191 			 */
1192 			k->k_state &= ~KS_DYNAMIC;
1193 			k->k_state |= KS_DELETE;
1194 			LIM_SEC(need_kern, 0);
1195 			trace_act("mark for deletion redirected %s --> %s"
1196 			    " via %s",
1197 			    addrname(k->k_dst, k->k_mask, 0),
1198 			    naddr_ntoa(k->k_gate),
1199 			    ifp ? ifp->int_name : "unknown interface");
1200 		} else {
1201 			k->k_state |= KS_DYNAMIC;
1202 			k->k_redirect_time = now.tv_sec;
1203 			trace_act("accept redirected %s --> %s via %s",
1204 			    addrname(k->k_dst, k->k_mask, 0),
1205 			    naddr_ntoa(k->k_gate),
1206 			    ifp ? ifp->int_name : "unknown interface");
1207 		}
1208 		return;
1209 	}
1210 
1211 	/*
1212 	 * If it is not a static route, quit until the next comparison
1213 	 * between the kernel and daemon tables, when it will be deleted.
1214 	 */
1215 	if (!(k->k_state & KS_STATIC)) {
1216 		if (!(k->k_state & (KS_IF|KS_DEPRE_IF|KS_FILE)))
1217 			k->k_state |= KS_DELETE;
1218 		LIM_SEC(need_kern, k->k_keep);
1219 		return;
1220 	}
1221 
1222 	/*
1223 	 * Put static routes with real metrics into the daemon table so
1224 	 * they can be advertised.
1225 	 */
1226 
1227 	kern_check_static(k, ifp);
1228 }
1229 
1230 
1231 /* deal with packet loss */
1232 static void
1233 rtm_lose(struct rt_msghdr *rtm, struct rt_addrinfo *info)
1234 {
1235 	struct rt_spare new, *rts, *losing_rts = NULL;
1236 	struct rt_entry *rt;
1237 	int i, spares;
1238 
1239 	if (INFO_GATE(info) == NULL || INFO_GATE(info)->ss_family != AF_INET) {
1240 		trace_act("ignore %s without gateway",
1241 		    rtm_type_name(rtm->rtm_type));
1242 		age(0);
1243 		return;
1244 	}
1245 
1246 	rt = rtfind(S_ADDR(INFO_DST(info)));
1247 	if (rt != NULL) {
1248 		spares = 0;
1249 		for (i = 0; i < rt->rt_num_spares;  i++) {
1250 			rts = &rt->rt_spares[i];
1251 			if (rts->rts_gate == S_ADDR(INFO_GATE(info))) {
1252 				losing_rts = rts;
1253 				continue;
1254 			}
1255 			if (rts->rts_gate != 0 && rts->rts_ifp != &dummy_ifp)
1256 				spares++;
1257 		}
1258 	}
1259 	if (rt == NULL || losing_rts == NULL) {
1260 		trace_act("Ignore RTM_LOSING because no route found"
1261 		    " for %s through %s",
1262 		    naddr_ntoa(S_ADDR(INFO_DST(info))),
1263 		    naddr_ntoa(S_ADDR(INFO_GATE(info))));
1264 		return;
1265 	}
1266 	if (spares == 0) {
1267 		trace_act("Got RTM_LOSING, but no alternatives to gw %s."
1268 		    " deprecating route to metric 15",
1269 		    naddr_ntoa(S_ADDR(INFO_GATE(info))));
1270 		new = *losing_rts;
1271 		new.rts_metric = HOPCNT_INFINITY - 1;
1272 		rtchange(rt, rt->rt_state, &new, 0);
1273 		return;
1274 	}
1275 	trace_act("Got RTM_LOSING. Found a route with %d alternates", spares);
1276 	if (rdisc_ok)
1277 		rdisc_age(S_ADDR(INFO_GATE(info)));
1278 	age(S_ADDR(INFO_GATE(info)));
1279 }
1280 
1281 
1282 /*
1283  * Make the gateway slot of an info structure point to something
1284  * useful.  If it is not already useful, but it specifies an interface,
1285  * then fill in the sockaddr_in provided and point it there.
1286  */
1287 static int
1288 get_info_gate(struct sockaddr_storage **ssp, struct sockaddr_in *sin)
1289 {
1290 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)*ssp;
1291 	struct interface *ifp;
1292 
1293 	if (sdl == NULL)
1294 		return (0);
1295 	if ((sdl)->sdl_family == AF_INET)
1296 		return (1);
1297 	if ((sdl)->sdl_family != AF_LINK)
1298 		return (0);
1299 
1300 	ifp = ifwithindex(sdl->sdl_index, _B_TRUE);
1301 	if (ifp == NULL)
1302 		return (0);
1303 
1304 	sin->sin_addr.s_addr = ifp->int_addr;
1305 	sin->sin_family = AF_INET;
1306 	/* LINTED */
1307 	*ssp = (struct sockaddr_storage *)sin;
1308 
1309 	return (1);
1310 }
1311 
1312 
1313 /*
1314  * Clean the kernel table by copying it to the daemon image.
1315  * Eventually the daemon will delete any extra routes.
1316  */
1317 void
1318 sync_kern(void)
1319 {
1320 	int i;
1321 	struct khash *k;
1322 	struct {
1323 		struct T_optmgmt_req req;
1324 		struct opthdr hdr;
1325 	} req;
1326 	union {
1327 		struct T_optmgmt_ack ack;
1328 		unsigned char space[64];
1329 	} ack;
1330 	struct opthdr *rh;
1331 	struct strbuf cbuf, dbuf;
1332 	int ipfd, nroutes, flags, r;
1333 	mib2_ipRouteEntry_t routes[8];
1334 	mib2_ipRouteEntry_t *rp;
1335 	struct rt_msghdr rtm;
1336 	struct rt_addrinfo info;
1337 	struct sockaddr_in sin_dst;
1338 	struct sockaddr_in sin_gate;
1339 	struct sockaddr_in sin_mask;
1340 	struct sockaddr_in sin_author;
1341 	struct interface *ifp;
1342 	char ifname[LIFNAMSIZ + 1];
1343 
1344 	for (i = 0; i < KHASH_SIZE; i++) {
1345 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1346 			if (!(k->k_state & (KS_IF|KS_DEPRE_IF)))
1347 				k->k_state |= KS_CHECK;
1348 		}
1349 	}
1350 
1351 	ipfd = open(IP_DEV_NAME, O_RDWR);
1352 	if (ipfd == -1) {
1353 		msglog("open " IP_DEV_NAME ": %s", rip_strerror(errno));
1354 		goto hash_clean;
1355 	}
1356 
1357 	req.req.PRIM_type = T_OPTMGMT_REQ;
1358 	req.req.OPT_offset = (caddr_t)&req.hdr - (caddr_t)&req;
1359 	req.req.OPT_length = sizeof (req.hdr);
1360 	req.req.MGMT_flags = T_CURRENT;
1361 
1362 	req.hdr.level = MIB2_IP;
1363 	req.hdr.name = 0;
1364 	req.hdr.len = 0;
1365 
1366 	cbuf.buf = (caddr_t)&req;
1367 	cbuf.len = sizeof (req);
1368 
1369 	if (putmsg(ipfd, &cbuf, NULL, 0) == -1) {
1370 		msglog("T_OPTMGMT_REQ putmsg: %s", rip_strerror(errno));
1371 		goto hash_clean;
1372 	}
1373 
1374 	for (;;) {
1375 		cbuf.buf = (caddr_t)&ack;
1376 		cbuf.maxlen = sizeof (ack);
1377 		dbuf.buf = (caddr_t)routes;
1378 		dbuf.maxlen = sizeof (routes);
1379 		flags = 0;
1380 		r = getmsg(ipfd, &cbuf, &dbuf, &flags);
1381 		if (r == -1) {
1382 			msglog("T_OPTMGMT_REQ getmsg: %s", rip_strerror(errno));
1383 			goto hash_clean;
1384 		}
1385 
1386 		if (cbuf.len < sizeof (struct T_optmgmt_ack) ||
1387 		    ack.ack.PRIM_type != T_OPTMGMT_ACK ||
1388 		    ack.ack.MGMT_flags != T_SUCCESS ||
1389 		    ack.ack.OPT_length < sizeof (struct opthdr)) {
1390 			msglog("bad T_OPTMGMT response; len=%d prim=%d "
1391 			    "flags=%d optlen=%d", cbuf.len, ack.ack.PRIM_type,
1392 			    ack.ack.MGMT_flags, ack.ack.OPT_length);
1393 			goto hash_clean;
1394 		}
1395 		/* LINTED */
1396 		rh = (struct opthdr *)((caddr_t)&ack + ack.ack.OPT_offset);
1397 		if (rh->level == 0 && rh->name == 0) {
1398 			break;
1399 		}
1400 		if (rh->level != MIB2_IP || rh->name != MIB2_IP_21) {
1401 			while (r == MOREDATA) {
1402 				r = getmsg(ipfd, NULL, &dbuf, &flags);
1403 			}
1404 			continue;
1405 		}
1406 		break;
1407 	}
1408 
1409 	(void) memset(&rtm, 0, sizeof (rtm));
1410 	(void) memset(&info, 0, sizeof (info));
1411 	(void) memset(&sin_dst, 0, sizeof (sin_dst));
1412 	(void) memset(&sin_gate, 0, sizeof (sin_gate));
1413 	(void) memset(&sin_mask, 0, sizeof (sin_mask));
1414 	(void) memset(&sin_author, 0, sizeof (sin_author));
1415 	sin_dst.sin_family = AF_INET;
1416 	/* LINTED */
1417 	info.rti_info[RTAX_DST] = (struct sockaddr_storage *)&sin_dst;
1418 	sin_gate.sin_family = AF_INET;
1419 	/* LINTED */
1420 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr_storage *)&sin_gate;
1421 	sin_mask.sin_family = AF_INET;
1422 	/* LINTED */
1423 	info.rti_info[RTAX_NETMASK] = (struct sockaddr_storage *)&sin_mask;
1424 	sin_dst.sin_family = AF_INET;
1425 	/* LINTED */
1426 	info.rti_info[RTAX_AUTHOR] = (struct sockaddr_storage *)&sin_author;
1427 
1428 	for (;;) {
1429 		nroutes = dbuf.len / sizeof (mib2_ipRouteEntry_t);
1430 		for (rp = routes; nroutes > 0; ++rp, nroutes--) {
1431 
1432 			/*
1433 			 * Ignore IRE cache, broadcast, and local address
1434 			 * entries; they're not subject to routing socket
1435 			 * control.
1436 			 */
1437 			if (rp->ipRouteInfo.re_ire_type &
1438 			    (IRE_BROADCAST | IRE_CACHE | IRE_LOCAL))
1439 				continue;
1440 
1441 			/* ignore multicast and link local addresses */
1442 			if (IN_MULTICAST(ntohl(rp->ipRouteDest)) ||
1443 			    IN_LINKLOCAL(ntohl(rp->ipRouteDest))) {
1444 				continue;
1445 			}
1446 
1447 
1448 #ifdef DEBUG_KERNEL_ROUTE_READ
1449 			(void) fprintf(stderr, "route type %d, ire type %08X, "
1450 			    "flags %08X: %s", rp->ipRouteType,
1451 			    rp->ipRouteInfo.re_ire_type,
1452 			    rp->ipRouteInfo.re_flags,
1453 			    naddr_ntoa(rp->ipRouteDest));
1454 			(void) fprintf(stderr, " %s",
1455 			    naddr_ntoa(rp->ipRouteMask));
1456 			(void) fprintf(stderr, " %s\n",
1457 			    naddr_ntoa(rp->ipRouteNextHop));
1458 #endif
1459 
1460 			/* Fake up the needed entries */
1461 			rtm.rtm_flags = rp->ipRouteInfo.re_flags;
1462 			rtm.rtm_type = RTM_GET;
1463 			rtm.rtm_rmx.rmx_hopcount = rp->ipRouteMetric1;
1464 
1465 			(void) memset(ifname, 0, sizeof (ifname));
1466 			if (rp->ipRouteIfIndex.o_length <
1467 			    sizeof (rp->ipRouteIfIndex.o_bytes))
1468 				rp->ipRouteIfIndex.o_bytes[
1469 				    rp->ipRouteIfIndex.o_length] = '\0';
1470 				(void) strncpy(ifname,
1471 				    rp->ipRouteIfIndex.o_bytes,
1472 				    sizeof (ifname));
1473 
1474 			/*
1475 			 * First try to match up on gwkludge entries
1476 			 * before trying to match ifp by name/nexthop.
1477 			 */
1478 			if ((ifp = gwkludge_iflookup(rp->ipRouteDest,
1479 			    rp->ipRouteNextHop,
1480 			    ntohl(rp->ipRouteMask))) == NULL) {
1481 				ifp = lifp_iflookup(rp->ipRouteNextHop, ifname);
1482 			}
1483 
1484 #ifdef DEBUG_KERNEL_ROUTE_READ
1485 			if (ifp != NULL) {
1486 				(void) fprintf(stderr, "   found interface"
1487 				    " %-4s #%-3d ", ifp->int_name,
1488 				    (ifp->int_phys != NULL) ?
1489 				    ifp->int_phys->phyi_index : 0);
1490 				(void) fprintf(stderr, "%-15s-->%-15s \n",
1491 				    naddr_ntoa(ifp->int_addr),
1492 				    addrname(((ifp->int_if_flags &
1493 				    IFF_POINTOPOINT) ?
1494 				    ifp->int_dstaddr : htonl(ifp->int_net)),
1495 				    ifp->int_mask, 1));
1496 			}
1497 #endif
1498 
1499 			info.rti_addrs = RTA_DST | RTA_GATEWAY | RTA_NETMASK;
1500 			if (rp->ipRouteInfo.re_ire_type & IRE_HOST_REDIRECT)
1501 				info.rti_addrs |= RTA_AUTHOR;
1502 			sin_dst.sin_addr.s_addr = rp->ipRouteDest;
1503 			sin_gate.sin_addr.s_addr = rp->ipRouteNextHop;
1504 			sin_mask.sin_addr.s_addr = rp->ipRouteMask;
1505 			sin_author.sin_addr.s_addr =
1506 			    rp->ipRouteInfo.re_src_addr;
1507 
1508 			/*
1509 			 * Note static routes and interface routes, and also
1510 			 * preload the image of the kernel table so that
1511 			 * we can later clean it, as well as avoid making
1512 			 * unneeded changes.  Keep the old kernel routes for a
1513 			 * few seconds to allow a RIP or router-discovery
1514 			 * response to be heard.
1515 			 */
1516 			rtm_add(&rtm, &info, MAX_WAITTIME,
1517 			    ((rp->ipRouteInfo.re_ire_type &
1518 			    (IRE_INTERFACE|IRE_LOOPBACK)) != 0), ifp);
1519 		}
1520 		if (r == 0) {
1521 			break;
1522 		}
1523 		r = getmsg(ipfd, NULL, &dbuf, &flags);
1524 	}
1525 
1526 hash_clean:
1527 	if (ipfd != -1)
1528 		(void) close(ipfd);
1529 	for (i = 0; i < KHASH_SIZE; i++) {
1530 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1531 
1532 			/*
1533 			 * KS_DELETED routes have been removed from the
1534 			 * kernel, but we keep them around for reasons
1535 			 * stated in del_static(), so we skip the check
1536 			 * for KS_DELETED routes here.
1537 			 */
1538 			if ((k->k_state & (KS_CHECK|KS_DELETED)) == KS_CHECK) {
1539 
1540 				if (!(k->k_state & KS_DYNAMIC)) {
1541 					writelog(LOG_WARNING,
1542 					    "%s --> %s disappeared from kernel",
1543 					    addrname(k->k_dst, k->k_mask, 0),
1544 					    naddr_ntoa(k->k_gate));
1545 				}
1546 				del_static(k->k_dst, k->k_mask, k->k_gate,
1547 				    k->k_ifp, 1);
1548 
1549 			}
1550 		}
1551 	}
1552 }
1553 
1554 
1555 /* Listen to announcements from the kernel */
1556 void
1557 read_rt(void)
1558 {
1559 	long cc;
1560 	struct interface *ifp;
1561 	struct sockaddr_in gate_sin;
1562 	in_addr_t mask, gate;
1563 	union {
1564 		struct {
1565 			struct rt_msghdr rtm;
1566 			struct sockaddr_storage addrs[RTA_NUMBITS];
1567 		} r;
1568 		struct if_msghdr ifm;
1569 	} m;
1570 	char str[100], *strp;
1571 	struct rt_addrinfo info;
1572 
1573 
1574 	for (;;) {
1575 		cc = read(rt_sock, &m, sizeof (m));
1576 		if (cc <= 0) {
1577 			if (cc < 0 && errno != EWOULDBLOCK)
1578 				LOGERR("read(rt_sock)");
1579 			return;
1580 		}
1581 
1582 		if (TRACERTS)
1583 			dump_rt_msg("read", &m.r.rtm, cc);
1584 
1585 		if (cc < m.r.rtm.rtm_msglen) {
1586 			msglog("routing message truncated (%d < %d)",
1587 			    cc, m.r.rtm.rtm_msglen);
1588 		}
1589 
1590 		if (m.r.rtm.rtm_version != RTM_VERSION) {
1591 			msglog("bogus routing message version %d",
1592 			    m.r.rtm.rtm_version);
1593 			continue;
1594 		}
1595 
1596 		ifp = NULL;
1597 
1598 		if (m.r.rtm.rtm_type == RTM_IFINFO ||
1599 		    m.r.rtm.rtm_type == RTM_NEWADDR ||
1600 		    m.r.rtm.rtm_type == RTM_DELADDR) {
1601 			strp = if_bit_string(m.ifm.ifm_flags, _B_TRUE);
1602 			if (strp == NULL) {
1603 				strp = str;
1604 				(void) sprintf(str, "%#x", m.ifm.ifm_flags);
1605 			}
1606 			ifp = ifwithindex(m.ifm.ifm_index,
1607 			    m.r.rtm.rtm_type != RTM_DELADDR);
1608 			if (ifp == NULL) {
1609 				char ifname[LIFNAMSIZ], *ifnamep;
1610 
1611 				ifnamep = if_indextoname(m.ifm.ifm_index,
1612 				    ifname);
1613 				if (ifnamep == NULL) {
1614 					trace_act("note %s with flags %s"
1615 					    " for unknown interface index #%d",
1616 					    rtm_type_name(m.r.rtm.rtm_type),
1617 					    strp, m.ifm.ifm_index);
1618 				} else {
1619 					trace_act("note %s with flags %s"
1620 					    " for unknown interface %s",
1621 					    rtm_type_name(m.r.rtm.rtm_type),
1622 					    strp, ifnamep);
1623 				}
1624 			} else {
1625 				trace_act("note %s with flags %s for %s",
1626 				    rtm_type_name(m.r.rtm.rtm_type),
1627 				    strp, ifp->int_name);
1628 			}
1629 			if (strp != str)
1630 				free(strp);
1631 
1632 			/*
1633 			 * After being informed of a change to an interface,
1634 			 * check them all now if the check would otherwise
1635 			 * be a long time from now, if the interface is
1636 			 * not known, or if the interface has been turned
1637 			 * off or on.
1638 			 */
1639 			if (ifscan_timer.tv_sec-now.tv_sec >=
1640 			    CHECK_BAD_INTERVAL || ifp == NULL ||
1641 			    ((ifp->int_if_flags ^ m.ifm.ifm_flags) &
1642 			    IFF_UP) != 0)
1643 				ifscan_timer.tv_sec = now.tv_sec;
1644 			continue;
1645 		} else if (m.r.rtm.rtm_type == RTM_CHGADDR ||
1646 		    m.r.rtm.rtm_type == RTM_FREEADDR) {
1647 			continue;
1648 		} else {
1649 			if (m.r.rtm.rtm_index != 0)
1650 				ifp = ifwithindex(m.r.rtm.rtm_index, 1);
1651 		}
1652 
1653 		(void) strlcpy(str, rtm_type_name(m.r.rtm.rtm_type),
1654 		    sizeof (str));
1655 		strp = &str[strlen(str)];
1656 		if (m.r.rtm.rtm_type <= RTM_CHANGE)
1657 			strp += snprintf(strp, sizeof (str) - (strp - str),
1658 			    " from pid %d", (int)m.r.rtm.rtm_pid);
1659 
1660 		/* LINTED */
1661 		(void) rt_xaddrs(&info, (struct sockaddr_storage *)(&m.r.rtm +
1662 		    1), (char *)&m + cc, m.r.rtm.rtm_addrs);
1663 
1664 		if (INFO_DST(&info) == 0) {
1665 			trace_act("ignore %s without dst", str);
1666 			continue;
1667 		}
1668 
1669 		if (INFO_DST(&info)->ss_family != AF_INET) {
1670 			trace_act("ignore %s for AF %d", str,
1671 			    INFO_DST(&info)->ss_family);
1672 			continue;
1673 		}
1674 
1675 		mask = ((INFO_MASK(&info) != 0) ?
1676 		    ntohl(S_ADDR(INFO_MASK(&info))) :
1677 		    (m.r.rtm.rtm_flags & RTF_HOST) ?
1678 		    HOST_MASK : std_mask(S_ADDR(INFO_DST(&info))));
1679 
1680 		strp += snprintf(strp, sizeof (str) - (strp - str), ": %s",
1681 		    addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1682 
1683 		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))) ||
1684 		    IN_LINKLOCAL(ntohl(S_ADDR(INFO_DST(&info))))) {
1685 			trace_act("ignore multicast/link local %s", str);
1686 			continue;
1687 		}
1688 
1689 		if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1690 			trace_act("ignore ARP %s", str);
1691 			continue;
1692 		}
1693 
1694 		if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1695 			gate = S_ADDR(INFO_GATE(&info));
1696 			strp += snprintf(strp, sizeof (str) - (strp - str),
1697 			    " --> %s", naddr_ntoa(gate));
1698 		} else {
1699 			gate = 0;
1700 		}
1701 
1702 		if (INFO_AUTHOR(&info) != 0)
1703 			strp += snprintf(strp, sizeof (str) - (strp - str),
1704 			    " by authority of %s",
1705 			    saddr_ntoa(INFO_AUTHOR(&info)));
1706 
1707 		switch (m.r.rtm.rtm_type) {
1708 		case RTM_ADD:
1709 		case RTM_CHANGE:
1710 		case RTM_REDIRECT:
1711 			if (m.r.rtm.rtm_errno != 0) {
1712 				trace_act("ignore %s with \"%s\" error",
1713 				    str, rip_strerror(m.r.rtm.rtm_errno));
1714 			} else {
1715 				trace_act("%s", str);
1716 				rtm_add(&m.r.rtm, &info, 0,
1717 				    !(m.r.rtm.rtm_flags & RTF_GATEWAY) &&
1718 				    m.r.rtm.rtm_type != RTM_REDIRECT, ifp);
1719 
1720 			}
1721 			break;
1722 
1723 		case RTM_DELETE:
1724 			if (m.r.rtm.rtm_errno != 0 &&
1725 			    m.r.rtm.rtm_errno != ESRCH) {
1726 				trace_act("ignore %s with \"%s\" error",
1727 				    str, rip_strerror(m.r.rtm.rtm_errno));
1728 			} else {
1729 				trace_act("%s", str);
1730 				del_static(S_ADDR(INFO_DST(&info)), mask,
1731 				    gate, ifp, 1);
1732 			}
1733 			break;
1734 
1735 		case RTM_LOSING:
1736 			trace_act("%s", str);
1737 			rtm_lose(&m.r.rtm, &info);
1738 			break;
1739 
1740 		default:
1741 			trace_act("ignore %s", str);
1742 			break;
1743 		}
1744 	}
1745 }
1746 
1747 
1748 /*
1749  * Disassemble a routing message.  The result is an array of pointers
1750  * to sockaddr_storage structures stored in the info argument.
1751  *
1752  * ss is a pointer to the beginning of the data following the
1753  * rt_msghdr contained in the routing socket message, which consists
1754  * of a string of concatenated sockaddr structure of different types.
1755  *
1756  * Extended attributes can be appended at the end of the list.
1757  */
1758 static int
1759 rt_xaddrs(struct rt_addrinfo *info,
1760     struct sockaddr_storage *ss,
1761     char *lim,
1762     int addrs)
1763 {
1764 	int retv = 0;
1765 	int i;
1766 	int abit;
1767 	int complaints;
1768 	static int prev_complaints;
1769 
1770 #define	XBAD_AF		0x1
1771 #define	XBAD_SHORT	0x2
1772 #define	XBAD_LONG	0x4
1773 
1774 	(void) memset(info, 0, sizeof (*info));
1775 	info->rti_addrs = addrs;
1776 	complaints = 0;
1777 	for (i = 0, abit = 1; i < RTAX_MAX && (char *)ss < lim;
1778 	    i++, abit <<= 1) {
1779 		if ((addrs & abit) == 0)
1780 			continue;
1781 		info->rti_info[i] = ss;
1782 		/* Horrible interface here */
1783 		switch (ss->ss_family) {
1784 		case AF_UNIX:
1785 			/* LINTED */
1786 			ss = (struct sockaddr_storage *)(
1787 			    (struct sockaddr_un *)ss + 1);
1788 			break;
1789 		case AF_INET:
1790 			/* LINTED */
1791 			ss = (struct sockaddr_storage *)(
1792 			    (struct sockaddr_in *)ss + 1);
1793 			break;
1794 		case AF_LINK:
1795 			/* LINTED */
1796 			ss = (struct sockaddr_storage *)(
1797 			    (struct sockaddr_dl *)ss + 1);
1798 			break;
1799 		case AF_INET6:
1800 			/* LINTED */
1801 			ss = (struct sockaddr_storage *)(
1802 			    (struct sockaddr_in6 *)ss + 1);
1803 			break;
1804 		default:
1805 			if (!(prev_complaints & XBAD_AF))
1806 				writelog(LOG_WARNING,
1807 				    "unknown address family %d "
1808 				    "encountered", ss->ss_family);
1809 			if (complaints & XBAD_AF)
1810 				goto xaddr_done;
1811 			/* LINTED */
1812 			ss = (struct sockaddr_storage *)(
1813 			    (struct sockaddr *)ss + 1);
1814 			complaints |= XBAD_AF;
1815 			info->rti_addrs &= abit - 1;
1816 			addrs = info->rti_addrs;
1817 			retv = -1;
1818 			break;
1819 		}
1820 		if ((char *)ss > lim) {
1821 			if (!(prev_complaints & XBAD_SHORT))
1822 				msglog("sockaddr %d too short by %d "
1823 				    "bytes", i + 1, (char *)ss - lim);
1824 			complaints |= XBAD_SHORT;
1825 			info->rti_info[i] = NULL;
1826 			info->rti_addrs &= abit - 1;
1827 			retv = -1;
1828 			goto xaddr_done;
1829 		}
1830 	}
1831 
1832 	while (((char *)ss + sizeof (rtm_ext_t)) <= lim) {
1833 		rtm_ext_t *tp;
1834 		char *nxt;
1835 
1836 		/* LINTED: alignment */
1837 		tp = (rtm_ext_t *)ss;
1838 		nxt = (char *)(tp + 1) + tp->rtmex_len;
1839 
1840 		if (!IS_P2ALIGNED(tp->rtmex_len, sizeof (uint32_t)) ||
1841 		    nxt > lim) {
1842 			break;
1843 		}
1844 
1845 		/* LINTED: alignment */
1846 		ss = (struct sockaddr_storage *)nxt;
1847 	}
1848 
1849 	if ((char *)ss != lim) {
1850 		if ((char *)ss > lim) {
1851 			if (!(prev_complaints & XBAD_SHORT))
1852 				msglog("routing message too short by %d bytes",
1853 				    (char *)ss - lim);
1854 			complaints |= XBAD_SHORT;
1855 		} else if (!(prev_complaints & XBAD_LONG)) {
1856 			msglog("%d bytes of routing message left over",
1857 			    lim - (char *)ss);
1858 			complaints |= XBAD_LONG;
1859 		}
1860 		retv = -1;
1861 	}
1862 xaddr_done:
1863 	prev_complaints = complaints;
1864 	return (retv);
1865 }
1866 
1867 
1868 /* after aggregating, note routes that belong in the kernel */
1869 static void
1870 kern_out(struct ag_info *ag)
1871 {
1872 	struct khash *k;
1873 	struct interface *ifp;
1874 
1875 	ifp = ag->ag_ifp;
1876 
1877 	/*
1878 	 * Do not install bad routes if they are not already present.
1879 	 * This includes routes that had RS_NET_SYN for interfaces that
1880 	 * recently died.
1881 	 */
1882 	if (ag->ag_metric == HOPCNT_INFINITY) {
1883 		k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask,
1884 		    ag->ag_nhop, ag->ag_ifp, NULL);
1885 		if (k == NULL)
1886 			return;
1887 	} else {
1888 		k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask, ag->ag_nhop,
1889 		    ifp);
1890 	}
1891 
1892 	if (k->k_state & KS_NEW) {
1893 		/* will need to add new entry to the kernel table */
1894 		k->k_state = KS_ADD;
1895 		if (ag->ag_state & AGS_GATEWAY)
1896 			k->k_state |= KS_GATEWAY;
1897 		if (ag->ag_state & AGS_IF)
1898 			k->k_state |= KS_IF;
1899 		if (ag->ag_state & AGS_PASSIVE)
1900 			k->k_state |= KS_PASSIVE;
1901 		if (ag->ag_state & AGS_FILE)
1902 			k->k_state |= KS_FILE;
1903 		k->k_gate = ag->ag_nhop;
1904 		k->k_ifp = ifp;
1905 		k->k_metric = ag->ag_metric;
1906 		return;
1907 	}
1908 
1909 	if ((k->k_state & (KS_STATIC|KS_DEPRE_IF)) ||
1910 	    ((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF)) {
1911 		return;
1912 	}
1913 
1914 	/* modify existing kernel entry if necessary */
1915 	if (k->k_gate == ag->ag_nhop && k->k_ifp == ag->ag_ifp &&
1916 	    k->k_metric != ag->ag_metric) {
1917 			/*
1918 			 * Must delete bad interface routes etc.
1919 			 * to change them.
1920 			 */
1921 			if (k->k_metric == HOPCNT_INFINITY)
1922 				k->k_state |= KS_DEL_ADD;
1923 			k->k_gate = ag->ag_nhop;
1924 			k->k_metric = ag->ag_metric;
1925 			k->k_state |= KS_CHANGE;
1926 	}
1927 
1928 	/*
1929 	 * If the daemon thinks the route should exist, forget
1930 	 * about any redirections.
1931 	 * If the daemon thinks the route should exist, eventually
1932 	 * override manual intervention by the operator.
1933 	 */
1934 	if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1935 		k->k_state &= ~KS_DYNAMIC;
1936 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1937 	}
1938 
1939 	if ((k->k_state & KS_GATEWAY) && !(ag->ag_state & AGS_GATEWAY)) {
1940 		k->k_state &= ~KS_GATEWAY;
1941 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1942 	} else if (!(k->k_state & KS_GATEWAY) && (ag->ag_state & AGS_GATEWAY)) {
1943 		k->k_state |= KS_GATEWAY;
1944 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1945 	}
1946 
1947 	/*
1948 	 * Deleting-and-adding is necessary to change aspects of a route.
1949 	 * Just delete instead of deleting and then adding a bad route.
1950 	 * Otherwise, we want to keep the route in the kernel.
1951 	 */
1952 	if (k->k_metric == HOPCNT_INFINITY && (k->k_state & KS_DEL_ADD))
1953 		k->k_state |= KS_DELETE;
1954 	else
1955 		k->k_state &= ~KS_DELETE;
1956 #undef RT
1957 }
1958 
1959 /*
1960  * Update our image of the kernel forwarding table using the given
1961  * route from our internal routing table.
1962  */
1963 
1964 /*ARGSUSED1*/
1965 static int
1966 walk_kern(struct radix_node *rn, void *argp)
1967 {
1968 #define	RT ((struct rt_entry *)rn)
1969 	uint8_t metric, pref;
1970 	uint_t ags = 0;
1971 	int i;
1972 	struct rt_spare *rts;
1973 
1974 	/* Do not install synthetic routes */
1975 	if (RT->rt_state & RS_NET_SYN)
1976 		return (0);
1977 
1978 	/*
1979 	 * Do not install static routes here. Only
1980 	 * read_rt->rtm_add->kern_add should install those
1981 	 */
1982 	if ((RT->rt_state & RS_STATIC) &&
1983 	    (RT->rt_spares[0].rts_origin != RO_FILE))
1984 		return (0);
1985 
1986 	/* Do not clobber kernel if this is a route for a dead interface */
1987 	if (RT->rt_state & RS_BADIF)
1988 		return (0);
1989 
1990 	if (!(RT->rt_state & RS_IF)) {
1991 		/* This is an ordinary route, not for an interface. */
1992 
1993 		/*
1994 		 * aggregate, ordinary good routes without regard to
1995 		 * their metric
1996 		 */
1997 		pref = 1;
1998 		ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1999 
2000 		/*
2001 		 * Do not install host routes directly to hosts, to avoid
2002 		 * interfering with ARP entries in the kernel table.
2003 		 */
2004 		if (RT_ISHOST(RT) && ntohl(RT->rt_dst) == RT->rt_gate)
2005 			return (0);
2006 
2007 	} else {
2008 		/*
2009 		 * This is an interface route.
2010 		 * Do not install routes for "external" remote interfaces.
2011 		 */
2012 		if (RT->rt_ifp != NULL && (RT->rt_ifp->int_state & IS_EXTERNAL))
2013 			return (0);
2014 
2015 		/* Interfaces should override received routes. */
2016 		pref = 0;
2017 		ags |= (AGS_IF | AGS_CORS_GATE);
2018 		if (RT->rt_ifp != NULL &&
2019 		    !(RT->rt_ifp->int_if_flags & IFF_LOOPBACK) &&
2020 		    (RT->rt_ifp->int_state & (IS_PASSIVE|IS_ALIAS)) ==
2021 		    IS_PASSIVE) {
2022 			ags |= AGS_PASSIVE;
2023 		}
2024 
2025 		/*
2026 		 * If it is not an interface, or an alias for an interface,
2027 		 * it must be a "gateway."
2028 		 *
2029 		 * If it is a "remote" interface, it is also a "gateway" to
2030 		 * the kernel if is not a alias.
2031 		 */
2032 		if (RT->rt_ifp == NULL || (RT->rt_ifp->int_state & IS_REMOTE)) {
2033 
2034 			ags |= (AGS_GATEWAY | AGS_SUPPRESS);
2035 
2036 			/*
2037 			 * Do not aggregate IS_PASSIVE routes.
2038 			 */
2039 			if (!(RT->rt_ifp->int_state & IS_PASSIVE))
2040 				ags |= AGS_AGGREGATE;
2041 		}
2042 	}
2043 
2044 	metric = RT->rt_metric;
2045 	if (metric == HOPCNT_INFINITY) {
2046 		/* If the route is dead, try hard to aggregate. */
2047 		pref = HOPCNT_INFINITY;
2048 		ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
2049 		ags &= ~(AGS_IF | AGS_CORS_GATE);
2050 	}
2051 
2052 	/*
2053 	 * dump all routes that have the same metric as rt_spares[0]
2054 	 * into the kern_table, to be added to the kernel.
2055 	 */
2056 	for (i = 0; i < RT->rt_num_spares; i++) {
2057 		rts = &RT->rt_spares[i];
2058 
2059 		/* Do not install external routes */
2060 		if (rts->rts_flags & RTS_EXTERNAL)
2061 			continue;
2062 
2063 		if (rts->rts_metric == metric) {
2064 			ag_check(RT->rt_dst, RT->rt_mask,
2065 			    rts->rts_router, rts->rts_ifp, rts->rts_gate,
2066 			    metric, pref, 0, 0,
2067 			    (rts->rts_origin & RO_FILE) ? (ags|AGS_FILE) : ags,
2068 			    kern_out);
2069 		}
2070 	}
2071 	return (0);
2072 #undef RT
2073 }
2074 
2075 
2076 /* Update the kernel table to match the daemon table. */
2077 static void
2078 fix_kern(void)
2079 {
2080 	int i;
2081 	struct khash *k, *pk, *knext;
2082 
2083 
2084 	need_kern = age_timer;
2085 
2086 	/* Walk daemon table, updating the copy of the kernel table. */
2087 	(void) rn_walktree(rhead, walk_kern, NULL);
2088 	ag_flush(0, 0, kern_out);
2089 
2090 	for (i = 0; i < KHASH_SIZE; i++) {
2091 		pk = NULL;
2092 		for (k = khash_bins[i]; k != NULL;  k = knext) {
2093 			knext = k->k_next;
2094 
2095 			/* Do not touch local interface routes */
2096 			if ((k->k_state & KS_DEPRE_IF) ||
2097 			    (k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) {
2098 				pk = k;
2099 				continue;
2100 			}
2101 
2102 			/* Do not touch static routes */
2103 			if (k->k_state & KS_STATIC) {
2104 				kern_check_static(k, 0);
2105 				pk = k;
2106 				continue;
2107 			}
2108 
2109 			/* check hold on routes deleted by the operator */
2110 			if (k->k_keep > now.tv_sec) {
2111 				/* ensure we check when the hold is over */
2112 				LIM_SEC(need_kern, k->k_keep);
2113 				pk = k;
2114 				continue;
2115 			}
2116 
2117 			if ((k->k_state & KS_DELETE) &&
2118 			    !(k->k_state & KS_DYNAMIC)) {
2119 				if ((k->k_dst == RIP_DEFAULT) &&
2120 				    (k->k_ifp != NULL) &&
2121 				    (kern_alternate(RIP_DEFAULT,
2122 				    k->k_mask, k->k_gate, k->k_ifp,
2123 				    NULL) == NULL))
2124 					rdisc_restore(k->k_ifp);
2125 				kern_ioctl(k, RTM_DELETE, 0);
2126 				if (pk != NULL)
2127 					pk->k_next = knext;
2128 				else
2129 					khash_bins[i] = knext;
2130 				free(k);
2131 				continue;
2132 			}
2133 
2134 			if (k->k_state & KS_DEL_ADD)
2135 				kern_ioctl(k, RTM_DELETE, 0);
2136 
2137 			if (k->k_state & KS_ADD) {
2138 				if ((k->k_dst == RIP_DEFAULT) &&
2139 				    (k->k_ifp != NULL))
2140 					rdisc_suppress(k->k_ifp);
2141 				kern_ioctl(k, RTM_ADD,
2142 				    ((0 != (k->k_state & (KS_GATEWAY |
2143 				    KS_DYNAMIC))) ? RTF_GATEWAY : 0));
2144 			} else if (k->k_state & KS_CHANGE) {
2145 				/*
2146 				 * Should be using RTM_CHANGE here, but
2147 				 * since RTM_CHANGE is currently
2148 				 * not multipath-aware, and assumes
2149 				 * that RTF_GATEWAY implies the gateway
2150 				 * of the route for dst has to be
2151 				 * changed, we play safe, and do a del + add.
2152 				 */
2153 				kern_ioctl(k,  RTM_DELETE, 0);
2154 				kern_ioctl(k, RTM_ADD,
2155 				    ((0 != (k->k_state & (KS_GATEWAY |
2156 				    KS_DYNAMIC))) ? RTF_GATEWAY : 0));
2157 			}
2158 			k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
2159 
2160 			/*
2161 			 * Mark this route to be deleted in the next cycle.
2162 			 * This deletes routes that disappear from the
2163 			 * daemon table, since the normal aging code
2164 			 * will clear the bit for routes that have not
2165 			 * disappeared from the daemon table.
2166 			 */
2167 			k->k_state |= KS_DELETE;
2168 			pk = k;
2169 		}
2170 	}
2171 }
2172 
2173 
2174 /* Delete a static route in the image of the kernel table. */
2175 void
2176 del_static(in_addr_t dst, in_addr_t mask, in_addr_t gate,
2177     struct interface *ifp, int gone)
2178 {
2179 	struct khash *k;
2180 	struct rt_entry *rt;
2181 
2182 	/*
2183 	 * Just mark it in the table to be deleted next time the kernel
2184 	 * table is updated.
2185 	 * If it has already been deleted, mark it as such, and set its
2186 	 * keep-timer so that it will not be deleted again for a while.
2187 	 * This lets the operator delete a route added by the daemon
2188 	 * and add a replacement.
2189 	 */
2190 	k = kern_find(dst, mask, gate, ifp, NULL);
2191 	if (k != NULL && (gate == 0 || k->k_gate == gate)) {
2192 		k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
2193 		k->k_state |= KS_DELETE;
2194 		if (gone) {
2195 			k->k_state |= KS_DELETED;
2196 			k->k_keep = now.tv_sec + K_KEEP_LIM;
2197 		}
2198 	}
2199 
2200 	rt = rtget(dst, mask);
2201 	if (rt != NULL && (rt->rt_state & RS_STATIC))
2202 		rtbad(rt, NULL);
2203 }
2204 
2205 
2206 /*
2207  * Delete all routes generated from ICMP Redirects that use a given gateway,
2208  * as well as old redirected routes.
2209  */
2210 void
2211 del_redirects(in_addr_t bad_gate, time_t old)
2212 {
2213 	int i;
2214 	struct khash *k;
2215 	boolean_t dosupply = should_supply(NULL);
2216 
2217 	for (i = 0; i < KHASH_SIZE; i++) {
2218 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
2219 			if (!(k->k_state & KS_DYNAMIC) ||
2220 			    (k->k_state & (KS_STATIC|KS_IF|KS_DEPRE_IF)))
2221 				continue;
2222 
2223 			if (k->k_gate != bad_gate && k->k_redirect_time > old &&
2224 			    !dosupply)
2225 				continue;
2226 
2227 			k->k_state |= KS_DELETE;
2228 			k->k_state &= ~KS_DYNAMIC;
2229 			need_kern.tv_sec = now.tv_sec;
2230 			trace_act("mark redirected %s --> %s for deletion",
2231 			    addrname(k->k_dst, k->k_mask, 0),
2232 			    naddr_ntoa(k->k_gate));
2233 		}
2234 	}
2235 }
2236 
2237 /* Start the daemon tables. */
2238 void
2239 rtinit(void)
2240 {
2241 	int i;
2242 	struct ag_info *ag;
2243 
2244 	/* Initialize the radix trees */
2245 	rn_init();
2246 	(void) rn_inithead((void**)&rhead, 32);
2247 
2248 	/* mark all of the slots in the table free */
2249 	ag_avail = ag_slots;
2250 	for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
2251 		ag->ag_fine = ag+1;
2252 		ag++;
2253 	}
2254 }
2255 
2256 
2257 static struct sockaddr_in dst_sock = {AF_INET};
2258 static struct sockaddr_in mask_sock = {AF_INET};
2259 
2260 
2261 static void
2262 set_need_flash(void)
2263 {
2264 	if (!need_flash) {
2265 		need_flash = _B_TRUE;
2266 		/*
2267 		 * Do not send the flash update immediately.  Wait a little
2268 		 * while to hear from other routers.
2269 		 */
2270 		no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
2271 	}
2272 }
2273 
2274 
2275 /* Get a particular routing table entry */
2276 struct rt_entry *
2277 rtget(in_addr_t dst, in_addr_t mask)
2278 {
2279 	struct rt_entry *rt;
2280 
2281 	dst_sock.sin_addr.s_addr = dst;
2282 	mask_sock.sin_addr.s_addr = htonl(mask);
2283 	rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock, &mask_sock, rhead);
2284 	if (rt == NULL || rt->rt_dst != dst || rt->rt_mask != mask)
2285 		return (NULL);
2286 
2287 	return (rt);
2288 }
2289 
2290 
2291 /* Find a route to dst as the kernel would. */
2292 struct rt_entry *
2293 rtfind(in_addr_t dst)
2294 {
2295 	dst_sock.sin_addr.s_addr = dst;
2296 	return ((struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead));
2297 }
2298 
2299 /* add a route to the table */
2300 void
2301 rtadd(in_addr_t	dst,
2302     in_addr_t	mask,
2303     uint16_t	state,			/* rt_state for the entry */
2304     struct	rt_spare *new)
2305 {
2306 	struct rt_entry *rt;
2307 	in_addr_t smask;
2308 	int i;
2309 	struct rt_spare *rts;
2310 
2311 	/* This is the only function that increments total_routes. */
2312 	if (total_routes == MAX_ROUTES) {
2313 		msglog("have maximum (%d) routes", total_routes);
2314 		return;
2315 	}
2316 
2317 	rt = rtmalloc(sizeof (*rt), "rtadd");
2318 	(void) memset(rt, 0, sizeof (*rt));
2319 	rt->rt_spares = rtmalloc(SPARE_INC  * sizeof (struct rt_spare),
2320 	    "rtadd");
2321 	rt->rt_num_spares = SPARE_INC;
2322 	(void) memset(rt->rt_spares, 0, SPARE_INC  * sizeof (struct rt_spare));
2323 	for (rts = rt->rt_spares, i = rt->rt_num_spares; i != 0; i--, rts++)
2324 		rts->rts_metric = HOPCNT_INFINITY;
2325 
2326 	rt->rt_nodes->rn_key = (uint8_t *)&rt->rt_dst_sock;
2327 	rt->rt_dst = dst;
2328 	rt->rt_dst_sock.sin_family = AF_INET;
2329 	if (mask != HOST_MASK) {
2330 		smask = std_mask(dst);
2331 		if ((smask & ~mask) == 0 && mask > smask)
2332 			state |= RS_SUBNET;
2333 	}
2334 	mask_sock.sin_addr.s_addr = htonl(mask);
2335 	rt->rt_mask = mask;
2336 	rt->rt_spares[0] = *new;
2337 	rt->rt_state = state;
2338 	rt->rt_time = now.tv_sec;
2339 	rt->rt_poison_metric = HOPCNT_INFINITY;
2340 	rt->rt_seqno = update_seqno;
2341 
2342 	if (TRACEACTIONS)
2343 		trace_add_del("Add", rt);
2344 
2345 	need_kern.tv_sec = now.tv_sec;
2346 	set_need_flash();
2347 
2348 	if (NULL == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock, rhead,
2349 	    rt->rt_nodes)) {
2350 		msglog("rnh_addaddr() failed for %s mask=%s",
2351 		    naddr_ntoa(dst), naddr_ntoa(htonl(mask)));
2352 		free(rt);
2353 	}
2354 
2355 	total_routes++;
2356 }
2357 
2358 
2359 /* notice a changed route */
2360 void
2361 rtchange(struct rt_entry *rt,
2362     uint16_t	state,			/* new state bits */
2363     struct rt_spare *new,
2364     char	*label)
2365 {
2366 	if (rt->rt_metric != new->rts_metric) {
2367 		/*
2368 		 * Fix the kernel immediately if it seems the route
2369 		 * has gone bad, since there may be a working route that
2370 		 * aggregates this route.
2371 		 */
2372 		if (new->rts_metric == HOPCNT_INFINITY) {
2373 			need_kern.tv_sec = now.tv_sec;
2374 			if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
2375 				new->rts_time = now.tv_sec - EXPIRE_TIME;
2376 		}
2377 		rt->rt_seqno = update_seqno;
2378 		set_need_flash();
2379 	}
2380 
2381 	if (rt->rt_gate != new->rts_gate) {
2382 		need_kern.tv_sec = now.tv_sec;
2383 		rt->rt_seqno = update_seqno;
2384 		set_need_flash();
2385 	}
2386 
2387 	state |= (rt->rt_state & RS_SUBNET);
2388 
2389 	/* Keep various things from deciding ageless routes are stale. */
2390 	if (!AGE_RT(state, rt->rt_spares[0].rts_origin, new->rts_ifp))
2391 		new->rts_time = now.tv_sec;
2392 
2393 	if (TRACEACTIONS)
2394 		trace_change(rt, state, new,
2395 		    label ? label : "Chg   ");
2396 
2397 	rt->rt_state = state;
2398 	/*
2399 	 * If the interface state of the new primary route is good,
2400 	 * turn off RS_BADIF flag
2401 	 */
2402 	if ((rt->rt_state & RS_BADIF) &&
2403 	    IS_IFF_UP(new->rts_ifp->int_if_flags) &&
2404 	    !(new->rts_ifp->int_state & (IS_BROKE | IS_SICK)))
2405 		rt->rt_state &= ~(RS_BADIF);
2406 
2407 	rt->rt_spares[0] = *new;
2408 }
2409 
2410 
2411 /* check for a better route among the spares */
2412 static struct rt_spare *
2413 rts_better(struct rt_entry *rt)
2414 {
2415 	struct rt_spare *rts, *rts1;
2416 	int i;
2417 
2418 	/* find the best alternative among the spares */
2419 	rts = rt->rt_spares+1;
2420 	for (i = rt->rt_num_spares, rts1 = rts+1; i > 2; i--, rts1++) {
2421 		if (BETTER_LINK(rt, rts1, rts))
2422 			rts = rts1;
2423 	}
2424 
2425 	return (rts);
2426 }
2427 
2428 
2429 /* switch to a backup route */
2430 void
2431 rtswitch(struct rt_entry *rt,
2432     struct rt_spare *rts)
2433 {
2434 	struct rt_spare swap;
2435 	char label[10];
2436 
2437 	/* Do not change permanent routes */
2438 	if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC |
2439 	    RS_NET_SYN | RS_IF)))
2440 		return;
2441 
2442 	/* find the best alternative among the spares */
2443 	if (rts == NULL)
2444 		rts = rts_better(rt);
2445 
2446 	/* Do not bother if it is not worthwhile. */
2447 	if (!BETTER_LINK(rt, rts, rt->rt_spares))
2448 		return;
2449 
2450 	swap = rt->rt_spares[0];
2451 	(void) snprintf(label, sizeof (label), "Use #%d",
2452 	    (int)(rts - rt->rt_spares));
2453 	rtchange(rt, rt->rt_state & ~(RS_NET_SYN), rts, label);
2454 
2455 	if (swap.rts_metric == HOPCNT_INFINITY) {
2456 		*rts = rts_empty;
2457 	} else {
2458 		*rts = swap;
2459 	}
2460 
2461 }
2462 
2463 
2464 void
2465 rtdelete(struct rt_entry *rt)
2466 {
2467 	struct rt_entry *deleted_rt;
2468 	struct rt_spare *rts;
2469 	int i;
2470 	in_addr_t gate = rt->rt_gate; /* for debugging */
2471 
2472 	if (TRACEACTIONS)
2473 		trace_add_del("Del", rt);
2474 
2475 	for (i = 0; i < rt->rt_num_spares; i++) {
2476 		rts = &rt->rt_spares[i];
2477 		rts_delete(rt, rts);
2478 	}
2479 
2480 	dst_sock.sin_addr.s_addr = rt->rt_dst;
2481 	mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
2482 	if (rt != (deleted_rt =
2483 	    ((struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
2484 	    rhead)))) {
2485 		msglog("rnh_deladdr(%s) failed; found rt 0x%lx",
2486 		    rtname(rt->rt_dst, rt->rt_mask, gate), deleted_rt);
2487 		if (deleted_rt != NULL)
2488 			free(deleted_rt);
2489 	}
2490 	total_routes--;
2491 	free(rt->rt_spares);
2492 	free(rt);
2493 
2494 	if (dst_sock.sin_addr.s_addr == RIP_DEFAULT) {
2495 		/*
2496 		 * we just deleted the default route. Trigger rdisc_sort
2497 		 * so that we can recover from any rdisc information that
2498 		 * is valid
2499 		 */
2500 		rdisc_timer.tv_sec = 0;
2501 	}
2502 }
2503 
2504 void
2505 rts_delete(struct rt_entry *rt, struct rt_spare *rts)
2506 {
2507 	struct khash *k;
2508 
2509 	trace_upslot(rt, rts, &rts_empty);
2510 	k = kern_find(rt->rt_dst, rt->rt_mask,
2511 	    rts->rts_gate, rts->rts_ifp, NULL);
2512 	if (k != NULL &&
2513 	    !(k->k_state & KS_DEPRE_IF) &&
2514 	    ((k->k_state & (KS_IF|KS_PASSIVE)) != KS_IF)) {
2515 		k->k_state |= KS_DELETE;
2516 		need_kern.tv_sec = now.tv_sec;
2517 	}
2518 
2519 	*rts = rts_empty;
2520 }
2521 
2522 /*
2523  * Get rid of a bad route, and try to switch to a replacement.
2524  * If the route has gone bad because of a bad interface,
2525  * the information about the dead interface is available in badifp
2526  * for the purpose of sanity checks, if_flags checks etc.
2527  */
2528 static void
2529 rtbad(struct rt_entry *rt, struct interface *badifp)
2530 {
2531 	struct rt_spare new;
2532 	uint16_t rt_state;
2533 
2534 
2535 	if (badifp == NULL || (rt->rt_spares[0].rts_ifp == badifp)) {
2536 		/* Poison the route */
2537 		new = rt->rt_spares[0];
2538 		new.rts_metric = HOPCNT_INFINITY;
2539 		rt_state = rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC);
2540 	}
2541 
2542 	if (badifp != NULL) {
2543 		/*
2544 		 * Dont mark the rtentry bad unless the ifp for the primary
2545 		 * route is the bad ifp
2546 		 */
2547 		if (rt->rt_spares[0].rts_ifp != badifp)
2548 			return;
2549 		/*
2550 		 * badifp has just gone bad. We want to keep this
2551 		 * rt_entry around so that we tell our rip-neighbors
2552 		 * about the bad route, but we can't do anything
2553 		 * to the kernel itself, so mark it as RS_BADIF
2554 		 */
2555 		trace_misc("rtbad:Setting RS_BADIF (%s)", badifp->int_name);
2556 		rt_state |= RS_BADIF;
2557 		new.rts_ifp = &dummy_ifp;
2558 	}
2559 	rtchange(rt, rt_state, &new, 0);
2560 	rtswitch(rt, 0);
2561 }
2562 
2563 
2564 /*
2565  * Junk a RS_NET_SYN or RS_LOCAL route,
2566  *	unless it is needed by another interface.
2567  */
2568 void
2569 rtbad_sub(struct rt_entry *rt, struct interface *badifp)
2570 {
2571 	struct interface *ifp, *ifp1;
2572 	struct intnet *intnetp;
2573 	uint_t state;
2574 
2575 
2576 	ifp1 = NULL;
2577 	state = 0;
2578 
2579 	if (rt->rt_state & RS_LOCAL) {
2580 		/*
2581 		 * Is this the route through loopback for the interface?
2582 		 * If so, see if it is used by any other interfaces, such
2583 		 * as a point-to-point interface with the same local address.
2584 		 */
2585 		for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
2586 			/* Retain it if another interface needs it. */
2587 			if (ifp->int_addr == rt->rt_ifp->int_addr) {
2588 				state |= RS_LOCAL;
2589 				ifp1 = ifp;
2590 				break;
2591 			}
2592 		}
2593 
2594 	}
2595 
2596 	if (!(state & RS_LOCAL)) {
2597 		/*
2598 		 * Retain RIPv1 logical network route if there is another
2599 		 * interface that justifies it.
2600 		 */
2601 		if (rt->rt_state & RS_NET_SYN) {
2602 			for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
2603 				if ((ifp->int_state & IS_NEED_NET_SYN) &&
2604 				    rt->rt_mask == ifp->int_std_mask &&
2605 				    rt->rt_dst == ifp->int_std_addr) {
2606 					state |= RS_NET_SYN;
2607 					ifp1 = ifp;
2608 					break;
2609 				}
2610 			}
2611 		}
2612 
2613 		/* or if there is an authority route that needs it. */
2614 		for (intnetp = intnets; intnetp != NULL;
2615 		    intnetp = intnetp->intnet_next) {
2616 			if (intnetp->intnet_addr == rt->rt_dst &&
2617 			    intnetp->intnet_mask == rt->rt_mask) {
2618 				state |= (RS_NET_SYN | RS_NET_INT);
2619 				break;
2620 			}
2621 		}
2622 	}
2623 
2624 	if (ifp1 != NULL || (state & RS_NET_SYN)) {
2625 		struct rt_spare new = rt->rt_spares[0];
2626 		new.rts_ifp = ifp1;
2627 		rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
2628 		    &new, 0);
2629 	} else {
2630 		rtbad(rt, badifp);
2631 	}
2632 }
2633 
2634 /*
2635  * Called while walking the table looking for sick interfaces
2636  * or after a time change.
2637  */
2638 int
2639 walk_bad(struct radix_node *rn,
2640     void *argp)
2641 {
2642 #define	RT ((struct rt_entry *)rn)
2643 	struct rt_spare *rts;
2644 	int i, j = -1;
2645 
2646 	/* fix any spare routes through the interface */
2647 	for (i = 1; i < RT->rt_num_spares; i++) {
2648 		rts = &((struct rt_entry *)rn)->rt_spares[i];
2649 
2650 		if (rts->rts_metric < HOPCNT_INFINITY &&
2651 		    (rts->rts_ifp == NULL ||
2652 		    (rts->rts_ifp->int_state & IS_BROKE)))
2653 			rts_delete(RT, rts);
2654 		else {
2655 			if (rts->rts_origin != RO_NONE)
2656 				j = i;
2657 		}
2658 	}
2659 
2660 	/*
2661 	 * Deal with the main route
2662 	 * finished if it has been handled before or if its interface is ok
2663 	 */
2664 	if (RT->rt_ifp == NULL || !(RT->rt_ifp->int_state & IS_BROKE))
2665 		return (0);
2666 
2667 	/* Bad routes for other than interfaces are easy. */
2668 	if (!(RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
2669 		if (j > 0) {
2670 			RT->rt_spares[0].rts_metric = HOPCNT_INFINITY;
2671 			rtswitch(RT, NULL);
2672 		} else {
2673 			rtbad(RT, (struct interface *)argp);
2674 		}
2675 		return (0);
2676 	}
2677 
2678 	rtbad_sub(RT, (struct interface *)argp);
2679 	return (0);
2680 #undef RT
2681 }
2682 
2683 /*
2684  * Called while walking the table to replace a duplicate interface
2685  * with a backup.
2686  */
2687 int
2688 walk_rewire(struct radix_node *rn, void *argp)
2689 {
2690 	struct rt_entry *RT = (struct rt_entry *)rn;
2691 	struct rewire_data *wire = (struct rewire_data *)argp;
2692 	struct rt_spare *rts;
2693 	int i;
2694 
2695 	/* fix any spare routes through the interface */
2696 	rts = RT->rt_spares;
2697 	for (i = RT->rt_num_spares; i > 0; i--, rts++) {
2698 		if (rts->rts_ifp == wire->if_old) {
2699 			rts->rts_ifp = wire->if_new;
2700 			if ((RT->rt_dst == RIP_DEFAULT) &&
2701 			    (wire->if_old->int_state & IS_SUPPRESS_RDISC))
2702 				rdisc_suppress(rts->rts_ifp);
2703 			if ((rts->rts_metric += wire->metric_delta) >
2704 			    HOPCNT_INFINITY)
2705 				rts->rts_metric = HOPCNT_INFINITY;
2706 
2707 			/*
2708 			 * If the main route is getting a worse metric,
2709 			 * then it may be time to switch to a backup.
2710 			 */
2711 			if (i == RT->rt_num_spares && wire->metric_delta > 0) {
2712 				rtswitch(RT, NULL);
2713 			}
2714 		}
2715 	}
2716 
2717 	return (0);
2718 }
2719 
2720 /* Check the age of an individual route. */
2721 static int
2722 walk_age(struct radix_node *rn, void *argp)
2723 {
2724 #define	RT ((struct rt_entry *)rn)
2725 	struct interface *ifp;
2726 	struct rt_spare *rts;
2727 	int i;
2728 	in_addr_t age_bad_gate = *(in_addr_t *)argp;
2729 
2730 
2731 	/*
2732 	 * age all of the spare routes, including the primary route
2733 	 * currently in use
2734 	 */
2735 	rts = RT->rt_spares;
2736 	for (i = RT->rt_num_spares; i != 0; i--, rts++) {
2737 
2738 		ifp = rts->rts_ifp;
2739 		if (i == RT->rt_num_spares) {
2740 			if (!AGE_RT(RT->rt_state, rts->rts_origin, ifp)) {
2741 				/*
2742 				 * Keep various things from deciding ageless
2743 				 * routes are stale
2744 				 */
2745 				rts->rts_time = now.tv_sec;
2746 				continue;
2747 			}
2748 
2749 			/* forget RIP routes after RIP has been turned off. */
2750 			if (rip_sock < 0) {
2751 				rts->rts_time = now_stale + 1;
2752 			}
2753 		}
2754 
2755 		/* age failing routes */
2756 		if (age_bad_gate == rts->rts_gate &&
2757 		    rts->rts_time >= now_stale) {
2758 			rts->rts_time -= SUPPLY_INTERVAL;
2759 		}
2760 
2761 		/* trash the spare routes when they go bad */
2762 		if (rts->rts_origin == RO_RIP &&
2763 		    ((rip_sock < 0) ||
2764 		    (rts->rts_metric < HOPCNT_INFINITY &&
2765 		    now_garbage > rts->rts_time)) &&
2766 		    i != RT->rt_num_spares) {
2767 			rts_delete(RT, rts);
2768 		}
2769 	}
2770 
2771 
2772 	/* finished if the active route is still fresh */
2773 	if (now_stale <= RT->rt_time)
2774 		return (0);
2775 
2776 	/* try to switch to an alternative */
2777 	rtswitch(RT, NULL);
2778 
2779 	/* Delete a dead route after it has been publically mourned. */
2780 	if (now_garbage > RT->rt_time) {
2781 		rtdelete(RT);
2782 		return (0);
2783 	}
2784 
2785 	/* Start poisoning a bad route before deleting it. */
2786 	if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2787 		struct rt_spare new = RT->rt_spares[0];
2788 
2789 		new.rts_metric = HOPCNT_INFINITY;
2790 		rtchange(RT, RT->rt_state, &new, 0);
2791 	}
2792 	return (0);
2793 }
2794 
2795 
2796 /* Watch for dead routes and interfaces. */
2797 void
2798 age(in_addr_t bad_gate)
2799 {
2800 	struct interface *ifp;
2801 	int need_query = 0;
2802 
2803 	/*
2804 	 * If not listening to RIP, there is no need to age the routes in
2805 	 * the table.
2806 	 */
2807 	age_timer.tv_sec = (now.tv_sec
2808 	    + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2809 
2810 	/*
2811 	 * Check for dead IS_REMOTE interfaces by timing their
2812 	 * transmissions.
2813 	 */
2814 	for (ifp = ifnet; ifp; ifp = ifp->int_next) {
2815 		if (!(ifp->int_state & IS_REMOTE))
2816 			continue;
2817 
2818 		/* ignore unreachable remote interfaces */
2819 		if (!check_remote(ifp))
2820 			continue;
2821 
2822 		/* Restore remote interface that has become reachable */
2823 		if (ifp->int_state & IS_BROKE)
2824 			if_ok(ifp, "remote ", _B_FALSE);
2825 
2826 		if (ifp->int_act_time != NEVER &&
2827 		    now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2828 			writelog(LOG_NOTICE,
2829 			    "remote interface %s to %s timed out after"
2830 			    " %ld:%ld",
2831 			    ifp->int_name,
2832 			    naddr_ntoa(ifp->int_dstaddr),
2833 			    (now.tv_sec - ifp->int_act_time)/60,
2834 			    (now.tv_sec - ifp->int_act_time)%60);
2835 			if_sick(ifp, _B_FALSE);
2836 		}
2837 
2838 		/*
2839 		 * If we have not heard from the other router
2840 		 * recently, ask it.
2841 		 */
2842 		if (now.tv_sec >= ifp->int_query_time) {
2843 			ifp->int_query_time = NEVER;
2844 			need_query = 1;
2845 		}
2846 	}
2847 
2848 	/* Age routes. */
2849 	(void) rn_walktree(rhead, walk_age, &bad_gate);
2850 
2851 	/*
2852 	 * delete old redirected routes to keep the kernel table small
2853 	 * and prevent blackholes
2854 	 */
2855 	del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2856 
2857 	/* Update the kernel routing table. */
2858 	fix_kern();
2859 
2860 	/* poke reticent remote gateways */
2861 	if (need_query)
2862 		rip_query();
2863 }
2864 
2865 void
2866 kern_dump(void)
2867 {
2868 	int i;
2869 	struct khash *k;
2870 
2871 	for (i = 0; i < KHASH_SIZE; i++) {
2872 		for (k = khash_bins[i]; k != NULL; k = k->k_next)
2873 			trace_khash(k);
2874 	}
2875 }
2876 
2877 
2878 static struct interface *
2879 gwkludge_iflookup(in_addr_t dstaddr, in_addr_t addr, in_addr_t mask)
2880 {
2881 	uint32_t int_state;
2882 	struct interface *ifp;
2883 
2884 	for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
2885 		int_state = ifp->int_state;
2886 
2887 		if (!(int_state & IS_REMOTE))
2888 			continue;
2889 
2890 		if (ifp->int_dstaddr == dstaddr && ifp->int_addr == addr &&
2891 		    ifp->int_mask == mask)
2892 			return (ifp);
2893 	}
2894 	return (NULL);
2895 }
2896 
2897 /*
2898  * Lookup logical interface structure given the gateway address.
2899  * Returns null if no interfaces match the given name.
2900  */
2901 static struct interface *
2902 lifp_iflookup(in_addr_t addr, const char *name)
2903 {
2904 	struct physical_interface *phyi;
2905 	struct interface *ifp;
2906 	struct interface *best = NULL;
2907 
2908 	if ((phyi = phys_byname(name)) == NULL)
2909 		return (NULL);
2910 
2911 	for (ifp = phyi->phyi_interface; ifp != NULL;
2912 	    ifp = ifp->int_ilist.hl_next) {
2913 
2914 #ifdef DEBUG_KERNEL_ROUTE_READ
2915 		(void) fprintf(stderr, " checking interface"
2916 		    " %-4s %-4s %-15s-->%-15s \n",
2917 		    phyi->phyi_name, ifp->int_name,
2918 		    naddr_ntoa(ifp->int_addr),
2919 		    addrname(((ifp->int_if_flags & IFF_POINTOPOINT) ?
2920 		    ifp->int_dstaddr : htonl(ifp->int_net)),
2921 		    ifp->int_mask, 1));
2922 #endif
2923 		/* Exact match found */
2924 		if (addr_on_ifp(addr, ifp, &best))
2925 			return (ifp);
2926 	}
2927 	/* No exact match found but return any best match found */
2928 	return (best);
2929 }
2930