xref: /freebsd/sbin/routed/table.c (revision e627b39baccd1ec9129690167cf5e6d860509655)
1 /*
2  * Copyright (c) 1983, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #if !defined(lint) && !defined(sgi) && !defined(__NetBSD__)
35 static char sccsid[] = "@(#)tables.c	8.1 (Berkeley) 6/5/93";
36 #elif defined(__NetBSD__)
37 static char rcsid[] = "$NetBSD$";
38 #endif
39 #ident "$Revision: 1.1.1.1 $"
40 
41 #include "defs.h"
42 
43 static struct rt_spare *rts_better(struct rt_entry *);
44 
45 struct radix_node_head *rhead;		/* root of the radix tree */
46 
47 int	need_flash = 1;			/* flash update needed
48 					 * start =1 to suppress the 1st
49 					 */
50 
51 struct timeval age_timer;		/* next check of old routes */
52 struct timeval need_kern = {		/* need to update kernel table */
53 	EPOCH+MIN_WAITTIME-1
54 };
55 
56 int	stopint;
57 
58 int	total_routes;
59 
60 naddr	age_bad_gate;
61 
62 
63 /* It is desirable to "aggregate" routes, to combine differing routes of
64  * the same metric and next hop into a common route with a smaller netmask
65  * or to suppress redundant routes, routes that add no information to
66  * routes with smaller netmasks.
67  *
68  * A route is redundant if and only if any and all routes with smaller
69  * but matching netmasks and nets are the same.  Since routes are
70  * kept sorted in the radix tree, redundant routes always come second.
71  *
72  * There are two kinds of aggregations.  First, two routes of the same bit
73  * mask and differing only in the least significant bit of the network
74  * number can be combined into a single route with a coarser mask.
75  *
76  * Second, a route can be suppressed in favor of another route with a more
77  * coarse mask provided no incompatible routes with intermediate masks
78  * are present.  The second kind of aggregation involves suppressing routes.
79  * A route must not be suppressed if an incompatible route exists with
80  * an intermediate mask, since the suppressed route would be covered
81  * by the intermediate.
82  *
83  * This code relies on the radix tree walk encountering routes
84  * sorted first by address, with the smallest address first.
85  */
86 
87 struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
88 
89 /* #define DEBUG_AG */
90 #ifdef DEBUG_AG
91 #define CHECK_AG() {int acnt = 0; struct ag_info *cag;		\
92 	for (cag = ag_avail; cag != 0; cag = cag->ag_fine)	\
93 		acnt++;						\
94 	for (cag = ag_corsest; cag != 0; cag = cag->ag_fine)	\
95 		acnt++;						\
96 	if (acnt != NUM_AG_SLOTS) {				\
97 		(void)fflush(stderr);				\
98 		abort();					\
99 	}							\
100 }
101 #else
102 #define CHECK_AG()
103 #endif
104 
105 
106 /* Output the contents of an aggregation table slot.
107  *	This function must always be immediately followed with the deletion
108  *	of the target slot.
109  */
110 static void
111 ag_out(struct ag_info *ag,
112 	 void (*out)(struct ag_info *))
113 {
114 	struct ag_info *ag_cors;
115 	naddr bit;
116 
117 
118 	/* If we output both the even and odd twins, then the immediate parent,
119 	 * if it is present, is redundant, unless the parent manages to
120 	 * aggregate into something coarser.
121 	 * On successive calls, this code detects the even and odd twins,
122 	 * and marks the parent.
123 	 *
124 	 * Note that the order in which the radix tree code emits routes
125 	 * ensures that the twins are seen before the parent is emitted.
126 	 */
127 	ag_cors = ag->ag_cors;
128 	if (ag_cors != 0
129 	    && ag_cors->ag_mask == ag->ag_mask<<1
130 	    && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
131 		ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
132 				      ? AGS_REDUN0
133 				      : AGS_REDUN1);
134 	}
135 
136 	/* Skip it if this route is itself redundant.
137 	 *
138 	 * It is ok to change the contents of the slot here, since it is
139 	 * always deleted next.
140 	 */
141 	if (ag->ag_state & AGS_REDUN0) {
142 		if (ag->ag_state & AGS_REDUN1)
143 			return;
144 		bit = (-ag->ag_mask) >> 1;
145 		ag->ag_dst_h |= bit;
146 		ag->ag_mask |= bit;
147 
148 	} else if (ag->ag_state & AGS_REDUN1) {
149 		bit = (-ag->ag_mask) >> 1;
150 		ag->ag_mask |= bit;
151 	}
152 	out(ag);
153 }
154 
155 
156 static void
157 ag_del(struct ag_info *ag)
158 {
159 	CHECK_AG();
160 
161 	if (ag->ag_cors == 0)
162 		ag_corsest = ag->ag_fine;
163 	else
164 		ag->ag_cors->ag_fine = ag->ag_fine;
165 
166 	if (ag->ag_fine == 0)
167 		ag_finest = ag->ag_cors;
168 	else
169 		ag->ag_fine->ag_cors = ag->ag_cors;
170 
171 	ag->ag_fine = ag_avail;
172 	ag_avail = ag;
173 
174 	CHECK_AG();
175 }
176 
177 
178 /* Flush routes waiting for aggretation.
179  *	This must not suppress a route unless it is known that among all
180  *	routes with coarser masks that match it, the one with the longest
181  *	mask is appropriate.  This is ensured by scanning the routes
182  *	in lexical order, and with the most restritive mask first
183  *	among routes to the same destination.
184  */
185 void
186 ag_flush(naddr lim_dst_h,		/* flush routes to here */
187 	 naddr lim_mask,		/* matching this mask */
188 	 void (*out)(struct ag_info *))
189 {
190 	struct ag_info *ag, *ag_cors;
191 	naddr dst_h;
192 
193 
194 	for (ag = ag_finest;
195 	     ag != 0 && ag->ag_mask >= lim_mask;
196 	     ag = ag_cors) {
197 		ag_cors = ag->ag_cors;
198 
199 		/* work on only the specified routes */
200 		dst_h = ag->ag_dst_h;
201 		if ((dst_h & lim_mask) != lim_dst_h)
202 			continue;
203 
204 		if (!(ag->ag_state & AGS_SUPPRESS))
205 			ag_out(ag, out);
206 
207 		else for ( ; ; ag_cors = ag_cors->ag_cors) {
208 			/* Look for a route that can suppress the
209 			 * current route */
210 			if (ag_cors == 0) {
211 				/* failed, so output it and look for
212 				 * another route to work on
213 				 */
214 				ag_out(ag, out);
215 				break;
216 			}
217 
218 			if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
219 				/* We found a route with a coarser mask that
220 				 * aggregates the current target.
221 				 *
222 				 * If it has a different next hop, it
223 				 * cannot replace the target, so output
224 				 * the target.
225 				 */
226 				if (ag->ag_gate != ag_cors->ag_gate
227 				    && !(ag->ag_state & AGS_FINE_GATE)
228 				    && !(ag_cors->ag_state & AGS_CORS_GATE)) {
229 					ag_out(ag, out);
230 					break;
231 				}
232 
233 				/* If the coarse route has a good enough
234 				 * metric, it suppresses the target.
235 				 */
236 				if (ag_cors->ag_pref <= ag->ag_pref) {
237 				    if (ag_cors->ag_seqno > ag->ag_seqno)
238 					ag_cors->ag_seqno = ag->ag_seqno;
239 				    if (AG_IS_REDUN(ag->ag_state)
240 					&& ag_cors->ag_mask==ag->ag_mask<<1) {
241 					if (ag_cors->ag_dst_h == dst_h)
242 					    ag_cors->ag_state |= AGS_REDUN0;
243 					else
244 					    ag_cors->ag_state |= AGS_REDUN1;
245 				    }
246 				    if (ag->ag_tag != ag_cors->ag_tag)
247 					    ag_cors->ag_tag = 0;
248 				    if (ag->ag_nhop != ag_cors->ag_nhop)
249 					    ag_cors->ag_nhop = 0;
250 				    break;
251 				}
252 			}
253 		}
254 
255 		/* That route has either been output or suppressed */
256 		ag_cors = ag->ag_cors;
257 		ag_del(ag);
258 	}
259 
260 	CHECK_AG();
261 }
262 
263 
264 /* Try to aggregate a route with previous routes.
265  */
266 void
267 ag_check(naddr	dst,
268 	 naddr	mask,
269 	 naddr	gate,
270 	 naddr	nhop,
271 	 char	metric,
272 	 char	pref,
273 	 u_int	seqno,
274 	 u_short tag,
275 	 u_short state,
276 	 void (*out)(struct ag_info *))	/* output using this */
277 {
278 	struct ag_info *ag, *nag, *ag_cors;
279 	naddr xaddr;
280 	int x;
281 
282 	NTOHL(dst);
283 
284 	/* Punt non-contiguous subnet masks.
285 	 *
286 	 * (X & -X) contains a single bit if and only if X is a power of 2.
287 	 * (X + (X & -X)) == 0 if and only if X is a power of 2.
288 	 */
289 	if ((mask & -mask) + mask != 0) {
290 		struct ag_info nc_ag;
291 
292 		nc_ag.ag_dst_h = dst;
293 		nc_ag.ag_mask = mask;
294 		nc_ag.ag_gate = gate;
295 		nc_ag.ag_nhop = nhop;
296 		nc_ag.ag_metric = metric;
297 		nc_ag.ag_pref = pref;
298 		nc_ag.ag_tag = tag;
299 		nc_ag.ag_state = state;
300 		nc_ag.ag_seqno = seqno;
301 		out(&nc_ag);
302 		return;
303 	}
304 
305 	/* Search for the right slot in the aggregation table.
306 	 */
307 	ag_cors = 0;
308 	ag = ag_corsest;
309 	while (ag != 0) {
310 		if (ag->ag_mask >= mask)
311 			break;
312 
313 		/* Suppress old routes (i.e. combine with compatible routes
314 		 * with coarser masks) as we look for the right slot in the
315 		 * aggregation table for the new route.
316 		 * A route to an address less than the current destination
317 		 * will not be affected by the current route or any route
318 		 * seen hereafter.  That means it is safe to suppress it.
319 		 * This check keeps poor routes (eg. with large hop counts)
320 		 * from preventing suppresion of finer routes.
321 		 */
322 		if (ag_cors != 0
323 		    && ag->ag_dst_h < dst
324 		    && (ag->ag_state & AGS_SUPPRESS)
325 		    && ag_cors->ag_pref <= ag->ag_pref
326 		    && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
327 		    && (ag_cors->ag_gate == ag->ag_gate
328 			|| (ag->ag_state & AGS_FINE_GATE)
329 			|| (ag_cors->ag_state & AGS_CORS_GATE))) {
330 			if (ag_cors->ag_seqno > ag->ag_seqno)
331 				ag_cors->ag_seqno = ag->ag_seqno;
332 			if (AG_IS_REDUN(ag->ag_state)
333 			    && ag_cors->ag_mask==ag->ag_mask<<1) {
334 				if (ag_cors->ag_dst_h == dst)
335 					ag_cors->ag_state |= AGS_REDUN0;
336 				else
337 					ag_cors->ag_state |= AGS_REDUN1;
338 			}
339 			if (ag->ag_tag != ag_cors->ag_tag)
340 				ag_cors->ag_tag = 0;
341 			if (ag->ag_nhop != ag_cors->ag_nhop)
342 				ag_cors->ag_nhop = 0;
343 			ag_del(ag);
344 			CHECK_AG();
345 		} else {
346 			ag_cors = ag;
347 		}
348 		ag = ag_cors->ag_fine;
349 	}
350 
351 	/* If we find the even/odd twin of the new route, and if the
352 	 * masks and so forth are equal, we can aggregate them.
353 	 * We can probably promote one of the pair.
354 	 *
355 	 * Since the routes are encountered in lexical order,
356 	 * the new route must be odd.  However, the second or later
357 	 * times around this loop, it could be the even twin promoted
358 	 * from the even/odd pair of twins of the finer route.
359 	 */
360 	while (ag != 0
361 	       && ag->ag_mask == mask
362 	       && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
363 
364 		/* Here we know the target route and the route in the current
365 		 * slot have the same netmasks and differ by at most the
366 		 * last bit.  They are either for the same destination, or
367 		 * for an even/odd pair of destinations.
368 		 */
369 		if (ag->ag_dst_h == dst) {
370 			/* We have two routes to the same destination.
371 			 * Routes are encountered in lexical order, so a
372 			 * route is never promoted until the parent route is
373 			 * already present.  So we know that the new route is
374 			 * a promoted pair and the route already in the slot
375 			 * is the explicit route.
376 			 *
377 			 * Prefer the best route if their metrics differ,
378 			 * or the promoted one if not, following a sort
379 			 * of longest-match rule.
380 			 */
381 			if (pref <= ag->ag_pref) {
382 				ag->ag_gate = gate;
383 				ag->ag_nhop = nhop;
384 				ag->ag_tag = tag;
385 				ag->ag_metric = metric;
386 				ag->ag_pref = pref;
387 				x = ag->ag_state;
388 				ag->ag_state = state;
389 				state = x;
390 			}
391 
392 			/* The sequence number controls flash updating,
393 			 * and should be the smaller of the two.
394 			 */
395 			if (ag->ag_seqno > seqno)
396 				ag->ag_seqno = seqno;
397 
398 			/* some bits are set if they are set on either route */
399 			ag->ag_state |= (state & (AGS_PROMOTE_EITHER
400 						  | AGS_REDUN0 | AGS_REDUN1));
401 			return;
402 		}
403 
404 		/* If one of the routes can be promoted and the other can
405 		 * be suppressed, it may be possible to combine them or
406 		 * worthwhile to promote one.
407 		 *
408 		 * Note that any route that can be promoted is always
409 		 * marked to be eligible to be suppressed.
410 		 */
411 		if (!((state & AGS_PROMOTE)
412 		      && (ag->ag_state & AGS_SUPPRESS))
413 		    && !((ag->ag_state & AGS_PROMOTE)
414 			 && (state & AGS_SUPPRESS)))
415 			break;
416 
417 		/* A pair of even/odd twin routes can be combined
418 		 * if either is redundant, or if they are via the
419 		 * same gateway and have the same metric.
420 		 */
421 		if (AG_IS_REDUN(ag->ag_state)
422 		    || AG_IS_REDUN(state)
423 		    || (ag->ag_gate == gate
424 			&& ag->ag_pref == pref
425 			&& (state & ag->ag_state & AGS_PROMOTE) != 0)) {
426 
427 			/* We have both the even and odd pairs.
428 			 * Since the routes are encountered in order,
429 			 * the route in the slot must be the even twin.
430 			 *
431 			 * Combine and promote the pair of routes.
432 			 */
433 			if (seqno > ag->ag_seqno)
434 				seqno = ag->ag_seqno;
435 			if (!AG_IS_REDUN(state))
436 				state &= ~AGS_REDUN1;
437 			if (AG_IS_REDUN(ag->ag_state))
438 				state |= AGS_REDUN0;
439 			else
440 				state &= ~AGS_REDUN0;
441 			state |= (ag->ag_state & AGS_PROMOTE_EITHER);
442 			if (ag->ag_tag != tag)
443 				tag = 0;
444 			if (ag->ag_nhop != nhop)
445 				nhop = 0;
446 
447 			/* Get rid of the even twin that was already
448 			 * in the slot.
449 			 */
450 			ag_del(ag);
451 
452 		} else if (ag->ag_pref >= pref
453 			   && (ag->ag_state & AGS_PROMOTE)) {
454 			/* If we cannot combine the pair, maybe the route
455 			 * with the worse metric can be promoted.
456 			 *
457 			 * Promote the old, even twin, by giving its slot
458 			 * in the table to the new, odd twin.
459 			 */
460 			ag->ag_dst_h = dst;
461 
462 			xaddr = ag->ag_gate;
463 			ag->ag_gate = gate;
464 			gate = xaddr;
465 
466 			xaddr = ag->ag_nhop;
467 			ag->ag_nhop = nhop;
468 			nhop = xaddr;
469 
470 			x = ag->ag_tag;
471 			ag->ag_tag = tag;
472 			tag = x;
473 
474 			x = ag->ag_state;
475 			ag->ag_state = state;
476 			state = x;
477 			if (!AG_IS_REDUN(state))
478 				state &= ~AGS_REDUN0;
479 
480 			x = ag->ag_metric;
481 			ag->ag_metric = metric;
482 			metric = x;
483 
484 			x = ag->ag_pref;
485 			ag->ag_pref = pref;
486 			pref = x;
487 
488 			if (seqno >= ag->ag_seqno)
489 				seqno = ag->ag_seqno;
490 			else
491 				ag->ag_seqno = seqno;
492 
493 		} else {
494 			if (!(state & AGS_PROMOTE))
495 				break;	/* cannot promote either twin */
496 
497 			/* promote the new, odd twin by shaving its
498 			 * mask and address.
499 			 */
500 			if (seqno > ag->ag_seqno)
501 				seqno = ag->ag_seqno;
502 			else
503 				ag->ag_seqno = seqno;
504 			if (!AG_IS_REDUN(state))
505 				state &= ~AGS_REDUN1;
506 		}
507 
508 		mask <<= 1;
509 		dst &= mask;
510 
511 		if (ag_cors == 0) {
512 			ag = ag_corsest;
513 			break;
514 		}
515 		ag = ag_cors;
516 		ag_cors = ag->ag_cors;
517 	}
518 
519 	/* When we can no longer promote and combine routes,
520 	 * flush the old route in the target slot.  Also flush
521 	 * any finer routes that we know will never be aggregated by
522 	 * the new route.
523 	 *
524 	 * In case we moved toward coarser masks,
525 	 * get back where we belong
526 	 */
527 	if (ag != 0
528 	    && ag->ag_mask < mask) {
529 		ag_cors = ag;
530 		ag = ag->ag_fine;
531 	}
532 
533 	/* Empty the target slot
534 	 */
535 	if (ag != 0 && ag->ag_mask == mask) {
536 		ag_flush(ag->ag_dst_h, ag->ag_mask, out);
537 		ag = (ag_cors == 0) ? ag_corsest : ag_cors->ag_fine;
538 	}
539 
540 #ifdef DEBUG_AG
541 	(void)fflush(stderr);
542 	if (ag == 0 && ag_cors != ag_finest)
543 		abort();
544 	if (ag_cors == 0 && ag != ag_corsest)
545 		abort();
546 	if (ag != 0 && ag->ag_cors != ag_cors)
547 		abort();
548 	if (ag_cors != 0 && ag_cors->ag_fine != ag)
549 		abort();
550 	CHECK_AG();
551 #endif
552 
553 	/* Save the new route on the end of the table.
554 	 */
555 	nag = ag_avail;
556 	ag_avail = nag->ag_fine;
557 
558 	nag->ag_dst_h = dst;
559 	nag->ag_mask = mask;
560 	nag->ag_gate = gate;
561 	nag->ag_nhop = nhop;
562 	nag->ag_metric = metric;
563 	nag->ag_pref = pref;
564 	nag->ag_tag = tag;
565 	nag->ag_state = state;
566 	nag->ag_seqno = seqno;
567 
568 	nag->ag_fine = ag;
569 	if (ag != 0)
570 		ag->ag_cors = nag;
571 	else
572 		ag_finest = nag;
573 	nag->ag_cors = ag_cors;
574 	if (ag_cors == 0)
575 		ag_corsest = nag;
576 	else
577 		ag_cors->ag_fine = nag;
578 	CHECK_AG();
579 }
580 
581 
582 static char *
583 rtm_type_name(u_char type)
584 {
585 	static char *rtm_types[] = {
586 		"RTM_ADD",
587 		"RTM_DELETE",
588 		"RTM_CHANGE",
589 		"RTM_GET",
590 		"RTM_LOSING",
591 		"RTM_REDIRECT",
592 		"RTM_MISS",
593 		"RTM_LOCK",
594 		"RTM_OLDADD",
595 		"RTM_OLDDEL",
596 		"RTM_RESOLVE",
597 		"RTM_NEWADDR",
598 		"RTM_DELADDR",
599 		"RTM_IFINFO"
600 	};
601 	static char name0[10];
602 
603 
604 	if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
605 	    || type == 0) {
606 		sprintf(name0, "RTM type %#x", type);
607 		return name0;
608 	} else {
609 		return rtm_types[type-1];
610 	}
611 }
612 
613 
614 /* Trim a mask in a sockaddr
615  *	Produce a length of 0 for an address of 0.
616  *	Otherwise produce the index of the first zero byte.
617  */
618 void
619 #ifdef _HAVE_SIN_LEN
620 masktrim(struct sockaddr_in *ap)
621 #else
622 masktrim(struct sockaddr_in_new *ap)
623 #endif
624 {
625 	register char *cp;
626 
627 	if (ap->sin_addr.s_addr == 0) {
628 		ap->sin_len = 0;
629 		return;
630 	}
631 	cp = (char *)(&ap->sin_addr.s_addr+1);
632 	while (*--cp == 0)
633 		continue;
634 	ap->sin_len = cp - (char*)ap + 1;
635 }
636 
637 
638 /* Tell the kernel to add, delete or change a route
639  */
640 static void
641 rtioctl(int action,			/* RTM_DELETE, etc */
642 	naddr dst,
643 	naddr gate,
644 	naddr mask,
645 	int metric,
646 	int flags)
647 {
648 	struct {
649 		struct rt_msghdr w_rtm;
650 		struct sockaddr_in w_dst;
651 		struct sockaddr_in w_gate;
652 #ifdef _HAVE_SA_LEN
653 		struct sockaddr_in w_mask;
654 #else
655 		struct sockaddr_in_new w_mask;
656 #endif
657 	} w;
658 	long cc;
659 
660 again:
661 	bzero(&w, sizeof(w));
662 	w.w_rtm.rtm_msglen = sizeof(w);
663 	w.w_rtm.rtm_version = RTM_VERSION;
664 	w.w_rtm.rtm_type = action;
665 	w.w_rtm.rtm_flags = flags;
666 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
667 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
668 	if (metric != 0) {
669 		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
670 		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
671 	}
672 	w.w_dst.sin_family = AF_INET;
673 	w.w_dst.sin_addr.s_addr = dst;
674 	w.w_gate.sin_family = AF_INET;
675 	w.w_gate.sin_addr.s_addr = gate;
676 #ifdef _HAVE_SA_LEN
677 	w.w_dst.sin_len = sizeof(w.w_dst);
678 	w.w_gate.sin_len = sizeof(w.w_gate);
679 #endif
680 	if (mask == HOST_MASK) {
681 		w.w_rtm.rtm_flags |= RTF_HOST;
682 		w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
683 	} else {
684 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
685 		w.w_mask.sin_addr.s_addr = htonl(mask);
686 #ifdef _HAVE_SA_LEN
687 		masktrim(&w.w_mask);
688 		if (w.w_mask.sin_len == 0)
689 			w.w_mask.sin_len = sizeof(long);
690 		w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
691 #endif
692 	}
693 
694 	if (TRACEKERNEL)
695 		trace_kernel("write kernel %s %s->%s metric=%d flags=%#x\n",
696 			     rtm_type_name(action),
697 			     addrname(dst, mask, 0), naddr_ntoa(gate),
698 			     metric, flags);
699 
700 #ifndef NO_INSTALL
701 	cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
702 	if (cc == w.w_rtm.rtm_msglen)
703 		return;
704 	if (cc < 0) {
705 		if (errno == ESRCH
706 		    && (action == RTM_CHANGE || action == RTM_DELETE)) {
707 			trace_act("route to %s disappeared before %s\n",
708 				  addrname(dst, mask, 0),
709 				  rtm_type_name(action));
710 			if (action == RTM_CHANGE) {
711 				action = RTM_ADD;
712 				goto again;
713 			}
714 			return;
715 		}
716 		msglog("write(rt_sock) %s %s --> %s: %s",
717 		       rtm_type_name(action),
718 		       addrname(dst, mask, 0), naddr_ntoa(gate),
719 		       strerror(errno));
720 	} else {
721 		msglog("write(rt_sock) wrote %d instead of %d",
722 		       cc, w.w_rtm.rtm_msglen);
723 	}
724 #endif
725 }
726 
727 
728 #define KHASH_SIZE 71			/* should be prime */
729 #define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
730 static struct khash {
731 	struct khash *k_next;
732 	naddr	k_dst;
733 	naddr	k_mask;
734 	naddr	k_gate;
735 	short	k_metric;
736 	u_short	k_state;
737 #define	    KS_NEW	0x001
738 #define	    KS_DELETE	0x002
739 #define	    KS_ADD	0x004		/* add to the kernel */
740 #define	    KS_CHANGE	0x008		/* tell kernel to change the route */
741 #define	    KS_DEL_ADD	0x010		/* delete & add to change the kernel */
742 #define	    KS_STATIC	0x020		/* Static flag in kernel */
743 #define	    KS_GATEWAY	0x040		/* G flag in kernel */
744 #define	    KS_DYNAMIC	0x080		/* result of redirect */
745 #define	    KS_DELETED	0x100		/* already deleted */
746 	time_t	k_keep;
747 #define	    K_KEEP_LIM	30
748 	time_t	k_redirect_time;
749 } *khash_bins[KHASH_SIZE];
750 
751 
752 static struct khash*
753 kern_find(naddr dst, naddr mask, struct khash ***ppk)
754 {
755 	struct khash *k, **pk;
756 
757 	for (pk = &KHASH(dst,mask); (k = *pk) != 0; pk = &k->k_next) {
758 		if (k->k_dst == dst && k->k_mask == mask)
759 			break;
760 	}
761 	if (ppk != 0)
762 		*ppk = pk;
763 	return k;
764 }
765 
766 
767 static struct khash*
768 kern_add(naddr dst, naddr mask)
769 {
770 	struct khash *k, **pk;
771 
772 	k = kern_find(dst, mask, &pk);
773 	if (k != 0)
774 		return k;
775 
776 	k = (struct khash *)malloc(sizeof(*k));
777 
778 	bzero(k, sizeof(*k));
779 	k->k_dst = dst;
780 	k->k_mask = mask;
781 	k->k_state = KS_NEW;
782 	k->k_keep = now.tv_sec;
783 	*pk = k;
784 
785 	return k;
786 }
787 
788 
789 /* If a kernel route has a non-zero metric, check that it is still in the
790  *	daemon table, and not deleted by interfaces coming and going.
791  */
792 static void
793 kern_check_static(struct khash *k,
794 		  struct interface *ifp)
795 {
796 	struct rt_entry *rt;
797 	naddr int_addr;
798 
799 	if (k->k_metric == 0)
800 		return;
801 
802 	int_addr = (ifp != 0) ? ifp->int_addr : loopaddr;
803 
804 	rt = rtget(k->k_dst, k->k_mask);
805 	if (rt != 0) {
806 		if (!(rt->rt_state & RS_STATIC))
807 			rtchange(rt, rt->rt_state | RS_STATIC,
808 				 k->k_gate, int_addr,
809 				 k->k_metric, 0, ifp, now.tv_sec, 0);
810 	} else {
811 		rtadd(k->k_dst, k->k_mask, k->k_gate, int_addr,
812 		      k->k_metric, 0, RS_STATIC, ifp);
813 	}
814 }
815 
816 
817 /* add a route the kernel told us
818  */
819 static void
820 rtm_add(struct rt_msghdr *rtm,
821 	struct rt_addrinfo *info,
822 	time_t keep)
823 {
824 	struct khash *k;
825 	struct interface *ifp;
826 	naddr mask;
827 
828 
829 	if (rtm->rtm_flags & RTF_HOST) {
830 		mask = HOST_MASK;
831 	} else if (INFO_MASK(info) != 0) {
832 		mask = ntohl(S_ADDR(INFO_MASK(info)));
833 	} else {
834 		msglog("punt %s without mask",
835 		       rtm_type_name(rtm->rtm_type));
836 		return;
837 	}
838 
839 	if (INFO_GATE(info) == 0
840 	    || INFO_GATE(info)->sa_family != AF_INET) {
841 		msglog("punt %s without gateway",
842 		       rtm_type_name(rtm->rtm_type));
843 		return;
844 	}
845 
846 	k = kern_add(S_ADDR(INFO_DST(info)), mask);
847 	if (k->k_state & KS_NEW)
848 		k->k_keep = now.tv_sec+keep;
849 	k->k_gate = S_ADDR(INFO_GATE(info));
850 	k->k_metric = rtm->rtm_rmx.rmx_hopcount;
851 	if (k->k_metric < 0)
852 		k->k_metric = 0;
853 	else if (k->k_metric > HOPCNT_INFINITY)
854 		 k->k_metric = HOPCNT_INFINITY;
855 	k->k_state &= ~(KS_DELETED | KS_GATEWAY | KS_STATIC | KS_NEW);
856 	if (rtm->rtm_flags & RTF_GATEWAY)
857 		k->k_state |= KS_GATEWAY;
858 	if (rtm->rtm_flags & RTF_STATIC)
859 		k->k_state |= KS_STATIC;
860 
861 	if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
862 		if (supplier) {
863 			/* Routers are not supposed to listen to redirects,
864 			 * so delete it.
865 			 */
866 			k->k_state &= ~KS_DYNAMIC;
867 			k->k_state |= KS_DELETE;
868 			LIM_SEC(need_kern, 0);
869 			trace_act("mark redirected %s --> %s for deletion"
870 				  " since this is a router\n",
871 				  addrname(k->k_dst, k->k_mask, 0),
872 				  naddr_ntoa(k->k_gate));
873 		} else {
874 			k->k_state |= KS_DYNAMIC;
875 			k->k_redirect_time = now.tv_sec;
876 		}
877 		return;
878 	}
879 
880 	/* If it is not a static route, quit until the next comparison
881 	 * between the kernel and daemon tables, when it will be deleted.
882 	 */
883 	if (!(k->k_state & KS_STATIC)) {
884 		k->k_state |= KS_DELETE;
885 		LIM_SEC(need_kern, k->k_keep);
886 		return;
887 	}
888 
889 	/* Put static routes with real metrics into the daemon table so
890 	 * they can be advertised.
891 	 *
892 	 * Find the interface concerned
893 	 */
894 	ifp = iflookup(k->k_gate);
895 	if (ifp == 0) {
896 		/* if there is no known interface,
897 		 * maybe there is a new interface
898 		 */
899 		ifinit();
900 		ifp = iflookup(k->k_gate);
901 		if (ifp == 0)
902 			msglog("static route %s --> %s impossibly lacks ifp",
903 			       addrname(S_ADDR(INFO_DST(info)), mask, 0),
904 			       naddr_ntoa(k->k_gate));
905 	}
906 
907 	kern_check_static(k, ifp);
908 }
909 
910 
911 /* deal with packet loss
912  */
913 static void
914 rtm_lose(struct rt_msghdr *rtm,
915 	 struct rt_addrinfo *info)
916 {
917 	if (INFO_GATE(info) == 0
918 	    || INFO_GATE(info)->sa_family != AF_INET) {
919 		msglog("punt %s without gateway",
920 		       rtm_type_name(rtm->rtm_type));
921 		return;
922 	}
923 
924 	if (!supplier)
925 		rdisc_age(S_ADDR(INFO_GATE(info)));
926 
927 	age(S_ADDR(INFO_GATE(info)));
928 }
929 
930 
931 /* Clean the kernel table by copying it to the daemon image.
932  * Eventually the daemon will delete any extra routes.
933  */
934 void
935 flush_kern(void)
936 {
937 	size_t needed;
938 	int mib[6];
939 	char *buf, *next, *lim;
940 	struct rt_msghdr *rtm;
941 	struct interface *ifp;
942 	static struct sockaddr_in gate_sa;
943 	struct rt_addrinfo info;
944 
945 
946 	mib[0] = CTL_NET;
947 	mib[1] = PF_ROUTE;
948 	mib[2] = 0;		/* protocol */
949 	mib[3] = 0;		/* wildcard address family */
950 	mib[4] = NET_RT_DUMP;
951 	mib[5] = 0;		/* no flags */
952 	if (sysctl(mib, 6, 0, &needed, 0, 0) < 0) {
953 		DBGERR(1,"RT_DUMP-sysctl-estimate");
954 		return;
955 	}
956 	buf = malloc(needed);
957 	if (sysctl(mib, 6, buf, &needed, 0, 0) < 0)
958 		BADERR(1,"RT_DUMP");
959 	lim = buf + needed;
960 	for (next = buf; next < lim; next += rtm->rtm_msglen) {
961 		rtm = (struct rt_msghdr *)next;
962 
963 		rt_xaddrs(&info,
964 			  (struct sockaddr *)(rtm+1),
965 			  (struct sockaddr *)(next + rtm->rtm_msglen),
966 			  rtm->rtm_addrs);
967 
968 		if (INFO_DST(&info) == 0
969 		    || INFO_DST(&info)->sa_family != AF_INET)
970 			continue;
971 
972 		/* ignore ARP table entries on systems with a merged route
973 		 * and ARP table.
974 		 */
975 		if (rtm->rtm_flags & RTF_LLINFO)
976 			continue;
977 
978 		if (INFO_GATE(&info) == 0)
979 			continue;
980 		if (INFO_GATE(&info)->sa_family != AF_INET) {
981 			if (INFO_GATE(&info)->sa_family != AF_LINK)
982 				continue;
983 			ifp = ifwithindex(((struct sockaddr_dl *)
984 					   INFO_GATE(&info))->sdl_index);
985 			if (ifp == 0)
986 				continue;
987 			if ((ifp->int_if_flags & IFF_POINTOPOINT)
988 			    || S_ADDR(INFO_DST(&info)) == ifp->int_addr)
989 				gate_sa.sin_addr.s_addr = ifp->int_addr;
990 			else
991 				gate_sa.sin_addr.s_addr = htonl(ifp->int_net);
992 #ifdef _HAVE_SA_LEN
993 			gate_sa.sin_len = sizeof(gate_sa);
994 #endif
995 			gate_sa.sin_family = AF_INET;
996 			INFO_GATE(&info) = (struct sockaddr *)&gate_sa;
997 		}
998 
999 		/* ignore multicast addresses
1000 		 */
1001 		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1002 			continue;
1003 
1004 		/* Note static routes and interface routes, and also
1005 		 * preload the image of the kernel table so that
1006 		 * we can later clean it, as well as avoid making
1007 		 * unneeded changes.  Keep the old kernel routes for a
1008 		 * few seconds to allow a RIP or router-discovery
1009 		 * response to be heard.
1010 		 */
1011 		rtm_add(rtm,&info,MIN_WAITTIME);
1012 	}
1013 	free(buf);
1014 }
1015 
1016 
1017 /* Listen to announcements from the kernel
1018  */
1019 void
1020 read_rt(void)
1021 {
1022 	long cc;
1023 	struct interface *ifp;
1024 	naddr mask;
1025 	union {
1026 		struct {
1027 			struct rt_msghdr rtm;
1028 			struct sockaddr addrs[RTAX_MAX];
1029 		} r;
1030 		struct if_msghdr ifm;
1031 	} m;
1032 	char str[100], *strp;
1033 	struct rt_addrinfo info;
1034 
1035 
1036 	for (;;) {
1037 		cc = read(rt_sock, &m, sizeof(m));
1038 		if (cc <= 0) {
1039 			if (cc < 0 && errno != EWOULDBLOCK)
1040 				LOGERR("read(rt_sock)");
1041 			return;
1042 		}
1043 
1044 		if (m.r.rtm.rtm_version != RTM_VERSION) {
1045 			msglog("bogus routing message version %d",
1046 			       m.r.rtm.rtm_version);
1047 			continue;
1048 		}
1049 
1050 		/* Ignore our own results.
1051 		 */
1052 		if (m.r.rtm.rtm_type <= RTM_CHANGE
1053 		    && m.r.rtm.rtm_pid == mypid) {
1054 			static int complained = 0;
1055 			if (!complained) {
1056 				msglog("receiving our own change messages");
1057 				complained = 1;
1058 			}
1059 			continue;
1060 		}
1061 
1062 		if (m.r.rtm.rtm_type == RTM_IFINFO
1063 		    || m.r.rtm.rtm_type == RTM_NEWADDR
1064 		    || m.r.rtm.rtm_type == RTM_DELADDR) {
1065 			ifp = ifwithindex(m.ifm.ifm_index);
1066 			if (ifp == 0)
1067 				trace_act("note %s with flags %#x"
1068 					  " for index #%d\n",
1069 					  rtm_type_name(m.r.rtm.rtm_type),
1070 					  m.ifm.ifm_flags,
1071 					  m.ifm.ifm_index);
1072 			else
1073 				trace_act("note %s with flags %#x for %s\n",
1074 					  rtm_type_name(m.r.rtm.rtm_type),
1075 					  m.ifm.ifm_flags,
1076 					  ifp->int_name);
1077 
1078 			/* After being informed of a change to an interface,
1079 			 * check them all now if the check would otherwise
1080 			 * be a long time from now, if the interface is
1081 			 * not known, or if the interface has been turned
1082 			 * off or on.
1083 			 */
1084 			if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1085 			    || ifp == 0
1086 			    || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1087 				& IFF_UP_RUNNING) != 0)
1088 				ifinit_timer.tv_sec = now.tv_sec;
1089 			continue;
1090 		}
1091 
1092 		strcpy(str, rtm_type_name(m.r.rtm.rtm_type));
1093 		strp = &str[strlen(str)];
1094 		if (m.r.rtm.rtm_type <= RTM_CHANGE)
1095 			strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
1096 
1097 		rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1098 			  m.r.rtm.rtm_addrs);
1099 
1100 		if (INFO_DST(&info) == 0) {
1101 			trace_act("ignore %s without dst\n", str);
1102 			continue;
1103 		}
1104 
1105 		if (INFO_DST(&info)->sa_family != AF_INET) {
1106 			trace_act("ignore %s for AF %d\n", str,
1107 				  INFO_DST(&info)->sa_family);
1108 			continue;
1109 		}
1110 
1111 		mask = ((INFO_MASK(&info) != 0)
1112 			? ntohl(S_ADDR(INFO_MASK(&info)))
1113 			: (m.r.rtm.rtm_flags & RTF_HOST)
1114 			? HOST_MASK
1115 			: std_mask(S_ADDR(INFO_DST(&info))));
1116 
1117 		strp += sprintf(strp, ": %s",
1118 				addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1119 
1120 		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1121 			trace_act("ignore multicast %s\n", str);
1122 			continue;
1123 		}
1124 
1125 		if (INFO_GATE(&info) != 0
1126 		    && INFO_GATE(&info)->sa_family == AF_INET)
1127 			strp += sprintf(strp, " --> %s",
1128 					saddr_ntoa(INFO_GATE(&info)));
1129 
1130 		if (INFO_AUTHOR(&info) != 0)
1131 			strp += sprintf(strp, " by authority of %s",
1132 					saddr_ntoa(INFO_AUTHOR(&info)));
1133 
1134 		switch (m.r.rtm.rtm_type) {
1135 		case RTM_ADD:
1136 		case RTM_CHANGE:
1137 		case RTM_REDIRECT:
1138 			if (m.r.rtm.rtm_errno != 0) {
1139 				trace_act("ignore %s with \"%s\" error\n",
1140 					  str, strerror(m.r.rtm.rtm_errno));
1141 			} else {
1142 				trace_act("%s\n", str);
1143 				rtm_add(&m.r.rtm,&info,0);
1144 			}
1145 			break;
1146 
1147 		case RTM_DELETE:
1148 			if (m.r.rtm.rtm_errno != 0) {
1149 				trace_act("ignore %s with \"%s\" error\n",
1150 					  str, strerror(m.r.rtm.rtm_errno));
1151 			} else {
1152 				trace_act("%s\n", str);
1153 				del_static(S_ADDR(INFO_DST(&info)), mask, 1);
1154 			}
1155 			break;
1156 
1157 		case RTM_LOSING:
1158 			trace_act("%s\n", str);
1159 			rtm_lose(&m.r.rtm,&info);
1160 			break;
1161 
1162 		default:
1163 			trace_act("ignore %s\n", str);
1164 			break;
1165 		}
1166 	}
1167 }
1168 
1169 
1170 /* after aggregating, note routes that belong in the kernel
1171  */
1172 static void
1173 kern_out(struct ag_info *ag)
1174 {
1175 	struct khash *k;
1176 
1177 
1178 	/* Do not install bad routes if they are not already present.
1179 	 * This includes routes that had RS_NET_SYN for interfaces that
1180 	 * recently died.
1181 	 */
1182 	if (ag->ag_metric == HOPCNT_INFINITY) {
1183 		k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1184 		if (k == 0)
1185 			return;
1186 	} else {
1187 		k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1188 	}
1189 
1190 	if (k->k_state & KS_NEW) {
1191 		/* will need to add new entry to the kernel table */
1192 		k->k_state = KS_ADD;
1193 		if (ag->ag_state & AGS_GATEWAY)
1194 			k->k_state |= KS_GATEWAY;
1195 		k->k_gate = ag->ag_gate;
1196 		k->k_metric = ag->ag_metric;
1197 		return;
1198 	}
1199 
1200 	if (k->k_state & KS_STATIC)
1201 		return;
1202 
1203 	/* modify existing kernel entry if necessary */
1204 	if (k->k_gate != ag->ag_gate
1205 	    || k->k_metric != ag->ag_metric) {
1206 		k->k_gate = ag->ag_gate;
1207 		k->k_metric = ag->ag_metric;
1208 		k->k_state |= KS_CHANGE;
1209 	}
1210 
1211 	if (k->k_state & KS_DYNAMIC) {
1212 		k->k_state &= ~KS_DYNAMIC;
1213 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1214 	}
1215 
1216 	if ((k->k_state & KS_GATEWAY)
1217 	    && !(ag->ag_state & AGS_GATEWAY)) {
1218 		k->k_state &= ~KS_GATEWAY;
1219 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1220 	} else if (!(k->k_state & KS_GATEWAY)
1221 		   && (ag->ag_state & AGS_GATEWAY)) {
1222 		k->k_state |= KS_GATEWAY;
1223 		k->k_state |= (KS_ADD | KS_DEL_ADD);
1224 	}
1225 
1226 	/* Deleting-and-adding is necessary to change aspects of a route.
1227 	 * Just delete instead of deleting and then adding a bad route.
1228 	 * Otherwise, we want to keep the route in the kernel.
1229 	 */
1230 	if (k->k_metric == HOPCNT_INFINITY
1231 	    && (k->k_state & KS_DEL_ADD))
1232 		k->k_state |= KS_DELETE;
1233 	else
1234 		k->k_state &= ~KS_DELETE;
1235 #undef RT
1236 }
1237 
1238 
1239 /* ARGSUSED */
1240 static int
1241 walk_kern(struct radix_node *rn,
1242 	  struct walkarg *w)
1243 {
1244 #define RT ((struct rt_entry *)rn)
1245 	char metric, pref;
1246 	u_int ags = 0;
1247 
1248 
1249 	/* Do not install synthetic routes */
1250 	if (RT->rt_state & RS_NET_SYN)
1251 		return 0;
1252 
1253 	if (!(RT->rt_state & RS_IF)) {
1254 		ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_PROMOTE);
1255 
1256 	} else {
1257 		/* Do not install routes for "external" remote interfaces.
1258 		 */
1259 		if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1260 			return 0;
1261 
1262 		ags |= AGS_IF;
1263 
1264 		/* If it is not an interface, or an alias for an interface,
1265 		 * it must be a "gateway."
1266 		 *
1267 		 * If it is a "remote" interface, it is also a "gateway" to
1268 		 * the kernel if is not a alias.
1269 		 */
1270 		if (RT->rt_ifp == 0
1271 		    || ((RT->rt_ifp->int_state & IS_REMOTE)
1272 			&& RT->rt_ifp->int_metric == 0))
1273 			ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_PROMOTE);
1274 	}
1275 
1276 	if (RT->rt_state & RS_RDISC)
1277 		ags |= AGS_CORS_GATE;
1278 
1279 	/* aggregate good routes without regard to their metric */
1280 	pref = 1;
1281 	metric = RT->rt_metric;
1282 	if (metric == HOPCNT_INFINITY) {
1283 		/* if the route is dead, so try hard to aggregate. */
1284 		pref = HOPCNT_INFINITY;
1285 		ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1286 	}
1287 
1288 	ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1289 		 metric,pref, 0, 0, ags, kern_out);
1290 	return 0;
1291 #undef RT
1292 }
1293 
1294 
1295 /* Update the kernel table to match the daemon table.
1296  */
1297 static void
1298 fix_kern(void)
1299 {
1300 	int i, flags;
1301 	struct khash *k, **pk;
1302 
1303 
1304 	need_kern = age_timer;
1305 
1306 	/* Walk daemon table, updating the copy of the kernel table.
1307 	 */
1308 	(void)rn_walktree(rhead, walk_kern, 0);
1309 	ag_flush(0,0,kern_out);
1310 
1311 	for (i = 0; i < KHASH_SIZE; i++) {
1312 		for (pk = &khash_bins[i]; (k = *pk) != 0; ) {
1313 			/* Do not touch static routes */
1314 			if (k->k_state & KS_STATIC) {
1315 				kern_check_static(k,0);
1316 				pk = &k->k_next;
1317 				continue;
1318 			}
1319 
1320 			/* check hold on routes deleted by the operator */
1321 			if (k->k_keep > now.tv_sec) {
1322 				LIM_SEC(need_kern, k->k_keep);
1323 				k->k_state |= KS_DELETE;
1324 				pk = &k->k_next;
1325 				continue;
1326 			}
1327 
1328 			if ((k->k_state & (KS_DELETE | KS_DYNAMIC))
1329 			    == KS_DELETE) {
1330 				if (!(k->k_state & KS_DELETED))
1331 					rtioctl(RTM_DELETE,
1332 						k->k_dst, k->k_gate, k->k_mask,
1333 						0, 0);
1334 				*pk = k->k_next;
1335 				free(k);
1336 				continue;
1337 			}
1338 
1339 			if (0 != (k->k_state&(KS_ADD|KS_CHANGE|KS_DEL_ADD))) {
1340 				if (k->k_state & KS_DEL_ADD) {
1341 					rtioctl(RTM_DELETE,
1342 						k->k_dst,k->k_gate,k->k_mask,
1343 						0, 0);
1344 					k->k_state &= ~KS_DYNAMIC;
1345 				}
1346 
1347 				flags = 0;
1348 				if (0 != (k->k_state&(KS_GATEWAY|KS_DYNAMIC)))
1349 					flags |= RTF_GATEWAY;
1350 
1351 				if (k->k_state & KS_ADD) {
1352 					rtioctl(RTM_ADD,
1353 						k->k_dst, k->k_gate, k->k_mask,
1354 						k->k_metric, flags);
1355 				} else if (k->k_state & KS_CHANGE) {
1356 					rtioctl(RTM_CHANGE,
1357 						k->k_dst,k->k_gate,k->k_mask,
1358 						k->k_metric, flags);
1359 				}
1360 				k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1361 			}
1362 
1363 			/* Mark this route to be deleted in the next cycle.
1364 			 * This deletes routes that disappear from the
1365 			 * daemon table, since the normal aging code
1366 			 * will clear the bit for routes that have not
1367 			 * disappeared from the daemon table.
1368 			 */
1369 			k->k_state |= KS_DELETE;
1370 			pk = &k->k_next;
1371 		}
1372 	}
1373 }
1374 
1375 
1376 /* Delete a static route in the image of the kernel table.
1377  */
1378 void
1379 del_static(naddr dst,
1380 	   naddr mask,
1381 	   int gone)
1382 {
1383 	struct khash *k;
1384 	struct rt_entry *rt;
1385 
1386 	/* Just mark it in the table to be deleted next time the kernel
1387 	 * table is updated.
1388 	 * If it has already been deleted, mark it as such, and set its
1389 	 * keep-timer so that it will not be deleted again for a while.
1390 	 * This lets the operator delete a route added by the daemon
1391 	 * and add a replacement.
1392 	 */
1393 	k = kern_find(dst, mask, 0);
1394 	if (k != 0) {
1395 		k->k_state &= ~(KS_STATIC | KS_DYNAMIC);
1396 		k->k_state |= KS_DELETE;
1397 		if (gone) {
1398 			k->k_state |= KS_DELETED;
1399 			k->k_keep = now.tv_sec + K_KEEP_LIM;
1400 		}
1401 	}
1402 
1403 	rt = rtget(dst, mask);
1404 	if (rt != 0 && (rt->rt_state & RS_STATIC))
1405 		rtbad(rt);
1406 }
1407 
1408 
1409 /* Delete all routes generated from ICMP Redirects that use a given gateway,
1410  * as well as old redirected routes.
1411  */
1412 void
1413 del_redirects(naddr bad_gate,
1414 	      time_t old)
1415 {
1416 	int i;
1417 	struct khash *k;
1418 
1419 
1420 	for (i = 0; i < KHASH_SIZE; i++) {
1421 		for (k = khash_bins[i]; k != 0; k = k->k_next) {
1422 			if (!(k->k_state & KS_DYNAMIC)
1423 			    || (k->k_state & KS_STATIC))
1424 				continue;
1425 
1426 			if (k->k_gate != bad_gate
1427 			    && k->k_redirect_time > old
1428 			    && !supplier)
1429 				continue;
1430 
1431 			k->k_state |= KS_DELETE;
1432 			k->k_state &= ~KS_DYNAMIC;
1433 			need_kern.tv_sec = now.tv_sec;
1434 			trace_act("mark redirected %s --> %s for deletion\n",
1435 				  addrname(k->k_dst, k->k_mask, 0),
1436 				  naddr_ntoa(k->k_gate));
1437 		}
1438 	}
1439 }
1440 
1441 
1442 /* Start the daemon tables.
1443  */
1444 void
1445 rtinit(void)
1446 {
1447 	extern int max_keylen;
1448 	int i;
1449 	struct ag_info *ag;
1450 
1451 	/* Initialize the radix trees */
1452 	max_keylen = sizeof(struct sockaddr_in);
1453 	rn_init();
1454 	rn_inithead((void**)&rhead, 32);
1455 
1456 	/* mark all of the slots in the table free */
1457 	ag_avail = ag_slots;
1458 	for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1459 		ag->ag_fine = ag+1;
1460 		ag++;
1461 	}
1462 }
1463 
1464 
1465 #ifdef _HAVE_SIN_LEN
1466 static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET};
1467 static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET};
1468 #else
1469 static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1470 static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1471 #endif
1472 
1473 
1474 void
1475 set_need_flash(void)
1476 {
1477 	if (!need_flash) {
1478 		need_flash = 1;
1479 		/* Do not send the flash update immediately.  Wait a little
1480 		 * while to hear from other routers.
1481 		 */
1482 		no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1483 	}
1484 }
1485 
1486 
1487 /* Get a particular routing table entry
1488  */
1489 struct rt_entry *
1490 rtget(naddr dst, naddr mask)
1491 {
1492 	struct rt_entry *rt;
1493 
1494 	dst_sock.sin_addr.s_addr = dst;
1495 	mask_sock.sin_addr.s_addr = mask;
1496 	masktrim(&mask_sock);
1497 	rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1498 	if (!rt
1499 	    || rt->rt_dst != dst
1500 	    || rt->rt_mask != mask)
1501 		return 0;
1502 
1503 	return rt;
1504 }
1505 
1506 
1507 /* Find a route to dst as the kernel would.
1508  */
1509 struct rt_entry *
1510 rtfind(naddr dst)
1511 {
1512 	dst_sock.sin_addr.s_addr = dst;
1513 	return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1514 }
1515 
1516 
1517 /* add a route to the table
1518  */
1519 void
1520 rtadd(naddr	dst,
1521       naddr	mask,
1522       naddr	gate,			/* forward packets here */
1523       naddr	router,			/* on the authority of this router */
1524       int	metric,
1525       u_short	tag,
1526       u_int	state,			/* rs_state for the entry */
1527       struct interface *ifp)
1528 {
1529 	struct rt_entry *rt;
1530 	naddr smask;
1531 	int i;
1532 	struct rt_spare *rts;
1533 
1534 	rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1535 	bzero(rt, sizeof(*rt));
1536 	for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1537 		rts->rts_metric = HOPCNT_INFINITY;
1538 
1539 	rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1540 	rt->rt_dst = dst;
1541 	rt->rt_dst_sock.sin_family = AF_INET;
1542 #ifdef _HAVE_SIN_LEN
1543 	rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1544 #endif
1545 	if (mask != HOST_MASK) {
1546 		smask = std_mask(dst);
1547 		if ((smask & ~mask) == 0 && mask > smask)
1548 			state |= RS_SUBNET;
1549 	}
1550 	mask_sock.sin_addr.s_addr = mask;
1551 	masktrim(&mask_sock);
1552 	rt->rt_mask = mask;
1553 	rt->rt_state = state;
1554 	rt->rt_gate = gate;
1555 	rt->rt_router = router;
1556 	rt->rt_time = now.tv_sec;
1557 	rt->rt_metric = metric;
1558 	rt->rt_poison_metric = HOPCNT_INFINITY;
1559 	rt->rt_tag = tag;
1560 	rt->rt_ifp = ifp;
1561 	rt->rt_seqno = update_seqno;
1562 
1563 	if (++total_routes == MAX_ROUTES)
1564 		msglog("have maximum (%d) routes", total_routes);
1565 	if (TRACEACTIONS)
1566 		trace_add_del("Add", rt);
1567 
1568 	need_kern.tv_sec = now.tv_sec;
1569 	set_need_flash();
1570 
1571 	if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1572 				    rhead, rt->rt_nodes)) {
1573 /*
1574  * This will happen if RIP1 and RIP2 routeds talk to one another and
1575  * there are variable subnets.  This is only good for filling up your
1576  * syslog. -jkh
1577  */
1578 #if 0
1579 		msglog("rnh_addaddr() failed for %s mask=%#x",
1580 		       naddr_ntoa(dst), mask);
1581 #endif
1582 	}
1583 }
1584 
1585 
1586 /* notice a changed route
1587  */
1588 void
1589 rtchange(struct rt_entry *rt,
1590 	 u_int	state,			/* new state bits */
1591 	 naddr	gate,			/* now forward packets here */
1592 	 naddr	router,			/* on the authority of this router */
1593 	 int	metric,			/* new metric */
1594 	 u_short tag,
1595 	 struct interface *ifp,
1596 	 time_t	new_time,
1597 	 char	*label)
1598 {
1599 	if (rt->rt_metric != metric) {
1600 		/* Fix the kernel immediately if it seems the route
1601 		 * has gone bad, since there may be a working route that
1602 		 * aggregates this route.
1603 		 */
1604 		if (metric == HOPCNT_INFINITY) {
1605 			need_kern.tv_sec = now.tv_sec;
1606 			if (new_time >= now.tv_sec - EXPIRE_TIME)
1607 				new_time = now.tv_sec - EXPIRE_TIME;
1608 		}
1609 		rt->rt_seqno = update_seqno;
1610 		set_need_flash();
1611 	}
1612 
1613 	if (rt->rt_gate != gate) {
1614 		need_kern.tv_sec = now.tv_sec;
1615 		rt->rt_seqno = update_seqno;
1616 		set_need_flash();
1617 	}
1618 
1619 	state |= (rt->rt_state & RS_SUBNET);
1620 
1621 	/* Keep various things from deciding ageless routes are stale.
1622 	 */
1623 	if (!AGE_RT(state, ifp))
1624 		new_time = now.tv_sec;
1625 
1626 	if (TRACEACTIONS)
1627 		trace_change(rt, state, gate, router, metric, tag, ifp,
1628 			     new_time,
1629 			     label ? label : "Chg   ");
1630 
1631 	rt->rt_state = state;
1632 	rt->rt_gate = gate;
1633 	rt->rt_router = router;
1634 	rt->rt_metric = metric;
1635 	rt->rt_tag = tag;
1636 	rt->rt_ifp = ifp;
1637 	rt->rt_time = new_time;
1638 }
1639 
1640 
1641 /* check for a better route among the spares
1642  */
1643 static struct rt_spare *
1644 rts_better(struct rt_entry *rt)
1645 {
1646 	struct rt_spare *rts, *rts1;
1647 	int i;
1648 
1649 	/* find the best alternative among the spares */
1650 	rts = rt->rt_spares+1;
1651 	for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1652 		if (BETTER_LINK(rt,rts1,rts))
1653 			rts = rts1;
1654 	}
1655 
1656 	return rts;
1657 }
1658 
1659 
1660 /* switch to a backup route
1661  */
1662 void
1663 rtswitch(struct rt_entry *rt,
1664 	 struct rt_spare *rts)
1665 {
1666 	struct rt_spare swap;
1667 	char label[10];
1668 
1669 
1670 	/* Do not change permanent routes */
1671 	if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1672 				  | RS_NET_SYN | RS_IF)))
1673 		return;
1674 
1675 	/* find the best alternative among the spares */
1676 	if (rts == 0)
1677 		rts = rts_better(rt);
1678 
1679 	/* Do not bother if it is not worthwhile.
1680 	 */
1681 	if (!BETTER_LINK(rt, rts, rt->rt_spares))
1682 		return;
1683 
1684 	swap = rt->rt_spares[0];
1685 	(void)sprintf(label, "Use #%d", rts - rt->rt_spares);
1686 	rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC),
1687 		 rts->rts_gate, rts->rts_router, rts->rts_metric,
1688 		 rts->rts_tag, rts->rts_ifp, rts->rts_time, label);
1689 	*rts = swap;
1690 }
1691 
1692 
1693 void
1694 rtdelete(struct rt_entry *rt)
1695 {
1696 	struct khash *k;
1697 
1698 
1699 	if (TRACEACTIONS)
1700 		trace_add_del("Del", rt);
1701 
1702 	k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1703 	if (k != 0) {
1704 		k->k_state |= KS_DELETE;
1705 		need_kern.tv_sec = now.tv_sec;
1706 	}
1707 
1708 	dst_sock.sin_addr.s_addr = rt->rt_dst;
1709 	mask_sock.sin_addr.s_addr = rt->rt_mask;
1710 	masktrim(&mask_sock);
1711 	if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1712 							rhead)) {
1713 		msglog("rnh_deladdr() failed");
1714 	} else {
1715 		free(rt);
1716 		total_routes--;
1717 	}
1718 }
1719 
1720 
1721 /* Get rid of a bad route, and try to switch to a replacement.
1722  */
1723 void
1724 rtbad(struct rt_entry *rt)
1725 {
1726 	/* Poison the route */
1727 	rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC),
1728 		 rt->rt_gate, rt->rt_router, HOPCNT_INFINITY, rt->rt_tag,
1729 		 0, rt->rt_time, 0);
1730 
1731 	rtswitch(rt, 0);
1732 }
1733 
1734 
1735 /* Junk a RS_NET_SYN or RS_LOCAL route,
1736  *	unless it is needed by another interface.
1737  */
1738 void
1739 rtbad_sub(struct rt_entry *rt)
1740 {
1741 	struct interface *ifp, *ifp1;
1742 	struct intnet *intnetp;
1743 	u_int state;
1744 
1745 
1746 	ifp1 = 0;
1747 	state = 0;
1748 
1749 	if (rt->rt_state & RS_LOCAL) {
1750 		/* Is this the route through loopback for the interface?
1751 		 * If so, see if it is used by any other interfaces, such
1752 		 * as a point-to-point interface with the same local address.
1753 		 */
1754 		for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1755 			/* Retain it if another interface needs it.
1756 			 */
1757 			if (ifp->int_addr == rt->rt_ifp->int_addr) {
1758 				state |= RS_LOCAL;
1759 				ifp1 = ifp;
1760 				break;
1761 			}
1762 		}
1763 
1764 	}
1765 
1766 	if (!(state & RS_LOCAL)) {
1767 		/* Retain RIPv1 logical network route if there is another
1768 		 * interface that justifies it.
1769 		 */
1770 		if (rt->rt_state & RS_NET_SYN) {
1771 			for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1772 				if ((ifp->int_state & IS_NEED_NET_SYN)
1773 				    && rt->rt_mask == ifp->int_std_mask
1774 				    && rt->rt_dst == ifp->int_std_addr) {
1775 					state |= RS_NET_SYN;
1776 					ifp1 = ifp;
1777 					break;
1778 				}
1779 			}
1780 		}
1781 
1782 		/* or if there is an authority route that needs it. */
1783 		for (intnetp = intnets;
1784 		     intnetp != 0;
1785 		     intnetp = intnetp->intnet_next) {
1786 			if (intnetp->intnet_addr == rt->rt_dst
1787 			    && intnetp->intnet_mask == rt->rt_mask) {
1788 				state |= (RS_NET_SYN | RS_NET_INT);
1789 				break;
1790 			}
1791 		}
1792 	}
1793 
1794 	if (ifp1 != 0 || (state & RS_NET_SYN)) {
1795 		rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN | RS_LOCAL))
1796 			      | state),
1797 			 rt->rt_gate, rt->rt_router, rt->rt_metric,
1798 			 rt->rt_tag, ifp1, rt->rt_time, 0);
1799 	} else {
1800 		rtbad(rt);
1801 	}
1802 }
1803 
1804 
1805 /* Called while walking the table looking for sick interfaces
1806  * or after a time change.
1807  */
1808 /* ARGSUSED */
1809 int
1810 walk_bad(struct radix_node *rn,
1811 	 struct walkarg *w)
1812 {
1813 #define RT ((struct rt_entry *)rn)
1814 	struct rt_spare *rts;
1815 	int i;
1816 	time_t new_time;
1817 
1818 
1819 	/* fix any spare routes through the interface
1820 	 */
1821 	rts = RT->rt_spares;
1822 	for (i = NUM_SPARES; i != 1; i--) {
1823 		rts++;
1824 
1825 		if (rts->rts_ifp != 0
1826 		    && (rts->rts_ifp->int_state & IS_BROKE)) {
1827 			/* mark the spare route to be deleted immediately */
1828 			new_time = rts->rts_time;
1829 			if (new_time >= now_garbage)
1830 				new_time = now_garbage-1;
1831 			trace_upslot(RT, rts, rts->rts_gate,
1832 				     rts->rts_router, 0,
1833 				     HOPCNT_INFINITY, rts->rts_tag,
1834 				     new_time);
1835 			rts->rts_ifp = 0;
1836 			rts->rts_metric = HOPCNT_INFINITY;
1837 			rts->rts_time = new_time;
1838 		}
1839 	}
1840 
1841 	/* Deal with the main route
1842 	 */
1843 	/* finished if it has been handled before or if its interface is ok
1844 	 */
1845 	if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
1846 		return 0;
1847 
1848 	/* Bad routes for other than interfaces are easy.
1849 	 */
1850 	if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
1851 		rtbad(RT);
1852 		return 0;
1853 	}
1854 
1855 	rtbad_sub(RT);
1856 	return 0;
1857 #undef RT
1858 }
1859 
1860 
1861 /* Check the age of an individual route.
1862  */
1863 /* ARGSUSED */
1864 static int
1865 walk_age(struct radix_node *rn,
1866 	   struct walkarg *w)
1867 {
1868 #define RT ((struct rt_entry *)rn)
1869 	struct interface *ifp;
1870 	struct rt_spare *rts;
1871 	int i;
1872 
1873 
1874 	/* age all of the spare routes, including the primary route
1875 	 * currently in use
1876 	 */
1877 	rts = RT->rt_spares;
1878 	for (i = NUM_SPARES; i != 0; i--, rts++) {
1879 
1880 		ifp = rts->rts_ifp;
1881 		if (i == NUM_SPARES) {
1882 			if (!AGE_RT(RT->rt_state, ifp)) {
1883 				/* Keep various things from deciding ageless
1884 				 * routes are stale
1885 				 */
1886 				rts->rts_time = now.tv_sec;
1887 				continue;
1888 			}
1889 
1890 			/* forget RIP routes after RIP has been turned off.
1891 			 */
1892 			if (rip_sock < 0) {
1893 				rtdelete(RT);
1894 				return 0;
1895 			}
1896 		}
1897 
1898 		/* age failing routes
1899 		 */
1900 		if (age_bad_gate == rts->rts_gate
1901 		    && rts->rts_time >= now_stale) {
1902 			rts->rts_time -= SUPPLY_INTERVAL;
1903 		}
1904 
1905 		/* trash the spare routes when they go bad */
1906 		if (rts->rts_metric < HOPCNT_INFINITY
1907 		    && now_garbage > rts->rts_time) {
1908 			trace_upslot(RT, rts, rts->rts_gate,
1909 				     rts->rts_router, rts->rts_ifp,
1910 				     HOPCNT_INFINITY, rts->rts_tag,
1911 				     rts->rts_time);
1912 			rts->rts_metric = HOPCNT_INFINITY;
1913 		}
1914 	}
1915 
1916 
1917 	/* finished if the active route is still fresh */
1918 	if (now_stale <= RT->rt_time)
1919 		return 0;
1920 
1921 	/* try to switch to an alternative */
1922 	rtswitch(RT, 0);
1923 
1924 	/* Delete a dead route after it has been publically mourned. */
1925 	if (now_garbage > RT->rt_time) {
1926 		rtdelete(RT);
1927 		return 0;
1928 	}
1929 
1930 	/* Start poisoning a bad route before deleting it. */
1931 	if (now.tv_sec - RT->rt_time > EXPIRE_TIME)
1932 		rtchange(RT, RT->rt_state, RT->rt_gate, RT->rt_router,
1933 			 HOPCNT_INFINITY, RT->rt_tag, RT->rt_ifp,
1934 			 RT->rt_time, 0);
1935 	return 0;
1936 }
1937 
1938 
1939 /* Watch for dead routes and interfaces.
1940  */
1941 void
1942 age(naddr bad_gate)
1943 {
1944 	struct interface *ifp;
1945 
1946 
1947 	age_timer.tv_sec = now.tv_sec + (rip_sock < 0
1948 					 ? NEVER
1949 					 : SUPPLY_INTERVAL);
1950 
1951 	for (ifp = ifnet; ifp; ifp = ifp->int_next) {
1952 		/* Check for dead IS_REMOTE interfaces by timing their
1953 		 * transmissions.
1954 		 */
1955 		if ((ifp->int_state & IS_REMOTE)
1956 		    && !(ifp->int_state & IS_PASSIVE)
1957 		    && (ifp->int_state & IS_ACTIVE)) {
1958 			LIM_SEC(age_timer, now.tv_sec+SUPPLY_INTERVAL);
1959 
1960 			if (now.tv_sec - ifp->int_act_time > EXPIRE_TIME
1961 			    && !(ifp->int_state & IS_BROKE)) {
1962 				msglog("remote interface %s to %s timed out"
1963 				       "--turned off",
1964 				       ifp->int_name,
1965 				       naddr_ntoa(ifp->int_addr));
1966 				if_bad(ifp);
1967 			}
1968 		}
1969 	}
1970 
1971 	/* Age routes. */
1972 	age_bad_gate = bad_gate;
1973 	(void)rn_walktree(rhead, walk_age, 0);
1974 
1975 	/* Update the kernel routing table. */
1976 	fix_kern();
1977 }
1978