1 /* 2 * Copyright 1994, 1995 Massachusetts Institute of Technology 3 * 4 * Permission to use, copy, modify, and distribute this software and 5 * its documentation for any purpose and without fee is hereby 6 * granted, provided that both the above copyright notice and this 7 * permission notice appear in all copies, that both the above 8 * copyright notice and this permission notice appear in all 9 * supporting documentation, and that the name of M.I.T. not be used 10 * in advertising or publicity pertaining to distribution of the 11 * software without specific, written prior permission. M.I.T. makes 12 * no representations about the suitability of this software for any 13 * purpose. It is provided "as is" without express or implied 14 * warranty. 15 * 16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 /* 33 * This code does two things necessary for the enhanced TCP metrics to 34 * function in a useful manner: 35 * 1) It marks all non-host routes as `cloning', thus ensuring that 36 * every actual reference to such a route actually gets turned 37 * into a reference to a host route to the specific destination 38 * requested. 39 * 2) When such routes lose all their references, it arranges for them 40 * to be deleted in some random collection of circumstances, so that 41 * a large quantity of stale routing data is not kept in kernel memory 42 * indefinitely. See in_rtqtimo() below for the exact mechanism. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/sysctl.h> 49 #include <sys/socket.h> 50 #include <sys/mbuf.h> 51 #include <sys/syslog.h> 52 53 #include <net/if.h> 54 #include <net/route.h> 55 #include <netinet/in.h> 56 #include <netinet/in_var.h> 57 #include <netinet/ip_var.h> 58 59 extern int in_inithead __P((void **head, int off)); 60 61 #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ 62 63 /* 64 * Do what we need to do when inserting a route. 65 */ 66 static struct radix_node * 67 in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 68 struct radix_node *treenodes) 69 { 70 struct rtentry *rt = (struct rtentry *)treenodes; 71 struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); 72 struct radix_node *ret; 73 74 /* 75 * For IP, all unicast non-host routes are automatically cloning. 76 */ 77 if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 78 rt->rt_flags |= RTF_MULTICAST; 79 80 if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { 81 rt->rt_flags |= RTF_PRCLONING; 82 } 83 84 /* 85 * A little bit of help for both IP output and input: 86 * For host routes, we make sure that RTF_BROADCAST 87 * is set for anything that looks like a broadcast address. 88 * This way, we can avoid an expensive call to in_broadcast() 89 * in ip_output() most of the time (because the route passed 90 * to ip_output() is almost always a host route). 91 * 92 * We also do the same for local addresses, with the thought 93 * that this might one day be used to speed up ip_input(). 94 * 95 * We also mark routes to multicast addresses as such, because 96 * it's easy to do and might be useful (but this is much more 97 * dubious since it's so easy to inspect the address). (This 98 * is done above.) 99 */ 100 if (rt->rt_flags & RTF_HOST) { 101 if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { 102 rt->rt_flags |= RTF_BROADCAST; 103 } else { 104 #define satosin(sa) ((struct sockaddr_in *)sa) 105 if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr 106 == sin->sin_addr.s_addr) 107 rt->rt_flags |= RTF_LOCAL; 108 #undef satosin 109 } 110 } 111 112 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) 113 && rt->rt_ifp) 114 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; 115 116 ret = rn_addroute(v_arg, n_arg, head, treenodes); 117 if (ret == NULL && rt->rt_flags & RTF_HOST) { 118 struct rtentry *rt2; 119 /* 120 * We are trying to add a host route, but can't. 121 * Find out if it is because of an 122 * ARP entry and delete it if so. 123 */ 124 rt2 = rtalloc1((struct sockaddr *)sin, 0, 125 RTF_CLONING | RTF_PRCLONING); 126 if (rt2) { 127 if (rt2->rt_flags & RTF_LLINFO && 128 rt2->rt_flags & RTF_HOST && 129 rt2->rt_gateway && 130 rt2->rt_gateway->sa_family == AF_LINK) { 131 rtrequest(RTM_DELETE, 132 (struct sockaddr *)rt_key(rt2), 133 rt2->rt_gateway, 134 rt_mask(rt2), rt2->rt_flags, 0); 135 ret = rn_addroute(v_arg, n_arg, head, 136 treenodes); 137 } 138 RTFREE(rt2); 139 } 140 } 141 142 /* 143 * If the new route created successfully, and we are forwarding, 144 * and there is a cached route, free it. Otherwise, we may end 145 * up using the wrong route. 146 */ 147 if (ret != NULL && ipforwarding && ipforward_rt.ro_rt) { 148 RTFREE(ipforward_rt.ro_rt); 149 ipforward_rt.ro_rt = 0; 150 } 151 152 return ret; 153 } 154 155 /* 156 * This code is the inverse of in_clsroute: on first reference, if we 157 * were managing the route, stop doing so and set the expiration timer 158 * back off again. 159 */ 160 static struct radix_node * 161 in_matroute(void *v_arg, struct radix_node_head *head) 162 { 163 struct radix_node *rn = rn_match(v_arg, head); 164 struct rtentry *rt = (struct rtentry *)rn; 165 166 if(rt && rt->rt_refcnt == 0) { /* this is first reference */ 167 if(rt->rt_flags & RTPRF_OURS) { 168 rt->rt_flags &= ~RTPRF_OURS; 169 rt->rt_rmx.rmx_expire = 0; 170 } 171 } 172 return rn; 173 } 174 175 static int rtq_reallyold = 60*60; 176 /* one hour is ``really old'' */ 177 SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, 178 &rtq_reallyold , 0, 179 "Default expiration time on dynamically learned routes"); 180 181 static int rtq_minreallyold = 10; 182 /* never automatically crank down to less */ 183 SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, 184 &rtq_minreallyold , 0, 185 "Minimum time to attempt to hold onto dynamically learned routes"); 186 187 static int rtq_toomany = 128; 188 /* 128 cached routes is ``too many'' */ 189 SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, 190 &rtq_toomany , 0, "Upper limit on dynamically learned routes"); 191 192 /* 193 * On last reference drop, mark the route as belong to us so that it can be 194 * timed out. 195 */ 196 static void 197 in_clsroute(struct radix_node *rn, struct radix_node_head *head) 198 { 199 struct rtentry *rt = (struct rtentry *)rn; 200 201 if(!(rt->rt_flags & RTF_UP)) 202 return; /* prophylactic measures */ 203 204 if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) 205 return; 206 207 if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) 208 != RTF_WASCLONED) 209 return; 210 211 /* 212 * As requested by David Greenman: 213 * If rtq_reallyold is 0, just delete the route without 214 * waiting for a timeout cycle to kill it. 215 */ 216 if(rtq_reallyold != 0) { 217 rt->rt_flags |= RTPRF_OURS; 218 rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; 219 } else { 220 rtrequest(RTM_DELETE, 221 (struct sockaddr *)rt_key(rt), 222 rt->rt_gateway, rt_mask(rt), 223 rt->rt_flags, 0); 224 } 225 } 226 227 struct rtqk_arg { 228 struct radix_node_head *rnh; 229 int draining; 230 int killed; 231 int found; 232 int updating; 233 time_t nextstop; 234 }; 235 236 /* 237 * Get rid of old routes. When draining, this deletes everything, even when 238 * the timeout is not expired yet. When updating, this makes sure that 239 * nothing has a timeout longer than the current value of rtq_reallyold. 240 */ 241 static int 242 in_rtqkill(struct radix_node *rn, void *rock) 243 { 244 struct rtqk_arg *ap = rock; 245 struct rtentry *rt = (struct rtentry *)rn; 246 int err; 247 248 if(rt->rt_flags & RTPRF_OURS) { 249 ap->found++; 250 251 if(ap->draining || rt->rt_rmx.rmx_expire <= time_second) { 252 if(rt->rt_refcnt > 0) 253 panic("rtqkill route really not free"); 254 255 err = rtrequest(RTM_DELETE, 256 (struct sockaddr *)rt_key(rt), 257 rt->rt_gateway, rt_mask(rt), 258 rt->rt_flags, 0); 259 if(err) { 260 log(LOG_WARNING, "in_rtqkill: error %d\n", err); 261 } else { 262 ap->killed++; 263 } 264 } else { 265 if(ap->updating 266 && (rt->rt_rmx.rmx_expire - time_second 267 > rtq_reallyold)) { 268 rt->rt_rmx.rmx_expire = time_second 269 + rtq_reallyold; 270 } 271 ap->nextstop = lmin(ap->nextstop, 272 rt->rt_rmx.rmx_expire); 273 } 274 } 275 276 return 0; 277 } 278 279 #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ 280 static int rtq_timeout = RTQ_TIMEOUT; 281 282 static void 283 in_rtqtimo(void *rock) 284 { 285 struct radix_node_head *rnh = rock; 286 struct rtqk_arg arg; 287 struct timeval atv; 288 static time_t last_adjusted_timeout = 0; 289 int s; 290 291 arg.found = arg.killed = 0; 292 arg.rnh = rnh; 293 arg.nextstop = time_second + rtq_timeout; 294 arg.draining = arg.updating = 0; 295 s = splnet(); 296 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 297 splx(s); 298 299 /* 300 * Attempt to be somewhat dynamic about this: 301 * If there are ``too many'' routes sitting around taking up space, 302 * then crank down the timeout, and see if we can't make some more 303 * go away. However, we make sure that we will never adjust more 304 * than once in rtq_timeout seconds, to keep from cranking down too 305 * hard. 306 */ 307 if((arg.found - arg.killed > rtq_toomany) 308 && (time_second - last_adjusted_timeout >= rtq_timeout) 309 && rtq_reallyold > rtq_minreallyold) { 310 rtq_reallyold = 2*rtq_reallyold / 3; 311 if(rtq_reallyold < rtq_minreallyold) { 312 rtq_reallyold = rtq_minreallyold; 313 } 314 315 last_adjusted_timeout = time_second; 316 #ifdef DIAGNOSTIC 317 log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", 318 rtq_reallyold); 319 #endif 320 arg.found = arg.killed = 0; 321 arg.updating = 1; 322 s = splnet(); 323 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 324 splx(s); 325 } 326 327 atv.tv_usec = 0; 328 atv.tv_sec = arg.nextstop - time_second; 329 timeout(in_rtqtimo, rock, tvtohz(&atv)); 330 } 331 332 void 333 in_rtqdrain(void) 334 { 335 struct radix_node_head *rnh = rt_tables[AF_INET]; 336 struct rtqk_arg arg; 337 int s; 338 arg.found = arg.killed = 0; 339 arg.rnh = rnh; 340 arg.nextstop = 0; 341 arg.draining = 1; 342 arg.updating = 0; 343 s = splnet(); 344 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 345 splx(s); 346 } 347 348 /* 349 * Initialize our routing tree. 350 */ 351 int 352 in_inithead(void **head, int off) 353 { 354 struct radix_node_head *rnh; 355 356 if(!rn_inithead(head, off)) 357 return 0; 358 359 if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */ 360 return 1; /* only do this for the real routing table */ 361 362 rnh = *head; 363 rnh->rnh_addaddr = in_addroute; 364 rnh->rnh_matchaddr = in_matroute; 365 rnh->rnh_close = in_clsroute; 366 in_rtqtimo(rnh); /* kick off timeout first time */ 367 return 1; 368 } 369 370 371 /* 372 * This zaps old routes when the interface goes down or interface 373 * address is deleted. In the latter case, it deletes static routes 374 * that point to this address. If we don't do this, we may end up 375 * using the old address in the future. The ones we always want to 376 * get rid of are things like ARP entries, since the user might down 377 * the interface, walk over to a completely different network, and 378 * plug back in. 379 */ 380 struct in_ifadown_arg { 381 struct radix_node_head *rnh; 382 struct ifaddr *ifa; 383 int del; 384 }; 385 386 static int 387 in_ifadownkill(struct radix_node *rn, void *xap) 388 { 389 struct in_ifadown_arg *ap = xap; 390 struct rtentry *rt = (struct rtentry *)rn; 391 int err; 392 393 if (rt->rt_ifa == ap->ifa && 394 (ap->del || !(rt->rt_flags & RTF_STATIC))) { 395 /* 396 * We need to disable the automatic prune that happens 397 * in this case in rtrequest() because it will blow 398 * away the pointers that rn_walktree() needs in order 399 * continue our descent. We will end up deleting all 400 * the routes that rtrequest() would have in any case, 401 * so that behavior is not needed there. 402 */ 403 rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); 404 err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), 405 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); 406 if (err) { 407 log(LOG_WARNING, "in_ifadownkill: error %d\n", err); 408 } 409 } 410 return 0; 411 } 412 413 int 414 in_ifadown(struct ifaddr *ifa, int delete) 415 { 416 struct in_ifadown_arg arg; 417 struct radix_node_head *rnh; 418 419 if (ifa->ifa_addr->sa_family != AF_INET) 420 return 1; 421 422 arg.rnh = rnh = rt_tables[AF_INET]; 423 arg.ifa = ifa; 424 arg.del = delete; 425 rnh->rnh_walktree(rnh, in_ifadownkill, &arg); 426 ifa->ifa_flags &= ~IFA_ROUTE; 427 return 0; 428 } 429