1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 30 * $FreeBSD$ 31 */ 32 33 #include "opt_ipsec.h" 34 #include "opt_mac.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/mac.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/time.h> 43 #include <sys/kernel.h> 44 #include <sys/sysctl.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/route.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_pcb.h> 52 #include <netinet/in_systm.h> 53 #include <netinet/in_var.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_icmp.h> 56 #include <netinet/ip_var.h> 57 #include <netinet/tcp.h> 58 #include <netinet/tcp_var.h> 59 #include <netinet/tcpip.h> 60 #include <netinet/icmp_var.h> 61 62 #ifdef IPSEC 63 #include <netinet6/ipsec.h> 64 #include <netkey/key.h> 65 #endif 66 67 #ifdef FAST_IPSEC 68 #include <netipsec/ipsec.h> 69 #include <netipsec/key.h> 70 #define IPSEC 71 #endif 72 73 #include <machine/in_cksum.h> 74 75 /* 76 * ICMP routines: error generation, receive packet processing, and 77 * routines to turnaround packets back to the originator, and 78 * host table maintenance routines. 79 */ 80 81 struct icmpstat icmpstat; 82 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW, 83 &icmpstat, icmpstat, ""); 84 85 static int icmpmaskrepl = 0; 86 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW, 87 &icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets."); 88 89 static u_int icmpmaskfake = 0; 90 SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW, 91 &icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets."); 92 93 static int drop_redirect = 0; 94 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, 95 &drop_redirect, 0, ""); 96 97 static int log_redirect = 0; 98 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, 99 &log_redirect, 0, ""); 100 101 static int icmplim = 200; 102 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW, 103 &icmplim, 0, ""); 104 105 static int icmplim_output = 1; 106 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW, 107 &icmplim_output, 0, ""); 108 109 static char reply_src[IFNAMSIZ]; 110 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW, 111 &reply_src, IFNAMSIZ, "icmp reply source for non-local packets."); 112 113 /* 114 * ICMP broadcast echo sysctl 115 */ 116 117 static int icmpbmcastecho = 0; 118 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, 119 &icmpbmcastecho, 0, ""); 120 121 122 #ifdef ICMPPRINTFS 123 int icmpprintfs = 0; 124 #endif 125 126 static void icmp_reflect(struct mbuf *); 127 static void icmp_send(struct mbuf *, struct mbuf *); 128 static int ip_next_mtu(int, int); 129 130 extern struct protosw inetsw[]; 131 132 /* 133 * Generate an error packet of type error 134 * in response to bad packet ip. 135 */ 136 void 137 icmp_error(n, type, code, dest, destifp) 138 struct mbuf *n; 139 int type, code; 140 n_long dest; 141 struct ifnet *destifp; 142 { 143 register struct ip *oip = mtod(n, struct ip *), *nip; 144 register unsigned oiplen = oip->ip_hl << 2; 145 register struct icmp *icp; 146 register struct mbuf *m; 147 register struct m_tag *mtag; 148 unsigned icmplen; 149 150 #ifdef ICMPPRINTFS 151 if (icmpprintfs) 152 printf("icmp_error(%p, %x, %d)\n", oip, type, code); 153 #endif 154 if (type != ICMP_REDIRECT) 155 icmpstat.icps_error++; 156 /* 157 * Don't send error if not the first fragment of message. 158 * Don't error if the old packet protocol was ICMP 159 * error message, only known informational types. 160 */ 161 if (oip->ip_off &~ (IP_MF|IP_DF)) 162 goto freeit; 163 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 164 n->m_len >= oiplen + ICMP_MINLEN && 165 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) { 166 icmpstat.icps_oldicmp++; 167 goto freeit; 168 } 169 /* Don't send error in response to a multicast or broadcast packet */ 170 if (n->m_flags & (M_BCAST|M_MCAST)) 171 goto freeit; 172 /* 173 * First, formulate icmp message 174 */ 175 m = m_gethdr(M_DONTWAIT, MT_HEADER); 176 if (m == NULL) 177 goto freeit; 178 #ifdef MAC 179 mac_create_mbuf_netlayer(n, m); 180 #endif 181 icmplen = min(oiplen + 8, oip->ip_len); 182 if (icmplen < sizeof(struct ip)) 183 panic("icmp_error: bad length"); 184 m->m_len = icmplen + ICMP_MINLEN; 185 MH_ALIGN(m, m->m_len); 186 icp = mtod(m, struct icmp *); 187 if ((u_int)type > ICMP_MAXTYPE) 188 panic("icmp_error"); 189 icmpstat.icps_outhist[type]++; 190 icp->icmp_type = type; 191 if (type == ICMP_REDIRECT) 192 icp->icmp_gwaddr.s_addr = dest; 193 else { 194 icp->icmp_void = 0; 195 /* 196 * The following assignments assume an overlay with the 197 * zeroed icmp_void field. 198 */ 199 if (type == ICMP_PARAMPROB) { 200 icp->icmp_pptr = code; 201 code = 0; 202 } else if (type == ICMP_UNREACH && 203 code == ICMP_UNREACH_NEEDFRAG && destifp) { 204 icp->icmp_nextmtu = htons(destifp->if_mtu); 205 } 206 } 207 208 icp->icmp_code = code; 209 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); 210 nip = &icp->icmp_ip; 211 212 /* 213 * Convert fields to network representation. 214 */ 215 nip->ip_len = htons(nip->ip_len); 216 nip->ip_off = htons(nip->ip_off); 217 218 /* 219 * Now, copy old ip header (without options) 220 * in front of icmp message. 221 */ 222 if (m->m_data - sizeof(struct ip) < m->m_pktdat) 223 panic("icmp len"); 224 m->m_data -= sizeof(struct ip); 225 m->m_len += sizeof(struct ip); 226 m->m_pkthdr.len = m->m_len; 227 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; 228 nip = mtod(m, struct ip *); 229 bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip)); 230 nip->ip_len = m->m_len; 231 nip->ip_v = IPVERSION; 232 nip->ip_hl = 5; 233 nip->ip_p = IPPROTO_ICMP; 234 nip->ip_tos = 0; 235 /* 236 * XXX: Move PF_GENERATED m_tag to new packet, if it exists. 237 * This should be replaced by unified flags/tags for 238 * pf/ipfw/ipf and future pfil_hook applications. 239 */ 240 mtag = m_tag_find(n, PACKET_TAG_PF_GENERATED, NULL); 241 if (mtag != NULL) { 242 m_tag_unlink(n, mtag); 243 m_tag_prepend(m, mtag); 244 } 245 icmp_reflect(m); 246 247 freeit: 248 m_freem(n); 249 } 250 251 /* 252 * Process a received ICMP message. 253 */ 254 void 255 icmp_input(m, off) 256 struct mbuf *m; 257 int off; 258 { 259 struct icmp *icp; 260 struct in_ifaddr *ia; 261 struct ip *ip = mtod(m, struct ip *); 262 struct sockaddr_in icmpsrc, icmpdst, icmpgw; 263 int hlen = off; 264 int icmplen = ip->ip_len; 265 int i, code; 266 void (*ctlfunc)(int, struct sockaddr *, void *); 267 268 /* 269 * Locate icmp structure in mbuf, and check 270 * that not corrupted and of at least minimum length. 271 */ 272 #ifdef ICMPPRINTFS 273 if (icmpprintfs) { 274 char buf[4 * sizeof "123"]; 275 strcpy(buf, inet_ntoa(ip->ip_src)); 276 printf("icmp_input from %s to %s, len %d\n", 277 buf, inet_ntoa(ip->ip_dst), icmplen); 278 } 279 #endif 280 if (icmplen < ICMP_MINLEN) { 281 icmpstat.icps_tooshort++; 282 goto freeit; 283 } 284 i = hlen + min(icmplen, ICMP_ADVLENMIN); 285 if (m->m_len < i && (m = m_pullup(m, i)) == 0) { 286 icmpstat.icps_tooshort++; 287 return; 288 } 289 ip = mtod(m, struct ip *); 290 m->m_len -= hlen; 291 m->m_data += hlen; 292 icp = mtod(m, struct icmp *); 293 if (in_cksum(m, icmplen)) { 294 icmpstat.icps_checksum++; 295 goto freeit; 296 } 297 m->m_len += hlen; 298 m->m_data -= hlen; 299 300 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { 301 /* 302 * Deliver very specific ICMP type only. 303 */ 304 switch (icp->icmp_type) { 305 case ICMP_UNREACH: 306 case ICMP_TIMXCEED: 307 break; 308 default: 309 goto freeit; 310 } 311 } 312 313 #ifdef ICMPPRINTFS 314 if (icmpprintfs) 315 printf("icmp_input, type %d code %d\n", icp->icmp_type, 316 icp->icmp_code); 317 #endif 318 319 /* 320 * Message type specific processing. 321 */ 322 if (icp->icmp_type > ICMP_MAXTYPE) 323 goto raw; 324 325 /* Initialize */ 326 bzero(&icmpsrc, sizeof(icmpsrc)); 327 icmpsrc.sin_len = sizeof(struct sockaddr_in); 328 icmpsrc.sin_family = AF_INET; 329 bzero(&icmpdst, sizeof(icmpdst)); 330 icmpdst.sin_len = sizeof(struct sockaddr_in); 331 icmpdst.sin_family = AF_INET; 332 bzero(&icmpgw, sizeof(icmpgw)); 333 icmpgw.sin_len = sizeof(struct sockaddr_in); 334 icmpgw.sin_family = AF_INET; 335 336 icmpstat.icps_inhist[icp->icmp_type]++; 337 code = icp->icmp_code; 338 switch (icp->icmp_type) { 339 340 case ICMP_UNREACH: 341 switch (code) { 342 case ICMP_UNREACH_NET: 343 case ICMP_UNREACH_HOST: 344 case ICMP_UNREACH_SRCFAIL: 345 case ICMP_UNREACH_NET_UNKNOWN: 346 case ICMP_UNREACH_HOST_UNKNOWN: 347 case ICMP_UNREACH_ISOLATED: 348 case ICMP_UNREACH_TOSNET: 349 case ICMP_UNREACH_TOSHOST: 350 case ICMP_UNREACH_HOST_PRECEDENCE: 351 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 352 code = PRC_UNREACH_NET; 353 break; 354 355 case ICMP_UNREACH_NEEDFRAG: 356 code = PRC_MSGSIZE; 357 break; 358 359 /* 360 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9. 361 * Treat subcodes 2,3 as immediate RST 362 */ 363 case ICMP_UNREACH_PROTOCOL: 364 case ICMP_UNREACH_PORT: 365 code = PRC_UNREACH_PORT; 366 break; 367 368 case ICMP_UNREACH_NET_PROHIB: 369 case ICMP_UNREACH_HOST_PROHIB: 370 case ICMP_UNREACH_FILTER_PROHIB: 371 code = PRC_UNREACH_ADMIN_PROHIB; 372 break; 373 374 default: 375 goto badcode; 376 } 377 goto deliver; 378 379 case ICMP_TIMXCEED: 380 if (code > 1) 381 goto badcode; 382 code += PRC_TIMXCEED_INTRANS; 383 goto deliver; 384 385 case ICMP_PARAMPROB: 386 if (code > 1) 387 goto badcode; 388 code = PRC_PARAMPROB; 389 goto deliver; 390 391 case ICMP_SOURCEQUENCH: 392 if (code) 393 goto badcode; 394 code = PRC_QUENCH; 395 deliver: 396 /* 397 * Problem with datagram; advise higher level routines. 398 */ 399 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 400 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 401 icmpstat.icps_badlen++; 402 goto freeit; 403 } 404 icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len); 405 /* Discard ICMP's in response to multicast packets */ 406 if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) 407 goto badcode; 408 #ifdef ICMPPRINTFS 409 if (icmpprintfs) 410 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 411 #endif 412 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 413 414 /* 415 * MTU discovery: 416 * If we got a needfrag and there is a host route to the 417 * original destination, and the MTU is not locked, then 418 * set the MTU in the route to the suggested new value 419 * (if given) and then notify as usual. The ULPs will 420 * notice that the MTU has changed and adapt accordingly. 421 * If no new MTU was suggested, then we guess a new one 422 * less than the current value. If the new MTU is 423 * unreasonably small (defined by sysctl tcp_minmss), then 424 * we don't update the MTU value. 425 * 426 * XXX: All this should be done in tcp_mtudisc() because 427 * the way we do it now, everyone can send us bogus ICMP 428 * MSGSIZE packets for any destination. By doing this far 429 * higher in the chain we have a matching tcp connection. 430 * Thus spoofing is much harder. However there is no easy 431 * non-hackish way to pass the new MTU up to tcp_mtudisc(). 432 * Also see next XXX regarding IPv4 AH TCP. 433 */ 434 if (code == PRC_MSGSIZE) { 435 int mtu; 436 struct in_conninfo inc; 437 438 bzero(&inc, sizeof(inc)); 439 inc.inc_flags = 0; /* IPv4 */ 440 inc.inc_faddr = icmpsrc.sin_addr; 441 442 mtu = ntohs(icp->icmp_nextmtu); 443 if (!mtu) 444 mtu = ip_next_mtu(mtu, 1); 445 446 if (mtu >= max(296, (tcp_minmss + 447 sizeof(struct tcpiphdr)))) 448 tcp_hc_updatemtu(&inc, mtu); 449 450 #ifdef DEBUG_MTUDISC 451 printf("MTU for %s reduced to %d\n", 452 inet_ntoa(icmpsrc.sin_addr), mtu); 453 #endif 454 } 455 456 /* 457 * XXX if the packet contains [IPv4 AH TCP], we can't make a 458 * notification to TCP layer. 459 */ 460 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 461 if (ctlfunc) 462 (*ctlfunc)(code, (struct sockaddr *)&icmpsrc, 463 (void *)&icp->icmp_ip); 464 break; 465 466 badcode: 467 icmpstat.icps_badcode++; 468 break; 469 470 case ICMP_ECHO: 471 if (!icmpbmcastecho 472 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 473 icmpstat.icps_bmcastecho++; 474 break; 475 } 476 icp->icmp_type = ICMP_ECHOREPLY; 477 if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) 478 goto freeit; 479 else 480 goto reflect; 481 482 case ICMP_TSTAMP: 483 if (!icmpbmcastecho 484 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 485 icmpstat.icps_bmcasttstamp++; 486 break; 487 } 488 if (icmplen < ICMP_TSLEN) { 489 icmpstat.icps_badlen++; 490 break; 491 } 492 icp->icmp_type = ICMP_TSTAMPREPLY; 493 icp->icmp_rtime = iptime(); 494 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 495 if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) 496 goto freeit; 497 else 498 goto reflect; 499 500 case ICMP_MASKREQ: 501 if (icmpmaskrepl == 0) 502 break; 503 /* 504 * We are not able to respond with all ones broadcast 505 * unless we receive it over a point-to-point interface. 506 */ 507 if (icmplen < ICMP_MASKLEN) 508 break; 509 switch (ip->ip_dst.s_addr) { 510 511 case INADDR_BROADCAST: 512 case INADDR_ANY: 513 icmpdst.sin_addr = ip->ip_src; 514 break; 515 516 default: 517 icmpdst.sin_addr = ip->ip_dst; 518 } 519 ia = (struct in_ifaddr *)ifaof_ifpforaddr( 520 (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); 521 if (ia == 0) 522 break; 523 if (ia->ia_ifp == 0) 524 break; 525 icp->icmp_type = ICMP_MASKREPLY; 526 if (icmpmaskfake == 0) 527 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 528 else 529 icp->icmp_mask = icmpmaskfake; 530 if (ip->ip_src.s_addr == 0) { 531 if (ia->ia_ifp->if_flags & IFF_BROADCAST) 532 ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr; 533 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) 534 ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; 535 } 536 reflect: 537 ip->ip_len += hlen; /* since ip_input deducts this */ 538 icmpstat.icps_reflect++; 539 icmpstat.icps_outhist[icp->icmp_type]++; 540 icmp_reflect(m); 541 return; 542 543 case ICMP_REDIRECT: 544 if (log_redirect) { 545 u_long src, dst, gw; 546 547 src = ntohl(ip->ip_src.s_addr); 548 dst = ntohl(icp->icmp_ip.ip_dst.s_addr); 549 gw = ntohl(icp->icmp_gwaddr.s_addr); 550 printf("icmp redirect from %d.%d.%d.%d: " 551 "%d.%d.%d.%d => %d.%d.%d.%d\n", 552 (int)(src >> 24), (int)((src >> 16) & 0xff), 553 (int)((src >> 8) & 0xff), (int)(src & 0xff), 554 (int)(dst >> 24), (int)((dst >> 16) & 0xff), 555 (int)((dst >> 8) & 0xff), (int)(dst & 0xff), 556 (int)(gw >> 24), (int)((gw >> 16) & 0xff), 557 (int)((gw >> 8) & 0xff), (int)(gw & 0xff)); 558 } 559 /* 560 * RFC1812 says we must ignore ICMP redirects if we 561 * are acting as router. 562 */ 563 if (drop_redirect || ipforwarding) 564 break; 565 if (code > 3) 566 goto badcode; 567 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 568 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 569 icmpstat.icps_badlen++; 570 break; 571 } 572 /* 573 * Short circuit routing redirects to force 574 * immediate change in the kernel's routing 575 * tables. The message is also handed to anyone 576 * listening on a raw socket (e.g. the routing 577 * daemon for use in updating its tables). 578 */ 579 icmpgw.sin_addr = ip->ip_src; 580 icmpdst.sin_addr = icp->icmp_gwaddr; 581 #ifdef ICMPPRINTFS 582 if (icmpprintfs) { 583 char buf[4 * sizeof "123"]; 584 strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst)); 585 586 printf("redirect dst %s to %s\n", 587 buf, inet_ntoa(icp->icmp_gwaddr)); 588 } 589 #endif 590 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 591 rtredirect((struct sockaddr *)&icmpsrc, 592 (struct sockaddr *)&icmpdst, 593 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, 594 (struct sockaddr *)&icmpgw); 595 pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); 596 #ifdef IPSEC 597 key_sa_routechange((struct sockaddr *)&icmpsrc); 598 #endif 599 break; 600 601 /* 602 * No kernel processing for the following; 603 * just fall through to send to raw listener. 604 */ 605 case ICMP_ECHOREPLY: 606 case ICMP_ROUTERADVERT: 607 case ICMP_ROUTERSOLICIT: 608 case ICMP_TSTAMPREPLY: 609 case ICMP_IREQREPLY: 610 case ICMP_MASKREPLY: 611 default: 612 break; 613 } 614 615 raw: 616 rip_input(m, off); 617 return; 618 619 freeit: 620 m_freem(m); 621 } 622 623 /* 624 * Reflect the ip packet back to the source 625 */ 626 static void 627 icmp_reflect(m) 628 struct mbuf *m; 629 { 630 struct ip *ip = mtod(m, struct ip *); 631 struct ifaddr *ifa; 632 struct ifnet *ifn; 633 struct in_ifaddr *ia; 634 struct in_addr t; 635 struct mbuf *opts = 0; 636 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 637 638 if (!in_canforward(ip->ip_src) && 639 ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) != 640 (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 641 m_freem(m); /* Bad return address */ 642 icmpstat.icps_badaddr++; 643 goto done; /* Ip_output() will check for broadcast */ 644 } 645 t = ip->ip_dst; 646 ip->ip_dst = ip->ip_src; 647 648 /* 649 * Source selection for ICMP replies: 650 * 651 * If the incoming packet was addressed directly to one of our 652 * own addresses, use dst as the src for the reply. 653 */ 654 LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) 655 if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) 656 goto match; 657 /* 658 * If the incoming packet was addressed to one of our broadcast 659 * addresses, use the first non-broadcast address which corresponds 660 * to the incoming interface. 661 */ 662 if (m->m_pkthdr.rcvif != NULL && 663 m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 664 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 665 if (ifa->ifa_addr->sa_family != AF_INET) 666 continue; 667 ia = ifatoia(ifa); 668 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 669 t.s_addr) 670 goto match; 671 } 672 } 673 /* 674 * If the incoming packet was not addressed directly to us, use 675 * designated interface for icmp replies specified by sysctl 676 * net.inet.icmp.reply_src (default not set). Otherwise continue 677 * with normal source selection. 678 */ 679 if (reply_src[0] != '\0' && (ifn = ifunit(reply_src))) { 680 TAILQ_FOREACH(ifa, &ifn->if_addrhead, ifa_link) { 681 if (ifa->ifa_addr->sa_family != AF_INET) 682 continue; 683 ia = ifatoia(ifa); 684 goto match; 685 } 686 } 687 /* 688 * If the packet was transiting through us, use the address of 689 * the interface that is the closest to the packet source. 690 * When we don't have a route back to the packet source, stop here 691 * and drop the packet. 692 */ 693 ia = ip_rtaddr(ip->ip_dst); 694 if (ia == NULL) { 695 m_freem(m); 696 icmpstat.icps_noroute++; 697 goto done; 698 } 699 match: 700 #ifdef MAC 701 mac_reflect_mbuf_icmp(m); 702 #endif 703 t = IA_SIN(ia)->sin_addr; 704 ip->ip_src = t; 705 ip->ip_ttl = ip_defttl; 706 707 if (optlen > 0) { 708 register u_char *cp; 709 int opt, cnt; 710 u_int len; 711 712 /* 713 * Retrieve any source routing from the incoming packet; 714 * add on any record-route or timestamp options. 715 */ 716 cp = (u_char *) (ip + 1); 717 if ((opts = ip_srcroute()) == 0 && 718 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 719 opts->m_len = sizeof(struct in_addr); 720 mtod(opts, struct in_addr *)->s_addr = 0; 721 } 722 if (opts) { 723 #ifdef ICMPPRINTFS 724 if (icmpprintfs) 725 printf("icmp_reflect optlen %d rt %d => ", 726 optlen, opts->m_len); 727 #endif 728 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 729 opt = cp[IPOPT_OPTVAL]; 730 if (opt == IPOPT_EOL) 731 break; 732 if (opt == IPOPT_NOP) 733 len = 1; 734 else { 735 if (cnt < IPOPT_OLEN + sizeof(*cp)) 736 break; 737 len = cp[IPOPT_OLEN]; 738 if (len < IPOPT_OLEN + sizeof(*cp) || 739 len > cnt) 740 break; 741 } 742 /* 743 * Should check for overflow, but it "can't happen" 744 */ 745 if (opt == IPOPT_RR || opt == IPOPT_TS || 746 opt == IPOPT_SECURITY) { 747 bcopy((caddr_t)cp, 748 mtod(opts, caddr_t) + opts->m_len, len); 749 opts->m_len += len; 750 } 751 } 752 /* Terminate & pad, if necessary */ 753 cnt = opts->m_len % 4; 754 if (cnt) { 755 for (; cnt < 4; cnt++) { 756 *(mtod(opts, caddr_t) + opts->m_len) = 757 IPOPT_EOL; 758 opts->m_len++; 759 } 760 } 761 #ifdef ICMPPRINTFS 762 if (icmpprintfs) 763 printf("%d\n", opts->m_len); 764 #endif 765 } 766 /* 767 * Now strip out original options by copying rest of first 768 * mbuf's data back, and adjust the IP length. 769 */ 770 ip->ip_len -= optlen; 771 ip->ip_v = IPVERSION; 772 ip->ip_hl = 5; 773 m->m_len -= optlen; 774 if (m->m_flags & M_PKTHDR) 775 m->m_pkthdr.len -= optlen; 776 optlen += sizeof(struct ip); 777 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1), 778 (unsigned)(m->m_len - sizeof(struct ip))); 779 } 780 m_tag_delete_nonpersistent(m); 781 m->m_flags &= ~(M_BCAST|M_MCAST); 782 icmp_send(m, opts); 783 done: 784 if (opts) 785 (void)m_free(opts); 786 } 787 788 /* 789 * Send an icmp packet back to the ip level, 790 * after supplying a checksum. 791 */ 792 static void 793 icmp_send(m, opts) 794 register struct mbuf *m; 795 struct mbuf *opts; 796 { 797 register struct ip *ip = mtod(m, struct ip *); 798 register int hlen; 799 register struct icmp *icp; 800 801 hlen = ip->ip_hl << 2; 802 m->m_data += hlen; 803 m->m_len -= hlen; 804 icp = mtod(m, struct icmp *); 805 icp->icmp_cksum = 0; 806 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen); 807 m->m_data -= hlen; 808 m->m_len += hlen; 809 m->m_pkthdr.rcvif = (struct ifnet *)0; 810 #ifdef ICMPPRINTFS 811 if (icmpprintfs) { 812 char buf[4 * sizeof "123"]; 813 strcpy(buf, inet_ntoa(ip->ip_dst)); 814 printf("icmp_send dst %s src %s\n", 815 buf, inet_ntoa(ip->ip_src)); 816 } 817 #endif 818 (void) ip_output(m, opts, NULL, 0, NULL, NULL); 819 } 820 821 n_time 822 iptime() 823 { 824 struct timeval atv; 825 u_long t; 826 827 getmicrotime(&atv); 828 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 829 return (htonl(t)); 830 } 831 832 /* 833 * Return the next larger or smaller MTU plateau (table from RFC 1191) 834 * given current value MTU. If DIR is less than zero, a larger plateau 835 * is returned; otherwise, a smaller value is returned. 836 */ 837 static int 838 ip_next_mtu(mtu, dir) 839 int mtu; 840 int dir; 841 { 842 static int mtutab[] = { 843 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296, 844 68, 0 845 }; 846 int i; 847 848 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) { 849 if (mtu >= mtutab[i]) 850 break; 851 } 852 853 if (dir < 0) { 854 if (i == 0) { 855 return 0; 856 } else { 857 return mtutab[i - 1]; 858 } 859 } else { 860 if (mtutab[i] == 0) { 861 return 0; 862 } else if(mtu > mtutab[i]) { 863 return mtutab[i]; 864 } else { 865 return mtutab[i + 1]; 866 } 867 } 868 } 869 870 871 /* 872 * badport_bandlim() - check for ICMP bandwidth limit 873 * 874 * Return 0 if it is ok to send an ICMP error response, -1 if we have 875 * hit our bandwidth limit and it is not ok. 876 * 877 * If icmplim is <= 0, the feature is disabled and 0 is returned. 878 * 879 * For now we separate the TCP and UDP subsystems w/ different 'which' 880 * values. We may eventually remove this separation (and simplify the 881 * code further). 882 * 883 * Note that the printing of the error message is delayed so we can 884 * properly print the icmp error rate that the system was trying to do 885 * (i.e. 22000/100 pps, etc...). This can cause long delays in printing 886 * the 'final' error, but it doesn't make sense to solve the printing 887 * delay with more complex code. 888 */ 889 890 int 891 badport_bandlim(int which) 892 { 893 #define N(a) (sizeof (a) / sizeof (a[0])) 894 static struct rate { 895 const char *type; 896 struct timeval lasttime; 897 int curpps;; 898 } rates[BANDLIM_MAX+1] = { 899 { "icmp unreach response" }, 900 { "icmp ping response" }, 901 { "icmp tstamp response" }, 902 { "closed port RST response" }, 903 { "open port RST response" } 904 }; 905 906 /* 907 * Return ok status if feature disabled or argument out of range. 908 */ 909 if (icmplim > 0 && (u_int) which < N(rates)) { 910 struct rate *r = &rates[which]; 911 int opps = r->curpps; 912 913 if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim)) 914 return -1; /* discard packet */ 915 /* 916 * If we've dropped below the threshold after having 917 * rate-limited traffic print the message. This preserves 918 * the previous behaviour at the expense of added complexity. 919 */ 920 if (icmplim_output && opps > icmplim) 921 printf("Limiting %s from %d to %d packets/sec\n", 922 r->type, opps, icmplim); 923 } 924 return 0; /* okay to send packet */ 925 #undef N 926 } 927