1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 30 * $FreeBSD$ 31 */ 32 33 #include "opt_ipsec.h" 34 #include "opt_mac.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/mac.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/time.h> 43 #include <sys/kernel.h> 44 #include <sys/sysctl.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/route.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_pcb.h> 52 #include <netinet/in_systm.h> 53 #include <netinet/in_var.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_icmp.h> 56 #include <netinet/ip_var.h> 57 #include <netinet/tcp.h> 58 #include <netinet/tcp_var.h> 59 #include <netinet/tcpip.h> 60 #include <netinet/icmp_var.h> 61 62 #ifdef IPSEC 63 #include <netinet6/ipsec.h> 64 #include <netkey/key.h> 65 #endif 66 67 #ifdef FAST_IPSEC 68 #include <netipsec/ipsec.h> 69 #include <netipsec/key.h> 70 #define IPSEC 71 #endif 72 73 #include <machine/in_cksum.h> 74 75 /* 76 * ICMP routines: error generation, receive packet processing, and 77 * routines to turnaround packets back to the originator, and 78 * host table maintenance routines. 79 */ 80 81 struct icmpstat icmpstat; 82 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW, 83 &icmpstat, icmpstat, ""); 84 85 static int icmpmaskrepl = 0; 86 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW, 87 &icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets."); 88 89 static u_int icmpmaskfake = 0; 90 SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW, 91 &icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets."); 92 93 static int drop_redirect = 0; 94 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, 95 &drop_redirect, 0, ""); 96 97 static int log_redirect = 0; 98 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, 99 &log_redirect, 0, ""); 100 101 static int icmplim = 200; 102 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW, 103 &icmplim, 0, ""); 104 105 static int icmplim_output = 1; 106 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW, 107 &icmplim_output, 0, ""); 108 109 static char reply_src[IFNAMSIZ]; 110 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW, 111 &reply_src, IFNAMSIZ, "icmp reply source for non-local packets."); 112 113 /* 114 * ICMP broadcast echo sysctl 115 */ 116 117 static int icmpbmcastecho = 0; 118 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, 119 &icmpbmcastecho, 0, ""); 120 121 122 #ifdef ICMPPRINTFS 123 int icmpprintfs = 0; 124 #endif 125 126 static void icmp_reflect(struct mbuf *); 127 static void icmp_send(struct mbuf *, struct mbuf *); 128 static int ip_next_mtu(int, int); 129 130 extern struct protosw inetsw[]; 131 132 /* 133 * Generate an error packet of type error 134 * in response to bad packet ip. 135 */ 136 void 137 icmp_error(n, type, code, dest, destifp) 138 struct mbuf *n; 139 int type, code; 140 n_long dest; 141 struct ifnet *destifp; 142 { 143 register struct ip *oip = mtod(n, struct ip *), *nip; 144 register unsigned oiplen = oip->ip_hl << 2; 145 register struct icmp *icp; 146 register struct mbuf *m; 147 unsigned icmplen; 148 149 #ifdef ICMPPRINTFS 150 if (icmpprintfs) 151 printf("icmp_error(%p, %x, %d)\n", oip, type, code); 152 #endif 153 if (type != ICMP_REDIRECT) 154 icmpstat.icps_error++; 155 /* 156 * Don't send error if the original packet was encrypted. 157 * Don't send error if not the first fragment of message. 158 * Don't error if the old packet protocol was ICMP 159 * error message, only known informational types. 160 */ 161 if (n->m_flags & M_DECRYPTED) 162 goto freeit; 163 if (oip->ip_off &~ (IP_MF|IP_DF)) 164 goto freeit; 165 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 166 n->m_len >= oiplen + ICMP_MINLEN && 167 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) { 168 icmpstat.icps_oldicmp++; 169 goto freeit; 170 } 171 /* Don't send error in response to a multicast or broadcast packet */ 172 if (n->m_flags & (M_BCAST|M_MCAST)) 173 goto freeit; 174 /* 175 * First, formulate icmp message 176 */ 177 m = m_gethdr(M_DONTWAIT, MT_HEADER); 178 if (m == NULL) 179 goto freeit; 180 #ifdef MAC 181 mac_create_mbuf_netlayer(n, m); 182 #endif 183 icmplen = min(oiplen + 8, oip->ip_len); 184 if (icmplen < sizeof(struct ip)) 185 panic("icmp_error: bad length"); 186 m->m_len = icmplen + ICMP_MINLEN; 187 MH_ALIGN(m, m->m_len); 188 icp = mtod(m, struct icmp *); 189 if ((u_int)type > ICMP_MAXTYPE) 190 panic("icmp_error"); 191 icmpstat.icps_outhist[type]++; 192 icp->icmp_type = type; 193 if (type == ICMP_REDIRECT) 194 icp->icmp_gwaddr.s_addr = dest; 195 else { 196 icp->icmp_void = 0; 197 /* 198 * The following assignments assume an overlay with the 199 * zeroed icmp_void field. 200 */ 201 if (type == ICMP_PARAMPROB) { 202 icp->icmp_pptr = code; 203 code = 0; 204 } else if (type == ICMP_UNREACH && 205 code == ICMP_UNREACH_NEEDFRAG && destifp) { 206 icp->icmp_nextmtu = htons(destifp->if_mtu); 207 } 208 } 209 210 icp->icmp_code = code; 211 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); 212 nip = &icp->icmp_ip; 213 214 /* 215 * Convert fields to network representation. 216 */ 217 nip->ip_len = htons(nip->ip_len); 218 nip->ip_off = htons(nip->ip_off); 219 220 /* 221 * Now, copy old ip header (without options) 222 * in front of icmp message. 223 */ 224 if (m->m_data - sizeof(struct ip) < m->m_pktdat) 225 panic("icmp len"); 226 /* 227 * If the original mbuf was meant to bypass the firewall, the error 228 * reply should bypass as well. 229 */ 230 m->m_flags |= n->m_flags & M_SKIP_FIREWALL; 231 m->m_data -= sizeof(struct ip); 232 m->m_len += sizeof(struct ip); 233 m->m_pkthdr.len = m->m_len; 234 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; 235 nip = mtod(m, struct ip *); 236 bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip)); 237 nip->ip_len = m->m_len; 238 nip->ip_v = IPVERSION; 239 nip->ip_hl = 5; 240 nip->ip_p = IPPROTO_ICMP; 241 nip->ip_tos = 0; 242 icmp_reflect(m); 243 244 freeit: 245 m_freem(n); 246 } 247 248 /* 249 * Process a received ICMP message. 250 */ 251 void 252 icmp_input(m, off) 253 struct mbuf *m; 254 int off; 255 { 256 struct icmp *icp; 257 struct in_ifaddr *ia; 258 struct ip *ip = mtod(m, struct ip *); 259 struct sockaddr_in icmpsrc, icmpdst, icmpgw; 260 int hlen = off; 261 int icmplen = ip->ip_len; 262 int i, code; 263 void (*ctlfunc)(int, struct sockaddr *, void *); 264 265 /* 266 * Locate icmp structure in mbuf, and check 267 * that not corrupted and of at least minimum length. 268 */ 269 #ifdef ICMPPRINTFS 270 if (icmpprintfs) { 271 char buf[4 * sizeof "123"]; 272 strcpy(buf, inet_ntoa(ip->ip_src)); 273 printf("icmp_input from %s to %s, len %d\n", 274 buf, inet_ntoa(ip->ip_dst), icmplen); 275 } 276 #endif 277 if (icmplen < ICMP_MINLEN) { 278 icmpstat.icps_tooshort++; 279 goto freeit; 280 } 281 i = hlen + min(icmplen, ICMP_ADVLENMIN); 282 if (m->m_len < i && (m = m_pullup(m, i)) == 0) { 283 icmpstat.icps_tooshort++; 284 return; 285 } 286 ip = mtod(m, struct ip *); 287 m->m_len -= hlen; 288 m->m_data += hlen; 289 icp = mtod(m, struct icmp *); 290 if (in_cksum(m, icmplen)) { 291 icmpstat.icps_checksum++; 292 goto freeit; 293 } 294 m->m_len += hlen; 295 m->m_data -= hlen; 296 297 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { 298 /* 299 * Deliver very specific ICMP type only. 300 */ 301 switch (icp->icmp_type) { 302 case ICMP_UNREACH: 303 case ICMP_TIMXCEED: 304 break; 305 default: 306 goto freeit; 307 } 308 } 309 310 #ifdef ICMPPRINTFS 311 if (icmpprintfs) 312 printf("icmp_input, type %d code %d\n", icp->icmp_type, 313 icp->icmp_code); 314 #endif 315 316 /* 317 * Message type specific processing. 318 */ 319 if (icp->icmp_type > ICMP_MAXTYPE) 320 goto raw; 321 322 /* Initialize */ 323 bzero(&icmpsrc, sizeof(icmpsrc)); 324 icmpsrc.sin_len = sizeof(struct sockaddr_in); 325 icmpsrc.sin_family = AF_INET; 326 bzero(&icmpdst, sizeof(icmpdst)); 327 icmpdst.sin_len = sizeof(struct sockaddr_in); 328 icmpdst.sin_family = AF_INET; 329 bzero(&icmpgw, sizeof(icmpgw)); 330 icmpgw.sin_len = sizeof(struct sockaddr_in); 331 icmpgw.sin_family = AF_INET; 332 333 icmpstat.icps_inhist[icp->icmp_type]++; 334 code = icp->icmp_code; 335 switch (icp->icmp_type) { 336 337 case ICMP_UNREACH: 338 switch (code) { 339 case ICMP_UNREACH_NET: 340 case ICMP_UNREACH_HOST: 341 case ICMP_UNREACH_SRCFAIL: 342 case ICMP_UNREACH_NET_UNKNOWN: 343 case ICMP_UNREACH_HOST_UNKNOWN: 344 case ICMP_UNREACH_ISOLATED: 345 case ICMP_UNREACH_TOSNET: 346 case ICMP_UNREACH_TOSHOST: 347 case ICMP_UNREACH_HOST_PRECEDENCE: 348 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 349 code = PRC_UNREACH_NET; 350 break; 351 352 case ICMP_UNREACH_NEEDFRAG: 353 code = PRC_MSGSIZE; 354 break; 355 356 /* 357 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9. 358 * Treat subcodes 2,3 as immediate RST 359 */ 360 case ICMP_UNREACH_PROTOCOL: 361 case ICMP_UNREACH_PORT: 362 code = PRC_UNREACH_PORT; 363 break; 364 365 case ICMP_UNREACH_NET_PROHIB: 366 case ICMP_UNREACH_HOST_PROHIB: 367 case ICMP_UNREACH_FILTER_PROHIB: 368 code = PRC_UNREACH_ADMIN_PROHIB; 369 break; 370 371 default: 372 goto badcode; 373 } 374 goto deliver; 375 376 case ICMP_TIMXCEED: 377 if (code > 1) 378 goto badcode; 379 code += PRC_TIMXCEED_INTRANS; 380 goto deliver; 381 382 case ICMP_PARAMPROB: 383 if (code > 1) 384 goto badcode; 385 code = PRC_PARAMPROB; 386 goto deliver; 387 388 case ICMP_SOURCEQUENCH: 389 if (code) 390 goto badcode; 391 code = PRC_QUENCH; 392 deliver: 393 /* 394 * Problem with datagram; advise higher level routines. 395 */ 396 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 397 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 398 icmpstat.icps_badlen++; 399 goto freeit; 400 } 401 icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len); 402 /* Discard ICMP's in response to multicast packets */ 403 if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) 404 goto badcode; 405 #ifdef ICMPPRINTFS 406 if (icmpprintfs) 407 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 408 #endif 409 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 410 411 /* 412 * MTU discovery: 413 * If we got a needfrag and there is a host route to the 414 * original destination, and the MTU is not locked, then 415 * set the MTU in the route to the suggested new value 416 * (if given) and then notify as usual. The ULPs will 417 * notice that the MTU has changed and adapt accordingly. 418 * If no new MTU was suggested, then we guess a new one 419 * less than the current value. If the new MTU is 420 * unreasonably small (defined by sysctl tcp_minmss), then 421 * we don't update the MTU value. 422 * 423 * XXX: All this should be done in tcp_mtudisc() because 424 * the way we do it now, everyone can send us bogus ICMP 425 * MSGSIZE packets for any destination. By doing this far 426 * higher in the chain we have a matching tcp connection. 427 * Thus spoofing is much harder. However there is no easy 428 * non-hackish way to pass the new MTU up to tcp_mtudisc(). 429 * Also see next XXX regarding IPv4 AH TCP. 430 */ 431 if (code == PRC_MSGSIZE) { 432 int mtu; 433 struct in_conninfo inc; 434 435 bzero(&inc, sizeof(inc)); 436 inc.inc_flags = 0; /* IPv4 */ 437 inc.inc_faddr = icmpsrc.sin_addr; 438 439 mtu = ntohs(icp->icmp_nextmtu); 440 if (!mtu) 441 mtu = ip_next_mtu(mtu, 1); 442 443 if (mtu >= max(296, (tcp_minmss + 444 sizeof(struct tcpiphdr)))) 445 tcp_hc_updatemtu(&inc, mtu); 446 447 #ifdef DEBUG_MTUDISC 448 printf("MTU for %s reduced to %d\n", 449 inet_ntoa(icmpsrc.sin_addr), mtu); 450 #endif 451 } 452 453 /* 454 * XXX if the packet contains [IPv4 AH TCP], we can't make a 455 * notification to TCP layer. 456 */ 457 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 458 if (ctlfunc) 459 (*ctlfunc)(code, (struct sockaddr *)&icmpsrc, 460 (void *)&icp->icmp_ip); 461 break; 462 463 badcode: 464 icmpstat.icps_badcode++; 465 break; 466 467 case ICMP_ECHO: 468 if (!icmpbmcastecho 469 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 470 icmpstat.icps_bmcastecho++; 471 break; 472 } 473 icp->icmp_type = ICMP_ECHOREPLY; 474 if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) 475 goto freeit; 476 else 477 goto reflect; 478 479 case ICMP_TSTAMP: 480 if (!icmpbmcastecho 481 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 482 icmpstat.icps_bmcasttstamp++; 483 break; 484 } 485 if (icmplen < ICMP_TSLEN) { 486 icmpstat.icps_badlen++; 487 break; 488 } 489 icp->icmp_type = ICMP_TSTAMPREPLY; 490 icp->icmp_rtime = iptime(); 491 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 492 if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) 493 goto freeit; 494 else 495 goto reflect; 496 497 case ICMP_MASKREQ: 498 if (icmpmaskrepl == 0) 499 break; 500 /* 501 * We are not able to respond with all ones broadcast 502 * unless we receive it over a point-to-point interface. 503 */ 504 if (icmplen < ICMP_MASKLEN) 505 break; 506 switch (ip->ip_dst.s_addr) { 507 508 case INADDR_BROADCAST: 509 case INADDR_ANY: 510 icmpdst.sin_addr = ip->ip_src; 511 break; 512 513 default: 514 icmpdst.sin_addr = ip->ip_dst; 515 } 516 ia = (struct in_ifaddr *)ifaof_ifpforaddr( 517 (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); 518 if (ia == 0) 519 break; 520 if (ia->ia_ifp == 0) 521 break; 522 icp->icmp_type = ICMP_MASKREPLY; 523 if (icmpmaskfake == 0) 524 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 525 else 526 icp->icmp_mask = icmpmaskfake; 527 if (ip->ip_src.s_addr == 0) { 528 if (ia->ia_ifp->if_flags & IFF_BROADCAST) 529 ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr; 530 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) 531 ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; 532 } 533 reflect: 534 ip->ip_len += hlen; /* since ip_input deducts this */ 535 icmpstat.icps_reflect++; 536 icmpstat.icps_outhist[icp->icmp_type]++; 537 icmp_reflect(m); 538 return; 539 540 case ICMP_REDIRECT: 541 if (log_redirect) { 542 u_long src, dst, gw; 543 544 src = ntohl(ip->ip_src.s_addr); 545 dst = ntohl(icp->icmp_ip.ip_dst.s_addr); 546 gw = ntohl(icp->icmp_gwaddr.s_addr); 547 printf("icmp redirect from %d.%d.%d.%d: " 548 "%d.%d.%d.%d => %d.%d.%d.%d\n", 549 (int)(src >> 24), (int)((src >> 16) & 0xff), 550 (int)((src >> 8) & 0xff), (int)(src & 0xff), 551 (int)(dst >> 24), (int)((dst >> 16) & 0xff), 552 (int)((dst >> 8) & 0xff), (int)(dst & 0xff), 553 (int)(gw >> 24), (int)((gw >> 16) & 0xff), 554 (int)((gw >> 8) & 0xff), (int)(gw & 0xff)); 555 } 556 /* 557 * RFC1812 says we must ignore ICMP redirects if we 558 * are acting as router. 559 */ 560 if (drop_redirect || ipforwarding) 561 break; 562 if (code > 3) 563 goto badcode; 564 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 565 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 566 icmpstat.icps_badlen++; 567 break; 568 } 569 /* 570 * Short circuit routing redirects to force 571 * immediate change in the kernel's routing 572 * tables. The message is also handed to anyone 573 * listening on a raw socket (e.g. the routing 574 * daemon for use in updating its tables). 575 */ 576 icmpgw.sin_addr = ip->ip_src; 577 icmpdst.sin_addr = icp->icmp_gwaddr; 578 #ifdef ICMPPRINTFS 579 if (icmpprintfs) { 580 char buf[4 * sizeof "123"]; 581 strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst)); 582 583 printf("redirect dst %s to %s\n", 584 buf, inet_ntoa(icp->icmp_gwaddr)); 585 } 586 #endif 587 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 588 rtredirect((struct sockaddr *)&icmpsrc, 589 (struct sockaddr *)&icmpdst, 590 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, 591 (struct sockaddr *)&icmpgw); 592 pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); 593 #ifdef IPSEC 594 key_sa_routechange((struct sockaddr *)&icmpsrc); 595 #endif 596 break; 597 598 /* 599 * No kernel processing for the following; 600 * just fall through to send to raw listener. 601 */ 602 case ICMP_ECHOREPLY: 603 case ICMP_ROUTERADVERT: 604 case ICMP_ROUTERSOLICIT: 605 case ICMP_TSTAMPREPLY: 606 case ICMP_IREQREPLY: 607 case ICMP_MASKREPLY: 608 default: 609 break; 610 } 611 612 raw: 613 rip_input(m, off); 614 return; 615 616 freeit: 617 m_freem(m); 618 } 619 620 /* 621 * Reflect the ip packet back to the source 622 */ 623 static void 624 icmp_reflect(m) 625 struct mbuf *m; 626 { 627 struct ip *ip = mtod(m, struct ip *); 628 struct ifaddr *ifa; 629 struct ifnet *ifn; 630 struct in_ifaddr *ia; 631 struct in_addr t; 632 struct mbuf *opts = 0; 633 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 634 635 if (!in_canforward(ip->ip_src) && 636 ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) != 637 (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 638 m_freem(m); /* Bad return address */ 639 icmpstat.icps_badaddr++; 640 goto done; /* Ip_output() will check for broadcast */ 641 } 642 t = ip->ip_dst; 643 ip->ip_dst = ip->ip_src; 644 645 /* 646 * Source selection for ICMP replies: 647 * 648 * If the incoming packet was addressed directly to one of our 649 * own addresses, use dst as the src for the reply. 650 */ 651 LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) 652 if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) 653 goto match; 654 /* 655 * If the incoming packet was addressed to one of our broadcast 656 * addresses, use the first non-broadcast address which corresponds 657 * to the incoming interface. 658 */ 659 if (m->m_pkthdr.rcvif != NULL && 660 m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 661 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 662 if (ifa->ifa_addr->sa_family != AF_INET) 663 continue; 664 ia = ifatoia(ifa); 665 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 666 t.s_addr) 667 goto match; 668 } 669 } 670 /* 671 * If the incoming packet was not addressed directly to us, use 672 * designated interface for icmp replies specified by sysctl 673 * net.inet.icmp.reply_src (default not set). Otherwise continue 674 * with normal source selection. 675 */ 676 if (reply_src[0] != '\0' && (ifn = ifunit(reply_src))) { 677 TAILQ_FOREACH(ifa, &ifn->if_addrhead, ifa_link) { 678 if (ifa->ifa_addr->sa_family != AF_INET) 679 continue; 680 ia = ifatoia(ifa); 681 goto match; 682 } 683 } 684 /* 685 * If the packet was transiting through us, use the address of 686 * the interface that is the closest to the packet source. 687 * When we don't have a route back to the packet source, stop here 688 * and drop the packet. 689 */ 690 ia = ip_rtaddr(ip->ip_dst); 691 if (ia == NULL) { 692 m_freem(m); 693 icmpstat.icps_noroute++; 694 goto done; 695 } 696 match: 697 #ifdef MAC 698 mac_reflect_mbuf_icmp(m); 699 #endif 700 t = IA_SIN(ia)->sin_addr; 701 ip->ip_src = t; 702 ip->ip_ttl = ip_defttl; 703 704 if (optlen > 0) { 705 register u_char *cp; 706 int opt, cnt; 707 u_int len; 708 709 /* 710 * Retrieve any source routing from the incoming packet; 711 * add on any record-route or timestamp options. 712 */ 713 cp = (u_char *) (ip + 1); 714 if ((opts = ip_srcroute(m)) == 0 && 715 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 716 opts->m_len = sizeof(struct in_addr); 717 mtod(opts, struct in_addr *)->s_addr = 0; 718 } 719 if (opts) { 720 #ifdef ICMPPRINTFS 721 if (icmpprintfs) 722 printf("icmp_reflect optlen %d rt %d => ", 723 optlen, opts->m_len); 724 #endif 725 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 726 opt = cp[IPOPT_OPTVAL]; 727 if (opt == IPOPT_EOL) 728 break; 729 if (opt == IPOPT_NOP) 730 len = 1; 731 else { 732 if (cnt < IPOPT_OLEN + sizeof(*cp)) 733 break; 734 len = cp[IPOPT_OLEN]; 735 if (len < IPOPT_OLEN + sizeof(*cp) || 736 len > cnt) 737 break; 738 } 739 /* 740 * Should check for overflow, but it "can't happen" 741 */ 742 if (opt == IPOPT_RR || opt == IPOPT_TS || 743 opt == IPOPT_SECURITY) { 744 bcopy((caddr_t)cp, 745 mtod(opts, caddr_t) + opts->m_len, len); 746 opts->m_len += len; 747 } 748 } 749 /* Terminate & pad, if necessary */ 750 cnt = opts->m_len % 4; 751 if (cnt) { 752 for (; cnt < 4; cnt++) { 753 *(mtod(opts, caddr_t) + opts->m_len) = 754 IPOPT_EOL; 755 opts->m_len++; 756 } 757 } 758 #ifdef ICMPPRINTFS 759 if (icmpprintfs) 760 printf("%d\n", opts->m_len); 761 #endif 762 } 763 /* 764 * Now strip out original options by copying rest of first 765 * mbuf's data back, and adjust the IP length. 766 */ 767 ip->ip_len -= optlen; 768 ip->ip_v = IPVERSION; 769 ip->ip_hl = 5; 770 m->m_len -= optlen; 771 if (m->m_flags & M_PKTHDR) 772 m->m_pkthdr.len -= optlen; 773 optlen += sizeof(struct ip); 774 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1), 775 (unsigned)(m->m_len - sizeof(struct ip))); 776 } 777 m_tag_delete_nonpersistent(m); 778 m->m_flags &= ~(M_BCAST|M_MCAST); 779 icmp_send(m, opts); 780 done: 781 if (opts) 782 (void)m_free(opts); 783 } 784 785 /* 786 * Send an icmp packet back to the ip level, 787 * after supplying a checksum. 788 */ 789 static void 790 icmp_send(m, opts) 791 register struct mbuf *m; 792 struct mbuf *opts; 793 { 794 register struct ip *ip = mtod(m, struct ip *); 795 register int hlen; 796 register struct icmp *icp; 797 798 hlen = ip->ip_hl << 2; 799 m->m_data += hlen; 800 m->m_len -= hlen; 801 icp = mtod(m, struct icmp *); 802 icp->icmp_cksum = 0; 803 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen); 804 m->m_data -= hlen; 805 m->m_len += hlen; 806 m->m_pkthdr.rcvif = (struct ifnet *)0; 807 #ifdef ICMPPRINTFS 808 if (icmpprintfs) { 809 char buf[4 * sizeof "123"]; 810 strcpy(buf, inet_ntoa(ip->ip_dst)); 811 printf("icmp_send dst %s src %s\n", 812 buf, inet_ntoa(ip->ip_src)); 813 } 814 #endif 815 (void) ip_output(m, opts, NULL, 0, NULL, NULL); 816 } 817 818 n_time 819 iptime() 820 { 821 struct timeval atv; 822 u_long t; 823 824 getmicrotime(&atv); 825 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 826 return (htonl(t)); 827 } 828 829 /* 830 * Return the next larger or smaller MTU plateau (table from RFC 1191) 831 * given current value MTU. If DIR is less than zero, a larger plateau 832 * is returned; otherwise, a smaller value is returned. 833 */ 834 static int 835 ip_next_mtu(mtu, dir) 836 int mtu; 837 int dir; 838 { 839 static int mtutab[] = { 840 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296, 841 68, 0 842 }; 843 int i; 844 845 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) { 846 if (mtu >= mtutab[i]) 847 break; 848 } 849 850 if (dir < 0) { 851 if (i == 0) { 852 return 0; 853 } else { 854 return mtutab[i - 1]; 855 } 856 } else { 857 if (mtutab[i] == 0) { 858 return 0; 859 } else if(mtu > mtutab[i]) { 860 return mtutab[i]; 861 } else { 862 return mtutab[i + 1]; 863 } 864 } 865 } 866 867 868 /* 869 * badport_bandlim() - check for ICMP bandwidth limit 870 * 871 * Return 0 if it is ok to send an ICMP error response, -1 if we have 872 * hit our bandwidth limit and it is not ok. 873 * 874 * If icmplim is <= 0, the feature is disabled and 0 is returned. 875 * 876 * For now we separate the TCP and UDP subsystems w/ different 'which' 877 * values. We may eventually remove this separation (and simplify the 878 * code further). 879 * 880 * Note that the printing of the error message is delayed so we can 881 * properly print the icmp error rate that the system was trying to do 882 * (i.e. 22000/100 pps, etc...). This can cause long delays in printing 883 * the 'final' error, but it doesn't make sense to solve the printing 884 * delay with more complex code. 885 */ 886 887 int 888 badport_bandlim(int which) 889 { 890 #define N(a) (sizeof (a) / sizeof (a[0])) 891 static struct rate { 892 const char *type; 893 struct timeval lasttime; 894 int curpps; 895 } rates[BANDLIM_MAX+1] = { 896 { "icmp unreach response" }, 897 { "icmp ping response" }, 898 { "icmp tstamp response" }, 899 { "closed port RST response" }, 900 { "open port RST response" } 901 }; 902 903 /* 904 * Return ok status if feature disabled or argument out of range. 905 */ 906 if (icmplim > 0 && (u_int) which < N(rates)) { 907 struct rate *r = &rates[which]; 908 int opps = r->curpps; 909 910 if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim)) 911 return -1; /* discard packet */ 912 /* 913 * If we've dropped below the threshold after having 914 * rate-limited traffic print the message. This preserves 915 * the previous behaviour at the expense of added complexity. 916 */ 917 if (icmplim_output && opps > icmplim) 918 printf("Limiting %s from %d to %d packets/sec\n", 919 r->type, opps, icmplim); 920 } 921 return 0; /* okay to send packet */ 922 #undef N 923 } 924