1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30 * $FreeBSD$ 31 */ 32 33 #include "opt_bootp.h" 34 #include "opt_ipfw.h" 35 #include "opt_ipstealth.h" 36 #include "opt_ipsec.h" 37 #include "opt_mac.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/callout.h> 42 #include <sys/mac.h> 43 #include <sys/mbuf.h> 44 #include <sys/malloc.h> 45 #include <sys/domain.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/time.h> 49 #include <sys/kernel.h> 50 #include <sys/syslog.h> 51 #include <sys/sysctl.h> 52 53 #include <net/pfil.h> 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/if_var.h> 57 #include <net/if_dl.h> 58 #include <net/route.h> 59 #include <net/netisr.h> 60 61 #include <netinet/in.h> 62 #include <netinet/in_systm.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip.h> 65 #include <netinet/in_pcb.h> 66 #include <netinet/ip_var.h> 67 #include <netinet/ip_icmp.h> 68 #include <machine/in_cksum.h> 69 70 #include <sys/socketvar.h> 71 72 /* XXX: Temporary until ipfw_ether and ipfw_bridge are converted. */ 73 #include <netinet/ip_fw.h> 74 #include <netinet/ip_dummynet.h> 75 76 #ifdef IPSEC 77 #include <netinet6/ipsec.h> 78 #include <netkey/key.h> 79 #endif 80 81 #ifdef FAST_IPSEC 82 #include <netipsec/ipsec.h> 83 #include <netipsec/key.h> 84 #endif 85 86 int rsvp_on = 0; 87 88 int ipforwarding = 0; 89 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, 90 &ipforwarding, 0, "Enable IP forwarding between interfaces"); 91 92 static int ipsendredirects = 1; /* XXX */ 93 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, 94 &ipsendredirects, 0, "Enable sending IP redirects"); 95 96 int ip_defttl = IPDEFTTL; 97 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW, 98 &ip_defttl, 0, "Maximum TTL on IP packets"); 99 100 static int ip_dosourceroute = 0; 101 SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW, 102 &ip_dosourceroute, 0, "Enable forwarding source routed IP packets"); 103 104 static int ip_acceptsourceroute = 0; 105 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 106 CTLFLAG_RW, &ip_acceptsourceroute, 0, 107 "Enable accepting source routed IP packets"); 108 109 int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */ 110 SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW, 111 &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)"); 112 113 static int ip_keepfaith = 0; 114 SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, 115 &ip_keepfaith, 0, 116 "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); 117 118 static int nipq = 0; /* total # of reass queues */ 119 static int maxnipq; 120 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW, 121 &maxnipq, 0, 122 "Maximum number of IPv4 fragment reassembly queue entries"); 123 124 static int maxfragsperpacket; 125 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, 126 &maxfragsperpacket, 0, 127 "Maximum number of IPv4 fragments allowed per packet"); 128 129 static int ip_sendsourcequench = 0; 130 SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, 131 &ip_sendsourcequench, 0, 132 "Enable the transmission of source quench packets"); 133 134 int ip_do_randomid = 0; 135 SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, 136 &ip_do_randomid, 0, 137 "Assign random ip_id values"); 138 139 /* 140 * XXX - Setting ip_checkinterface mostly implements the receive side of 141 * the Strong ES model described in RFC 1122, but since the routing table 142 * and transmit implementation do not implement the Strong ES model, 143 * setting this to 1 results in an odd hybrid. 144 * 145 * XXX - ip_checkinterface currently must be disabled if you use ipnat 146 * to translate the destination address to another local interface. 147 * 148 * XXX - ip_checkinterface must be disabled if you add IP aliases 149 * to the loopback interface instead of the interface where the 150 * packets for those addresses are received. 151 */ 152 static int ip_checkinterface = 0; 153 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, 154 &ip_checkinterface, 0, "Verify packet arrives on correct interface"); 155 156 #ifdef DIAGNOSTIC 157 static int ipprintfs = 0; 158 #endif 159 160 struct pfil_head inet_pfil_hook; /* Packet filter hooks */ 161 162 static struct ifqueue ipintrq; 163 static int ipqmaxlen = IFQ_MAXLEN; 164 165 extern struct domain inetdomain; 166 extern struct protosw inetsw[]; 167 u_char ip_protox[IPPROTO_MAX]; 168 struct in_ifaddrhead in_ifaddrhead; /* first inet address */ 169 struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ 170 u_long in_ifaddrhmask; /* mask for hash table */ 171 172 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, 173 &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); 174 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD, 175 &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue"); 176 177 struct ipstat ipstat; 178 SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, 179 &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); 180 181 /* Packet reassembly stuff */ 182 #define IPREASS_NHASH_LOG2 6 183 #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) 184 #define IPREASS_HMASK (IPREASS_NHASH - 1) 185 #define IPREASS_HASH(x,y) \ 186 (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) 187 188 static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH]; 189 struct mtx ipqlock; 190 struct callout ipport_tick_callout; 191 192 #define IPQ_LOCK() mtx_lock(&ipqlock) 193 #define IPQ_UNLOCK() mtx_unlock(&ipqlock) 194 #define IPQ_LOCK_INIT() mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF) 195 #define IPQ_LOCK_ASSERT() mtx_assert(&ipqlock, MA_OWNED) 196 197 #ifdef IPCTL_DEFMTU 198 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 199 &ip_mtu, 0, "Default MTU"); 200 #endif 201 202 #ifdef IPSTEALTH 203 int ipstealth = 0; 204 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, 205 &ipstealth, 0, ""); 206 #endif 207 208 /* 209 * ipfw_ether and ipfw_bridge hooks. 210 * XXX: Temporary until those are converted to pfil_hooks as well. 211 */ 212 ip_fw_chk_t *ip_fw_chk_ptr = NULL; 213 ip_dn_io_t *ip_dn_io_ptr = NULL; 214 int fw_enable = 1; 215 int fw_one_pass = 1; 216 217 /* 218 * XXX this is ugly. IP options source routing magic. 219 */ 220 struct ipoptrt { 221 struct in_addr dst; /* final destination */ 222 char nop; /* one NOP to align */ 223 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ 224 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; 225 }; 226 227 struct ipopt_tag { 228 struct m_tag tag; 229 int ip_nhops; 230 struct ipoptrt ip_srcrt; 231 }; 232 233 static void save_rte(struct mbuf *, u_char *, struct in_addr); 234 static int ip_dooptions(struct mbuf *m, int); 235 static void ip_forward(struct mbuf *m, int srcrt); 236 static void ip_freef(struct ipqhead *, struct ipq *); 237 238 /* 239 * IP initialization: fill in IP protocol switch table. 240 * All protocols not implemented in kernel go to raw IP protocol handler. 241 */ 242 void 243 ip_init() 244 { 245 register struct protosw *pr; 246 register int i; 247 248 TAILQ_INIT(&in_ifaddrhead); 249 in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask); 250 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 251 if (pr == NULL) 252 panic("ip_init: PF_INET not found"); 253 254 /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 255 for (i = 0; i < IPPROTO_MAX; i++) 256 ip_protox[i] = pr - inetsw; 257 /* 258 * Cycle through IP protocols and put them into the appropriate place 259 * in ip_protox[]. 260 */ 261 for (pr = inetdomain.dom_protosw; 262 pr < inetdomain.dom_protoswNPROTOSW; pr++) 263 if (pr->pr_domain->dom_family == PF_INET && 264 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 265 /* Be careful to only index valid IP protocols. */ 266 if (pr->pr_protocol <= IPPROTO_MAX) 267 ip_protox[pr->pr_protocol] = pr - inetsw; 268 } 269 270 /* Initialize packet filter hooks. */ 271 inet_pfil_hook.ph_type = PFIL_TYPE_AF; 272 inet_pfil_hook.ph_af = AF_INET; 273 if ((i = pfil_head_register(&inet_pfil_hook)) != 0) 274 printf("%s: WARNING: unable to register pfil hook, " 275 "error %d\n", __func__, i); 276 277 /* Initialize IP reassembly queue. */ 278 IPQ_LOCK_INIT(); 279 for (i = 0; i < IPREASS_NHASH; i++) 280 TAILQ_INIT(&ipq[i]); 281 maxnipq = nmbclusters / 32; 282 maxfragsperpacket = 16; 283 284 /* Start ipport_tick. */ 285 callout_init(&ipport_tick_callout, CALLOUT_MPSAFE); 286 ipport_tick(NULL); 287 EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, 288 SHUTDOWN_PRI_DEFAULT); 289 290 /* Initialize various other remaining things. */ 291 ip_id = time_second & 0xffff; 292 ipintrq.ifq_maxlen = ipqmaxlen; 293 mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF); 294 netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE); 295 } 296 297 void ip_fini(xtp) 298 void *xtp; 299 { 300 callout_stop(&ipport_tick_callout); 301 } 302 303 /* 304 * Ip input routine. Checksum and byte swap header. If fragmented 305 * try to reassemble. Process options. Pass to next level. 306 */ 307 void 308 ip_input(struct mbuf *m) 309 { 310 struct ip *ip = NULL; 311 struct in_ifaddr *ia = NULL; 312 struct ifaddr *ifa; 313 int checkif, hlen = 0; 314 u_short sum; 315 int dchg = 0; /* dest changed after fw */ 316 struct in_addr odst; /* original dst address */ 317 #ifdef FAST_IPSEC 318 struct m_tag *mtag; 319 struct tdb_ident *tdbi; 320 struct secpolicy *sp; 321 int s, error; 322 #endif /* FAST_IPSEC */ 323 324 M_ASSERTPKTHDR(m); 325 326 if (m->m_flags & M_FASTFWD_OURS) { 327 /* 328 * Firewall or NAT changed destination to local. 329 * We expect ip_len and ip_off to be in host byte order. 330 */ 331 m->m_flags &= ~M_FASTFWD_OURS; 332 /* Set up some basics that will be used later. */ 333 ip = mtod(m, struct ip *); 334 hlen = ip->ip_hl << 2; 335 goto ours; 336 } 337 338 ipstat.ips_total++; 339 340 if (m->m_pkthdr.len < sizeof(struct ip)) 341 goto tooshort; 342 343 if (m->m_len < sizeof (struct ip) && 344 (m = m_pullup(m, sizeof (struct ip))) == NULL) { 345 ipstat.ips_toosmall++; 346 return; 347 } 348 ip = mtod(m, struct ip *); 349 350 if (ip->ip_v != IPVERSION) { 351 ipstat.ips_badvers++; 352 goto bad; 353 } 354 355 hlen = ip->ip_hl << 2; 356 if (hlen < sizeof(struct ip)) { /* minimum header length */ 357 ipstat.ips_badhlen++; 358 goto bad; 359 } 360 if (hlen > m->m_len) { 361 if ((m = m_pullup(m, hlen)) == NULL) { 362 ipstat.ips_badhlen++; 363 return; 364 } 365 ip = mtod(m, struct ip *); 366 } 367 368 /* 127/8 must not appear on wire - RFC1122 */ 369 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 370 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 371 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { 372 ipstat.ips_badaddr++; 373 goto bad; 374 } 375 } 376 377 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 378 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 379 } else { 380 if (hlen == sizeof(struct ip)) { 381 sum = in_cksum_hdr(ip); 382 } else { 383 sum = in_cksum(m, hlen); 384 } 385 } 386 if (sum) { 387 ipstat.ips_badsum++; 388 goto bad; 389 } 390 391 #ifdef ALTQ 392 if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 393 /* packet is dropped by traffic conditioner */ 394 return; 395 #endif 396 397 /* 398 * Convert fields to host representation. 399 */ 400 ip->ip_len = ntohs(ip->ip_len); 401 if (ip->ip_len < hlen) { 402 ipstat.ips_badlen++; 403 goto bad; 404 } 405 ip->ip_off = ntohs(ip->ip_off); 406 407 /* 408 * Check that the amount of data in the buffers 409 * is as at least much as the IP header would have us expect. 410 * Trim mbufs if longer than we expect. 411 * Drop packet if shorter than we expect. 412 */ 413 if (m->m_pkthdr.len < ip->ip_len) { 414 tooshort: 415 ipstat.ips_tooshort++; 416 goto bad; 417 } 418 if (m->m_pkthdr.len > ip->ip_len) { 419 if (m->m_len == m->m_pkthdr.len) { 420 m->m_len = ip->ip_len; 421 m->m_pkthdr.len = ip->ip_len; 422 } else 423 m_adj(m, ip->ip_len - m->m_pkthdr.len); 424 } 425 #if defined(IPSEC) && !defined(IPSEC_FILTERGIF) 426 /* 427 * Bypass packet filtering for packets from a tunnel (gif). 428 */ 429 if (ipsec_getnhist(m)) 430 goto passin; 431 #endif 432 #if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF) 433 /* 434 * Bypass packet filtering for packets from a tunnel (gif). 435 */ 436 if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) 437 goto passin; 438 #endif 439 440 /* 441 * Run through list of hooks for input packets. 442 * 443 * NB: Beware of the destination address changing (e.g. 444 * by NAT rewriting). When this happens, tell 445 * ip_forward to do the right thing. 446 */ 447 448 /* Jump over all PFIL processing if hooks are not active. */ 449 if (inet_pfil_hook.ph_busy_count == -1) 450 goto passin; 451 452 odst = ip->ip_dst; 453 if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, 454 PFIL_IN, NULL) != 0) 455 return; 456 if (m == NULL) /* consumed by filter */ 457 return; 458 459 ip = mtod(m, struct ip *); 460 dchg = (odst.s_addr != ip->ip_dst.s_addr); 461 462 #ifdef IPFIREWALL_FORWARD 463 if (m->m_flags & M_FASTFWD_OURS) { 464 m->m_flags &= ~M_FASTFWD_OURS; 465 goto ours; 466 } 467 dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); 468 #endif /* IPFIREWALL_FORWARD */ 469 470 passin: 471 /* 472 * Process options and, if not destined for us, 473 * ship it on. ip_dooptions returns 1 when an 474 * error was detected (causing an icmp message 475 * to be sent and the original packet to be freed). 476 */ 477 if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 478 return; 479 480 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 481 * matter if it is destined to another node, or whether it is 482 * a multicast one, RSVP wants it! and prevents it from being forwarded 483 * anywhere else. Also checks if the rsvp daemon is running before 484 * grabbing the packet. 485 */ 486 if (rsvp_on && ip->ip_p==IPPROTO_RSVP) 487 goto ours; 488 489 /* 490 * Check our list of addresses, to see if the packet is for us. 491 * If we don't have any addresses, assume any unicast packet 492 * we receive might be for us (and let the upper layers deal 493 * with it). 494 */ 495 if (TAILQ_EMPTY(&in_ifaddrhead) && 496 (m->m_flags & (M_MCAST|M_BCAST)) == 0) 497 goto ours; 498 499 /* 500 * Enable a consistency check between the destination address 501 * and the arrival interface for a unicast packet (the RFC 1122 502 * strong ES model) if IP forwarding is disabled and the packet 503 * is not locally generated and the packet is not subject to 504 * 'ipfw fwd'. 505 * 506 * XXX - Checking also should be disabled if the destination 507 * address is ipnat'ed to a different interface. 508 * 509 * XXX - Checking is incompatible with IP aliases added 510 * to the loopback interface instead of the interface where 511 * the packets are received. 512 */ 513 checkif = ip_checkinterface && (ipforwarding == 0) && 514 m->m_pkthdr.rcvif != NULL && 515 ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) && 516 (dchg == 0); 517 518 /* 519 * Check for exact addresses in the hash bucket. 520 */ 521 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 522 /* 523 * If the address matches, verify that the packet 524 * arrived via the correct interface if checking is 525 * enabled. 526 */ 527 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 528 (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) 529 goto ours; 530 } 531 /* 532 * Check for broadcast addresses. 533 * 534 * Only accept broadcast packets that arrive via the matching 535 * interface. Reception of forwarded directed broadcasts would 536 * be handled via ip_forward() and ether_output() with the loopback 537 * into the stack for SIMPLEX interfaces handled by ether_output(). 538 */ 539 if (m->m_pkthdr.rcvif != NULL && 540 m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 541 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 542 if (ifa->ifa_addr->sa_family != AF_INET) 543 continue; 544 ia = ifatoia(ifa); 545 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 546 ip->ip_dst.s_addr) 547 goto ours; 548 if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) 549 goto ours; 550 #ifdef BOOTP_COMPAT 551 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) 552 goto ours; 553 #endif 554 } 555 } 556 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 557 struct in_multi *inm; 558 if (ip_mrouter) { 559 /* 560 * If we are acting as a multicast router, all 561 * incoming multicast packets are passed to the 562 * kernel-level multicast forwarding function. 563 * The packet is returned (relatively) intact; if 564 * ip_mforward() returns a non-zero value, the packet 565 * must be discarded, else it may be accepted below. 566 */ 567 if (ip_mforward && 568 ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { 569 ipstat.ips_cantforward++; 570 m_freem(m); 571 return; 572 } 573 574 /* 575 * The process-level routing daemon needs to receive 576 * all multicast IGMP packets, whether or not this 577 * host belongs to their destination groups. 578 */ 579 if (ip->ip_p == IPPROTO_IGMP) 580 goto ours; 581 ipstat.ips_forward++; 582 } 583 /* 584 * See if we belong to the destination multicast group on the 585 * arrival interface. 586 */ 587 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); 588 if (inm == NULL) { 589 ipstat.ips_notmember++; 590 m_freem(m); 591 return; 592 } 593 goto ours; 594 } 595 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 596 goto ours; 597 if (ip->ip_dst.s_addr == INADDR_ANY) 598 goto ours; 599 600 /* 601 * FAITH(Firewall Aided Internet Translator) 602 */ 603 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { 604 if (ip_keepfaith) { 605 if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 606 goto ours; 607 } 608 m_freem(m); 609 return; 610 } 611 612 /* 613 * Not for us; forward if possible and desirable. 614 */ 615 if (ipforwarding == 0) { 616 ipstat.ips_cantforward++; 617 m_freem(m); 618 } else { 619 #ifdef IPSEC 620 /* 621 * Enforce inbound IPsec SPD. 622 */ 623 if (ipsec4_in_reject(m, NULL)) { 624 ipsecstat.in_polvio++; 625 goto bad; 626 } 627 #endif /* IPSEC */ 628 #ifdef FAST_IPSEC 629 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); 630 s = splnet(); 631 if (mtag != NULL) { 632 tdbi = (struct tdb_ident *)(mtag + 1); 633 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); 634 } else { 635 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 636 IP_FORWARDING, &error); 637 } 638 if (sp == NULL) { /* NB: can happen if error */ 639 splx(s); 640 /*XXX error stat???*/ 641 DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ 642 goto bad; 643 } 644 645 /* 646 * Check security policy against packet attributes. 647 */ 648 error = ipsec_in_reject(sp, m); 649 KEY_FREESP(&sp); 650 splx(s); 651 if (error) { 652 ipstat.ips_cantforward++; 653 goto bad; 654 } 655 #endif /* FAST_IPSEC */ 656 ip_forward(m, dchg); 657 } 658 return; 659 660 ours: 661 #ifdef IPSTEALTH 662 /* 663 * IPSTEALTH: Process non-routing options only 664 * if the packet is destined for us. 665 */ 666 if (ipstealth && hlen > sizeof (struct ip) && 667 ip_dooptions(m, 1)) 668 return; 669 #endif /* IPSTEALTH */ 670 671 /* Count the packet in the ip address stats */ 672 if (ia != NULL) { 673 ia->ia_ifa.if_ipackets++; 674 ia->ia_ifa.if_ibytes += m->m_pkthdr.len; 675 } 676 677 /* 678 * Attempt reassembly; if it succeeds, proceed. 679 * ip_reass() will return a different mbuf. 680 */ 681 if (ip->ip_off & (IP_MF | IP_OFFMASK)) { 682 m = ip_reass(m); 683 if (m == NULL) 684 return; 685 ip = mtod(m, struct ip *); 686 /* Get the header length of the reassembled packet */ 687 hlen = ip->ip_hl << 2; 688 } 689 690 /* 691 * Further protocols expect the packet length to be w/o the 692 * IP header. 693 */ 694 ip->ip_len -= hlen; 695 696 #ifdef IPSEC 697 /* 698 * enforce IPsec policy checking if we are seeing last header. 699 * note that we do not visit this with protocols with pcb layer 700 * code - like udp/tcp/raw ip. 701 */ 702 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 && 703 ipsec4_in_reject(m, NULL)) { 704 ipsecstat.in_polvio++; 705 goto bad; 706 } 707 #endif 708 #if FAST_IPSEC 709 /* 710 * enforce IPsec policy checking if we are seeing last header. 711 * note that we do not visit this with protocols with pcb layer 712 * code - like udp/tcp/raw ip. 713 */ 714 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { 715 /* 716 * Check if the packet has already had IPsec processing 717 * done. If so, then just pass it along. This tag gets 718 * set during AH, ESP, etc. input handling, before the 719 * packet is returned to the ip input queue for delivery. 720 */ 721 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); 722 s = splnet(); 723 if (mtag != NULL) { 724 tdbi = (struct tdb_ident *)(mtag + 1); 725 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); 726 } else { 727 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 728 IP_FORWARDING, &error); 729 } 730 if (sp != NULL) { 731 /* 732 * Check security policy against packet attributes. 733 */ 734 error = ipsec_in_reject(sp, m); 735 KEY_FREESP(&sp); 736 } else { 737 /* XXX error stat??? */ 738 error = EINVAL; 739 DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ 740 goto bad; 741 } 742 splx(s); 743 if (error) 744 goto bad; 745 } 746 #endif /* FAST_IPSEC */ 747 748 /* 749 * Switch out to protocol's input routine. 750 */ 751 ipstat.ips_delivered++; 752 753 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); 754 return; 755 bad: 756 m_freem(m); 757 } 758 759 /* 760 * Take incoming datagram fragment and try to reassemble it into 761 * whole datagram. If the argument is the first fragment or one 762 * in between the function will return NULL and store the mbuf 763 * in the fragment chain. If the argument is the last fragment 764 * the packet will be reassembled and the pointer to the new 765 * mbuf returned for further processing. Only m_tags attached 766 * to the first packet/fragment are preserved. 767 * The IP header is *NOT* adjusted out of iplen. 768 */ 769 770 struct mbuf * 771 ip_reass(struct mbuf *m) 772 { 773 struct ip *ip; 774 struct mbuf *p, *q, *nq, *t; 775 struct ipq *fp = NULL; 776 struct ipqhead *head; 777 int i, hlen, next; 778 u_int8_t ecn, ecn0; 779 u_short hash; 780 781 /* If maxnipq is 0, never accept fragments. */ 782 if (maxnipq == 0) { 783 ipstat.ips_fragments++; 784 ipstat.ips_fragdropped++; 785 m_freem(m); 786 return (NULL); 787 } 788 789 ip = mtod(m, struct ip *); 790 hlen = ip->ip_hl << 2; 791 792 hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 793 head = &ipq[hash]; 794 IPQ_LOCK(); 795 796 /* 797 * Look for queue of fragments 798 * of this datagram. 799 */ 800 TAILQ_FOREACH(fp, head, ipq_list) 801 if (ip->ip_id == fp->ipq_id && 802 ip->ip_src.s_addr == fp->ipq_src.s_addr && 803 ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 804 #ifdef MAC 805 mac_fragment_match(m, fp) && 806 #endif 807 ip->ip_p == fp->ipq_p) 808 goto found; 809 810 fp = NULL; 811 812 /* 813 * Enforce upper bound on number of fragmented packets 814 * for which we attempt reassembly; 815 * If maxnipq is -1, accept all fragments without limitation. 816 */ 817 if ((nipq > maxnipq) && (maxnipq > 0)) { 818 /* 819 * drop something from the tail of the current queue 820 * before proceeding further 821 */ 822 struct ipq *q = TAILQ_LAST(head, ipqhead); 823 if (q == NULL) { /* gak */ 824 for (i = 0; i < IPREASS_NHASH; i++) { 825 struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead); 826 if (r) { 827 ipstat.ips_fragtimeout += r->ipq_nfrags; 828 ip_freef(&ipq[i], r); 829 break; 830 } 831 } 832 } else { 833 ipstat.ips_fragtimeout += q->ipq_nfrags; 834 ip_freef(head, q); 835 } 836 } 837 838 found: 839 /* 840 * Adjust ip_len to not reflect header, 841 * convert offset of this to bytes. 842 */ 843 ip->ip_len -= hlen; 844 if (ip->ip_off & IP_MF) { 845 /* 846 * Make sure that fragments have a data length 847 * that's a non-zero multiple of 8 bytes. 848 */ 849 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { 850 ipstat.ips_toosmall++; /* XXX */ 851 goto dropfrag; 852 } 853 m->m_flags |= M_FRAG; 854 } else 855 m->m_flags &= ~M_FRAG; 856 ip->ip_off <<= 3; 857 858 859 /* 860 * Attempt reassembly; if it succeeds, proceed. 861 * ip_reass() will return a different mbuf. 862 */ 863 ipstat.ips_fragments++; 864 m->m_pkthdr.header = ip; 865 866 /* Previous ip_reass() started here. */ 867 /* 868 * Presence of header sizes in mbufs 869 * would confuse code below. 870 */ 871 m->m_data += hlen; 872 m->m_len -= hlen; 873 874 /* 875 * If first fragment to arrive, create a reassembly queue. 876 */ 877 if (fp == NULL) { 878 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL) 879 goto dropfrag; 880 fp = mtod(t, struct ipq *); 881 #ifdef MAC 882 if (mac_init_ipq(fp, M_NOWAIT) != 0) { 883 m_free(t); 884 goto dropfrag; 885 } 886 mac_create_ipq(m, fp); 887 #endif 888 TAILQ_INSERT_HEAD(head, fp, ipq_list); 889 nipq++; 890 fp->ipq_nfrags = 1; 891 fp->ipq_ttl = IPFRAGTTL; 892 fp->ipq_p = ip->ip_p; 893 fp->ipq_id = ip->ip_id; 894 fp->ipq_src = ip->ip_src; 895 fp->ipq_dst = ip->ip_dst; 896 fp->ipq_frags = m; 897 m->m_nextpkt = NULL; 898 goto inserted; 899 } else { 900 fp->ipq_nfrags++; 901 #ifdef MAC 902 mac_update_ipq(m, fp); 903 #endif 904 } 905 906 #define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) 907 908 /* 909 * Handle ECN by comparing this segment with the first one; 910 * if CE is set, do not lose CE. 911 * drop if CE and not-ECT are mixed for the same packet. 912 */ 913 ecn = ip->ip_tos & IPTOS_ECN_MASK; 914 ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK; 915 if (ecn == IPTOS_ECN_CE) { 916 if (ecn0 == IPTOS_ECN_NOTECT) 917 goto dropfrag; 918 if (ecn0 != IPTOS_ECN_CE) 919 GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE; 920 } 921 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) 922 goto dropfrag; 923 924 /* 925 * Find a segment which begins after this one does. 926 */ 927 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) 928 if (GETIP(q)->ip_off > ip->ip_off) 929 break; 930 931 /* 932 * If there is a preceding segment, it may provide some of 933 * our data already. If so, drop the data from the incoming 934 * segment. If it provides all of our data, drop us, otherwise 935 * stick new segment in the proper place. 936 * 937 * If some of the data is dropped from the the preceding 938 * segment, then it's checksum is invalidated. 939 */ 940 if (p) { 941 i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; 942 if (i > 0) { 943 if (i >= ip->ip_len) 944 goto dropfrag; 945 m_adj(m, i); 946 m->m_pkthdr.csum_flags = 0; 947 ip->ip_off += i; 948 ip->ip_len -= i; 949 } 950 m->m_nextpkt = p->m_nextpkt; 951 p->m_nextpkt = m; 952 } else { 953 m->m_nextpkt = fp->ipq_frags; 954 fp->ipq_frags = m; 955 } 956 957 /* 958 * While we overlap succeeding segments trim them or, 959 * if they are completely covered, dequeue them. 960 */ 961 for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; 962 q = nq) { 963 i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; 964 if (i < GETIP(q)->ip_len) { 965 GETIP(q)->ip_len -= i; 966 GETIP(q)->ip_off += i; 967 m_adj(q, i); 968 q->m_pkthdr.csum_flags = 0; 969 break; 970 } 971 nq = q->m_nextpkt; 972 m->m_nextpkt = nq; 973 ipstat.ips_fragdropped++; 974 fp->ipq_nfrags--; 975 m_freem(q); 976 } 977 978 inserted: 979 980 /* 981 * Check for complete reassembly and perform frag per packet 982 * limiting. 983 * 984 * Frag limiting is performed here so that the nth frag has 985 * a chance to complete the packet before we drop the packet. 986 * As a result, n+1 frags are actually allowed per packet, but 987 * only n will ever be stored. (n = maxfragsperpacket.) 988 * 989 */ 990 next = 0; 991 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { 992 if (GETIP(q)->ip_off != next) { 993 if (fp->ipq_nfrags > maxfragsperpacket) { 994 ipstat.ips_fragdropped += fp->ipq_nfrags; 995 ip_freef(head, fp); 996 } 997 goto done; 998 } 999 next += GETIP(q)->ip_len; 1000 } 1001 /* Make sure the last packet didn't have the IP_MF flag */ 1002 if (p->m_flags & M_FRAG) { 1003 if (fp->ipq_nfrags > maxfragsperpacket) { 1004 ipstat.ips_fragdropped += fp->ipq_nfrags; 1005 ip_freef(head, fp); 1006 } 1007 goto done; 1008 } 1009 1010 /* 1011 * Reassembly is complete. Make sure the packet is a sane size. 1012 */ 1013 q = fp->ipq_frags; 1014 ip = GETIP(q); 1015 if (next + (ip->ip_hl << 2) > IP_MAXPACKET) { 1016 ipstat.ips_toolong++; 1017 ipstat.ips_fragdropped += fp->ipq_nfrags; 1018 ip_freef(head, fp); 1019 goto done; 1020 } 1021 1022 /* 1023 * Concatenate fragments. 1024 */ 1025 m = q; 1026 t = m->m_next; 1027 m->m_next = NULL; 1028 m_cat(m, t); 1029 nq = q->m_nextpkt; 1030 q->m_nextpkt = NULL; 1031 for (q = nq; q != NULL; q = nq) { 1032 nq = q->m_nextpkt; 1033 q->m_nextpkt = NULL; 1034 m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; 1035 m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; 1036 m_cat(m, q); 1037 } 1038 #ifdef MAC 1039 mac_create_datagram_from_ipq(fp, m); 1040 mac_destroy_ipq(fp); 1041 #endif 1042 1043 /* 1044 * Create header for new ip packet by modifying header of first 1045 * packet; dequeue and discard fragment reassembly header. 1046 * Make header visible. 1047 */ 1048 ip->ip_len = (ip->ip_hl << 2) + next; 1049 ip->ip_src = fp->ipq_src; 1050 ip->ip_dst = fp->ipq_dst; 1051 TAILQ_REMOVE(head, fp, ipq_list); 1052 nipq--; 1053 (void) m_free(dtom(fp)); 1054 m->m_len += (ip->ip_hl << 2); 1055 m->m_data -= (ip->ip_hl << 2); 1056 /* some debugging cruft by sklower, below, will go away soon */ 1057 if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ 1058 m_fixhdr(m); 1059 ipstat.ips_reassembled++; 1060 IPQ_UNLOCK(); 1061 return (m); 1062 1063 dropfrag: 1064 ipstat.ips_fragdropped++; 1065 if (fp != NULL) 1066 fp->ipq_nfrags--; 1067 m_freem(m); 1068 done: 1069 IPQ_UNLOCK(); 1070 return (NULL); 1071 1072 #undef GETIP 1073 } 1074 1075 /* 1076 * Free a fragment reassembly header and all 1077 * associated datagrams. 1078 */ 1079 static void 1080 ip_freef(fhp, fp) 1081 struct ipqhead *fhp; 1082 struct ipq *fp; 1083 { 1084 register struct mbuf *q; 1085 1086 IPQ_LOCK_ASSERT(); 1087 1088 while (fp->ipq_frags) { 1089 q = fp->ipq_frags; 1090 fp->ipq_frags = q->m_nextpkt; 1091 m_freem(q); 1092 } 1093 TAILQ_REMOVE(fhp, fp, ipq_list); 1094 (void) m_free(dtom(fp)); 1095 nipq--; 1096 } 1097 1098 /* 1099 * IP timer processing; 1100 * if a timer expires on a reassembly 1101 * queue, discard it. 1102 */ 1103 void 1104 ip_slowtimo() 1105 { 1106 register struct ipq *fp; 1107 int s = splnet(); 1108 int i; 1109 1110 IPQ_LOCK(); 1111 for (i = 0; i < IPREASS_NHASH; i++) { 1112 for(fp = TAILQ_FIRST(&ipq[i]); fp;) { 1113 struct ipq *fpp; 1114 1115 fpp = fp; 1116 fp = TAILQ_NEXT(fp, ipq_list); 1117 if(--fpp->ipq_ttl == 0) { 1118 ipstat.ips_fragtimeout += fpp->ipq_nfrags; 1119 ip_freef(&ipq[i], fpp); 1120 } 1121 } 1122 } 1123 /* 1124 * If we are over the maximum number of fragments 1125 * (due to the limit being lowered), drain off 1126 * enough to get down to the new limit. 1127 */ 1128 if (maxnipq >= 0 && nipq > maxnipq) { 1129 for (i = 0; i < IPREASS_NHASH; i++) { 1130 while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) { 1131 ipstat.ips_fragdropped += 1132 TAILQ_FIRST(&ipq[i])->ipq_nfrags; 1133 ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); 1134 } 1135 } 1136 } 1137 IPQ_UNLOCK(); 1138 splx(s); 1139 } 1140 1141 /* 1142 * Drain off all datagram fragments. 1143 */ 1144 void 1145 ip_drain() 1146 { 1147 int i; 1148 1149 IPQ_LOCK(); 1150 for (i = 0; i < IPREASS_NHASH; i++) { 1151 while(!TAILQ_EMPTY(&ipq[i])) { 1152 ipstat.ips_fragdropped += 1153 TAILQ_FIRST(&ipq[i])->ipq_nfrags; 1154 ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); 1155 } 1156 } 1157 IPQ_UNLOCK(); 1158 in_rtqdrain(); 1159 } 1160 1161 /* 1162 * The protocol to be inserted into ip_protox[] must be already registered 1163 * in inetsw[], either statically or through pf_proto_register(). 1164 */ 1165 int 1166 ipproto_register(u_char ipproto) 1167 { 1168 struct protosw *pr; 1169 1170 /* Sanity checks. */ 1171 if (ipproto == 0) 1172 return (EPROTONOSUPPORT); 1173 1174 /* 1175 * The protocol slot must not be occupied by another protocol 1176 * already. An index pointing to IPPROTO_RAW is unused. 1177 */ 1178 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1179 if (pr == NULL) 1180 return (EPFNOSUPPORT); 1181 if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 1182 return (EEXIST); 1183 1184 /* Find the protocol position in inetsw[] and set the index. */ 1185 for (pr = inetdomain.dom_protosw; 1186 pr < inetdomain.dom_protoswNPROTOSW; pr++) { 1187 if (pr->pr_domain->dom_family == PF_INET && 1188 pr->pr_protocol && pr->pr_protocol == ipproto) { 1189 /* Be careful to only index valid IP protocols. */ 1190 if (pr->pr_protocol <= IPPROTO_MAX) { 1191 ip_protox[pr->pr_protocol] = pr - inetsw; 1192 return (0); 1193 } else 1194 return (EINVAL); 1195 } 1196 } 1197 return (EPROTONOSUPPORT); 1198 } 1199 1200 int 1201 ipproto_unregister(u_char ipproto) 1202 { 1203 struct protosw *pr; 1204 1205 /* Sanity checks. */ 1206 if (ipproto == 0) 1207 return (EPROTONOSUPPORT); 1208 1209 /* Check if the protocol was indeed registered. */ 1210 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1211 if (pr == NULL) 1212 return (EPFNOSUPPORT); 1213 if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 1214 return (ENOENT); 1215 1216 /* Reset the protocol slot to IPPROTO_RAW. */ 1217 ip_protox[ipproto] = pr - inetsw; 1218 return (0); 1219 } 1220 1221 1222 /* 1223 * Do option processing on a datagram, 1224 * possibly discarding it if bad options are encountered, 1225 * or forwarding it if source-routed. 1226 * The pass argument is used when operating in the IPSTEALTH 1227 * mode to tell what options to process: 1228 * [LS]SRR (pass 0) or the others (pass 1). 1229 * The reason for as many as two passes is that when doing IPSTEALTH, 1230 * non-routing options should be processed only if the packet is for us. 1231 * Returns 1 if packet has been forwarded/freed, 1232 * 0 if the packet should be processed further. 1233 */ 1234 static int 1235 ip_dooptions(struct mbuf *m, int pass) 1236 { 1237 struct ip *ip = mtod(m, struct ip *); 1238 u_char *cp; 1239 struct in_ifaddr *ia; 1240 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; 1241 struct in_addr *sin, dst; 1242 n_time ntime; 1243 struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; 1244 1245 /* ignore or reject packets with IP options */ 1246 if (ip_doopts == 0) 1247 return 0; 1248 else if (ip_doopts == 2) { 1249 type = ICMP_UNREACH; 1250 code = ICMP_UNREACH_FILTER_PROHIB; 1251 goto bad; 1252 } 1253 1254 dst = ip->ip_dst; 1255 cp = (u_char *)(ip + 1); 1256 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 1257 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1258 opt = cp[IPOPT_OPTVAL]; 1259 if (opt == IPOPT_EOL) 1260 break; 1261 if (opt == IPOPT_NOP) 1262 optlen = 1; 1263 else { 1264 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 1265 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1266 goto bad; 1267 } 1268 optlen = cp[IPOPT_OLEN]; 1269 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { 1270 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1271 goto bad; 1272 } 1273 } 1274 switch (opt) { 1275 1276 default: 1277 break; 1278 1279 /* 1280 * Source routing with record. 1281 * Find interface with current destination address. 1282 * If none on this machine then drop if strictly routed, 1283 * or do nothing if loosely routed. 1284 * Record interface address and bring up next address 1285 * component. If strictly routed make sure next 1286 * address is on directly accessible net. 1287 */ 1288 case IPOPT_LSRR: 1289 case IPOPT_SSRR: 1290 #ifdef IPSTEALTH 1291 if (ipstealth && pass > 0) 1292 break; 1293 #endif 1294 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 1295 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1296 goto bad; 1297 } 1298 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 1299 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1300 goto bad; 1301 } 1302 ipaddr.sin_addr = ip->ip_dst; 1303 ia = (struct in_ifaddr *) 1304 ifa_ifwithaddr((struct sockaddr *)&ipaddr); 1305 if (ia == NULL) { 1306 if (opt == IPOPT_SSRR) { 1307 type = ICMP_UNREACH; 1308 code = ICMP_UNREACH_SRCFAIL; 1309 goto bad; 1310 } 1311 if (!ip_dosourceroute) 1312 goto nosourcerouting; 1313 /* 1314 * Loose routing, and not at next destination 1315 * yet; nothing to do except forward. 1316 */ 1317 break; 1318 } 1319 off--; /* 0 origin */ 1320 if (off > optlen - (int)sizeof(struct in_addr)) { 1321 /* 1322 * End of source route. Should be for us. 1323 */ 1324 if (!ip_acceptsourceroute) 1325 goto nosourcerouting; 1326 save_rte(m, cp, ip->ip_src); 1327 break; 1328 } 1329 #ifdef IPSTEALTH 1330 if (ipstealth) 1331 goto dropit; 1332 #endif 1333 if (!ip_dosourceroute) { 1334 if (ipforwarding) { 1335 char buf[16]; /* aaa.bbb.ccc.ddd\0 */ 1336 /* 1337 * Acting as a router, so generate ICMP 1338 */ 1339 nosourcerouting: 1340 strcpy(buf, inet_ntoa(ip->ip_dst)); 1341 log(LOG_WARNING, 1342 "attempted source route from %s to %s\n", 1343 inet_ntoa(ip->ip_src), buf); 1344 type = ICMP_UNREACH; 1345 code = ICMP_UNREACH_SRCFAIL; 1346 goto bad; 1347 } else { 1348 /* 1349 * Not acting as a router, so silently drop. 1350 */ 1351 #ifdef IPSTEALTH 1352 dropit: 1353 #endif 1354 ipstat.ips_cantforward++; 1355 m_freem(m); 1356 return (1); 1357 } 1358 } 1359 1360 /* 1361 * locate outgoing interface 1362 */ 1363 (void)memcpy(&ipaddr.sin_addr, cp + off, 1364 sizeof(ipaddr.sin_addr)); 1365 1366 if (opt == IPOPT_SSRR) { 1367 #define INA struct in_ifaddr * 1368 #define SA struct sockaddr * 1369 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL) 1370 ia = (INA)ifa_ifwithnet((SA)&ipaddr); 1371 } else 1372 ia = ip_rtaddr(ipaddr.sin_addr); 1373 if (ia == NULL) { 1374 type = ICMP_UNREACH; 1375 code = ICMP_UNREACH_SRCFAIL; 1376 goto bad; 1377 } 1378 ip->ip_dst = ipaddr.sin_addr; 1379 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), 1380 sizeof(struct in_addr)); 1381 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1382 /* 1383 * Let ip_intr's mcast routing check handle mcast pkts 1384 */ 1385 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr)); 1386 break; 1387 1388 case IPOPT_RR: 1389 #ifdef IPSTEALTH 1390 if (ipstealth && pass == 0) 1391 break; 1392 #endif 1393 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 1394 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1395 goto bad; 1396 } 1397 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 1398 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1399 goto bad; 1400 } 1401 /* 1402 * If no space remains, ignore. 1403 */ 1404 off--; /* 0 origin */ 1405 if (off > optlen - (int)sizeof(struct in_addr)) 1406 break; 1407 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst, 1408 sizeof(ipaddr.sin_addr)); 1409 /* 1410 * locate outgoing interface; if we're the destination, 1411 * use the incoming interface (should be same). 1412 */ 1413 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL && 1414 (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) { 1415 type = ICMP_UNREACH; 1416 code = ICMP_UNREACH_HOST; 1417 goto bad; 1418 } 1419 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), 1420 sizeof(struct in_addr)); 1421 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1422 break; 1423 1424 case IPOPT_TS: 1425 #ifdef IPSTEALTH 1426 if (ipstealth && pass == 0) 1427 break; 1428 #endif 1429 code = cp - (u_char *)ip; 1430 if (optlen < 4 || optlen > 40) { 1431 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1432 goto bad; 1433 } 1434 if ((off = cp[IPOPT_OFFSET]) < 5) { 1435 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1436 goto bad; 1437 } 1438 if (off > optlen - (int)sizeof(int32_t)) { 1439 cp[IPOPT_OFFSET + 1] += (1 << 4); 1440 if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) { 1441 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1442 goto bad; 1443 } 1444 break; 1445 } 1446 off--; /* 0 origin */ 1447 sin = (struct in_addr *)(cp + off); 1448 switch (cp[IPOPT_OFFSET + 1] & 0x0f) { 1449 1450 case IPOPT_TS_TSONLY: 1451 break; 1452 1453 case IPOPT_TS_TSANDADDR: 1454 if (off + sizeof(n_time) + 1455 sizeof(struct in_addr) > optlen) { 1456 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1457 goto bad; 1458 } 1459 ipaddr.sin_addr = dst; 1460 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr, 1461 m->m_pkthdr.rcvif); 1462 if (ia == NULL) 1463 continue; 1464 (void)memcpy(sin, &IA_SIN(ia)->sin_addr, 1465 sizeof(struct in_addr)); 1466 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1467 off += sizeof(struct in_addr); 1468 break; 1469 1470 case IPOPT_TS_PRESPEC: 1471 if (off + sizeof(n_time) + 1472 sizeof(struct in_addr) > optlen) { 1473 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1474 goto bad; 1475 } 1476 (void)memcpy(&ipaddr.sin_addr, sin, 1477 sizeof(struct in_addr)); 1478 if (ifa_ifwithaddr((SA)&ipaddr) == NULL) 1479 continue; 1480 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1481 off += sizeof(struct in_addr); 1482 break; 1483 1484 default: 1485 code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip; 1486 goto bad; 1487 } 1488 ntime = iptime(); 1489 (void)memcpy(cp + off, &ntime, sizeof(n_time)); 1490 cp[IPOPT_OFFSET] += sizeof(n_time); 1491 } 1492 } 1493 if (forward && ipforwarding) { 1494 ip_forward(m, 1); 1495 return (1); 1496 } 1497 return (0); 1498 bad: 1499 icmp_error(m, type, code, 0, 0); 1500 ipstat.ips_badoptions++; 1501 return (1); 1502 } 1503 1504 /* 1505 * Given address of next destination (final or next hop), 1506 * return internet address info of interface to be used to get there. 1507 */ 1508 struct in_ifaddr * 1509 ip_rtaddr(dst) 1510 struct in_addr dst; 1511 { 1512 struct route sro; 1513 struct sockaddr_in *sin; 1514 struct in_ifaddr *ifa; 1515 1516 bzero(&sro, sizeof(sro)); 1517 sin = (struct sockaddr_in *)&sro.ro_dst; 1518 sin->sin_family = AF_INET; 1519 sin->sin_len = sizeof(*sin); 1520 sin->sin_addr = dst; 1521 rtalloc_ign(&sro, RTF_CLONING); 1522 1523 if (sro.ro_rt == NULL) 1524 return (NULL); 1525 1526 ifa = ifatoia(sro.ro_rt->rt_ifa); 1527 RTFREE(sro.ro_rt); 1528 return (ifa); 1529 } 1530 1531 /* 1532 * Save incoming source route for use in replies, 1533 * to be picked up later by ip_srcroute if the receiver is interested. 1534 */ 1535 static void 1536 save_rte(m, option, dst) 1537 struct mbuf *m; 1538 u_char *option; 1539 struct in_addr dst; 1540 { 1541 unsigned olen; 1542 struct ipopt_tag *opts; 1543 1544 opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS, 1545 sizeof(struct ipopt_tag), M_NOWAIT); 1546 if (opts == NULL) 1547 return; 1548 1549 olen = option[IPOPT_OLEN]; 1550 #ifdef DIAGNOSTIC 1551 if (ipprintfs) 1552 printf("save_rte: olen %d\n", olen); 1553 #endif 1554 if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) 1555 return; 1556 bcopy(option, opts->ip_srcrt.srcopt, olen); 1557 opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); 1558 opts->ip_srcrt.dst = dst; 1559 m_tag_prepend(m, (struct m_tag *)opts); 1560 } 1561 1562 /* 1563 * Retrieve incoming source route for use in replies, 1564 * in the same form used by setsockopt. 1565 * The first hop is placed before the options, will be removed later. 1566 */ 1567 struct mbuf * 1568 ip_srcroute(m0) 1569 struct mbuf *m0; 1570 { 1571 register struct in_addr *p, *q; 1572 register struct mbuf *m; 1573 struct ipopt_tag *opts; 1574 1575 opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL); 1576 if (opts == NULL) 1577 return (NULL); 1578 1579 if (opts->ip_nhops == 0) 1580 return (NULL); 1581 m = m_get(M_DONTWAIT, MT_HEADER); 1582 if (m == NULL) 1583 return (NULL); 1584 1585 #define OPTSIZ (sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt)) 1586 1587 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ 1588 m->m_len = opts->ip_nhops * sizeof(struct in_addr) + 1589 sizeof(struct in_addr) + OPTSIZ; 1590 #ifdef DIAGNOSTIC 1591 if (ipprintfs) 1592 printf("ip_srcroute: nhops %d mlen %d", opts->ip_nhops, m->m_len); 1593 #endif 1594 1595 /* 1596 * First save first hop for return route 1597 */ 1598 p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]); 1599 *(mtod(m, struct in_addr *)) = *p--; 1600 #ifdef DIAGNOSTIC 1601 if (ipprintfs) 1602 printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr)); 1603 #endif 1604 1605 /* 1606 * Copy option fields and padding (nop) to mbuf. 1607 */ 1608 opts->ip_srcrt.nop = IPOPT_NOP; 1609 opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; 1610 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr), 1611 &(opts->ip_srcrt.nop), OPTSIZ); 1612 q = (struct in_addr *)(mtod(m, caddr_t) + 1613 sizeof(struct in_addr) + OPTSIZ); 1614 #undef OPTSIZ 1615 /* 1616 * Record return path as an IP source route, 1617 * reversing the path (pointers are now aligned). 1618 */ 1619 while (p >= opts->ip_srcrt.route) { 1620 #ifdef DIAGNOSTIC 1621 if (ipprintfs) 1622 printf(" %lx", (u_long)ntohl(q->s_addr)); 1623 #endif 1624 *q++ = *p--; 1625 } 1626 /* 1627 * Last hop goes to final destination. 1628 */ 1629 *q = opts->ip_srcrt.dst; 1630 #ifdef DIAGNOSTIC 1631 if (ipprintfs) 1632 printf(" %lx\n", (u_long)ntohl(q->s_addr)); 1633 #endif 1634 m_tag_delete(m0, (struct m_tag *)opts); 1635 return (m); 1636 } 1637 1638 /* 1639 * Strip out IP options, at higher 1640 * level protocol in the kernel. 1641 * Second argument is buffer to which options 1642 * will be moved, and return value is their length. 1643 * XXX should be deleted; last arg currently ignored. 1644 */ 1645 void 1646 ip_stripoptions(m, mopt) 1647 register struct mbuf *m; 1648 struct mbuf *mopt; 1649 { 1650 register int i; 1651 struct ip *ip = mtod(m, struct ip *); 1652 register caddr_t opts; 1653 int olen; 1654 1655 olen = (ip->ip_hl << 2) - sizeof (struct ip); 1656 opts = (caddr_t)(ip + 1); 1657 i = m->m_len - (sizeof (struct ip) + olen); 1658 bcopy(opts + olen, opts, (unsigned)i); 1659 m->m_len -= olen; 1660 if (m->m_flags & M_PKTHDR) 1661 m->m_pkthdr.len -= olen; 1662 ip->ip_v = IPVERSION; 1663 ip->ip_hl = sizeof(struct ip) >> 2; 1664 } 1665 1666 u_char inetctlerrmap[PRC_NCMDS] = { 1667 0, 0, 0, 0, 1668 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1669 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1670 EMSGSIZE, EHOSTUNREACH, 0, 0, 1671 0, 0, EHOSTUNREACH, 0, 1672 ENOPROTOOPT, ECONNREFUSED 1673 }; 1674 1675 /* 1676 * Forward a packet. If some error occurs return the sender 1677 * an icmp packet. Note we can't always generate a meaningful 1678 * icmp message because icmp doesn't have a large enough repertoire 1679 * of codes and types. 1680 * 1681 * If not forwarding, just drop the packet. This could be confusing 1682 * if ipforwarding was zero but some routing protocol was advancing 1683 * us as a gateway to somewhere. However, we must let the routing 1684 * protocol deal with that. 1685 * 1686 * The srcrt parameter indicates whether the packet is being forwarded 1687 * via a source route. 1688 */ 1689 void 1690 ip_forward(struct mbuf *m, int srcrt) 1691 { 1692 struct ip *ip = mtod(m, struct ip *); 1693 struct in_ifaddr *ia = NULL; 1694 int error, type = 0, code = 0; 1695 struct mbuf *mcopy; 1696 struct in_addr dest; 1697 struct ifnet *destifp, dummyifp; 1698 1699 #ifdef DIAGNOSTIC 1700 if (ipprintfs) 1701 printf("forward: src %lx dst %lx ttl %x\n", 1702 (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr, 1703 ip->ip_ttl); 1704 #endif 1705 1706 1707 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 1708 ipstat.ips_cantforward++; 1709 m_freem(m); 1710 return; 1711 } 1712 #ifdef IPSTEALTH 1713 if (!ipstealth) { 1714 #endif 1715 if (ip->ip_ttl <= IPTTLDEC) { 1716 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 1717 0, 0); 1718 return; 1719 } 1720 #ifdef IPSTEALTH 1721 } 1722 #endif 1723 1724 if (!srcrt && (ia = ip_rtaddr(ip->ip_dst)) == NULL) { 1725 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 1726 return; 1727 } 1728 1729 /* 1730 * Save the IP header and at most 8 bytes of the payload, 1731 * in case we need to generate an ICMP message to the src. 1732 * 1733 * XXX this can be optimized a lot by saving the data in a local 1734 * buffer on the stack (72 bytes at most), and only allocating the 1735 * mbuf if really necessary. The vast majority of the packets 1736 * are forwarded without having to send an ICMP back (either 1737 * because unnecessary, or because rate limited), so we are 1738 * really we are wasting a lot of work here. 1739 * 1740 * We don't use m_copy() because it might return a reference 1741 * to a shared cluster. Both this function and ip_output() 1742 * assume exclusive access to the IP header in `m', so any 1743 * data in a cluster may change before we reach icmp_error(). 1744 */ 1745 MGET(mcopy, M_DONTWAIT, m->m_type); 1746 if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) { 1747 /* 1748 * It's probably ok if the pkthdr dup fails (because 1749 * the deep copy of the tag chain failed), but for now 1750 * be conservative and just discard the copy since 1751 * code below may some day want the tags. 1752 */ 1753 m_free(mcopy); 1754 mcopy = NULL; 1755 } 1756 if (mcopy != NULL) { 1757 mcopy->m_len = imin((ip->ip_hl << 2) + 8, 1758 (int)ip->ip_len); 1759 mcopy->m_pkthdr.len = mcopy->m_len; 1760 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1761 } 1762 1763 #ifdef IPSTEALTH 1764 if (!ipstealth) { 1765 #endif 1766 ip->ip_ttl -= IPTTLDEC; 1767 #ifdef IPSTEALTH 1768 } 1769 #endif 1770 1771 /* 1772 * If forwarding packet using same interface that it came in on, 1773 * perhaps should send a redirect to sender to shortcut a hop. 1774 * Only send redirect if source is sending directly to us, 1775 * and if packet was not source routed (or has any options). 1776 * Also, don't send redirect if forwarding using a default route 1777 * or a route modified by a redirect. 1778 */ 1779 dest.s_addr = 0; 1780 if (!srcrt && ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) { 1781 struct sockaddr_in *sin; 1782 struct route ro; 1783 struct rtentry *rt; 1784 1785 bzero(&ro, sizeof(ro)); 1786 sin = (struct sockaddr_in *)&ro.ro_dst; 1787 sin->sin_family = AF_INET; 1788 sin->sin_len = sizeof(*sin); 1789 sin->sin_addr = ip->ip_dst; 1790 rtalloc_ign(&ro, RTF_CLONING); 1791 1792 rt = ro.ro_rt; 1793 1794 if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 1795 satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1796 #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1797 u_long src = ntohl(ip->ip_src.s_addr); 1798 1799 if (RTA(rt) && 1800 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1801 if (rt->rt_flags & RTF_GATEWAY) 1802 dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1803 else 1804 dest.s_addr = ip->ip_dst.s_addr; 1805 /* Router requirements says to only send host redirects */ 1806 type = ICMP_REDIRECT; 1807 code = ICMP_REDIRECT_HOST; 1808 #ifdef DIAGNOSTIC 1809 if (ipprintfs) 1810 printf("redirect (%d) to %lx\n", code, (u_long)dest.s_addr); 1811 #endif 1812 } 1813 } 1814 if (rt) 1815 RTFREE(rt); 1816 } 1817 1818 error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 1819 if (error) 1820 ipstat.ips_cantforward++; 1821 else { 1822 ipstat.ips_forward++; 1823 if (type) 1824 ipstat.ips_redirectsent++; 1825 else { 1826 if (mcopy) 1827 m_freem(mcopy); 1828 return; 1829 } 1830 } 1831 if (mcopy == NULL) 1832 return; 1833 destifp = NULL; 1834 1835 switch (error) { 1836 1837 case 0: /* forwarded, but need redirect */ 1838 /* type, code set above */ 1839 break; 1840 1841 case ENETUNREACH: /* shouldn't happen, checked above */ 1842 case EHOSTUNREACH: 1843 case ENETDOWN: 1844 case EHOSTDOWN: 1845 default: 1846 type = ICMP_UNREACH; 1847 code = ICMP_UNREACH_HOST; 1848 break; 1849 1850 case EMSGSIZE: 1851 type = ICMP_UNREACH; 1852 code = ICMP_UNREACH_NEEDFRAG; 1853 #if defined(IPSEC) || defined(FAST_IPSEC) 1854 /* 1855 * If the packet is routed over IPsec tunnel, tell the 1856 * originator the tunnel MTU. 1857 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz 1858 * XXX quickhack!!! 1859 */ 1860 { 1861 struct secpolicy *sp = NULL; 1862 int ipsecerror; 1863 int ipsechdr; 1864 struct route *ro; 1865 1866 #ifdef IPSEC 1867 sp = ipsec4_getpolicybyaddr(mcopy, 1868 IPSEC_DIR_OUTBOUND, 1869 IP_FORWARDING, 1870 &ipsecerror); 1871 #else /* FAST_IPSEC */ 1872 sp = ipsec_getpolicybyaddr(mcopy, 1873 IPSEC_DIR_OUTBOUND, 1874 IP_FORWARDING, 1875 &ipsecerror); 1876 #endif 1877 if (sp != NULL) { 1878 /* count IPsec header size */ 1879 ipsechdr = ipsec4_hdrsiz(mcopy, 1880 IPSEC_DIR_OUTBOUND, 1881 NULL); 1882 1883 /* 1884 * find the correct route for outer IPv4 1885 * header, compute tunnel MTU. 1886 * 1887 * XXX BUG ALERT 1888 * The "dummyifp" code relies upon the fact 1889 * that icmp_error() touches only ifp->if_mtu. 1890 */ 1891 /*XXX*/ 1892 destifp = NULL; 1893 if (sp->req != NULL 1894 && sp->req->sav != NULL 1895 && sp->req->sav->sah != NULL) { 1896 ro = &sp->req->sav->sah->sa_route; 1897 if (ro->ro_rt && ro->ro_rt->rt_ifp) { 1898 dummyifp.if_mtu = 1899 ro->ro_rt->rt_rmx.rmx_mtu ? 1900 ro->ro_rt->rt_rmx.rmx_mtu : 1901 ro->ro_rt->rt_ifp->if_mtu; 1902 dummyifp.if_mtu -= ipsechdr; 1903 destifp = &dummyifp; 1904 } 1905 } 1906 1907 #ifdef IPSEC 1908 key_freesp(sp); 1909 #else /* FAST_IPSEC */ 1910 KEY_FREESP(&sp); 1911 #endif 1912 ipstat.ips_cantfrag++; 1913 break; 1914 } else 1915 #endif /*IPSEC || FAST_IPSEC*/ 1916 /* 1917 * When doing source routing 'ia' can be NULL. Fall back 1918 * to the minimum guaranteed routeable packet size and use 1919 * the same hack as IPSEC to setup a dummyifp for icmp. 1920 */ 1921 if (ia == NULL) { 1922 dummyifp.if_mtu = IP_MSS; 1923 destifp = &dummyifp; 1924 } else 1925 destifp = ia->ia_ifp; 1926 #if defined(IPSEC) || defined(FAST_IPSEC) 1927 } 1928 #endif /*IPSEC || FAST_IPSEC*/ 1929 ipstat.ips_cantfrag++; 1930 break; 1931 1932 case ENOBUFS: 1933 /* 1934 * A router should not generate ICMP_SOURCEQUENCH as 1935 * required in RFC1812 Requirements for IP Version 4 Routers. 1936 * Source quench could be a big problem under DoS attacks, 1937 * or if the underlying interface is rate-limited. 1938 * Those who need source quench packets may re-enable them 1939 * via the net.inet.ip.sendsourcequench sysctl. 1940 */ 1941 if (ip_sendsourcequench == 0) { 1942 m_freem(mcopy); 1943 return; 1944 } else { 1945 type = ICMP_SOURCEQUENCH; 1946 code = 0; 1947 } 1948 break; 1949 1950 case EACCES: /* ipfw denied packet */ 1951 m_freem(mcopy); 1952 return; 1953 } 1954 icmp_error(mcopy, type, code, dest.s_addr, destifp); 1955 } 1956 1957 void 1958 ip_savecontrol(inp, mp, ip, m) 1959 register struct inpcb *inp; 1960 register struct mbuf **mp; 1961 register struct ip *ip; 1962 register struct mbuf *m; 1963 { 1964 if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1965 struct bintime bt; 1966 1967 bintime(&bt); 1968 if (inp->inp_socket->so_options & SO_BINTIME) { 1969 *mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt), 1970 SCM_BINTIME, SOL_SOCKET); 1971 if (*mp) 1972 mp = &(*mp)->m_next; 1973 } 1974 if (inp->inp_socket->so_options & SO_TIMESTAMP) { 1975 struct timeval tv; 1976 1977 bintime2timeval(&bt, &tv); 1978 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 1979 SCM_TIMESTAMP, SOL_SOCKET); 1980 if (*mp) 1981 mp = &(*mp)->m_next; 1982 } 1983 } 1984 if (inp->inp_flags & INP_RECVDSTADDR) { 1985 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, 1986 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 1987 if (*mp) 1988 mp = &(*mp)->m_next; 1989 } 1990 if (inp->inp_flags & INP_RECVTTL) { 1991 *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl, 1992 sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 1993 if (*mp) 1994 mp = &(*mp)->m_next; 1995 } 1996 #ifdef notyet 1997 /* XXX 1998 * Moving these out of udp_input() made them even more broken 1999 * than they already were. 2000 */ 2001 /* options were tossed already */ 2002 if (inp->inp_flags & INP_RECVOPTS) { 2003 *mp = sbcreatecontrol((caddr_t) opts_deleted_above, 2004 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 2005 if (*mp) 2006 mp = &(*mp)->m_next; 2007 } 2008 /* ip_srcroute doesn't do what we want here, need to fix */ 2009 if (inp->inp_flags & INP_RECVRETOPTS) { 2010 *mp = sbcreatecontrol((caddr_t) ip_srcroute(m), 2011 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 2012 if (*mp) 2013 mp = &(*mp)->m_next; 2014 } 2015 #endif 2016 if (inp->inp_flags & INP_RECVIF) { 2017 struct ifnet *ifp; 2018 struct sdlbuf { 2019 struct sockaddr_dl sdl; 2020 u_char pad[32]; 2021 } sdlbuf; 2022 struct sockaddr_dl *sdp; 2023 struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 2024 2025 if (((ifp = m->m_pkthdr.rcvif)) 2026 && ( ifp->if_index && (ifp->if_index <= if_index))) { 2027 sdp = (struct sockaddr_dl *) 2028 (ifaddr_byindex(ifp->if_index)->ifa_addr); 2029 /* 2030 * Change our mind and don't try copy. 2031 */ 2032 if ((sdp->sdl_family != AF_LINK) 2033 || (sdp->sdl_len > sizeof(sdlbuf))) { 2034 goto makedummy; 2035 } 2036 bcopy(sdp, sdl2, sdp->sdl_len); 2037 } else { 2038 makedummy: 2039 sdl2->sdl_len 2040 = offsetof(struct sockaddr_dl, sdl_data[0]); 2041 sdl2->sdl_family = AF_LINK; 2042 sdl2->sdl_index = 0; 2043 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 2044 } 2045 *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, 2046 IP_RECVIF, IPPROTO_IP); 2047 if (*mp) 2048 mp = &(*mp)->m_next; 2049 } 2050 } 2051 2052 /* 2053 * XXX these routines are called from the upper part of the kernel. 2054 * They need to be locked when we remove Giant. 2055 * 2056 * They could also be moved to ip_mroute.c, since all the RSVP 2057 * handling is done there already. 2058 */ 2059 static int ip_rsvp_on; 2060 struct socket *ip_rsvpd; 2061 int 2062 ip_rsvp_init(struct socket *so) 2063 { 2064 if (so->so_type != SOCK_RAW || 2065 so->so_proto->pr_protocol != IPPROTO_RSVP) 2066 return EOPNOTSUPP; 2067 2068 if (ip_rsvpd != NULL) 2069 return EADDRINUSE; 2070 2071 ip_rsvpd = so; 2072 /* 2073 * This may seem silly, but we need to be sure we don't over-increment 2074 * the RSVP counter, in case something slips up. 2075 */ 2076 if (!ip_rsvp_on) { 2077 ip_rsvp_on = 1; 2078 rsvp_on++; 2079 } 2080 2081 return 0; 2082 } 2083 2084 int 2085 ip_rsvp_done(void) 2086 { 2087 ip_rsvpd = NULL; 2088 /* 2089 * This may seem silly, but we need to be sure we don't over-decrement 2090 * the RSVP counter, in case something slips up. 2091 */ 2092 if (ip_rsvp_on) { 2093 ip_rsvp_on = 0; 2094 rsvp_on--; 2095 } 2096 return 0; 2097 } 2098 2099 void 2100 rsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */ 2101 { 2102 if (rsvp_input_p) { /* call the real one if loaded */ 2103 rsvp_input_p(m, off); 2104 return; 2105 } 2106 2107 /* Can still get packets with rsvp_on = 0 if there is a local member 2108 * of the group to which the RSVP packet is addressed. But in this 2109 * case we want to throw the packet away. 2110 */ 2111 2112 if (!rsvp_on) { 2113 m_freem(m); 2114 return; 2115 } 2116 2117 if (ip_rsvpd != NULL) { 2118 rip_input(m, off); 2119 return; 2120 } 2121 /* Drop the packet */ 2122 m_freem(m); 2123 } 2124