1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30c3aac50fSPeter Wemm * $FreeBSD$ 31df8bae1dSRodney W. Grimes */ 32df8bae1dSRodney W. Grimes 330ac40133SBrian Somers #include "opt_bootp.h" 3474a9466cSGary Palmer #include "opt_ipfw.h" 3527108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 366a800098SYoshinobu Inoue #include "opt_ipsec.h" 3736b0360bSRobert Watson #include "opt_mac.h" 38a9771948SGleb Smirnoff #include "opt_carp.h" 3974a9466cSGary Palmer 40df8bae1dSRodney W. Grimes #include <sys/param.h> 41df8bae1dSRodney W. Grimes #include <sys/systm.h> 425f311da2SMike Silbersack #include <sys/callout.h> 4336b0360bSRobert Watson #include <sys/mac.h> 44df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 45b715f178SLuigi Rizzo #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/domain.h> 47df8bae1dSRodney W. Grimes #include <sys/protosw.h> 48df8bae1dSRodney W. Grimes #include <sys/socket.h> 49df8bae1dSRodney W. Grimes #include <sys/time.h> 50df8bae1dSRodney W. Grimes #include <sys/kernel.h> 511025071fSGarrett Wollman #include <sys/syslog.h> 52b5e8ce9fSBruce Evans #include <sys/sysctl.h> 53df8bae1dSRodney W. Grimes 54c85540ddSAndrey A. Chernov #include <net/pfil.h> 55df8bae1dSRodney W. Grimes #include <net/if.h> 569494d596SBrooks Davis #include <net/if_types.h> 57d314ad7bSJulian Elischer #include <net/if_var.h> 5882c23ebaSBill Fenner #include <net/if_dl.h> 59df8bae1dSRodney W. Grimes #include <net/route.h> 60748e0b0aSGarrett Wollman #include <net/netisr.h> 61df8bae1dSRodney W. Grimes 62df8bae1dSRodney W. Grimes #include <netinet/in.h> 63df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 64b5e8ce9fSBruce Evans #include <netinet/in_var.h> 65df8bae1dSRodney W. Grimes #include <netinet/ip.h> 66df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 67df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 68df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 69ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 7058938916SGarrett Wollman #include <machine/in_cksum.h> 71a9771948SGleb Smirnoff #ifdef DEV_CARP 72a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 73a9771948SGleb Smirnoff #endif 74df8bae1dSRodney W. Grimes 75f0068c4aSGarrett Wollman #include <sys/socketvar.h> 766ddbf1e2SGary Palmer 779b932e9eSAndre Oppermann /* XXX: Temporary until ipfw_ether and ipfw_bridge are converted. */ 786ddbf1e2SGary Palmer #include <netinet/ip_fw.h> 79db69a05dSPaul Saab #include <netinet/ip_dummynet.h> 80db69a05dSPaul Saab 816a800098SYoshinobu Inoue #ifdef IPSEC 826a800098SYoshinobu Inoue #include <netinet6/ipsec.h> 836a800098SYoshinobu Inoue #include <netkey/key.h> 846a800098SYoshinobu Inoue #endif 856a800098SYoshinobu Inoue 86b9234fafSSam Leffler #ifdef FAST_IPSEC 87b9234fafSSam Leffler #include <netipsec/ipsec.h> 88b9234fafSSam Leffler #include <netipsec/key.h> 89b9234fafSSam Leffler #endif 90b9234fafSSam Leffler 911c5de19aSGarrett Wollman int rsvp_on = 0; 92f0068c4aSGarrett Wollman 931f91d8c5SDavid Greenman int ipforwarding = 0; 940312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, 953d177f46SBill Fumerola &ipforwarding, 0, "Enable IP forwarding between interfaces"); 960312fbe9SPoul-Henning Kamp 97d4fb926cSGarrett Wollman static int ipsendredirects = 1; /* XXX */ 980312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, 993d177f46SBill Fumerola &ipsendredirects, 0, "Enable sending IP redirects"); 1000312fbe9SPoul-Henning Kamp 101df8bae1dSRodney W. Grimes int ip_defttl = IPDEFTTL; 1020312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW, 1033d177f46SBill Fumerola &ip_defttl, 0, "Maximum TTL on IP packets"); 1040312fbe9SPoul-Henning Kamp 1056a800098SYoshinobu Inoue static int ip_keepfaith = 0; 1066a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, 1076a800098SYoshinobu Inoue &ip_keepfaith, 0, 1086a800098SYoshinobu Inoue "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); 1096a800098SYoshinobu Inoue 110df285b3dSMike Silbersack static int ip_sendsourcequench = 0; 111df285b3dSMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, 112df285b3dSMike Silbersack &ip_sendsourcequench, 0, 113df285b3dSMike Silbersack "Enable the transmission of source quench packets"); 114df285b3dSMike Silbersack 1151f44b0a1SDavid Malone int ip_do_randomid = 0; 1161f44b0a1SDavid Malone SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, 1171f44b0a1SDavid Malone &ip_do_randomid, 0, 1181f44b0a1SDavid Malone "Assign random ip_id values"); 1191f44b0a1SDavid Malone 120823db0e9SDon Lewis /* 121823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 122823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 123a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 124823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1253f67c834SDon Lewis * 126a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 127a8f12100SDon Lewis * to translate the destination address to another local interface. 1283f67c834SDon Lewis * 1293f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1303f67c834SDon Lewis * to the loopback interface instead of the interface where the 1313f67c834SDon Lewis * packets for those addresses are received. 132823db0e9SDon Lewis */ 1334bc37f98SMaxim Konovalov static int ip_checkinterface = 0; 134b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, 135b3e95d4eSJonathan Lemon &ip_checkinterface, 0, "Verify packet arrives on correct interface"); 136b3e95d4eSJonathan Lemon 137c21fd232SAndre Oppermann struct pfil_head inet_pfil_hook; /* Packet filter hooks */ 138df8bae1dSRodney W. Grimes 1391cafed39SJonathan Lemon static struct ifqueue ipintrq; 140ca925d9cSJonathan Lemon static int ipqmaxlen = IFQ_MAXLEN; 141ca925d9cSJonathan Lemon 142df8bae1dSRodney W. Grimes extern struct domain inetdomain; 143f0ffb944SJulian Elischer extern struct protosw inetsw[]; 144df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 14559562606SGarrett Wollman struct in_ifaddrhead in_ifaddrhead; /* first inet address */ 146ca925d9cSJonathan Lemon struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ 147ca925d9cSJonathan Lemon u_long in_ifaddrhmask; /* mask for hash table */ 148ca925d9cSJonathan Lemon 149afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, 1503d177f46SBill Fumerola &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); 1510312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD, 1523d177f46SBill Fumerola &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue"); 153df8bae1dSRodney W. Grimes 154f23b4c91SGarrett Wollman struct ipstat ipstat; 155c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, 1563d177f46SBill Fumerola &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); 157194a213eSAndrey A. Chernov 158d248c7d7SRobert Watson /* 159d248c7d7SRobert Watson * IP datagram reassembly. 160d248c7d7SRobert Watson */ 161194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2 6 162194a213eSAndrey A. Chernov #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) 163194a213eSAndrey A. Chernov #define IPREASS_HMASK (IPREASS_NHASH - 1) 164194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \ 165831a80b0SMatthew Dillon (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) 166194a213eSAndrey A. Chernov 167d248c7d7SRobert Watson static uma_zone_t ipq_zone; 168462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH]; 169dfa60d93SRobert Watson static struct mtx ipqlock; 1702fad1e93SSam Leffler 1712fad1e93SSam Leffler #define IPQ_LOCK() mtx_lock(&ipqlock) 1722fad1e93SSam Leffler #define IPQ_UNLOCK() mtx_unlock(&ipqlock) 173888c2a3cSSam Leffler #define IPQ_LOCK_INIT() mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF) 174888c2a3cSSam Leffler #define IPQ_LOCK_ASSERT() mtx_assert(&ipqlock, MA_OWNED) 175f23b4c91SGarrett Wollman 176d248c7d7SRobert Watson static void maxnipq_update(void); 177d248c7d7SRobert Watson 178d248c7d7SRobert Watson static int maxnipq; /* Administrative limit on # reass queues. */ 179d248c7d7SRobert Watson static int nipq = 0; /* Total # of reass queues */ 180d248c7d7SRobert Watson SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &nipq, 0, 181d248c7d7SRobert Watson "Current number of IPv4 fragment reassembly queue entries"); 182d248c7d7SRobert Watson 183d248c7d7SRobert Watson static int maxfragsperpacket; 184d248c7d7SRobert Watson SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, 185d248c7d7SRobert Watson &maxfragsperpacket, 0, 186d248c7d7SRobert Watson "Maximum number of IPv4 fragments allowed per packet"); 187d248c7d7SRobert Watson 188d248c7d7SRobert Watson struct callout ipport_tick_callout; 189d248c7d7SRobert Watson 1900312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1910312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1923d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1930312fbe9SPoul-Henning Kamp #endif 1940312fbe9SPoul-Henning Kamp 1951b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 196c76ff708SAndre Oppermann int ipstealth = 0; 1971b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, 1981b968362SDag-Erling Smørgrav &ipstealth, 0, ""); 1991b968362SDag-Erling Smørgrav #endif 2001b968362SDag-Erling Smørgrav 2019b932e9eSAndre Oppermann /* 2029b932e9eSAndre Oppermann * ipfw_ether and ipfw_bridge hooks. 2039b932e9eSAndre Oppermann * XXX: Temporary until those are converted to pfil_hooks as well. 2049b932e9eSAndre Oppermann */ 2059b932e9eSAndre Oppermann ip_fw_chk_t *ip_fw_chk_ptr = NULL; 2069b932e9eSAndre Oppermann ip_dn_io_t *ip_dn_io_ptr = NULL; 207e4c97effSAndre Oppermann int fw_enable = 1; 20897850a5dSLuigi Rizzo int fw_one_pass = 1; 209e7319babSPoul-Henning Kamp 2104d77a549SAlfred Perlstein static void ip_freef(struct ipqhead *, struct ipq *); 2118948e4baSArchie Cobbs 212df8bae1dSRodney W. Grimes /* 213df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 214df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 215df8bae1dSRodney W. Grimes */ 216df8bae1dSRodney W. Grimes void 217df8bae1dSRodney W. Grimes ip_init() 218df8bae1dSRodney W. Grimes { 219f0ffb944SJulian Elischer register struct protosw *pr; 220df8bae1dSRodney W. Grimes register int i; 221df8bae1dSRodney W. Grimes 22259562606SGarrett Wollman TAILQ_INIT(&in_ifaddrhead); 223ca925d9cSJonathan Lemon in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask); 224f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 22502410549SRobert Watson if (pr == NULL) 226db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 227db09bef3SAndre Oppermann 228db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 229df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 230df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 231db09bef3SAndre Oppermann /* 232db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 233db09bef3SAndre Oppermann * in ip_protox[]. 234db09bef3SAndre Oppermann */ 235f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 236f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 237df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 238db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 239db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 240db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 241df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 242db09bef3SAndre Oppermann } 243194a213eSAndrey A. Chernov 244c21fd232SAndre Oppermann /* Initialize packet filter hooks. */ 245134ea224SSam Leffler inet_pfil_hook.ph_type = PFIL_TYPE_AF; 246134ea224SSam Leffler inet_pfil_hook.ph_af = AF_INET; 247134ea224SSam Leffler if ((i = pfil_head_register(&inet_pfil_hook)) != 0) 248134ea224SSam Leffler printf("%s: WARNING: unable to register pfil hook, " 249134ea224SSam Leffler "error %d\n", __func__, i); 250134ea224SSam Leffler 251db09bef3SAndre Oppermann /* Initialize IP reassembly queue. */ 2522fad1e93SSam Leffler IPQ_LOCK_INIT(); 253194a213eSAndrey A. Chernov for (i = 0; i < IPREASS_NHASH; i++) 254462b86feSPoul-Henning Kamp TAILQ_INIT(&ipq[i]); 255375386e2SMike Silbersack maxnipq = nmbclusters / 32; 256375386e2SMike Silbersack maxfragsperpacket = 16; 257d248c7d7SRobert Watson ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL, 258d248c7d7SRobert Watson NULL, UMA_ALIGN_PTR, 0); 259d248c7d7SRobert Watson maxnipq_update(); 260194a213eSAndrey A. Chernov 2615f311da2SMike Silbersack /* Start ipport_tick. */ 2625f311da2SMike Silbersack callout_init(&ipport_tick_callout, CALLOUT_MPSAFE); 2635f311da2SMike Silbersack ipport_tick(NULL); 2645f311da2SMike Silbersack EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, 2655f311da2SMike Silbersack SHUTDOWN_PRI_DEFAULT); 2665f311da2SMike Silbersack 267db09bef3SAndre Oppermann /* Initialize various other remaining things. */ 268227ee8a1SPoul-Henning Kamp ip_id = time_second & 0xffff; 269df8bae1dSRodney W. Grimes ipintrq.ifq_maxlen = ipqmaxlen; 2706008862bSJohn Baldwin mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF); 2717902224cSSam Leffler netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE); 272df8bae1dSRodney W. Grimes } 273df8bae1dSRodney W. Grimes 2745f311da2SMike Silbersack void ip_fini(xtp) 2755f311da2SMike Silbersack void *xtp; 2765f311da2SMike Silbersack { 2775f311da2SMike Silbersack callout_stop(&ipport_tick_callout); 2785f311da2SMike Silbersack } 2795f311da2SMike Silbersack 2804d2e3692SLuigi Rizzo /* 281df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 282df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 283df8bae1dSRodney W. Grimes */ 284c67b1d17SGarrett Wollman void 285c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 286df8bae1dSRodney W. Grimes { 2879188b4a1SAndre Oppermann struct ip *ip = NULL; 2885da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 289ca925d9cSJonathan Lemon struct ifaddr *ifa; 2909b932e9eSAndre Oppermann int checkif, hlen = 0; 29147c861ecSBrian Somers u_short sum; 29202c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 293f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 294b9234fafSSam Leffler #ifdef FAST_IPSEC 29536e8826fSMax Laier struct m_tag *mtag; 296b9234fafSSam Leffler struct tdb_ident *tdbi; 297b9234fafSSam Leffler struct secpolicy *sp; 298b9234fafSSam Leffler int s, error; 299b9234fafSSam Leffler #endif /* FAST_IPSEC */ 300b715f178SLuigi Rizzo 301fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 302db40007dSAndrew R. Reiter 303ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 3049b932e9eSAndre Oppermann /* 30576ff6dcfSAndre Oppermann * Firewall or NAT changed destination to local. 30676ff6dcfSAndre Oppermann * We expect ip_len and ip_off to be in host byte order. 3079b932e9eSAndre Oppermann */ 30876ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 30976ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 3102b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 31153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 3129b932e9eSAndre Oppermann goto ours; 3132b25acc1SLuigi Rizzo } 3142b25acc1SLuigi Rizzo 315df8bae1dSRodney W. Grimes ipstat.ips_total++; 31658938916SGarrett Wollman 31758938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 31858938916SGarrett Wollman goto tooshort; 31958938916SGarrett Wollman 320df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 3210b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 322df8bae1dSRodney W. Grimes ipstat.ips_toosmall++; 323c67b1d17SGarrett Wollman return; 324df8bae1dSRodney W. Grimes } 325df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 32658938916SGarrett Wollman 32753be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 328df8bae1dSRodney W. Grimes ipstat.ips_badvers++; 329df8bae1dSRodney W. Grimes goto bad; 330df8bae1dSRodney W. Grimes } 33158938916SGarrett Wollman 33253be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 333df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 334df8bae1dSRodney W. Grimes ipstat.ips_badhlen++; 335df8bae1dSRodney W. Grimes goto bad; 336df8bae1dSRodney W. Grimes } 337df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 3380b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 339df8bae1dSRodney W. Grimes ipstat.ips_badhlen++; 340c67b1d17SGarrett Wollman return; 341df8bae1dSRodney W. Grimes } 342df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 343df8bae1dSRodney W. Grimes } 34433841545SHajimu UMEMOTO 34533841545SHajimu UMEMOTO /* 127/8 must not appear on wire - RFC1122 */ 34633841545SHajimu UMEMOTO if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 34733841545SHajimu UMEMOTO (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 34833841545SHajimu UMEMOTO if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { 34933841545SHajimu UMEMOTO ipstat.ips_badaddr++; 35033841545SHajimu UMEMOTO goto bad; 35133841545SHajimu UMEMOTO } 35233841545SHajimu UMEMOTO } 35333841545SHajimu UMEMOTO 354db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 355db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 356db4f9cc7SJonathan Lemon } else { 35758938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 35847c861ecSBrian Somers sum = in_cksum_hdr(ip); 35958938916SGarrett Wollman } else { 36047c861ecSBrian Somers sum = in_cksum(m, hlen); 36158938916SGarrett Wollman } 362db4f9cc7SJonathan Lemon } 36347c861ecSBrian Somers if (sum) { 364df8bae1dSRodney W. Grimes ipstat.ips_badsum++; 365df8bae1dSRodney W. Grimes goto bad; 366df8bae1dSRodney W. Grimes } 367df8bae1dSRodney W. Grimes 36802b199f1SMax Laier #ifdef ALTQ 36902b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 37002b199f1SMax Laier /* packet is dropped by traffic conditioner */ 37102b199f1SMax Laier return; 37202b199f1SMax Laier #endif 37302b199f1SMax Laier 374df8bae1dSRodney W. Grimes /* 375df8bae1dSRodney W. Grimes * Convert fields to host representation. 376df8bae1dSRodney W. Grimes */ 377fd8e4ebcSMike Barcroft ip->ip_len = ntohs(ip->ip_len); 378df8bae1dSRodney W. Grimes if (ip->ip_len < hlen) { 379df8bae1dSRodney W. Grimes ipstat.ips_badlen++; 380df8bae1dSRodney W. Grimes goto bad; 381df8bae1dSRodney W. Grimes } 382fd8e4ebcSMike Barcroft ip->ip_off = ntohs(ip->ip_off); 383df8bae1dSRodney W. Grimes 384df8bae1dSRodney W. Grimes /* 385df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 386df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 387df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 388df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 389df8bae1dSRodney W. Grimes */ 390df8bae1dSRodney W. Grimes if (m->m_pkthdr.len < ip->ip_len) { 39158938916SGarrett Wollman tooshort: 392df8bae1dSRodney W. Grimes ipstat.ips_tooshort++; 393df8bae1dSRodney W. Grimes goto bad; 394df8bae1dSRodney W. Grimes } 395df8bae1dSRodney W. Grimes if (m->m_pkthdr.len > ip->ip_len) { 396df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 397df8bae1dSRodney W. Grimes m->m_len = ip->ip_len; 398df8bae1dSRodney W. Grimes m->m_pkthdr.len = ip->ip_len; 399df8bae1dSRodney W. Grimes } else 400df8bae1dSRodney W. Grimes m_adj(m, ip->ip_len - m->m_pkthdr.len); 401df8bae1dSRodney W. Grimes } 40214dd6717SSam Leffler #if defined(IPSEC) && !defined(IPSEC_FILTERGIF) 40314dd6717SSam Leffler /* 40414dd6717SSam Leffler * Bypass packet filtering for packets from a tunnel (gif). 40514dd6717SSam Leffler */ 4060f9ade71SHajimu UMEMOTO if (ipsec_getnhist(m)) 407c21fd232SAndre Oppermann goto passin; 40814dd6717SSam Leffler #endif 4091f76a5e2SSam Leffler #if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF) 4101f76a5e2SSam Leffler /* 4111f76a5e2SSam Leffler * Bypass packet filtering for packets from a tunnel (gif). 4121f76a5e2SSam Leffler */ 4131f76a5e2SSam Leffler if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) 414c21fd232SAndre Oppermann goto passin; 4151f76a5e2SSam Leffler #endif 4163f67c834SDon Lewis 417c4ac87eaSDarren Reed /* 418134ea224SSam Leffler * Run through list of hooks for input packets. 419f51f805fSSam Leffler * 420f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 421f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 422f51f805fSSam Leffler * ip_forward to do the right thing. 423c4ac87eaSDarren Reed */ 424c21fd232SAndre Oppermann 425c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 426c21fd232SAndre Oppermann if (inet_pfil_hook.ph_busy_count == -1) 427c21fd232SAndre Oppermann goto passin; 428c21fd232SAndre Oppermann 429f51f805fSSam Leffler odst = ip->ip_dst; 430134ea224SSam Leffler if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, 431d6a8d588SMax Laier PFIL_IN, NULL) != 0) 432beec8214SDarren Reed return; 433134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 434c4ac87eaSDarren Reed return; 4359b932e9eSAndre Oppermann 436c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 43702c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 4389b932e9eSAndre Oppermann 4399b932e9eSAndre Oppermann #ifdef IPFIREWALL_FORWARD 4409b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 4419b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 4429b932e9eSAndre Oppermann goto ours; 4439b932e9eSAndre Oppermann } 444099dd043SAndre Oppermann #ifndef IPFIREWALL_FORWARD_EXTENDED 4459b932e9eSAndre Oppermann dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); 446099dd043SAndre Oppermann #else 447099dd043SAndre Oppermann if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) { 448099dd043SAndre Oppermann /* 449099dd043SAndre Oppermann * Directly ship on the packet. This allows to forward packets 450099dd043SAndre Oppermann * that were destined for us to some other directly connected 451099dd043SAndre Oppermann * host. 452099dd043SAndre Oppermann */ 453099dd043SAndre Oppermann ip_forward(m, dchg); 454099dd043SAndre Oppermann return; 455099dd043SAndre Oppermann } 456099dd043SAndre Oppermann #endif /* IPFIREWALL_FORWARD_EXTENDED */ 4579b932e9eSAndre Oppermann #endif /* IPFIREWALL_FORWARD */ 4589b932e9eSAndre Oppermann 459c21fd232SAndre Oppermann passin: 460df8bae1dSRodney W. Grimes /* 461df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 462df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 463df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 464df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 465df8bae1dSRodney W. Grimes */ 4669b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 467c67b1d17SGarrett Wollman return; 468df8bae1dSRodney W. Grimes 469f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 470f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 471f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 472f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 473f0068c4aSGarrett Wollman * grabbing the packet. 474f0068c4aSGarrett Wollman */ 4751c5de19aSGarrett Wollman if (rsvp_on && ip->ip_p==IPPROTO_RSVP) 476f0068c4aSGarrett Wollman goto ours; 477f0068c4aSGarrett Wollman 478df8bae1dSRodney W. Grimes /* 479df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 480cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 481cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 482cc766e04SGarrett Wollman * with it). 483df8bae1dSRodney W. Grimes */ 484cc766e04SGarrett Wollman if (TAILQ_EMPTY(&in_ifaddrhead) && 485cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 486cc766e04SGarrett Wollman goto ours; 487cc766e04SGarrett Wollman 4887538a9a0SJonathan Lemon /* 489823db0e9SDon Lewis * Enable a consistency check between the destination address 490823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 491823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 492e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 493e15ae1b2SDon Lewis * 'ipfw fwd'. 4943f67c834SDon Lewis * 4953f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 4963f67c834SDon Lewis * address is ipnat'ed to a different interface. 4973f67c834SDon Lewis * 498a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 4993f67c834SDon Lewis * to the loopback interface instead of the interface where 5003f67c834SDon Lewis * the packets are received. 501a9771948SGleb Smirnoff * 502a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 503a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 504a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 505823db0e9SDon Lewis */ 506823db0e9SDon Lewis checkif = ip_checkinterface && (ipforwarding == 0) && 5079494d596SBrooks Davis m->m_pkthdr.rcvif != NULL && 508e15ae1b2SDon Lewis ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) && 509a9771948SGleb Smirnoff #ifdef DEV_CARP 510a9771948SGleb Smirnoff !m->m_pkthdr.rcvif->if_carp && 511a9771948SGleb Smirnoff #endif 5129b932e9eSAndre Oppermann (dchg == 0); 513823db0e9SDon Lewis 514ca925d9cSJonathan Lemon /* 515ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 516ca925d9cSJonathan Lemon */ 5179b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 518f9e354dfSJulian Elischer /* 519823db0e9SDon Lewis * If the address matches, verify that the packet 520823db0e9SDon Lewis * arrived via the correct interface if checking is 521823db0e9SDon Lewis * enabled. 522f9e354dfSJulian Elischer */ 5239b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 524823db0e9SDon Lewis (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) 525ed1ff184SJulian Elischer goto ours; 526ca925d9cSJonathan Lemon } 527823db0e9SDon Lewis /* 528ca925d9cSJonathan Lemon * Check for broadcast addresses. 529ca925d9cSJonathan Lemon * 530ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 531ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 532ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 533ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 534823db0e9SDon Lewis */ 5354f450ff9SBruce M Simpson if (m->m_pkthdr.rcvif != NULL && 5364f450ff9SBruce M Simpson m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 537ca925d9cSJonathan Lemon TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 538ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 539ca925d9cSJonathan Lemon continue; 540ca925d9cSJonathan Lemon ia = ifatoia(ifa); 541df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 5429b932e9eSAndre Oppermann ip->ip_dst.s_addr) 543df8bae1dSRodney W. Grimes goto ours; 5449b932e9eSAndre Oppermann if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) 545df8bae1dSRodney W. Grimes goto ours; 5460ac40133SBrian Somers #ifdef BOOTP_COMPAT 547ca925d9cSJonathan Lemon if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) 548ca925d9cSJonathan Lemon goto ours; 5490ac40133SBrian Somers #endif 550df8bae1dSRodney W. Grimes } 551df8bae1dSRodney W. Grimes } 552df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 553df8bae1dSRodney W. Grimes struct in_multi *inm; 554df8bae1dSRodney W. Grimes if (ip_mrouter) { 555df8bae1dSRodney W. Grimes /* 556df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 557df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 558df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 559df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 560df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 561df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 562df8bae1dSRodney W. Grimes */ 563bbb4330bSLuigi Rizzo if (ip_mforward && 564bbb4330bSLuigi Rizzo ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { 565df8bae1dSRodney W. Grimes ipstat.ips_cantforward++; 566df8bae1dSRodney W. Grimes m_freem(m); 567c67b1d17SGarrett Wollman return; 568df8bae1dSRodney W. Grimes } 569df8bae1dSRodney W. Grimes 570df8bae1dSRodney W. Grimes /* 57111612afaSDima Dorfman * The process-level routing daemon needs to receive 572df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 573df8bae1dSRodney W. Grimes * host belongs to their destination groups. 574df8bae1dSRodney W. Grimes */ 575df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 576df8bae1dSRodney W. Grimes goto ours; 577df8bae1dSRodney W. Grimes ipstat.ips_forward++; 578df8bae1dSRodney W. Grimes } 579df8bae1dSRodney W. Grimes /* 580df8bae1dSRodney W. Grimes * See if we belong to the destination multicast group on the 581df8bae1dSRodney W. Grimes * arrival interface. 582df8bae1dSRodney W. Grimes */ 583dd5a318bSRobert Watson IN_MULTI_LOCK(); 584df8bae1dSRodney W. Grimes IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); 585dd5a318bSRobert Watson IN_MULTI_UNLOCK(); 586df8bae1dSRodney W. Grimes if (inm == NULL) { 58782c39223SGarrett Wollman ipstat.ips_notmember++; 588df8bae1dSRodney W. Grimes m_freem(m); 589c67b1d17SGarrett Wollman return; 590df8bae1dSRodney W. Grimes } 591df8bae1dSRodney W. Grimes goto ours; 592df8bae1dSRodney W. Grimes } 593df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 594df8bae1dSRodney W. Grimes goto ours; 595df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 596df8bae1dSRodney W. Grimes goto ours; 597df8bae1dSRodney W. Grimes 5986a800098SYoshinobu Inoue /* 5996a800098SYoshinobu Inoue * FAITH(Firewall Aided Internet Translator) 6006a800098SYoshinobu Inoue */ 6016a800098SYoshinobu Inoue if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { 6026a800098SYoshinobu Inoue if (ip_keepfaith) { 6036a800098SYoshinobu Inoue if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 6046a800098SYoshinobu Inoue goto ours; 6056a800098SYoshinobu Inoue } 6066a800098SYoshinobu Inoue m_freem(m); 6076a800098SYoshinobu Inoue return; 6086a800098SYoshinobu Inoue } 6099494d596SBrooks Davis 610df8bae1dSRodney W. Grimes /* 611df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 612df8bae1dSRodney W. Grimes */ 613df8bae1dSRodney W. Grimes if (ipforwarding == 0) { 614df8bae1dSRodney W. Grimes ipstat.ips_cantforward++; 615df8bae1dSRodney W. Grimes m_freem(m); 616546f251bSChris D. Faulhaber } else { 617546f251bSChris D. Faulhaber #ifdef IPSEC 618546f251bSChris D. Faulhaber /* 619546f251bSChris D. Faulhaber * Enforce inbound IPsec SPD. 620546f251bSChris D. Faulhaber */ 621546f251bSChris D. Faulhaber if (ipsec4_in_reject(m, NULL)) { 622546f251bSChris D. Faulhaber ipsecstat.in_polvio++; 623546f251bSChris D. Faulhaber goto bad; 624546f251bSChris D. Faulhaber } 625546f251bSChris D. Faulhaber #endif /* IPSEC */ 626b9234fafSSam Leffler #ifdef FAST_IPSEC 627b9234fafSSam Leffler mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); 628b9234fafSSam Leffler s = splnet(); 629b9234fafSSam Leffler if (mtag != NULL) { 630b9234fafSSam Leffler tdbi = (struct tdb_ident *)(mtag + 1); 631b9234fafSSam Leffler sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); 632b9234fafSSam Leffler } else { 633b9234fafSSam Leffler sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 634b9234fafSSam Leffler IP_FORWARDING, &error); 635b9234fafSSam Leffler } 636b9234fafSSam Leffler if (sp == NULL) { /* NB: can happen if error */ 637b9234fafSSam Leffler splx(s); 638b9234fafSSam Leffler /*XXX error stat???*/ 639b9234fafSSam Leffler DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ 640b9234fafSSam Leffler goto bad; 641b9234fafSSam Leffler } 642b9234fafSSam Leffler 643b9234fafSSam Leffler /* 644b9234fafSSam Leffler * Check security policy against packet attributes. 645b9234fafSSam Leffler */ 646b9234fafSSam Leffler error = ipsec_in_reject(sp, m); 647b9234fafSSam Leffler KEY_FREESP(&sp); 648b9234fafSSam Leffler splx(s); 649b9234fafSSam Leffler if (error) { 650b9234fafSSam Leffler ipstat.ips_cantforward++; 651b9234fafSSam Leffler goto bad; 652b9234fafSSam Leffler } 653b9234fafSSam Leffler #endif /* FAST_IPSEC */ 6549b932e9eSAndre Oppermann ip_forward(m, dchg); 655546f251bSChris D. Faulhaber } 656c67b1d17SGarrett Wollman return; 657df8bae1dSRodney W. Grimes 658df8bae1dSRodney W. Grimes ours: 659d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 660d0ebc0d2SYaroslav Tykhiy /* 661d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 662d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 663d0ebc0d2SYaroslav Tykhiy */ 6642b25acc1SLuigi Rizzo if (ipstealth && hlen > sizeof (struct ip) && 6659b932e9eSAndre Oppermann ip_dooptions(m, 1)) 666d0ebc0d2SYaroslav Tykhiy return; 667d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 668d0ebc0d2SYaroslav Tykhiy 6695da9f8faSJosef Karthauser /* Count the packet in the ip address stats */ 6705da9f8faSJosef Karthauser if (ia != NULL) { 6715da9f8faSJosef Karthauser ia->ia_ifa.if_ipackets++; 6725da9f8faSJosef Karthauser ia->ia_ifa.if_ibytes += m->m_pkthdr.len; 6735da9f8faSJosef Karthauser } 674100ba1a6SJordan K. Hubbard 67563f8d699SJordan K. Hubbard /* 676b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 677ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 678df8bae1dSRodney W. Grimes */ 679f0cada84SAndre Oppermann if (ip->ip_off & (IP_MF | IP_OFFMASK)) { 680f0cada84SAndre Oppermann m = ip_reass(m); 681f0cada84SAndre Oppermann if (m == NULL) 682c67b1d17SGarrett Wollman return; 6836a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 6847e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 68553be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 686f0cada84SAndre Oppermann } 687f0cada84SAndre Oppermann 688f0cada84SAndre Oppermann /* 689f0cada84SAndre Oppermann * Further protocols expect the packet length to be w/o the 690f0cada84SAndre Oppermann * IP header. 691f0cada84SAndre Oppermann */ 692df8bae1dSRodney W. Grimes ip->ip_len -= hlen; 693df8bae1dSRodney W. Grimes 69433841545SHajimu UMEMOTO #ifdef IPSEC 69533841545SHajimu UMEMOTO /* 69633841545SHajimu UMEMOTO * enforce IPsec policy checking if we are seeing last header. 69733841545SHajimu UMEMOTO * note that we do not visit this with protocols with pcb layer 69833841545SHajimu UMEMOTO * code - like udp/tcp/raw ip. 69933841545SHajimu UMEMOTO */ 70033841545SHajimu UMEMOTO if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 && 70133841545SHajimu UMEMOTO ipsec4_in_reject(m, NULL)) { 70233841545SHajimu UMEMOTO ipsecstat.in_polvio++; 70333841545SHajimu UMEMOTO goto bad; 70433841545SHajimu UMEMOTO } 70533841545SHajimu UMEMOTO #endif 706f4e98881SRuslan Ermilov #ifdef FAST_IPSEC 707b9234fafSSam Leffler /* 708b9234fafSSam Leffler * enforce IPsec policy checking if we are seeing last header. 709b9234fafSSam Leffler * note that we do not visit this with protocols with pcb layer 710b9234fafSSam Leffler * code - like udp/tcp/raw ip. 711b9234fafSSam Leffler */ 712b9234fafSSam Leffler if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { 713b9234fafSSam Leffler /* 714b9234fafSSam Leffler * Check if the packet has already had IPsec processing 715b9234fafSSam Leffler * done. If so, then just pass it along. This tag gets 716b9234fafSSam Leffler * set during AH, ESP, etc. input handling, before the 717b9234fafSSam Leffler * packet is returned to the ip input queue for delivery. 718b9234fafSSam Leffler */ 719b9234fafSSam Leffler mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); 720b9234fafSSam Leffler s = splnet(); 721b9234fafSSam Leffler if (mtag != NULL) { 722b9234fafSSam Leffler tdbi = (struct tdb_ident *)(mtag + 1); 723b9234fafSSam Leffler sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); 724b9234fafSSam Leffler } else { 725b9234fafSSam Leffler sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 726b9234fafSSam Leffler IP_FORWARDING, &error); 727b9234fafSSam Leffler } 728b9234fafSSam Leffler if (sp != NULL) { 729b9234fafSSam Leffler /* 730b9234fafSSam Leffler * Check security policy against packet attributes. 731b9234fafSSam Leffler */ 732b9234fafSSam Leffler error = ipsec_in_reject(sp, m); 733b9234fafSSam Leffler KEY_FREESP(&sp); 734b9234fafSSam Leffler } else { 735b9234fafSSam Leffler /* XXX error stat??? */ 736b9234fafSSam Leffler error = EINVAL; 737b9234fafSSam Leffler DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ 738b9234fafSSam Leffler goto bad; 739b9234fafSSam Leffler } 740b9234fafSSam Leffler splx(s); 741b9234fafSSam Leffler if (error) 742b9234fafSSam Leffler goto bad; 743b9234fafSSam Leffler } 744b9234fafSSam Leffler #endif /* FAST_IPSEC */ 74533841545SHajimu UMEMOTO 746df8bae1dSRodney W. Grimes /* 747df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 748df8bae1dSRodney W. Grimes */ 749df8bae1dSRodney W. Grimes ipstat.ips_delivered++; 7509b932e9eSAndre Oppermann 7512b25acc1SLuigi Rizzo (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); 752c67b1d17SGarrett Wollman return; 753df8bae1dSRodney W. Grimes bad: 754df8bae1dSRodney W. Grimes m_freem(m); 755c67b1d17SGarrett Wollman } 756c67b1d17SGarrett Wollman 757c67b1d17SGarrett Wollman /* 758d248c7d7SRobert Watson * After maxnipq has been updated, propagate the change to UMA. The UMA zone 759d248c7d7SRobert Watson * max has slightly different semantics than the sysctl, for historical 760d248c7d7SRobert Watson * reasons. 761d248c7d7SRobert Watson */ 762d248c7d7SRobert Watson static void 763d248c7d7SRobert Watson maxnipq_update(void) 764d248c7d7SRobert Watson { 765d248c7d7SRobert Watson 766d248c7d7SRobert Watson /* 767d248c7d7SRobert Watson * -1 for unlimited allocation. 768d248c7d7SRobert Watson */ 769d248c7d7SRobert Watson if (maxnipq < 0) 770d248c7d7SRobert Watson uma_zone_set_max(ipq_zone, 0); 771d248c7d7SRobert Watson /* 772d248c7d7SRobert Watson * Positive number for specific bound. 773d248c7d7SRobert Watson */ 774d248c7d7SRobert Watson if (maxnipq > 0) 775d248c7d7SRobert Watson uma_zone_set_max(ipq_zone, maxnipq); 776d248c7d7SRobert Watson /* 777d248c7d7SRobert Watson * Zero specifies no further fragment queue allocation -- set the 778d248c7d7SRobert Watson * bound very low, but rely on implementation elsewhere to actually 779d248c7d7SRobert Watson * prevent allocation and reclaim current queues. 780d248c7d7SRobert Watson */ 781d248c7d7SRobert Watson if (maxnipq == 0) 782d248c7d7SRobert Watson uma_zone_set_max(ipq_zone, 1); 783d248c7d7SRobert Watson } 784d248c7d7SRobert Watson 785d248c7d7SRobert Watson static int 786d248c7d7SRobert Watson sysctl_maxnipq(SYSCTL_HANDLER_ARGS) 787d248c7d7SRobert Watson { 788d248c7d7SRobert Watson int error, i; 789d248c7d7SRobert Watson 790d248c7d7SRobert Watson i = maxnipq; 791d248c7d7SRobert Watson error = sysctl_handle_int(oidp, &i, 0, req); 792d248c7d7SRobert Watson if (error || !req->newptr) 793d248c7d7SRobert Watson return (error); 794d248c7d7SRobert Watson 795d248c7d7SRobert Watson /* 796d248c7d7SRobert Watson * XXXRW: Might be a good idea to sanity check the argument and place 797d248c7d7SRobert Watson * an extreme upper bound. 798d248c7d7SRobert Watson */ 799d248c7d7SRobert Watson if (i < -1) 800d248c7d7SRobert Watson return (EINVAL); 801d248c7d7SRobert Watson maxnipq = i; 802d248c7d7SRobert Watson maxnipq_update(); 803d248c7d7SRobert Watson return (0); 804d248c7d7SRobert Watson } 805d248c7d7SRobert Watson 806d248c7d7SRobert Watson SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW, 807d248c7d7SRobert Watson NULL, 0, sysctl_maxnipq, "I", 808d248c7d7SRobert Watson "Maximum number of IPv4 fragment reassembly queue entries"); 809d248c7d7SRobert Watson 810d248c7d7SRobert Watson /* 8118948e4baSArchie Cobbs * Take incoming datagram fragment and try to reassemble it into 812f0cada84SAndre Oppermann * whole datagram. If the argument is the first fragment or one 813f0cada84SAndre Oppermann * in between the function will return NULL and store the mbuf 814f0cada84SAndre Oppermann * in the fragment chain. If the argument is the last fragment 815f0cada84SAndre Oppermann * the packet will be reassembled and the pointer to the new 816f0cada84SAndre Oppermann * mbuf returned for further processing. Only m_tags attached 817f0cada84SAndre Oppermann * to the first packet/fragment are preserved. 818f0cada84SAndre Oppermann * The IP header is *NOT* adjusted out of iplen. 819df8bae1dSRodney W. Grimes */ 8208948e4baSArchie Cobbs 821f0cada84SAndre Oppermann struct mbuf * 822f0cada84SAndre Oppermann ip_reass(struct mbuf *m) 823df8bae1dSRodney W. Grimes { 824f0cada84SAndre Oppermann struct ip *ip; 825f0cada84SAndre Oppermann struct mbuf *p, *q, *nq, *t; 826f0cada84SAndre Oppermann struct ipq *fp = NULL; 827f0cada84SAndre Oppermann struct ipqhead *head; 828f0cada84SAndre Oppermann int i, hlen, next; 82959dfcba4SHajimu UMEMOTO u_int8_t ecn, ecn0; 830f0cada84SAndre Oppermann u_short hash; 831df8bae1dSRodney W. Grimes 832800af1fbSMaxim Konovalov /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */ 833800af1fbSMaxim Konovalov if (maxnipq == 0 || maxfragsperpacket == 0) { 834f0cada84SAndre Oppermann ipstat.ips_fragments++; 835f0cada84SAndre Oppermann ipstat.ips_fragdropped++; 8369d804f81SAndre Oppermann m_freem(m); 8379d804f81SAndre Oppermann return (NULL); 838f0cada84SAndre Oppermann } 8392fad1e93SSam Leffler 840f0cada84SAndre Oppermann ip = mtod(m, struct ip *); 841f0cada84SAndre Oppermann hlen = ip->ip_hl << 2; 842f0cada84SAndre Oppermann 843f0cada84SAndre Oppermann hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 844f0cada84SAndre Oppermann head = &ipq[hash]; 845f0cada84SAndre Oppermann IPQ_LOCK(); 846f0cada84SAndre Oppermann 847f0cada84SAndre Oppermann /* 848f0cada84SAndre Oppermann * Look for queue of fragments 849f0cada84SAndre Oppermann * of this datagram. 850f0cada84SAndre Oppermann */ 851f0cada84SAndre Oppermann TAILQ_FOREACH(fp, head, ipq_list) 852f0cada84SAndre Oppermann if (ip->ip_id == fp->ipq_id && 853f0cada84SAndre Oppermann ip->ip_src.s_addr == fp->ipq_src.s_addr && 854f0cada84SAndre Oppermann ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 855f0cada84SAndre Oppermann #ifdef MAC 856f0cada84SAndre Oppermann mac_fragment_match(m, fp) && 857f0cada84SAndre Oppermann #endif 858f0cada84SAndre Oppermann ip->ip_p == fp->ipq_p) 859f0cada84SAndre Oppermann goto found; 860f0cada84SAndre Oppermann 861f0cada84SAndre Oppermann fp = NULL; 862f0cada84SAndre Oppermann 863f0cada84SAndre Oppermann /* 864d248c7d7SRobert Watson * Attempt to trim the number of allocated fragment queues if it 865d248c7d7SRobert Watson * exceeds the administrative limit. 866f0cada84SAndre Oppermann */ 867f0cada84SAndre Oppermann if ((nipq > maxnipq) && (maxnipq > 0)) { 868f0cada84SAndre Oppermann /* 869f0cada84SAndre Oppermann * drop something from the tail of the current queue 870f0cada84SAndre Oppermann * before proceeding further 871f0cada84SAndre Oppermann */ 872f0cada84SAndre Oppermann struct ipq *q = TAILQ_LAST(head, ipqhead); 873f0cada84SAndre Oppermann if (q == NULL) { /* gak */ 874f0cada84SAndre Oppermann for (i = 0; i < IPREASS_NHASH; i++) { 875f0cada84SAndre Oppermann struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead); 876f0cada84SAndre Oppermann if (r) { 877f0cada84SAndre Oppermann ipstat.ips_fragtimeout += r->ipq_nfrags; 878f0cada84SAndre Oppermann ip_freef(&ipq[i], r); 879f0cada84SAndre Oppermann break; 880f0cada84SAndre Oppermann } 881f0cada84SAndre Oppermann } 882f0cada84SAndre Oppermann } else { 883f0cada84SAndre Oppermann ipstat.ips_fragtimeout += q->ipq_nfrags; 884f0cada84SAndre Oppermann ip_freef(head, q); 885f0cada84SAndre Oppermann } 886f0cada84SAndre Oppermann } 887f0cada84SAndre Oppermann 888f0cada84SAndre Oppermann found: 889f0cada84SAndre Oppermann /* 890f0cada84SAndre Oppermann * Adjust ip_len to not reflect header, 891f0cada84SAndre Oppermann * convert offset of this to bytes. 892f0cada84SAndre Oppermann */ 893f0cada84SAndre Oppermann ip->ip_len -= hlen; 894f0cada84SAndre Oppermann if (ip->ip_off & IP_MF) { 895f0cada84SAndre Oppermann /* 896f0cada84SAndre Oppermann * Make sure that fragments have a data length 897f0cada84SAndre Oppermann * that's a non-zero multiple of 8 bytes. 898f0cada84SAndre Oppermann */ 899f0cada84SAndre Oppermann if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { 900f0cada84SAndre Oppermann ipstat.ips_toosmall++; /* XXX */ 901f0cada84SAndre Oppermann goto dropfrag; 902f0cada84SAndre Oppermann } 903f0cada84SAndre Oppermann m->m_flags |= M_FRAG; 904f0cada84SAndre Oppermann } else 905f0cada84SAndre Oppermann m->m_flags &= ~M_FRAG; 906f0cada84SAndre Oppermann ip->ip_off <<= 3; 907f0cada84SAndre Oppermann 908f0cada84SAndre Oppermann 909f0cada84SAndre Oppermann /* 910f0cada84SAndre Oppermann * Attempt reassembly; if it succeeds, proceed. 911f0cada84SAndre Oppermann * ip_reass() will return a different mbuf. 912f0cada84SAndre Oppermann */ 913f0cada84SAndre Oppermann ipstat.ips_fragments++; 914f0cada84SAndre Oppermann m->m_pkthdr.header = ip; 915f0cada84SAndre Oppermann 916f0cada84SAndre Oppermann /* Previous ip_reass() started here. */ 917df8bae1dSRodney W. Grimes /* 918df8bae1dSRodney W. Grimes * Presence of header sizes in mbufs 919df8bae1dSRodney W. Grimes * would confuse code below. 920df8bae1dSRodney W. Grimes */ 921df8bae1dSRodney W. Grimes m->m_data += hlen; 922df8bae1dSRodney W. Grimes m->m_len -= hlen; 923df8bae1dSRodney W. Grimes 924df8bae1dSRodney W. Grimes /* 925df8bae1dSRodney W. Grimes * If first fragment to arrive, create a reassembly queue. 926df8bae1dSRodney W. Grimes */ 927042bbfa3SRobert Watson if (fp == NULL) { 928d248c7d7SRobert Watson fp = uma_zalloc(ipq_zone, M_NOWAIT); 929d248c7d7SRobert Watson if (fp == NULL) 930df8bae1dSRodney W. Grimes goto dropfrag; 93136b0360bSRobert Watson #ifdef MAC 9325e7ce478SRobert Watson if (mac_init_ipq(fp, M_NOWAIT) != 0) { 933d248c7d7SRobert Watson uma_zfree(ipq_zone, fp); 9345e7ce478SRobert Watson goto dropfrag; 9355e7ce478SRobert Watson } 93636b0360bSRobert Watson mac_create_ipq(m, fp); 93736b0360bSRobert Watson #endif 938462b86feSPoul-Henning Kamp TAILQ_INSERT_HEAD(head, fp, ipq_list); 939194a213eSAndrey A. Chernov nipq++; 940375386e2SMike Silbersack fp->ipq_nfrags = 1; 941df8bae1dSRodney W. Grimes fp->ipq_ttl = IPFRAGTTL; 942df8bae1dSRodney W. Grimes fp->ipq_p = ip->ip_p; 943df8bae1dSRodney W. Grimes fp->ipq_id = ip->ip_id; 9446effc713SDoug Rabson fp->ipq_src = ip->ip_src; 9456effc713SDoug Rabson fp->ipq_dst = ip->ip_dst; 946af38c68cSLuigi Rizzo fp->ipq_frags = m; 947af38c68cSLuigi Rizzo m->m_nextpkt = NULL; 948800af1fbSMaxim Konovalov goto done; 94936b0360bSRobert Watson } else { 950375386e2SMike Silbersack fp->ipq_nfrags++; 95136b0360bSRobert Watson #ifdef MAC 95236b0360bSRobert Watson mac_update_ipq(m, fp); 95336b0360bSRobert Watson #endif 954df8bae1dSRodney W. Grimes } 955df8bae1dSRodney W. Grimes 9566effc713SDoug Rabson #define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) 9576effc713SDoug Rabson 958df8bae1dSRodney W. Grimes /* 95959dfcba4SHajimu UMEMOTO * Handle ECN by comparing this segment with the first one; 96059dfcba4SHajimu UMEMOTO * if CE is set, do not lose CE. 96159dfcba4SHajimu UMEMOTO * drop if CE and not-ECT are mixed for the same packet. 96259dfcba4SHajimu UMEMOTO */ 96359dfcba4SHajimu UMEMOTO ecn = ip->ip_tos & IPTOS_ECN_MASK; 96459dfcba4SHajimu UMEMOTO ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK; 96559dfcba4SHajimu UMEMOTO if (ecn == IPTOS_ECN_CE) { 96659dfcba4SHajimu UMEMOTO if (ecn0 == IPTOS_ECN_NOTECT) 96759dfcba4SHajimu UMEMOTO goto dropfrag; 96859dfcba4SHajimu UMEMOTO if (ecn0 != IPTOS_ECN_CE) 96959dfcba4SHajimu UMEMOTO GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE; 97059dfcba4SHajimu UMEMOTO } 97159dfcba4SHajimu UMEMOTO if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) 97259dfcba4SHajimu UMEMOTO goto dropfrag; 97359dfcba4SHajimu UMEMOTO 97459dfcba4SHajimu UMEMOTO /* 975df8bae1dSRodney W. Grimes * Find a segment which begins after this one does. 976df8bae1dSRodney W. Grimes */ 9776effc713SDoug Rabson for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) 9786effc713SDoug Rabson if (GETIP(q)->ip_off > ip->ip_off) 979df8bae1dSRodney W. Grimes break; 980df8bae1dSRodney W. Grimes 981df8bae1dSRodney W. Grimes /* 982df8bae1dSRodney W. Grimes * If there is a preceding segment, it may provide some of 983df8bae1dSRodney W. Grimes * our data already. If so, drop the data from the incoming 984af38c68cSLuigi Rizzo * segment. If it provides all of our data, drop us, otherwise 985af38c68cSLuigi Rizzo * stick new segment in the proper place. 986db4f9cc7SJonathan Lemon * 987db4f9cc7SJonathan Lemon * If some of the data is dropped from the the preceding 988db4f9cc7SJonathan Lemon * segment, then it's checksum is invalidated. 989df8bae1dSRodney W. Grimes */ 9906effc713SDoug Rabson if (p) { 9916effc713SDoug Rabson i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; 992df8bae1dSRodney W. Grimes if (i > 0) { 993df8bae1dSRodney W. Grimes if (i >= ip->ip_len) 994df8bae1dSRodney W. Grimes goto dropfrag; 9956a800098SYoshinobu Inoue m_adj(m, i); 996db4f9cc7SJonathan Lemon m->m_pkthdr.csum_flags = 0; 997df8bae1dSRodney W. Grimes ip->ip_off += i; 998df8bae1dSRodney W. Grimes ip->ip_len -= i; 999df8bae1dSRodney W. Grimes } 1000af38c68cSLuigi Rizzo m->m_nextpkt = p->m_nextpkt; 1001af38c68cSLuigi Rizzo p->m_nextpkt = m; 1002af38c68cSLuigi Rizzo } else { 1003af38c68cSLuigi Rizzo m->m_nextpkt = fp->ipq_frags; 1004af38c68cSLuigi Rizzo fp->ipq_frags = m; 1005df8bae1dSRodney W. Grimes } 1006df8bae1dSRodney W. Grimes 1007df8bae1dSRodney W. Grimes /* 1008df8bae1dSRodney W. Grimes * While we overlap succeeding segments trim them or, 1009df8bae1dSRodney W. Grimes * if they are completely covered, dequeue them. 1010df8bae1dSRodney W. Grimes */ 10116effc713SDoug Rabson for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; 1012af38c68cSLuigi Rizzo q = nq) { 1013b36f5b37SMaxim Konovalov i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; 10146effc713SDoug Rabson if (i < GETIP(q)->ip_len) { 10156effc713SDoug Rabson GETIP(q)->ip_len -= i; 10166effc713SDoug Rabson GETIP(q)->ip_off += i; 10176effc713SDoug Rabson m_adj(q, i); 1018db4f9cc7SJonathan Lemon q->m_pkthdr.csum_flags = 0; 1019df8bae1dSRodney W. Grimes break; 1020df8bae1dSRodney W. Grimes } 10216effc713SDoug Rabson nq = q->m_nextpkt; 1022af38c68cSLuigi Rizzo m->m_nextpkt = nq; 102399e8617dSMaxim Konovalov ipstat.ips_fragdropped++; 1024375386e2SMike Silbersack fp->ipq_nfrags--; 10256effc713SDoug Rabson m_freem(q); 1026df8bae1dSRodney W. Grimes } 1027df8bae1dSRodney W. Grimes 1028df8bae1dSRodney W. Grimes /* 1029375386e2SMike Silbersack * Check for complete reassembly and perform frag per packet 1030375386e2SMike Silbersack * limiting. 1031375386e2SMike Silbersack * 1032375386e2SMike Silbersack * Frag limiting is performed here so that the nth frag has 1033375386e2SMike Silbersack * a chance to complete the packet before we drop the packet. 1034375386e2SMike Silbersack * As a result, n+1 frags are actually allowed per packet, but 1035375386e2SMike Silbersack * only n will ever be stored. (n = maxfragsperpacket.) 1036375386e2SMike Silbersack * 1037df8bae1dSRodney W. Grimes */ 10386effc713SDoug Rabson next = 0; 10396effc713SDoug Rabson for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { 1040375386e2SMike Silbersack if (GETIP(q)->ip_off != next) { 104199e8617dSMaxim Konovalov if (fp->ipq_nfrags > maxfragsperpacket) { 104299e8617dSMaxim Konovalov ipstat.ips_fragdropped += fp->ipq_nfrags; 1043375386e2SMike Silbersack ip_freef(head, fp); 104499e8617dSMaxim Konovalov } 1045f0cada84SAndre Oppermann goto done; 1046375386e2SMike Silbersack } 10476effc713SDoug Rabson next += GETIP(q)->ip_len; 10486effc713SDoug Rabson } 10496effc713SDoug Rabson /* Make sure the last packet didn't have the IP_MF flag */ 1050375386e2SMike Silbersack if (p->m_flags & M_FRAG) { 105199e8617dSMaxim Konovalov if (fp->ipq_nfrags > maxfragsperpacket) { 105299e8617dSMaxim Konovalov ipstat.ips_fragdropped += fp->ipq_nfrags; 1053375386e2SMike Silbersack ip_freef(head, fp); 105499e8617dSMaxim Konovalov } 1055f0cada84SAndre Oppermann goto done; 1056375386e2SMike Silbersack } 1057df8bae1dSRodney W. Grimes 1058df8bae1dSRodney W. Grimes /* 1059430d30d8SBill Fenner * Reassembly is complete. Make sure the packet is a sane size. 1060430d30d8SBill Fenner */ 10616effc713SDoug Rabson q = fp->ipq_frags; 10626effc713SDoug Rabson ip = GETIP(q); 106353be11f6SPoul-Henning Kamp if (next + (ip->ip_hl << 2) > IP_MAXPACKET) { 1064430d30d8SBill Fenner ipstat.ips_toolong++; 106599e8617dSMaxim Konovalov ipstat.ips_fragdropped += fp->ipq_nfrags; 1066462b86feSPoul-Henning Kamp ip_freef(head, fp); 1067f0cada84SAndre Oppermann goto done; 1068430d30d8SBill Fenner } 1069430d30d8SBill Fenner 1070430d30d8SBill Fenner /* 1071430d30d8SBill Fenner * Concatenate fragments. 1072df8bae1dSRodney W. Grimes */ 10736effc713SDoug Rabson m = q; 1074df8bae1dSRodney W. Grimes t = m->m_next; 107502410549SRobert Watson m->m_next = NULL; 1076df8bae1dSRodney W. Grimes m_cat(m, t); 10776effc713SDoug Rabson nq = q->m_nextpkt; 107802410549SRobert Watson q->m_nextpkt = NULL; 10796effc713SDoug Rabson for (q = nq; q != NULL; q = nq) { 10806effc713SDoug Rabson nq = q->m_nextpkt; 1081945aa40dSDoug Rabson q->m_nextpkt = NULL; 1082db4f9cc7SJonathan Lemon m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; 1083db4f9cc7SJonathan Lemon m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; 1084a8db1d93SJonathan Lemon m_cat(m, q); 1085df8bae1dSRodney W. Grimes } 108636b0360bSRobert Watson #ifdef MAC 108736b0360bSRobert Watson mac_create_datagram_from_ipq(fp, m); 108836b0360bSRobert Watson mac_destroy_ipq(fp); 108936b0360bSRobert Watson #endif 1090df8bae1dSRodney W. Grimes 1091df8bae1dSRodney W. Grimes /* 1092f0cada84SAndre Oppermann * Create header for new ip packet by modifying header of first 1093f0cada84SAndre Oppermann * packet; dequeue and discard fragment reassembly header. 1094df8bae1dSRodney W. Grimes * Make header visible. 1095df8bae1dSRodney W. Grimes */ 1096f0cada84SAndre Oppermann ip->ip_len = (ip->ip_hl << 2) + next; 10976effc713SDoug Rabson ip->ip_src = fp->ipq_src; 10986effc713SDoug Rabson ip->ip_dst = fp->ipq_dst; 1099462b86feSPoul-Henning Kamp TAILQ_REMOVE(head, fp, ipq_list); 1100194a213eSAndrey A. Chernov nipq--; 1101d248c7d7SRobert Watson uma_zfree(ipq_zone, fp); 110253be11f6SPoul-Henning Kamp m->m_len += (ip->ip_hl << 2); 110353be11f6SPoul-Henning Kamp m->m_data -= (ip->ip_hl << 2); 1104df8bae1dSRodney W. Grimes /* some debugging cruft by sklower, below, will go away soon */ 1105a5554bf0SPoul-Henning Kamp if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ 1106a5554bf0SPoul-Henning Kamp m_fixhdr(m); 1107f0cada84SAndre Oppermann ipstat.ips_reassembled++; 1108f0cada84SAndre Oppermann IPQ_UNLOCK(); 11096a800098SYoshinobu Inoue return (m); 1110df8bae1dSRodney W. Grimes 1111df8bae1dSRodney W. Grimes dropfrag: 1112df8bae1dSRodney W. Grimes ipstat.ips_fragdropped++; 1113042bbfa3SRobert Watson if (fp != NULL) 1114375386e2SMike Silbersack fp->ipq_nfrags--; 1115df8bae1dSRodney W. Grimes m_freem(m); 1116f0cada84SAndre Oppermann done: 1117f0cada84SAndre Oppermann IPQ_UNLOCK(); 1118f0cada84SAndre Oppermann return (NULL); 11196effc713SDoug Rabson 11206effc713SDoug Rabson #undef GETIP 1121df8bae1dSRodney W. Grimes } 1122df8bae1dSRodney W. Grimes 1123df8bae1dSRodney W. Grimes /* 1124df8bae1dSRodney W. Grimes * Free a fragment reassembly header and all 1125df8bae1dSRodney W. Grimes * associated datagrams. 1126df8bae1dSRodney W. Grimes */ 11270312fbe9SPoul-Henning Kamp static void 1128462b86feSPoul-Henning Kamp ip_freef(fhp, fp) 1129462b86feSPoul-Henning Kamp struct ipqhead *fhp; 1130df8bae1dSRodney W. Grimes struct ipq *fp; 1131df8bae1dSRodney W. Grimes { 11326effc713SDoug Rabson register struct mbuf *q; 1133df8bae1dSRodney W. Grimes 11342fad1e93SSam Leffler IPQ_LOCK_ASSERT(); 11352fad1e93SSam Leffler 11366effc713SDoug Rabson while (fp->ipq_frags) { 11376effc713SDoug Rabson q = fp->ipq_frags; 11386effc713SDoug Rabson fp->ipq_frags = q->m_nextpkt; 11396effc713SDoug Rabson m_freem(q); 1140df8bae1dSRodney W. Grimes } 1141462b86feSPoul-Henning Kamp TAILQ_REMOVE(fhp, fp, ipq_list); 1142d248c7d7SRobert Watson uma_zfree(ipq_zone, fp); 1143194a213eSAndrey A. Chernov nipq--; 1144df8bae1dSRodney W. Grimes } 1145df8bae1dSRodney W. Grimes 1146df8bae1dSRodney W. Grimes /* 1147df8bae1dSRodney W. Grimes * IP timer processing; 1148df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 1149df8bae1dSRodney W. Grimes * queue, discard it. 1150df8bae1dSRodney W. Grimes */ 1151df8bae1dSRodney W. Grimes void 1152df8bae1dSRodney W. Grimes ip_slowtimo() 1153df8bae1dSRodney W. Grimes { 1154df8bae1dSRodney W. Grimes register struct ipq *fp; 1155194a213eSAndrey A. Chernov int i; 1156df8bae1dSRodney W. Grimes 11572fad1e93SSam Leffler IPQ_LOCK(); 1158194a213eSAndrey A. Chernov for (i = 0; i < IPREASS_NHASH; i++) { 1159462b86feSPoul-Henning Kamp for(fp = TAILQ_FIRST(&ipq[i]); fp;) { 1160462b86feSPoul-Henning Kamp struct ipq *fpp; 1161462b86feSPoul-Henning Kamp 1162462b86feSPoul-Henning Kamp fpp = fp; 1163462b86feSPoul-Henning Kamp fp = TAILQ_NEXT(fp, ipq_list); 1164462b86feSPoul-Henning Kamp if(--fpp->ipq_ttl == 0) { 116599e8617dSMaxim Konovalov ipstat.ips_fragtimeout += fpp->ipq_nfrags; 1166462b86feSPoul-Henning Kamp ip_freef(&ipq[i], fpp); 1167df8bae1dSRodney W. Grimes } 1168df8bae1dSRodney W. Grimes } 1169194a213eSAndrey A. Chernov } 1170690a6055SJesper Skriver /* 1171690a6055SJesper Skriver * If we are over the maximum number of fragments 1172690a6055SJesper Skriver * (due to the limit being lowered), drain off 1173690a6055SJesper Skriver * enough to get down to the new limit. 1174690a6055SJesper Skriver */ 1175a75a485dSMike Silbersack if (maxnipq >= 0 && nipq > maxnipq) { 1176690a6055SJesper Skriver for (i = 0; i < IPREASS_NHASH; i++) { 1177b36f5b37SMaxim Konovalov while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) { 117899e8617dSMaxim Konovalov ipstat.ips_fragdropped += 117999e8617dSMaxim Konovalov TAILQ_FIRST(&ipq[i])->ipq_nfrags; 1180690a6055SJesper Skriver ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); 1181690a6055SJesper Skriver } 1182690a6055SJesper Skriver } 1183690a6055SJesper Skriver } 11842fad1e93SSam Leffler IPQ_UNLOCK(); 1185df8bae1dSRodney W. Grimes } 1186df8bae1dSRodney W. Grimes 1187df8bae1dSRodney W. Grimes /* 1188df8bae1dSRodney W. Grimes * Drain off all datagram fragments. 1189df8bae1dSRodney W. Grimes */ 1190df8bae1dSRodney W. Grimes void 1191df8bae1dSRodney W. Grimes ip_drain() 1192df8bae1dSRodney W. Grimes { 1193194a213eSAndrey A. Chernov int i; 1194ce29ab3aSGarrett Wollman 11952fad1e93SSam Leffler IPQ_LOCK(); 1196194a213eSAndrey A. Chernov for (i = 0; i < IPREASS_NHASH; i++) { 1197462b86feSPoul-Henning Kamp while(!TAILQ_EMPTY(&ipq[i])) { 119899e8617dSMaxim Konovalov ipstat.ips_fragdropped += 119999e8617dSMaxim Konovalov TAILQ_FIRST(&ipq[i])->ipq_nfrags; 1200462b86feSPoul-Henning Kamp ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); 1201194a213eSAndrey A. Chernov } 1202194a213eSAndrey A. Chernov } 12032fad1e93SSam Leffler IPQ_UNLOCK(); 1204ce29ab3aSGarrett Wollman in_rtqdrain(); 1205df8bae1dSRodney W. Grimes } 1206df8bae1dSRodney W. Grimes 1207df8bae1dSRodney W. Grimes /* 1208de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 1209de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 1210de38924dSAndre Oppermann */ 1211de38924dSAndre Oppermann int 1212de38924dSAndre Oppermann ipproto_register(u_char ipproto) 1213de38924dSAndre Oppermann { 1214de38924dSAndre Oppermann struct protosw *pr; 1215de38924dSAndre Oppermann 1216de38924dSAndre Oppermann /* Sanity checks. */ 1217de38924dSAndre Oppermann if (ipproto == 0) 1218de38924dSAndre Oppermann return (EPROTONOSUPPORT); 1219de38924dSAndre Oppermann 1220de38924dSAndre Oppermann /* 1221de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 1222de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 1223de38924dSAndre Oppermann */ 1224de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1225de38924dSAndre Oppermann if (pr == NULL) 1226de38924dSAndre Oppermann return (EPFNOSUPPORT); 1227de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 1228de38924dSAndre Oppermann return (EEXIST); 1229de38924dSAndre Oppermann 1230de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 1231de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 1232de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 1233de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 1234de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 1235de38924dSAndre Oppermann /* Be careful to only index valid IP protocols. */ 1236db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) { 1237de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 1238de38924dSAndre Oppermann return (0); 1239de38924dSAndre Oppermann } else 1240de38924dSAndre Oppermann return (EINVAL); 1241de38924dSAndre Oppermann } 1242de38924dSAndre Oppermann } 1243de38924dSAndre Oppermann return (EPROTONOSUPPORT); 1244de38924dSAndre Oppermann } 1245de38924dSAndre Oppermann 1246de38924dSAndre Oppermann int 1247de38924dSAndre Oppermann ipproto_unregister(u_char ipproto) 1248de38924dSAndre Oppermann { 1249de38924dSAndre Oppermann struct protosw *pr; 1250de38924dSAndre Oppermann 1251de38924dSAndre Oppermann /* Sanity checks. */ 1252de38924dSAndre Oppermann if (ipproto == 0) 1253de38924dSAndre Oppermann return (EPROTONOSUPPORT); 1254de38924dSAndre Oppermann 1255de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 1256de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1257de38924dSAndre Oppermann if (pr == NULL) 1258de38924dSAndre Oppermann return (EPFNOSUPPORT); 1259de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 1260de38924dSAndre Oppermann return (ENOENT); 1261de38924dSAndre Oppermann 1262de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 1263de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 1264de38924dSAndre Oppermann return (0); 1265de38924dSAndre Oppermann } 1266de38924dSAndre Oppermann 1267df8bae1dSRodney W. Grimes /* 1268df8bae1dSRodney W. Grimes * Given address of next destination (final or next hop), 1269df8bae1dSRodney W. Grimes * return internet address info of interface to be used to get there. 1270df8bae1dSRodney W. Grimes */ 1271bd714208SRuslan Ermilov struct in_ifaddr * 127202c1c707SAndre Oppermann ip_rtaddr(dst) 1273df8bae1dSRodney W. Grimes struct in_addr dst; 1274df8bae1dSRodney W. Grimes { 127597d8d152SAndre Oppermann struct route sro; 127602c1c707SAndre Oppermann struct sockaddr_in *sin; 127702c1c707SAndre Oppermann struct in_ifaddr *ifa; 1278df8bae1dSRodney W. Grimes 12790cfbbe3bSAndre Oppermann bzero(&sro, sizeof(sro)); 128097d8d152SAndre Oppermann sin = (struct sockaddr_in *)&sro.ro_dst; 1281df8bae1dSRodney W. Grimes sin->sin_family = AF_INET; 1282df8bae1dSRodney W. Grimes sin->sin_len = sizeof(*sin); 1283df8bae1dSRodney W. Grimes sin->sin_addr = dst; 128497d8d152SAndre Oppermann rtalloc_ign(&sro, RTF_CLONING); 1285df8bae1dSRodney W. Grimes 128697d8d152SAndre Oppermann if (sro.ro_rt == NULL) 128702410549SRobert Watson return (NULL); 128802c1c707SAndre Oppermann 128997d8d152SAndre Oppermann ifa = ifatoia(sro.ro_rt->rt_ifa); 129097d8d152SAndre Oppermann RTFREE(sro.ro_rt); 129102410549SRobert Watson return (ifa); 1292df8bae1dSRodney W. Grimes } 1293df8bae1dSRodney W. Grimes 1294df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 1295df8bae1dSRodney W. Grimes 0, 0, 0, 0, 1296df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1297df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1298df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 1299fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 13003b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 1301df8bae1dSRodney W. Grimes }; 1302df8bae1dSRodney W. Grimes 1303df8bae1dSRodney W. Grimes /* 1304df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 1305df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 1306df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 1307df8bae1dSRodney W. Grimes * of codes and types. 1308df8bae1dSRodney W. Grimes * 1309df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 1310df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 1311df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 1312df8bae1dSRodney W. Grimes * protocol deal with that. 1313df8bae1dSRodney W. Grimes * 1314df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 1315df8bae1dSRodney W. Grimes * via a source route. 1316df8bae1dSRodney W. Grimes */ 13179b932e9eSAndre Oppermann void 13189b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 1319df8bae1dSRodney W. Grimes { 13202b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 13219b932e9eSAndre Oppermann struct in_ifaddr *ia = NULL; 1322df8bae1dSRodney W. Grimes struct mbuf *mcopy; 13239b932e9eSAndre Oppermann struct in_addr dest; 1324c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 13253efc3014SJulian Elischer 13269b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 1327df8bae1dSRodney W. Grimes ipstat.ips_cantforward++; 1328df8bae1dSRodney W. Grimes m_freem(m); 1329df8bae1dSRodney W. Grimes return; 1330df8bae1dSRodney W. Grimes } 13311b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 13321b968362SDag-Erling Smørgrav if (!ipstealth) { 13331b968362SDag-Erling Smørgrav #endif 1334df8bae1dSRodney W. Grimes if (ip->ip_ttl <= IPTTLDEC) { 13351b968362SDag-Erling Smørgrav icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 133602c1c707SAndre Oppermann 0, 0); 1337df8bae1dSRodney W. Grimes return; 1338df8bae1dSRodney W. Grimes } 13391b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 13401b968362SDag-Erling Smørgrav } 13411b968362SDag-Erling Smørgrav #endif 1342df8bae1dSRodney W. Grimes 13439b932e9eSAndre Oppermann if (!srcrt && (ia = ip_rtaddr(ip->ip_dst)) == NULL) { 134402c1c707SAndre Oppermann icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 1345df8bae1dSRodney W. Grimes return; 134602c1c707SAndre Oppermann } 1347df8bae1dSRodney W. Grimes 1348df8bae1dSRodney W. Grimes /* 1349bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 1350bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 1351bfef7ed4SIan Dowse * 13524d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 13534d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 13544d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 13554d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 13564d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 13574d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 13584d2e3692SLuigi Rizzo * 1359bfef7ed4SIan Dowse * We don't use m_copy() because it might return a reference 1360bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 1361bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 1362bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1363df8bae1dSRodney W. Grimes */ 1364780b2f69SAndre Oppermann MGETHDR(mcopy, M_DONTWAIT, m->m_type); 1365a163d034SWarner Losh if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) { 13669967cafcSSam Leffler /* 13679967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 13689967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 13699967cafcSSam Leffler * be conservative and just discard the copy since 13709967cafcSSam Leffler * code below may some day want the tags. 13719967cafcSSam Leffler */ 13729967cafcSSam Leffler m_free(mcopy); 13739967cafcSSam Leffler mcopy = NULL; 13749967cafcSSam Leffler } 1375bfef7ed4SIan Dowse if (mcopy != NULL) { 1376780b2f69SAndre Oppermann mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy)); 1377e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1378bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1379bfef7ed4SIan Dowse } 138004287599SRuslan Ermilov 138104287599SRuslan Ermilov #ifdef IPSTEALTH 138204287599SRuslan Ermilov if (!ipstealth) { 138304287599SRuslan Ermilov #endif 138404287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 138504287599SRuslan Ermilov #ifdef IPSTEALTH 138604287599SRuslan Ermilov } 138704287599SRuslan Ermilov #endif 1388df8bae1dSRodney W. Grimes 1389df8bae1dSRodney W. Grimes /* 1390df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1391df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1392df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1393df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1394df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1395df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1396df8bae1dSRodney W. Grimes */ 13979b932e9eSAndre Oppermann dest.s_addr = 0; 13989b932e9eSAndre Oppermann if (!srcrt && ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) { 139902c1c707SAndre Oppermann struct sockaddr_in *sin; 140002c1c707SAndre Oppermann struct route ro; 140102c1c707SAndre Oppermann struct rtentry *rt; 140202c1c707SAndre Oppermann 14030cfbbe3bSAndre Oppermann bzero(&ro, sizeof(ro)); 140402c1c707SAndre Oppermann sin = (struct sockaddr_in *)&ro.ro_dst; 140502c1c707SAndre Oppermann sin->sin_family = AF_INET; 140602c1c707SAndre Oppermann sin->sin_len = sizeof(*sin); 14079b932e9eSAndre Oppermann sin->sin_addr = ip->ip_dst; 140826d02ca7SAndre Oppermann rtalloc_ign(&ro, RTF_CLONING); 140902c1c707SAndre Oppermann 141002c1c707SAndre Oppermann rt = ro.ro_rt; 141102c1c707SAndre Oppermann 141202c1c707SAndre Oppermann if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 14139b932e9eSAndre Oppermann satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1414df8bae1dSRodney W. Grimes #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1415df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1416df8bae1dSRodney W. Grimes 1417df8bae1dSRodney W. Grimes if (RTA(rt) && 1418df8bae1dSRodney W. Grimes (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1419df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) 14209b932e9eSAndre Oppermann dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1421df8bae1dSRodney W. Grimes else 14229b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1423df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1424df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1425df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1426df8bae1dSRodney W. Grimes } 1427df8bae1dSRodney W. Grimes } 142802c1c707SAndre Oppermann if (rt) 142902c1c707SAndre Oppermann RTFREE(rt); 143002c1c707SAndre Oppermann } 1431df8bae1dSRodney W. Grimes 143202410549SRobert Watson error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 1433df8bae1dSRodney W. Grimes if (error) 1434df8bae1dSRodney W. Grimes ipstat.ips_cantforward++; 1435df8bae1dSRodney W. Grimes else { 1436df8bae1dSRodney W. Grimes ipstat.ips_forward++; 1437df8bae1dSRodney W. Grimes if (type) 1438df8bae1dSRodney W. Grimes ipstat.ips_redirectsent++; 1439df8bae1dSRodney W. Grimes else { 14409188b4a1SAndre Oppermann if (mcopy) 1441df8bae1dSRodney W. Grimes m_freem(mcopy); 1442df8bae1dSRodney W. Grimes return; 1443df8bae1dSRodney W. Grimes } 1444df8bae1dSRodney W. Grimes } 1445df8bae1dSRodney W. Grimes if (mcopy == NULL) 1446df8bae1dSRodney W. Grimes return; 1447df8bae1dSRodney W. Grimes 1448df8bae1dSRodney W. Grimes switch (error) { 1449df8bae1dSRodney W. Grimes 1450df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1451df8bae1dSRodney W. Grimes /* type, code set above */ 1452df8bae1dSRodney W. Grimes break; 1453df8bae1dSRodney W. Grimes 1454df8bae1dSRodney W. Grimes case ENETUNREACH: /* shouldn't happen, checked above */ 1455df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1456df8bae1dSRodney W. Grimes case ENETDOWN: 1457df8bae1dSRodney W. Grimes case EHOSTDOWN: 1458df8bae1dSRodney W. Grimes default: 1459df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1460df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1461df8bae1dSRodney W. Grimes break; 1462df8bae1dSRodney W. Grimes 1463df8bae1dSRodney W. Grimes case EMSGSIZE: 1464df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1465df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 146602c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC) 14676a800098SYoshinobu Inoue /* 14686a800098SYoshinobu Inoue * If the packet is routed over IPsec tunnel, tell the 14696a800098SYoshinobu Inoue * originator the tunnel MTU. 14706a800098SYoshinobu Inoue * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz 14716a800098SYoshinobu Inoue * XXX quickhack!!! 14726a800098SYoshinobu Inoue */ 147302c1c707SAndre Oppermann { 14746a800098SYoshinobu Inoue struct secpolicy *sp = NULL; 14756a800098SYoshinobu Inoue int ipsecerror; 14766a800098SYoshinobu Inoue int ipsechdr; 147702c1c707SAndre Oppermann struct route *ro; 14786a800098SYoshinobu Inoue 147902c1c707SAndre Oppermann #ifdef IPSEC 14806a800098SYoshinobu Inoue sp = ipsec4_getpolicybyaddr(mcopy, 14816a800098SYoshinobu Inoue IPSEC_DIR_OUTBOUND, 14826a800098SYoshinobu Inoue IP_FORWARDING, 14836a800098SYoshinobu Inoue &ipsecerror); 148402c1c707SAndre Oppermann #else /* FAST_IPSEC */ 1485b9234fafSSam Leffler sp = ipsec_getpolicybyaddr(mcopy, 1486b9234fafSSam Leffler IPSEC_DIR_OUTBOUND, 1487b9234fafSSam Leffler IP_FORWARDING, 1488b9234fafSSam Leffler &ipsecerror); 148902c1c707SAndre Oppermann #endif 149002c1c707SAndre Oppermann if (sp != NULL) { 1491b9234fafSSam Leffler /* count IPsec header size */ 1492b9234fafSSam Leffler ipsechdr = ipsec4_hdrsiz(mcopy, 1493b9234fafSSam Leffler IPSEC_DIR_OUTBOUND, 1494b9234fafSSam Leffler NULL); 1495b9234fafSSam Leffler 1496b9234fafSSam Leffler /* 1497b9234fafSSam Leffler * find the correct route for outer IPv4 1498b9234fafSSam Leffler * header, compute tunnel MTU. 1499b9234fafSSam Leffler */ 1500b9234fafSSam Leffler if (sp->req != NULL 1501b9234fafSSam Leffler && sp->req->sav != NULL 1502b9234fafSSam Leffler && sp->req->sav->sah != NULL) { 150302c1c707SAndre Oppermann ro = &sp->req->sav->sah->sa_route; 150402c1c707SAndre Oppermann if (ro->ro_rt && ro->ro_rt->rt_ifp) { 1505c773494eSAndre Oppermann mtu = 150657ab3660SBruce M Simpson ro->ro_rt->rt_rmx.rmx_mtu ? 150757ab3660SBruce M Simpson ro->ro_rt->rt_rmx.rmx_mtu : 150802c1c707SAndre Oppermann ro->ro_rt->rt_ifp->if_mtu; 1509c773494eSAndre Oppermann mtu -= ipsechdr; 1510b9234fafSSam Leffler } 1511b9234fafSSam Leffler } 1512b9234fafSSam Leffler 151302c1c707SAndre Oppermann #ifdef IPSEC 151402c1c707SAndre Oppermann key_freesp(sp); 151502c1c707SAndre Oppermann #else /* FAST_IPSEC */ 1516b9234fafSSam Leffler KEY_FREESP(&sp); 151702c1c707SAndre Oppermann #endif 151802c1c707SAndre Oppermann ipstat.ips_cantfrag++; 151902c1c707SAndre Oppermann break; 1520ab48768bSAndre Oppermann } 152102c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/ 15229b932e9eSAndre Oppermann /* 1523ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1524ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1525ab48768bSAndre Oppermann * current packet size. 15269b932e9eSAndre Oppermann */ 1527ab48768bSAndre Oppermann if (mtu == 0) { 1528ab48768bSAndre Oppermann if (ia != NULL) 1529c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1530ab48768bSAndre Oppermann else 1531ab48768bSAndre Oppermann mtu = ip_next_mtu(ip->ip_len, 0); 1532ab48768bSAndre Oppermann } 153302c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC) 1534b9234fafSSam Leffler } 153502c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/ 1536df8bae1dSRodney W. Grimes ipstat.ips_cantfrag++; 1537df8bae1dSRodney W. Grimes break; 1538df8bae1dSRodney W. Grimes 1539df8bae1dSRodney W. Grimes case ENOBUFS: 1540df285b3dSMike Silbersack /* 1541df285b3dSMike Silbersack * A router should not generate ICMP_SOURCEQUENCH as 1542df285b3dSMike Silbersack * required in RFC1812 Requirements for IP Version 4 Routers. 1543df285b3dSMike Silbersack * Source quench could be a big problem under DoS attacks, 1544df285b3dSMike Silbersack * or if the underlying interface is rate-limited. 1545df285b3dSMike Silbersack * Those who need source quench packets may re-enable them 1546df285b3dSMike Silbersack * via the net.inet.ip.sendsourcequench sysctl. 1547df285b3dSMike Silbersack */ 1548df285b3dSMike Silbersack if (ip_sendsourcequench == 0) { 1549df285b3dSMike Silbersack m_freem(mcopy); 1550df285b3dSMike Silbersack return; 1551df285b3dSMike Silbersack } else { 1552df8bae1dSRodney W. Grimes type = ICMP_SOURCEQUENCH; 1553df8bae1dSRodney W. Grimes code = 0; 1554df285b3dSMike Silbersack } 1555df8bae1dSRodney W. Grimes break; 15563a06e3e0SRuslan Ermilov 15573a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 15583a06e3e0SRuslan Ermilov m_freem(mcopy); 15593a06e3e0SRuslan Ermilov return; 1560df8bae1dSRodney W. Grimes } 1561c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1562df8bae1dSRodney W. Grimes } 1563df8bae1dSRodney W. Grimes 156482c23ebaSBill Fenner void 156582c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m) 156682c23ebaSBill Fenner register struct inpcb *inp; 156782c23ebaSBill Fenner register struct mbuf **mp; 156882c23ebaSBill Fenner register struct ip *ip; 156982c23ebaSBill Fenner register struct mbuf *m; 157082c23ebaSBill Fenner { 1571be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1572be8a62e8SPoul-Henning Kamp struct bintime bt; 1573be8a62e8SPoul-Henning Kamp 1574be8a62e8SPoul-Henning Kamp bintime(&bt); 1575be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & SO_BINTIME) { 1576be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt), 1577be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 1578be8a62e8SPoul-Henning Kamp if (*mp) 1579be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 1580be8a62e8SPoul-Henning Kamp } 158182c23ebaSBill Fenner if (inp->inp_socket->so_options & SO_TIMESTAMP) { 158282c23ebaSBill Fenner struct timeval tv; 158382c23ebaSBill Fenner 1584be8a62e8SPoul-Henning Kamp bintime2timeval(&bt, &tv); 158582c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 158682c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 158782c23ebaSBill Fenner if (*mp) 158882c23ebaSBill Fenner mp = &(*mp)->m_next; 15894cc20ab1SSeigo Tanimura } 1590be8a62e8SPoul-Henning Kamp } 159182c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 159282c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, 159382c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 159482c23ebaSBill Fenner if (*mp) 159582c23ebaSBill Fenner mp = &(*mp)->m_next; 159682c23ebaSBill Fenner } 15974957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 15984957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl, 15994957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 16004957466bSMatthew N. Dodd if (*mp) 16014957466bSMatthew N. Dodd mp = &(*mp)->m_next; 16024957466bSMatthew N. Dodd } 160382c23ebaSBill Fenner #ifdef notyet 160482c23ebaSBill Fenner /* XXX 160582c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 160682c23ebaSBill Fenner * than they already were. 160782c23ebaSBill Fenner */ 160882c23ebaSBill Fenner /* options were tossed already */ 160982c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 161082c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t) opts_deleted_above, 161182c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 161282c23ebaSBill Fenner if (*mp) 161382c23ebaSBill Fenner mp = &(*mp)->m_next; 161482c23ebaSBill Fenner } 161582c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 161682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1617e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t) ip_srcroute(m), 161882c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 161982c23ebaSBill Fenner if (*mp) 162082c23ebaSBill Fenner mp = &(*mp)->m_next; 162182c23ebaSBill Fenner } 162282c23ebaSBill Fenner #endif 162382c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1624d314ad7bSJulian Elischer struct ifnet *ifp; 1625d314ad7bSJulian Elischer struct sdlbuf { 162682c23ebaSBill Fenner struct sockaddr_dl sdl; 1627d314ad7bSJulian Elischer u_char pad[32]; 1628d314ad7bSJulian Elischer } sdlbuf; 1629d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1630d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 163182c23ebaSBill Fenner 1632d314ad7bSJulian Elischer if (((ifp = m->m_pkthdr.rcvif)) 1633d314ad7bSJulian Elischer && ( ifp->if_index && (ifp->if_index <= if_index))) { 16344a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1635d314ad7bSJulian Elischer /* 1636d314ad7bSJulian Elischer * Change our mind and don't try copy. 1637d314ad7bSJulian Elischer */ 1638d314ad7bSJulian Elischer if ((sdp->sdl_family != AF_LINK) 1639d314ad7bSJulian Elischer || (sdp->sdl_len > sizeof(sdlbuf))) { 1640d314ad7bSJulian Elischer goto makedummy; 1641d314ad7bSJulian Elischer } 1642d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1643d314ad7bSJulian Elischer } else { 1644d314ad7bSJulian Elischer makedummy: 1645d314ad7bSJulian Elischer sdl2->sdl_len 1646d314ad7bSJulian Elischer = offsetof(struct sockaddr_dl, sdl_data[0]); 1647d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1648d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1649d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1650d314ad7bSJulian Elischer } 1651d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, 165282c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 165382c23ebaSBill Fenner if (*mp) 165482c23ebaSBill Fenner mp = &(*mp)->m_next; 165582c23ebaSBill Fenner } 165682c23ebaSBill Fenner } 165782c23ebaSBill Fenner 16584d2e3692SLuigi Rizzo /* 16594d2e3692SLuigi Rizzo * XXX these routines are called from the upper part of the kernel. 16604d2e3692SLuigi Rizzo * They need to be locked when we remove Giant. 16614d2e3692SLuigi Rizzo * 16624d2e3692SLuigi Rizzo * They could also be moved to ip_mroute.c, since all the RSVP 16634d2e3692SLuigi Rizzo * handling is done there already. 16644d2e3692SLuigi Rizzo */ 16654d2e3692SLuigi Rizzo static int ip_rsvp_on; 16664d2e3692SLuigi Rizzo struct socket *ip_rsvpd; 1667df8bae1dSRodney W. Grimes int 1668f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1669f0068c4aSGarrett Wollman { 1670f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1671f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1672f0068c4aSGarrett Wollman return EOPNOTSUPP; 1673f0068c4aSGarrett Wollman 1674f0068c4aSGarrett Wollman if (ip_rsvpd != NULL) 1675f0068c4aSGarrett Wollman return EADDRINUSE; 1676f0068c4aSGarrett Wollman 1677f0068c4aSGarrett Wollman ip_rsvpd = so; 16781c5de19aSGarrett Wollman /* 16791c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 16801c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 16811c5de19aSGarrett Wollman */ 16821c5de19aSGarrett Wollman if (!ip_rsvp_on) { 16831c5de19aSGarrett Wollman ip_rsvp_on = 1; 16841c5de19aSGarrett Wollman rsvp_on++; 16851c5de19aSGarrett Wollman } 1686f0068c4aSGarrett Wollman 1687f0068c4aSGarrett Wollman return 0; 1688f0068c4aSGarrett Wollman } 1689f0068c4aSGarrett Wollman 1690f0068c4aSGarrett Wollman int 1691f0068c4aSGarrett Wollman ip_rsvp_done(void) 1692f0068c4aSGarrett Wollman { 1693f0068c4aSGarrett Wollman ip_rsvpd = NULL; 16941c5de19aSGarrett Wollman /* 16951c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 16961c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 16971c5de19aSGarrett Wollman */ 16981c5de19aSGarrett Wollman if (ip_rsvp_on) { 16991c5de19aSGarrett Wollman ip_rsvp_on = 0; 17001c5de19aSGarrett Wollman rsvp_on--; 17011c5de19aSGarrett Wollman } 1702f0068c4aSGarrett Wollman return 0; 1703f0068c4aSGarrett Wollman } 1704bbb4330bSLuigi Rizzo 1705bbb4330bSLuigi Rizzo void 1706bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */ 1707bbb4330bSLuigi Rizzo { 1708bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 1709bbb4330bSLuigi Rizzo rsvp_input_p(m, off); 1710bbb4330bSLuigi Rizzo return; 1711bbb4330bSLuigi Rizzo } 1712bbb4330bSLuigi Rizzo 1713bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1714bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1715bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1716bbb4330bSLuigi Rizzo */ 1717bbb4330bSLuigi Rizzo 1718bbb4330bSLuigi Rizzo if (!rsvp_on) { 1719bbb4330bSLuigi Rizzo m_freem(m); 1720bbb4330bSLuigi Rizzo return; 1721bbb4330bSLuigi Rizzo } 1722bbb4330bSLuigi Rizzo 1723bbb4330bSLuigi Rizzo if (ip_rsvpd != NULL) { 1724bbb4330bSLuigi Rizzo rip_input(m, off); 1725bbb4330bSLuigi Rizzo return; 1726bbb4330bSLuigi Rizzo } 1727bbb4330bSLuigi Rizzo /* Drop the packet */ 1728bbb4330bSLuigi Rizzo m_freem(m); 1729bbb4330bSLuigi Rizzo } 1730