xref: /freebsd/sys/netinet/ip_input.c (revision ab48768b20058a18fd4adfd6836af7a652509d60)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
30c3aac50fSPeter Wemm  * $FreeBSD$
31df8bae1dSRodney W. Grimes  */
32df8bae1dSRodney W. Grimes 
330ac40133SBrian Somers #include "opt_bootp.h"
3474a9466cSGary Palmer #include "opt_ipfw.h"
3527108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
366a800098SYoshinobu Inoue #include "opt_ipsec.h"
3736b0360bSRobert Watson #include "opt_mac.h"
38a9771948SGleb Smirnoff #include "opt_carp.h"
3974a9466cSGary Palmer 
40df8bae1dSRodney W. Grimes #include <sys/param.h>
41df8bae1dSRodney W. Grimes #include <sys/systm.h>
425f311da2SMike Silbersack #include <sys/callout.h>
4336b0360bSRobert Watson #include <sys/mac.h>
44df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
45b715f178SLuigi Rizzo #include <sys/malloc.h>
46df8bae1dSRodney W. Grimes #include <sys/domain.h>
47df8bae1dSRodney W. Grimes #include <sys/protosw.h>
48df8bae1dSRodney W. Grimes #include <sys/socket.h>
49df8bae1dSRodney W. Grimes #include <sys/time.h>
50df8bae1dSRodney W. Grimes #include <sys/kernel.h>
511025071fSGarrett Wollman #include <sys/syslog.h>
52b5e8ce9fSBruce Evans #include <sys/sysctl.h>
53df8bae1dSRodney W. Grimes 
54c85540ddSAndrey A. Chernov #include <net/pfil.h>
55df8bae1dSRodney W. Grimes #include <net/if.h>
569494d596SBrooks Davis #include <net/if_types.h>
57d314ad7bSJulian Elischer #include <net/if_var.h>
5882c23ebaSBill Fenner #include <net/if_dl.h>
59df8bae1dSRodney W. Grimes #include <net/route.h>
60748e0b0aSGarrett Wollman #include <net/netisr.h>
61df8bae1dSRodney W. Grimes 
62df8bae1dSRodney W. Grimes #include <netinet/in.h>
63df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
64b5e8ce9fSBruce Evans #include <netinet/in_var.h>
65df8bae1dSRodney W. Grimes #include <netinet/ip.h>
66df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
67df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
68df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
69ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
7058938916SGarrett Wollman #include <machine/in_cksum.h>
71a9771948SGleb Smirnoff #ifdef DEV_CARP
72a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
73a9771948SGleb Smirnoff #endif
74df8bae1dSRodney W. Grimes 
75f0068c4aSGarrett Wollman #include <sys/socketvar.h>
766ddbf1e2SGary Palmer 
779b932e9eSAndre Oppermann /* XXX: Temporary until ipfw_ether and ipfw_bridge are converted. */
786ddbf1e2SGary Palmer #include <netinet/ip_fw.h>
79db69a05dSPaul Saab #include <netinet/ip_dummynet.h>
80db69a05dSPaul Saab 
816a800098SYoshinobu Inoue #ifdef IPSEC
826a800098SYoshinobu Inoue #include <netinet6/ipsec.h>
836a800098SYoshinobu Inoue #include <netkey/key.h>
846a800098SYoshinobu Inoue #endif
856a800098SYoshinobu Inoue 
86b9234fafSSam Leffler #ifdef FAST_IPSEC
87b9234fafSSam Leffler #include <netipsec/ipsec.h>
88b9234fafSSam Leffler #include <netipsec/key.h>
89b9234fafSSam Leffler #endif
90b9234fafSSam Leffler 
911c5de19aSGarrett Wollman int rsvp_on = 0;
92f0068c4aSGarrett Wollman 
931f91d8c5SDavid Greenman int	ipforwarding = 0;
940312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
953d177f46SBill Fumerola     &ipforwarding, 0, "Enable IP forwarding between interfaces");
960312fbe9SPoul-Henning Kamp 
97d4fb926cSGarrett Wollman static int	ipsendredirects = 1; /* XXX */
980312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
993d177f46SBill Fumerola     &ipsendredirects, 0, "Enable sending IP redirects");
1000312fbe9SPoul-Henning Kamp 
101df8bae1dSRodney W. Grimes int	ip_defttl = IPDEFTTL;
1020312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
1033d177f46SBill Fumerola     &ip_defttl, 0, "Maximum TTL on IP packets");
1040312fbe9SPoul-Henning Kamp 
1056a800098SYoshinobu Inoue static int	ip_keepfaith = 0;
1066a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
1076a800098SYoshinobu Inoue 	&ip_keepfaith,	0,
1086a800098SYoshinobu Inoue 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1096a800098SYoshinobu Inoue 
110df285b3dSMike Silbersack static int	ip_sendsourcequench = 0;
111df285b3dSMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
112df285b3dSMike Silbersack 	&ip_sendsourcequench, 0,
113df285b3dSMike Silbersack 	"Enable the transmission of source quench packets");
114df285b3dSMike Silbersack 
1151f44b0a1SDavid Malone int	ip_do_randomid = 0;
1161f44b0a1SDavid Malone SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
1171f44b0a1SDavid Malone 	&ip_do_randomid, 0,
1181f44b0a1SDavid Malone 	"Assign random ip_id values");
1191f44b0a1SDavid Malone 
120823db0e9SDon Lewis /*
121823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
122823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
123a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
124823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1253f67c834SDon Lewis  *
126a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
127a8f12100SDon Lewis  * to translate the destination address to another local interface.
1283f67c834SDon Lewis  *
1293f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1303f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1313f67c834SDon Lewis  * packets for those addresses are received.
132823db0e9SDon Lewis  */
1334bc37f98SMaxim Konovalov static int	ip_checkinterface = 0;
134b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
135b3e95d4eSJonathan Lemon     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
136b3e95d4eSJonathan Lemon 
137c21fd232SAndre Oppermann struct pfil_head inet_pfil_hook;	/* Packet filter hooks */
138df8bae1dSRodney W. Grimes 
1391cafed39SJonathan Lemon static struct	ifqueue ipintrq;
140ca925d9cSJonathan Lemon static int	ipqmaxlen = IFQ_MAXLEN;
141ca925d9cSJonathan Lemon 
142df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
143f0ffb944SJulian Elischer extern	struct protosw inetsw[];
144df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
14559562606SGarrett Wollman struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
146ca925d9cSJonathan Lemon struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
147ca925d9cSJonathan Lemon u_long 	in_ifaddrhmask;				/* mask for hash table */
148ca925d9cSJonathan Lemon 
149afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
1503d177f46SBill Fumerola     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
1510312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
1523d177f46SBill Fumerola     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
153df8bae1dSRodney W. Grimes 
154f23b4c91SGarrett Wollman struct ipstat ipstat;
155c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
1563d177f46SBill Fumerola     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
157194a213eSAndrey A. Chernov 
158d248c7d7SRobert Watson /*
159d248c7d7SRobert Watson  * IP datagram reassembly.
160d248c7d7SRobert Watson  */
161194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2      6
162194a213eSAndrey A. Chernov #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
163194a213eSAndrey A. Chernov #define IPREASS_HMASK           (IPREASS_NHASH - 1)
164194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \
165831a80b0SMatthew Dillon 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
166194a213eSAndrey A. Chernov 
167d248c7d7SRobert Watson static uma_zone_t ipq_zone;
168462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
169dfa60d93SRobert Watson static struct mtx ipqlock;
1702fad1e93SSam Leffler 
1712fad1e93SSam Leffler #define	IPQ_LOCK()	mtx_lock(&ipqlock)
1722fad1e93SSam Leffler #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
173888c2a3cSSam Leffler #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
174888c2a3cSSam Leffler #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
175f23b4c91SGarrett Wollman 
176d248c7d7SRobert Watson static void	maxnipq_update(void);
177d248c7d7SRobert Watson 
178d248c7d7SRobert Watson static int	maxnipq;	/* Administrative limit on # reass queues. */
179d248c7d7SRobert Watson static int	nipq = 0;	/* Total # of reass queues */
180d248c7d7SRobert Watson SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &nipq, 0,
181d248c7d7SRobert Watson 	"Current number of IPv4 fragment reassembly queue entries");
182d248c7d7SRobert Watson 
183d248c7d7SRobert Watson static int	maxfragsperpacket;
184d248c7d7SRobert Watson SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
185d248c7d7SRobert Watson 	&maxfragsperpacket, 0,
186d248c7d7SRobert Watson 	"Maximum number of IPv4 fragments allowed per packet");
187d248c7d7SRobert Watson 
188d248c7d7SRobert Watson struct callout	ipport_tick_callout;
189d248c7d7SRobert Watson 
1900312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1910312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1923d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1930312fbe9SPoul-Henning Kamp #endif
1940312fbe9SPoul-Henning Kamp 
1951b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
196c76ff708SAndre Oppermann int	ipstealth = 0;
1971b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
1981b968362SDag-Erling Smørgrav     &ipstealth, 0, "");
1991b968362SDag-Erling Smørgrav #endif
2001b968362SDag-Erling Smørgrav 
2019b932e9eSAndre Oppermann /*
2029b932e9eSAndre Oppermann  * ipfw_ether and ipfw_bridge hooks.
2039b932e9eSAndre Oppermann  * XXX: Temporary until those are converted to pfil_hooks as well.
2049b932e9eSAndre Oppermann  */
2059b932e9eSAndre Oppermann ip_fw_chk_t *ip_fw_chk_ptr = NULL;
2069b932e9eSAndre Oppermann ip_dn_io_t *ip_dn_io_ptr = NULL;
207e4c97effSAndre Oppermann int fw_enable = 1;
20897850a5dSLuigi Rizzo int fw_one_pass = 1;
209e7319babSPoul-Henning Kamp 
2104d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
2118948e4baSArchie Cobbs 
212df8bae1dSRodney W. Grimes /*
213df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
214df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
215df8bae1dSRodney W. Grimes  */
216df8bae1dSRodney W. Grimes void
217df8bae1dSRodney W. Grimes ip_init()
218df8bae1dSRodney W. Grimes {
219f0ffb944SJulian Elischer 	register struct protosw *pr;
220df8bae1dSRodney W. Grimes 	register int i;
221df8bae1dSRodney W. Grimes 
22259562606SGarrett Wollman 	TAILQ_INIT(&in_ifaddrhead);
223ca925d9cSJonathan Lemon 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
224f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
22502410549SRobert Watson 	if (pr == NULL)
226db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
227db09bef3SAndre Oppermann 
228db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
229df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
230df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
231db09bef3SAndre Oppermann 	/*
232db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
233db09bef3SAndre Oppermann 	 * in ip_protox[].
234db09bef3SAndre Oppermann 	 */
235f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
236f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
237df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
238db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
239db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
240db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
241df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
242db09bef3SAndre Oppermann 		}
243194a213eSAndrey A. Chernov 
244c21fd232SAndre Oppermann 	/* Initialize packet filter hooks. */
245134ea224SSam Leffler 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
246134ea224SSam Leffler 	inet_pfil_hook.ph_af = AF_INET;
247134ea224SSam Leffler 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
248134ea224SSam Leffler 		printf("%s: WARNING: unable to register pfil hook, "
249134ea224SSam Leffler 			"error %d\n", __func__, i);
250134ea224SSam Leffler 
251db09bef3SAndre Oppermann 	/* Initialize IP reassembly queue. */
2522fad1e93SSam Leffler 	IPQ_LOCK_INIT();
253194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++)
254462b86feSPoul-Henning Kamp 	    TAILQ_INIT(&ipq[i]);
255375386e2SMike Silbersack 	maxnipq = nmbclusters / 32;
256375386e2SMike Silbersack 	maxfragsperpacket = 16;
257d248c7d7SRobert Watson 	ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
258d248c7d7SRobert Watson 	    NULL, UMA_ALIGN_PTR, 0);
259d248c7d7SRobert Watson 	maxnipq_update();
260194a213eSAndrey A. Chernov 
2615f311da2SMike Silbersack 	/* Start ipport_tick. */
2625f311da2SMike Silbersack 	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
2635f311da2SMike Silbersack 	ipport_tick(NULL);
2645f311da2SMike Silbersack 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
2655f311da2SMike Silbersack 		SHUTDOWN_PRI_DEFAULT);
2665f311da2SMike Silbersack 
267db09bef3SAndre Oppermann 	/* Initialize various other remaining things. */
268227ee8a1SPoul-Henning Kamp 	ip_id = time_second & 0xffff;
269df8bae1dSRodney W. Grimes 	ipintrq.ifq_maxlen = ipqmaxlen;
2706008862bSJohn Baldwin 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
2717902224cSSam Leffler 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
272df8bae1dSRodney W. Grimes }
273df8bae1dSRodney W. Grimes 
2745f311da2SMike Silbersack void ip_fini(xtp)
2755f311da2SMike Silbersack 	void *xtp;
2765f311da2SMike Silbersack {
2775f311da2SMike Silbersack 	callout_stop(&ipport_tick_callout);
2785f311da2SMike Silbersack }
2795f311da2SMike Silbersack 
2804d2e3692SLuigi Rizzo /*
281df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
282df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
283df8bae1dSRodney W. Grimes  */
284c67b1d17SGarrett Wollman void
285c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
286df8bae1dSRodney W. Grimes {
2879188b4a1SAndre Oppermann 	struct ip *ip = NULL;
2885da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
289ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
2909b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
29147c861ecSBrian Somers 	u_short sum;
29202c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
293f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
294b9234fafSSam Leffler #ifdef FAST_IPSEC
29536e8826fSMax Laier 	struct m_tag *mtag;
296b9234fafSSam Leffler 	struct tdb_ident *tdbi;
297b9234fafSSam Leffler 	struct secpolicy *sp;
298b9234fafSSam Leffler 	int s, error;
299b9234fafSSam Leffler #endif /* FAST_IPSEC */
300b715f178SLuigi Rizzo 
301fe584538SDag-Erling Smørgrav   	M_ASSERTPKTHDR(m);
302db40007dSAndrew R. Reiter 
303ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
3049b932e9eSAndre Oppermann 		/*
30576ff6dcfSAndre Oppermann 		 * Firewall or NAT changed destination to local.
30676ff6dcfSAndre Oppermann 		 * We expect ip_len and ip_off to be in host byte order.
3079b932e9eSAndre Oppermann 		 */
30876ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
30976ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
3102b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
31153be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
3129b932e9eSAndre Oppermann   		goto ours;
3132b25acc1SLuigi Rizzo   	}
3142b25acc1SLuigi Rizzo 
315df8bae1dSRodney W. Grimes 	ipstat.ips_total++;
31658938916SGarrett Wollman 
31758938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
31858938916SGarrett Wollman 		goto tooshort;
31958938916SGarrett Wollman 
320df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
3210b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
322df8bae1dSRodney W. Grimes 		ipstat.ips_toosmall++;
323c67b1d17SGarrett Wollman 		return;
324df8bae1dSRodney W. Grimes 	}
325df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
32658938916SGarrett Wollman 
32753be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
328df8bae1dSRodney W. Grimes 		ipstat.ips_badvers++;
329df8bae1dSRodney W. Grimes 		goto bad;
330df8bae1dSRodney W. Grimes 	}
33158938916SGarrett Wollman 
33253be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
333df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
334df8bae1dSRodney W. Grimes 		ipstat.ips_badhlen++;
335df8bae1dSRodney W. Grimes 		goto bad;
336df8bae1dSRodney W. Grimes 	}
337df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
3380b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
339df8bae1dSRodney W. Grimes 			ipstat.ips_badhlen++;
340c67b1d17SGarrett Wollman 			return;
341df8bae1dSRodney W. Grimes 		}
342df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
343df8bae1dSRodney W. Grimes 	}
34433841545SHajimu UMEMOTO 
34533841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
34633841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
34733841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
34833841545SHajimu UMEMOTO 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
34933841545SHajimu UMEMOTO 			ipstat.ips_badaddr++;
35033841545SHajimu UMEMOTO 			goto bad;
35133841545SHajimu UMEMOTO 		}
35233841545SHajimu UMEMOTO 	}
35333841545SHajimu UMEMOTO 
354db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
355db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
356db4f9cc7SJonathan Lemon 	} else {
35758938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
35847c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
35958938916SGarrett Wollman 		} else {
36047c861ecSBrian Somers 			sum = in_cksum(m, hlen);
36158938916SGarrett Wollman 		}
362db4f9cc7SJonathan Lemon 	}
36347c861ecSBrian Somers 	if (sum) {
364df8bae1dSRodney W. Grimes 		ipstat.ips_badsum++;
365df8bae1dSRodney W. Grimes 		goto bad;
366df8bae1dSRodney W. Grimes 	}
367df8bae1dSRodney W. Grimes 
36802b199f1SMax Laier #ifdef ALTQ
36902b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
37002b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
37102b199f1SMax Laier 		return;
37202b199f1SMax Laier #endif
37302b199f1SMax Laier 
374df8bae1dSRodney W. Grimes 	/*
375df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
376df8bae1dSRodney W. Grimes 	 */
377fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
378df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
379df8bae1dSRodney W. Grimes 		ipstat.ips_badlen++;
380df8bae1dSRodney W. Grimes 		goto bad;
381df8bae1dSRodney W. Grimes 	}
382fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
383df8bae1dSRodney W. Grimes 
384df8bae1dSRodney W. Grimes 	/*
385df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
386df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
387df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
388df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
389df8bae1dSRodney W. Grimes 	 */
390df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
39158938916SGarrett Wollman tooshort:
392df8bae1dSRodney W. Grimes 		ipstat.ips_tooshort++;
393df8bae1dSRodney W. Grimes 		goto bad;
394df8bae1dSRodney W. Grimes 	}
395df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
396df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
397df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
398df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
399df8bae1dSRodney W. Grimes 		} else
400df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
401df8bae1dSRodney W. Grimes 	}
40214dd6717SSam Leffler #if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
40314dd6717SSam Leffler 	/*
40414dd6717SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
40514dd6717SSam Leffler 	 */
4060f9ade71SHajimu UMEMOTO 	if (ipsec_getnhist(m))
407c21fd232SAndre Oppermann 		goto passin;
40814dd6717SSam Leffler #endif
4091f76a5e2SSam Leffler #if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF)
4101f76a5e2SSam Leffler 	/*
4111f76a5e2SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
4121f76a5e2SSam Leffler 	 */
4131f76a5e2SSam Leffler 	if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
414c21fd232SAndre Oppermann 		goto passin;
4151f76a5e2SSam Leffler #endif
4163f67c834SDon Lewis 
417c4ac87eaSDarren Reed 	/*
418134ea224SSam Leffler 	 * Run through list of hooks for input packets.
419f51f805fSSam Leffler 	 *
420f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
421f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
422f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
423c4ac87eaSDarren Reed 	 */
424c21fd232SAndre Oppermann 
425c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
426c21fd232SAndre Oppermann 	if (inet_pfil_hook.ph_busy_count == -1)
427c21fd232SAndre Oppermann 		goto passin;
428c21fd232SAndre Oppermann 
429f51f805fSSam Leffler 	odst = ip->ip_dst;
430134ea224SSam Leffler 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
431d6a8d588SMax Laier 	    PFIL_IN, NULL) != 0)
432beec8214SDarren Reed 		return;
433134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
434c4ac87eaSDarren Reed 		return;
4359b932e9eSAndre Oppermann 
436c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
43702c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
4389b932e9eSAndre Oppermann 
4399b932e9eSAndre Oppermann #ifdef IPFIREWALL_FORWARD
4409b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
4419b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
4429b932e9eSAndre Oppermann 		goto ours;
4439b932e9eSAndre Oppermann 	}
444099dd043SAndre Oppermann #ifndef IPFIREWALL_FORWARD_EXTENDED
4459b932e9eSAndre Oppermann 	dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
446099dd043SAndre Oppermann #else
447099dd043SAndre Oppermann 	if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
448099dd043SAndre Oppermann 		/*
449099dd043SAndre Oppermann 		 * Directly ship on the packet.  This allows to forward packets
450099dd043SAndre Oppermann 		 * that were destined for us to some other directly connected
451099dd043SAndre Oppermann 		 * host.
452099dd043SAndre Oppermann 		 */
453099dd043SAndre Oppermann 		ip_forward(m, dchg);
454099dd043SAndre Oppermann 		return;
455099dd043SAndre Oppermann 	}
456099dd043SAndre Oppermann #endif /* IPFIREWALL_FORWARD_EXTENDED */
4579b932e9eSAndre Oppermann #endif /* IPFIREWALL_FORWARD */
4589b932e9eSAndre Oppermann 
459c21fd232SAndre Oppermann passin:
460df8bae1dSRodney W. Grimes 	/*
461df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
462df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
463df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
464df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
465df8bae1dSRodney W. Grimes 	 */
4669b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
467c67b1d17SGarrett Wollman 		return;
468df8bae1dSRodney W. Grimes 
469f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
470f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
471f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
472f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
473f0068c4aSGarrett Wollman 	 * grabbing the packet.
474f0068c4aSGarrett Wollman          */
4751c5de19aSGarrett Wollman 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
476f0068c4aSGarrett Wollman 		goto ours;
477f0068c4aSGarrett Wollman 
478df8bae1dSRodney W. Grimes 	/*
479df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
480cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
481cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
482cc766e04SGarrett Wollman 	 * with it).
483df8bae1dSRodney W. Grimes 	 */
484cc766e04SGarrett Wollman 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
485cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
486cc766e04SGarrett Wollman 		goto ours;
487cc766e04SGarrett Wollman 
4887538a9a0SJonathan Lemon 	/*
489823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
490823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
491823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
492e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
493e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
4943f67c834SDon Lewis 	 *
4953f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
4963f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
4973f67c834SDon Lewis 	 *
498a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
4993f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5003f67c834SDon Lewis 	 * the packets are received.
501a9771948SGleb Smirnoff 	 *
502a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
503a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
504a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
505823db0e9SDon Lewis 	 */
506823db0e9SDon Lewis 	checkif = ip_checkinterface && (ipforwarding == 0) &&
5079494d596SBrooks Davis 	    m->m_pkthdr.rcvif != NULL &&
508e15ae1b2SDon Lewis 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
509a9771948SGleb Smirnoff #ifdef DEV_CARP
510a9771948SGleb Smirnoff 	    !m->m_pkthdr.rcvif->if_carp &&
511a9771948SGleb Smirnoff #endif
5129b932e9eSAndre Oppermann 	    (dchg == 0);
513823db0e9SDon Lewis 
514ca925d9cSJonathan Lemon 	/*
515ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
516ca925d9cSJonathan Lemon 	 */
5179b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
518f9e354dfSJulian Elischer 		/*
519823db0e9SDon Lewis 		 * If the address matches, verify that the packet
520823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
521823db0e9SDon Lewis 		 * enabled.
522f9e354dfSJulian Elischer 		 */
5239b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
524823db0e9SDon Lewis 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
525ed1ff184SJulian Elischer 			goto ours;
526ca925d9cSJonathan Lemon 	}
527823db0e9SDon Lewis 	/*
528ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
529ca925d9cSJonathan Lemon 	 *
530ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
531ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
532ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
533ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
534823db0e9SDon Lewis 	 */
5354f450ff9SBruce M Simpson 	if (m->m_pkthdr.rcvif != NULL &&
5364f450ff9SBruce M Simpson 	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
537ca925d9cSJonathan Lemon 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
538ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
539ca925d9cSJonathan Lemon 				continue;
540ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
541df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
5429b932e9eSAndre Oppermann 			    ip->ip_dst.s_addr)
543df8bae1dSRodney W. Grimes 				goto ours;
5449b932e9eSAndre Oppermann 			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr)
545df8bae1dSRodney W. Grimes 				goto ours;
5460ac40133SBrian Somers #ifdef BOOTP_COMPAT
547ca925d9cSJonathan Lemon 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
548ca925d9cSJonathan Lemon 				goto ours;
5490ac40133SBrian Somers #endif
550df8bae1dSRodney W. Grimes 		}
551df8bae1dSRodney W. Grimes 	}
552df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
553df8bae1dSRodney W. Grimes 		struct in_multi *inm;
554df8bae1dSRodney W. Grimes 		if (ip_mrouter) {
555df8bae1dSRodney W. Grimes 			/*
556df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
557df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
558df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
559df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
560df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
561df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
562df8bae1dSRodney W. Grimes 			 */
563bbb4330bSLuigi Rizzo 			if (ip_mforward &&
564bbb4330bSLuigi Rizzo 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
565df8bae1dSRodney W. Grimes 				ipstat.ips_cantforward++;
566df8bae1dSRodney W. Grimes 				m_freem(m);
567c67b1d17SGarrett Wollman 				return;
568df8bae1dSRodney W. Grimes 			}
569df8bae1dSRodney W. Grimes 
570df8bae1dSRodney W. Grimes 			/*
57111612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
572df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
573df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
574df8bae1dSRodney W. Grimes 			 */
575df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
576df8bae1dSRodney W. Grimes 				goto ours;
577df8bae1dSRodney W. Grimes 			ipstat.ips_forward++;
578df8bae1dSRodney W. Grimes 		}
579df8bae1dSRodney W. Grimes 		/*
580df8bae1dSRodney W. Grimes 		 * See if we belong to the destination multicast group on the
581df8bae1dSRodney W. Grimes 		 * arrival interface.
582df8bae1dSRodney W. Grimes 		 */
583dd5a318bSRobert Watson 		IN_MULTI_LOCK();
584df8bae1dSRodney W. Grimes 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
585dd5a318bSRobert Watson 		IN_MULTI_UNLOCK();
586df8bae1dSRodney W. Grimes 		if (inm == NULL) {
58782c39223SGarrett Wollman 			ipstat.ips_notmember++;
588df8bae1dSRodney W. Grimes 			m_freem(m);
589c67b1d17SGarrett Wollman 			return;
590df8bae1dSRodney W. Grimes 		}
591df8bae1dSRodney W. Grimes 		goto ours;
592df8bae1dSRodney W. Grimes 	}
593df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
594df8bae1dSRodney W. Grimes 		goto ours;
595df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
596df8bae1dSRodney W. Grimes 		goto ours;
597df8bae1dSRodney W. Grimes 
5986a800098SYoshinobu Inoue 	/*
5996a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6006a800098SYoshinobu Inoue 	 */
6016a800098SYoshinobu Inoue 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
6026a800098SYoshinobu Inoue 		if (ip_keepfaith) {
6036a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
6046a800098SYoshinobu Inoue 				goto ours;
6056a800098SYoshinobu Inoue 		}
6066a800098SYoshinobu Inoue 		m_freem(m);
6076a800098SYoshinobu Inoue 		return;
6086a800098SYoshinobu Inoue 	}
6099494d596SBrooks Davis 
610df8bae1dSRodney W. Grimes 	/*
611df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
612df8bae1dSRodney W. Grimes 	 */
613df8bae1dSRodney W. Grimes 	if (ipforwarding == 0) {
614df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
615df8bae1dSRodney W. Grimes 		m_freem(m);
616546f251bSChris D. Faulhaber 	} else {
617546f251bSChris D. Faulhaber #ifdef IPSEC
618546f251bSChris D. Faulhaber 		/*
619546f251bSChris D. Faulhaber 		 * Enforce inbound IPsec SPD.
620546f251bSChris D. Faulhaber 		 */
621546f251bSChris D. Faulhaber 		if (ipsec4_in_reject(m, NULL)) {
622546f251bSChris D. Faulhaber 			ipsecstat.in_polvio++;
623546f251bSChris D. Faulhaber 			goto bad;
624546f251bSChris D. Faulhaber 		}
625546f251bSChris D. Faulhaber #endif /* IPSEC */
626b9234fafSSam Leffler #ifdef FAST_IPSEC
627b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
628b9234fafSSam Leffler 		s = splnet();
629b9234fafSSam Leffler 		if (mtag != NULL) {
630b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
631b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
632b9234fafSSam Leffler 		} else {
633b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
634b9234fafSSam Leffler 						   IP_FORWARDING, &error);
635b9234fafSSam Leffler 		}
636b9234fafSSam Leffler 		if (sp == NULL) {	/* NB: can happen if error */
637b9234fafSSam Leffler 			splx(s);
638b9234fafSSam Leffler 			/*XXX error stat???*/
639b9234fafSSam Leffler 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
640b9234fafSSam Leffler 			goto bad;
641b9234fafSSam Leffler 		}
642b9234fafSSam Leffler 
643b9234fafSSam Leffler 		/*
644b9234fafSSam Leffler 		 * Check security policy against packet attributes.
645b9234fafSSam Leffler 		 */
646b9234fafSSam Leffler 		error = ipsec_in_reject(sp, m);
647b9234fafSSam Leffler 		KEY_FREESP(&sp);
648b9234fafSSam Leffler 		splx(s);
649b9234fafSSam Leffler 		if (error) {
650b9234fafSSam Leffler 			ipstat.ips_cantforward++;
651b9234fafSSam Leffler 			goto bad;
652b9234fafSSam Leffler 		}
653b9234fafSSam Leffler #endif /* FAST_IPSEC */
6549b932e9eSAndre Oppermann 		ip_forward(m, dchg);
655546f251bSChris D. Faulhaber 	}
656c67b1d17SGarrett Wollman 	return;
657df8bae1dSRodney W. Grimes 
658df8bae1dSRodney W. Grimes ours:
659d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
660d0ebc0d2SYaroslav Tykhiy 	/*
661d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
662d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
663d0ebc0d2SYaroslav Tykhiy 	 */
6642b25acc1SLuigi Rizzo 	if (ipstealth && hlen > sizeof (struct ip) &&
6659b932e9eSAndre Oppermann 	    ip_dooptions(m, 1))
666d0ebc0d2SYaroslav Tykhiy 		return;
667d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
668d0ebc0d2SYaroslav Tykhiy 
6695da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
6705da9f8faSJosef Karthauser 	if (ia != NULL) {
6715da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
6725da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
6735da9f8faSJosef Karthauser 	}
674100ba1a6SJordan K. Hubbard 
67563f8d699SJordan K. Hubbard 	/*
676b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
677ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
678df8bae1dSRodney W. Grimes 	 */
679f0cada84SAndre Oppermann 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
680f0cada84SAndre Oppermann 		m = ip_reass(m);
681f0cada84SAndre Oppermann 		if (m == NULL)
682c67b1d17SGarrett Wollman 			return;
6836a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
6847e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
68553be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
686f0cada84SAndre Oppermann 	}
687f0cada84SAndre Oppermann 
688f0cada84SAndre Oppermann 	/*
689f0cada84SAndre Oppermann 	 * Further protocols expect the packet length to be w/o the
690f0cada84SAndre Oppermann 	 * IP header.
691f0cada84SAndre Oppermann 	 */
692df8bae1dSRodney W. Grimes 	ip->ip_len -= hlen;
693df8bae1dSRodney W. Grimes 
69433841545SHajimu UMEMOTO #ifdef IPSEC
69533841545SHajimu UMEMOTO 	/*
69633841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
69733841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
69833841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
69933841545SHajimu UMEMOTO 	 */
70033841545SHajimu UMEMOTO 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
70133841545SHajimu UMEMOTO 	    ipsec4_in_reject(m, NULL)) {
70233841545SHajimu UMEMOTO 		ipsecstat.in_polvio++;
70333841545SHajimu UMEMOTO 		goto bad;
70433841545SHajimu UMEMOTO 	}
70533841545SHajimu UMEMOTO #endif
706f4e98881SRuslan Ermilov #ifdef FAST_IPSEC
707b9234fafSSam Leffler 	/*
708b9234fafSSam Leffler 	 * enforce IPsec policy checking if we are seeing last header.
709b9234fafSSam Leffler 	 * note that we do not visit this with protocols with pcb layer
710b9234fafSSam Leffler 	 * code - like udp/tcp/raw ip.
711b9234fafSSam Leffler 	 */
712b9234fafSSam Leffler 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
713b9234fafSSam Leffler 		/*
714b9234fafSSam Leffler 		 * Check if the packet has already had IPsec processing
715b9234fafSSam Leffler 		 * done.  If so, then just pass it along.  This tag gets
716b9234fafSSam Leffler 		 * set during AH, ESP, etc. input handling, before the
717b9234fafSSam Leffler 		 * packet is returned to the ip input queue for delivery.
718b9234fafSSam Leffler 		 */
719b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
720b9234fafSSam Leffler 		s = splnet();
721b9234fafSSam Leffler 		if (mtag != NULL) {
722b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
723b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
724b9234fafSSam Leffler 		} else {
725b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
726b9234fafSSam Leffler 						   IP_FORWARDING, &error);
727b9234fafSSam Leffler 		}
728b9234fafSSam Leffler 		if (sp != NULL) {
729b9234fafSSam Leffler 			/*
730b9234fafSSam Leffler 			 * Check security policy against packet attributes.
731b9234fafSSam Leffler 			 */
732b9234fafSSam Leffler 			error = ipsec_in_reject(sp, m);
733b9234fafSSam Leffler 			KEY_FREESP(&sp);
734b9234fafSSam Leffler 		} else {
735b9234fafSSam Leffler 			/* XXX error stat??? */
736b9234fafSSam Leffler 			error = EINVAL;
737b9234fafSSam Leffler DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
738b9234fafSSam Leffler 			goto bad;
739b9234fafSSam Leffler 		}
740b9234fafSSam Leffler 		splx(s);
741b9234fafSSam Leffler 		if (error)
742b9234fafSSam Leffler 			goto bad;
743b9234fafSSam Leffler 	}
744b9234fafSSam Leffler #endif /* FAST_IPSEC */
74533841545SHajimu UMEMOTO 
746df8bae1dSRodney W. Grimes 	/*
747df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
748df8bae1dSRodney W. Grimes 	 */
749df8bae1dSRodney W. Grimes 	ipstat.ips_delivered++;
7509b932e9eSAndre Oppermann 
7512b25acc1SLuigi Rizzo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
752c67b1d17SGarrett Wollman 	return;
753df8bae1dSRodney W. Grimes bad:
754df8bae1dSRodney W. Grimes 	m_freem(m);
755c67b1d17SGarrett Wollman }
756c67b1d17SGarrett Wollman 
757c67b1d17SGarrett Wollman /*
758d248c7d7SRobert Watson  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
759d248c7d7SRobert Watson  * max has slightly different semantics than the sysctl, for historical
760d248c7d7SRobert Watson  * reasons.
761d248c7d7SRobert Watson  */
762d248c7d7SRobert Watson static void
763d248c7d7SRobert Watson maxnipq_update(void)
764d248c7d7SRobert Watson {
765d248c7d7SRobert Watson 
766d248c7d7SRobert Watson 	/*
767d248c7d7SRobert Watson 	 * -1 for unlimited allocation.
768d248c7d7SRobert Watson 	 */
769d248c7d7SRobert Watson 	if (maxnipq < 0)
770d248c7d7SRobert Watson 		uma_zone_set_max(ipq_zone, 0);
771d248c7d7SRobert Watson 	/*
772d248c7d7SRobert Watson 	 * Positive number for specific bound.
773d248c7d7SRobert Watson 	 */
774d248c7d7SRobert Watson 	if (maxnipq > 0)
775d248c7d7SRobert Watson 		uma_zone_set_max(ipq_zone, maxnipq);
776d248c7d7SRobert Watson 	/*
777d248c7d7SRobert Watson 	 * Zero specifies no further fragment queue allocation -- set the
778d248c7d7SRobert Watson 	 * bound very low, but rely on implementation elsewhere to actually
779d248c7d7SRobert Watson 	 * prevent allocation and reclaim current queues.
780d248c7d7SRobert Watson 	 */
781d248c7d7SRobert Watson 	if (maxnipq == 0)
782d248c7d7SRobert Watson 		uma_zone_set_max(ipq_zone, 1);
783d248c7d7SRobert Watson }
784d248c7d7SRobert Watson 
785d248c7d7SRobert Watson static int
786d248c7d7SRobert Watson sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
787d248c7d7SRobert Watson {
788d248c7d7SRobert Watson 	int error, i;
789d248c7d7SRobert Watson 
790d248c7d7SRobert Watson 	i = maxnipq;
791d248c7d7SRobert Watson 	error = sysctl_handle_int(oidp, &i, 0, req);
792d248c7d7SRobert Watson 	if (error || !req->newptr)
793d248c7d7SRobert Watson 		return (error);
794d248c7d7SRobert Watson 
795d248c7d7SRobert Watson 	/*
796d248c7d7SRobert Watson 	 * XXXRW: Might be a good idea to sanity check the argument and place
797d248c7d7SRobert Watson 	 * an extreme upper bound.
798d248c7d7SRobert Watson 	 */
799d248c7d7SRobert Watson 	if (i < -1)
800d248c7d7SRobert Watson 		return (EINVAL);
801d248c7d7SRobert Watson 	maxnipq = i;
802d248c7d7SRobert Watson 	maxnipq_update();
803d248c7d7SRobert Watson 	return (0);
804d248c7d7SRobert Watson }
805d248c7d7SRobert Watson 
806d248c7d7SRobert Watson SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
807d248c7d7SRobert Watson     NULL, 0, sysctl_maxnipq, "I",
808d248c7d7SRobert Watson     "Maximum number of IPv4 fragment reassembly queue entries");
809d248c7d7SRobert Watson 
810d248c7d7SRobert Watson /*
8118948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
812f0cada84SAndre Oppermann  * whole datagram.  If the argument is the first fragment or one
813f0cada84SAndre Oppermann  * in between the function will return NULL and store the mbuf
814f0cada84SAndre Oppermann  * in the fragment chain.  If the argument is the last fragment
815f0cada84SAndre Oppermann  * the packet will be reassembled and the pointer to the new
816f0cada84SAndre Oppermann  * mbuf returned for further processing.  Only m_tags attached
817f0cada84SAndre Oppermann  * to the first packet/fragment are preserved.
818f0cada84SAndre Oppermann  * The IP header is *NOT* adjusted out of iplen.
819df8bae1dSRodney W. Grimes  */
8208948e4baSArchie Cobbs 
821f0cada84SAndre Oppermann struct mbuf *
822f0cada84SAndre Oppermann ip_reass(struct mbuf *m)
823df8bae1dSRodney W. Grimes {
824f0cada84SAndre Oppermann 	struct ip *ip;
825f0cada84SAndre Oppermann 	struct mbuf *p, *q, *nq, *t;
826f0cada84SAndre Oppermann 	struct ipq *fp = NULL;
827f0cada84SAndre Oppermann 	struct ipqhead *head;
828f0cada84SAndre Oppermann 	int i, hlen, next;
82959dfcba4SHajimu UMEMOTO 	u_int8_t ecn, ecn0;
830f0cada84SAndre Oppermann 	u_short hash;
831df8bae1dSRodney W. Grimes 
832800af1fbSMaxim Konovalov 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
833800af1fbSMaxim Konovalov 	if (maxnipq == 0 || maxfragsperpacket == 0) {
834f0cada84SAndre Oppermann 		ipstat.ips_fragments++;
835f0cada84SAndre Oppermann 		ipstat.ips_fragdropped++;
8369d804f81SAndre Oppermann 		m_freem(m);
8379d804f81SAndre Oppermann 		return (NULL);
838f0cada84SAndre Oppermann 	}
8392fad1e93SSam Leffler 
840f0cada84SAndre Oppermann 	ip = mtod(m, struct ip *);
841f0cada84SAndre Oppermann 	hlen = ip->ip_hl << 2;
842f0cada84SAndre Oppermann 
843f0cada84SAndre Oppermann 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
844f0cada84SAndre Oppermann 	head = &ipq[hash];
845f0cada84SAndre Oppermann 	IPQ_LOCK();
846f0cada84SAndre Oppermann 
847f0cada84SAndre Oppermann 	/*
848f0cada84SAndre Oppermann 	 * Look for queue of fragments
849f0cada84SAndre Oppermann 	 * of this datagram.
850f0cada84SAndre Oppermann 	 */
851f0cada84SAndre Oppermann 	TAILQ_FOREACH(fp, head, ipq_list)
852f0cada84SAndre Oppermann 		if (ip->ip_id == fp->ipq_id &&
853f0cada84SAndre Oppermann 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
854f0cada84SAndre Oppermann 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
855f0cada84SAndre Oppermann #ifdef MAC
856f0cada84SAndre Oppermann 		    mac_fragment_match(m, fp) &&
857f0cada84SAndre Oppermann #endif
858f0cada84SAndre Oppermann 		    ip->ip_p == fp->ipq_p)
859f0cada84SAndre Oppermann 			goto found;
860f0cada84SAndre Oppermann 
861f0cada84SAndre Oppermann 	fp = NULL;
862f0cada84SAndre Oppermann 
863f0cada84SAndre Oppermann 	/*
864d248c7d7SRobert Watson 	 * Attempt to trim the number of allocated fragment queues if it
865d248c7d7SRobert Watson 	 * exceeds the administrative limit.
866f0cada84SAndre Oppermann 	 */
867f0cada84SAndre Oppermann 	if ((nipq > maxnipq) && (maxnipq > 0)) {
868f0cada84SAndre Oppermann 		/*
869f0cada84SAndre Oppermann 		 * drop something from the tail of the current queue
870f0cada84SAndre Oppermann 		 * before proceeding further
871f0cada84SAndre Oppermann 		 */
872f0cada84SAndre Oppermann 		struct ipq *q = TAILQ_LAST(head, ipqhead);
873f0cada84SAndre Oppermann 		if (q == NULL) {   /* gak */
874f0cada84SAndre Oppermann 			for (i = 0; i < IPREASS_NHASH; i++) {
875f0cada84SAndre Oppermann 				struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
876f0cada84SAndre Oppermann 				if (r) {
877f0cada84SAndre Oppermann 					ipstat.ips_fragtimeout += r->ipq_nfrags;
878f0cada84SAndre Oppermann 					ip_freef(&ipq[i], r);
879f0cada84SAndre Oppermann 					break;
880f0cada84SAndre Oppermann 				}
881f0cada84SAndre Oppermann 			}
882f0cada84SAndre Oppermann 		} else {
883f0cada84SAndre Oppermann 			ipstat.ips_fragtimeout += q->ipq_nfrags;
884f0cada84SAndre Oppermann 			ip_freef(head, q);
885f0cada84SAndre Oppermann 		}
886f0cada84SAndre Oppermann 	}
887f0cada84SAndre Oppermann 
888f0cada84SAndre Oppermann found:
889f0cada84SAndre Oppermann 	/*
890f0cada84SAndre Oppermann 	 * Adjust ip_len to not reflect header,
891f0cada84SAndre Oppermann 	 * convert offset of this to bytes.
892f0cada84SAndre Oppermann 	 */
893f0cada84SAndre Oppermann 	ip->ip_len -= hlen;
894f0cada84SAndre Oppermann 	if (ip->ip_off & IP_MF) {
895f0cada84SAndre Oppermann 		/*
896f0cada84SAndre Oppermann 		 * Make sure that fragments have a data length
897f0cada84SAndre Oppermann 		 * that's a non-zero multiple of 8 bytes.
898f0cada84SAndre Oppermann 		 */
899f0cada84SAndre Oppermann 		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
900f0cada84SAndre Oppermann 			ipstat.ips_toosmall++; /* XXX */
901f0cada84SAndre Oppermann 			goto dropfrag;
902f0cada84SAndre Oppermann 		}
903f0cada84SAndre Oppermann 		m->m_flags |= M_FRAG;
904f0cada84SAndre Oppermann 	} else
905f0cada84SAndre Oppermann 		m->m_flags &= ~M_FRAG;
906f0cada84SAndre Oppermann 	ip->ip_off <<= 3;
907f0cada84SAndre Oppermann 
908f0cada84SAndre Oppermann 
909f0cada84SAndre Oppermann 	/*
910f0cada84SAndre Oppermann 	 * Attempt reassembly; if it succeeds, proceed.
911f0cada84SAndre Oppermann 	 * ip_reass() will return a different mbuf.
912f0cada84SAndre Oppermann 	 */
913f0cada84SAndre Oppermann 	ipstat.ips_fragments++;
914f0cada84SAndre Oppermann 	m->m_pkthdr.header = ip;
915f0cada84SAndre Oppermann 
916f0cada84SAndre Oppermann 	/* Previous ip_reass() started here. */
917df8bae1dSRodney W. Grimes 	/*
918df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
919df8bae1dSRodney W. Grimes 	 * would confuse code below.
920df8bae1dSRodney W. Grimes 	 */
921df8bae1dSRodney W. Grimes 	m->m_data += hlen;
922df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
923df8bae1dSRodney W. Grimes 
924df8bae1dSRodney W. Grimes 	/*
925df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
926df8bae1dSRodney W. Grimes 	 */
927042bbfa3SRobert Watson 	if (fp == NULL) {
928d248c7d7SRobert Watson 		fp = uma_zalloc(ipq_zone, M_NOWAIT);
929d248c7d7SRobert Watson 		if (fp == NULL)
930df8bae1dSRodney W. Grimes 			goto dropfrag;
93136b0360bSRobert Watson #ifdef MAC
9325e7ce478SRobert Watson 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
933d248c7d7SRobert Watson 			uma_zfree(ipq_zone, fp);
9345e7ce478SRobert Watson 			goto dropfrag;
9355e7ce478SRobert Watson 		}
93636b0360bSRobert Watson 		mac_create_ipq(m, fp);
93736b0360bSRobert Watson #endif
938462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
939194a213eSAndrey A. Chernov 		nipq++;
940375386e2SMike Silbersack 		fp->ipq_nfrags = 1;
941df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
942df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
943df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
9446effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
9456effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
946af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
947af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
948800af1fbSMaxim Konovalov 		goto done;
94936b0360bSRobert Watson 	} else {
950375386e2SMike Silbersack 		fp->ipq_nfrags++;
95136b0360bSRobert Watson #ifdef MAC
95236b0360bSRobert Watson 		mac_update_ipq(m, fp);
95336b0360bSRobert Watson #endif
954df8bae1dSRodney W. Grimes 	}
955df8bae1dSRodney W. Grimes 
9566effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
9576effc713SDoug Rabson 
958df8bae1dSRodney W. Grimes 	/*
95959dfcba4SHajimu UMEMOTO 	 * Handle ECN by comparing this segment with the first one;
96059dfcba4SHajimu UMEMOTO 	 * if CE is set, do not lose CE.
96159dfcba4SHajimu UMEMOTO 	 * drop if CE and not-ECT are mixed for the same packet.
96259dfcba4SHajimu UMEMOTO 	 */
96359dfcba4SHajimu UMEMOTO 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
96459dfcba4SHajimu UMEMOTO 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
96559dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_CE) {
96659dfcba4SHajimu UMEMOTO 		if (ecn0 == IPTOS_ECN_NOTECT)
96759dfcba4SHajimu UMEMOTO 			goto dropfrag;
96859dfcba4SHajimu UMEMOTO 		if (ecn0 != IPTOS_ECN_CE)
96959dfcba4SHajimu UMEMOTO 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
97059dfcba4SHajimu UMEMOTO 	}
97159dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
97259dfcba4SHajimu UMEMOTO 		goto dropfrag;
97359dfcba4SHajimu UMEMOTO 
97459dfcba4SHajimu UMEMOTO 	/*
975df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
976df8bae1dSRodney W. Grimes 	 */
9776effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
9786effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
979df8bae1dSRodney W. Grimes 			break;
980df8bae1dSRodney W. Grimes 
981df8bae1dSRodney W. Grimes 	/*
982df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
983df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
984af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
985af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
986db4f9cc7SJonathan Lemon 	 *
987db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
988db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
989df8bae1dSRodney W. Grimes 	 */
9906effc713SDoug Rabson 	if (p) {
9916effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
992df8bae1dSRodney W. Grimes 		if (i > 0) {
993df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
994df8bae1dSRodney W. Grimes 				goto dropfrag;
9956a800098SYoshinobu Inoue 			m_adj(m, i);
996db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
997df8bae1dSRodney W. Grimes 			ip->ip_off += i;
998df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
999df8bae1dSRodney W. Grimes 		}
1000af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
1001af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
1002af38c68cSLuigi Rizzo 	} else {
1003af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
1004af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1005df8bae1dSRodney W. Grimes 	}
1006df8bae1dSRodney W. Grimes 
1007df8bae1dSRodney W. Grimes 	/*
1008df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
1009df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
1010df8bae1dSRodney W. Grimes 	 */
10116effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1012af38c68cSLuigi Rizzo 	     q = nq) {
1013b36f5b37SMaxim Konovalov 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
10146effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
10156effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
10166effc713SDoug Rabson 			GETIP(q)->ip_off += i;
10176effc713SDoug Rabson 			m_adj(q, i);
1018db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
1019df8bae1dSRodney W. Grimes 			break;
1020df8bae1dSRodney W. Grimes 		}
10216effc713SDoug Rabson 		nq = q->m_nextpkt;
1022af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
102399e8617dSMaxim Konovalov 		ipstat.ips_fragdropped++;
1024375386e2SMike Silbersack 		fp->ipq_nfrags--;
10256effc713SDoug Rabson 		m_freem(q);
1026df8bae1dSRodney W. Grimes 	}
1027df8bae1dSRodney W. Grimes 
1028df8bae1dSRodney W. Grimes 	/*
1029375386e2SMike Silbersack 	 * Check for complete reassembly and perform frag per packet
1030375386e2SMike Silbersack 	 * limiting.
1031375386e2SMike Silbersack 	 *
1032375386e2SMike Silbersack 	 * Frag limiting is performed here so that the nth frag has
1033375386e2SMike Silbersack 	 * a chance to complete the packet before we drop the packet.
1034375386e2SMike Silbersack 	 * As a result, n+1 frags are actually allowed per packet, but
1035375386e2SMike Silbersack 	 * only n will ever be stored. (n = maxfragsperpacket.)
1036375386e2SMike Silbersack 	 *
1037df8bae1dSRodney W. Grimes 	 */
10386effc713SDoug Rabson 	next = 0;
10396effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1040375386e2SMike Silbersack 		if (GETIP(q)->ip_off != next) {
104199e8617dSMaxim Konovalov 			if (fp->ipq_nfrags > maxfragsperpacket) {
104299e8617dSMaxim Konovalov 				ipstat.ips_fragdropped += fp->ipq_nfrags;
1043375386e2SMike Silbersack 				ip_freef(head, fp);
104499e8617dSMaxim Konovalov 			}
1045f0cada84SAndre Oppermann 			goto done;
1046375386e2SMike Silbersack 		}
10476effc713SDoug Rabson 		next += GETIP(q)->ip_len;
10486effc713SDoug Rabson 	}
10496effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
1050375386e2SMike Silbersack 	if (p->m_flags & M_FRAG) {
105199e8617dSMaxim Konovalov 		if (fp->ipq_nfrags > maxfragsperpacket) {
105299e8617dSMaxim Konovalov 			ipstat.ips_fragdropped += fp->ipq_nfrags;
1053375386e2SMike Silbersack 			ip_freef(head, fp);
105499e8617dSMaxim Konovalov 		}
1055f0cada84SAndre Oppermann 		goto done;
1056375386e2SMike Silbersack 	}
1057df8bae1dSRodney W. Grimes 
1058df8bae1dSRodney W. Grimes 	/*
1059430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1060430d30d8SBill Fenner 	 */
10616effc713SDoug Rabson 	q = fp->ipq_frags;
10626effc713SDoug Rabson 	ip = GETIP(q);
106353be11f6SPoul-Henning Kamp 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
1064430d30d8SBill Fenner 		ipstat.ips_toolong++;
106599e8617dSMaxim Konovalov 		ipstat.ips_fragdropped += fp->ipq_nfrags;
1066462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1067f0cada84SAndre Oppermann 		goto done;
1068430d30d8SBill Fenner 	}
1069430d30d8SBill Fenner 
1070430d30d8SBill Fenner 	/*
1071430d30d8SBill Fenner 	 * Concatenate fragments.
1072df8bae1dSRodney W. Grimes 	 */
10736effc713SDoug Rabson 	m = q;
1074df8bae1dSRodney W. Grimes 	t = m->m_next;
107502410549SRobert Watson 	m->m_next = NULL;
1076df8bae1dSRodney W. Grimes 	m_cat(m, t);
10776effc713SDoug Rabson 	nq = q->m_nextpkt;
107802410549SRobert Watson 	q->m_nextpkt = NULL;
10796effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
10806effc713SDoug Rabson 		nq = q->m_nextpkt;
1081945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1082db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1083db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1084a8db1d93SJonathan Lemon 		m_cat(m, q);
1085df8bae1dSRodney W. Grimes 	}
108636b0360bSRobert Watson #ifdef MAC
108736b0360bSRobert Watson 	mac_create_datagram_from_ipq(fp, m);
108836b0360bSRobert Watson 	mac_destroy_ipq(fp);
108936b0360bSRobert Watson #endif
1090df8bae1dSRodney W. Grimes 
1091df8bae1dSRodney W. Grimes 	/*
1092f0cada84SAndre Oppermann 	 * Create header for new ip packet by modifying header of first
1093f0cada84SAndre Oppermann 	 * packet;  dequeue and discard fragment reassembly header.
1094df8bae1dSRodney W. Grimes 	 * Make header visible.
1095df8bae1dSRodney W. Grimes 	 */
1096f0cada84SAndre Oppermann 	ip->ip_len = (ip->ip_hl << 2) + next;
10976effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
10986effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1099462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1100194a213eSAndrey A. Chernov 	nipq--;
1101d248c7d7SRobert Watson 	uma_zfree(ipq_zone, fp);
110253be11f6SPoul-Henning Kamp 	m->m_len += (ip->ip_hl << 2);
110353be11f6SPoul-Henning Kamp 	m->m_data -= (ip->ip_hl << 2);
1104df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1105a5554bf0SPoul-Henning Kamp 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
1106a5554bf0SPoul-Henning Kamp 		m_fixhdr(m);
1107f0cada84SAndre Oppermann 	ipstat.ips_reassembled++;
1108f0cada84SAndre Oppermann 	IPQ_UNLOCK();
11096a800098SYoshinobu Inoue 	return (m);
1110df8bae1dSRodney W. Grimes 
1111df8bae1dSRodney W. Grimes dropfrag:
1112df8bae1dSRodney W. Grimes 	ipstat.ips_fragdropped++;
1113042bbfa3SRobert Watson 	if (fp != NULL)
1114375386e2SMike Silbersack 		fp->ipq_nfrags--;
1115df8bae1dSRodney W. Grimes 	m_freem(m);
1116f0cada84SAndre Oppermann done:
1117f0cada84SAndre Oppermann 	IPQ_UNLOCK();
1118f0cada84SAndre Oppermann 	return (NULL);
11196effc713SDoug Rabson 
11206effc713SDoug Rabson #undef GETIP
1121df8bae1dSRodney W. Grimes }
1122df8bae1dSRodney W. Grimes 
1123df8bae1dSRodney W. Grimes /*
1124df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1125df8bae1dSRodney W. Grimes  * associated datagrams.
1126df8bae1dSRodney W. Grimes  */
11270312fbe9SPoul-Henning Kamp static void
1128462b86feSPoul-Henning Kamp ip_freef(fhp, fp)
1129462b86feSPoul-Henning Kamp 	struct ipqhead *fhp;
1130df8bae1dSRodney W. Grimes 	struct ipq *fp;
1131df8bae1dSRodney W. Grimes {
11326effc713SDoug Rabson 	register struct mbuf *q;
1133df8bae1dSRodney W. Grimes 
11342fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
11352fad1e93SSam Leffler 
11366effc713SDoug Rabson 	while (fp->ipq_frags) {
11376effc713SDoug Rabson 		q = fp->ipq_frags;
11386effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
11396effc713SDoug Rabson 		m_freem(q);
1140df8bae1dSRodney W. Grimes 	}
1141462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1142d248c7d7SRobert Watson 	uma_zfree(ipq_zone, fp);
1143194a213eSAndrey A. Chernov 	nipq--;
1144df8bae1dSRodney W. Grimes }
1145df8bae1dSRodney W. Grimes 
1146df8bae1dSRodney W. Grimes /*
1147df8bae1dSRodney W. Grimes  * IP timer processing;
1148df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1149df8bae1dSRodney W. Grimes  * queue, discard it.
1150df8bae1dSRodney W. Grimes  */
1151df8bae1dSRodney W. Grimes void
1152df8bae1dSRodney W. Grimes ip_slowtimo()
1153df8bae1dSRodney W. Grimes {
1154df8bae1dSRodney W. Grimes 	register struct ipq *fp;
1155194a213eSAndrey A. Chernov 	int i;
1156df8bae1dSRodney W. Grimes 
11572fad1e93SSam Leffler 	IPQ_LOCK();
1158194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1159462b86feSPoul-Henning Kamp 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1160462b86feSPoul-Henning Kamp 			struct ipq *fpp;
1161462b86feSPoul-Henning Kamp 
1162462b86feSPoul-Henning Kamp 			fpp = fp;
1163462b86feSPoul-Henning Kamp 			fp = TAILQ_NEXT(fp, ipq_list);
1164462b86feSPoul-Henning Kamp 			if(--fpp->ipq_ttl == 0) {
116599e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
1166462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], fpp);
1167df8bae1dSRodney W. Grimes 			}
1168df8bae1dSRodney W. Grimes 		}
1169194a213eSAndrey A. Chernov 	}
1170690a6055SJesper Skriver 	/*
1171690a6055SJesper Skriver 	 * If we are over the maximum number of fragments
1172690a6055SJesper Skriver 	 * (due to the limit being lowered), drain off
1173690a6055SJesper Skriver 	 * enough to get down to the new limit.
1174690a6055SJesper Skriver 	 */
1175a75a485dSMike Silbersack 	if (maxnipq >= 0 && nipq > maxnipq) {
1176690a6055SJesper Skriver 		for (i = 0; i < IPREASS_NHASH; i++) {
1177b36f5b37SMaxim Konovalov 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
117899e8617dSMaxim Konovalov 				ipstat.ips_fragdropped +=
117999e8617dSMaxim Konovalov 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1180690a6055SJesper Skriver 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1181690a6055SJesper Skriver 			}
1182690a6055SJesper Skriver 		}
1183690a6055SJesper Skriver 	}
11842fad1e93SSam Leffler 	IPQ_UNLOCK();
1185df8bae1dSRodney W. Grimes }
1186df8bae1dSRodney W. Grimes 
1187df8bae1dSRodney W. Grimes /*
1188df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1189df8bae1dSRodney W. Grimes  */
1190df8bae1dSRodney W. Grimes void
1191df8bae1dSRodney W. Grimes ip_drain()
1192df8bae1dSRodney W. Grimes {
1193194a213eSAndrey A. Chernov 	int     i;
1194ce29ab3aSGarrett Wollman 
11952fad1e93SSam Leffler 	IPQ_LOCK();
1196194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1197462b86feSPoul-Henning Kamp 		while(!TAILQ_EMPTY(&ipq[i])) {
119899e8617dSMaxim Konovalov 			ipstat.ips_fragdropped +=
119999e8617dSMaxim Konovalov 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1200462b86feSPoul-Henning Kamp 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1201194a213eSAndrey A. Chernov 		}
1202194a213eSAndrey A. Chernov 	}
12032fad1e93SSam Leffler 	IPQ_UNLOCK();
1204ce29ab3aSGarrett Wollman 	in_rtqdrain();
1205df8bae1dSRodney W. Grimes }
1206df8bae1dSRodney W. Grimes 
1207df8bae1dSRodney W. Grimes /*
1208de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
1209de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
1210de38924dSAndre Oppermann  */
1211de38924dSAndre Oppermann int
1212de38924dSAndre Oppermann ipproto_register(u_char ipproto)
1213de38924dSAndre Oppermann {
1214de38924dSAndre Oppermann 	struct protosw *pr;
1215de38924dSAndre Oppermann 
1216de38924dSAndre Oppermann 	/* Sanity checks. */
1217de38924dSAndre Oppermann 	if (ipproto == 0)
1218de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
1219de38924dSAndre Oppermann 
1220de38924dSAndre Oppermann 	/*
1221de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
1222de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
1223de38924dSAndre Oppermann 	 */
1224de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
1225de38924dSAndre Oppermann 	if (pr == NULL)
1226de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
1227de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
1228de38924dSAndre Oppermann 		return (EEXIST);
1229de38924dSAndre Oppermann 
1230de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
1231de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
1232de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
1233de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
1234de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
1235de38924dSAndre Oppermann 			/* Be careful to only index valid IP protocols. */
1236db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX) {
1237de38924dSAndre Oppermann 				ip_protox[pr->pr_protocol] = pr - inetsw;
1238de38924dSAndre Oppermann 				return (0);
1239de38924dSAndre Oppermann 			} else
1240de38924dSAndre Oppermann 				return (EINVAL);
1241de38924dSAndre Oppermann 		}
1242de38924dSAndre Oppermann 	}
1243de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
1244de38924dSAndre Oppermann }
1245de38924dSAndre Oppermann 
1246de38924dSAndre Oppermann int
1247de38924dSAndre Oppermann ipproto_unregister(u_char ipproto)
1248de38924dSAndre Oppermann {
1249de38924dSAndre Oppermann 	struct protosw *pr;
1250de38924dSAndre Oppermann 
1251de38924dSAndre Oppermann 	/* Sanity checks. */
1252de38924dSAndre Oppermann 	if (ipproto == 0)
1253de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
1254de38924dSAndre Oppermann 
1255de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
1256de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
1257de38924dSAndre Oppermann 	if (pr == NULL)
1258de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
1259de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
1260de38924dSAndre Oppermann 		return (ENOENT);
1261de38924dSAndre Oppermann 
1262de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
1263de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
1264de38924dSAndre Oppermann 	return (0);
1265de38924dSAndre Oppermann }
1266de38924dSAndre Oppermann 
1267df8bae1dSRodney W. Grimes /*
1268df8bae1dSRodney W. Grimes  * Given address of next destination (final or next hop),
1269df8bae1dSRodney W. Grimes  * return internet address info of interface to be used to get there.
1270df8bae1dSRodney W. Grimes  */
1271bd714208SRuslan Ermilov struct in_ifaddr *
127202c1c707SAndre Oppermann ip_rtaddr(dst)
1273df8bae1dSRodney W. Grimes 	struct in_addr dst;
1274df8bae1dSRodney W. Grimes {
127597d8d152SAndre Oppermann 	struct route sro;
127602c1c707SAndre Oppermann 	struct sockaddr_in *sin;
127702c1c707SAndre Oppermann 	struct in_ifaddr *ifa;
1278df8bae1dSRodney W. Grimes 
12790cfbbe3bSAndre Oppermann 	bzero(&sro, sizeof(sro));
128097d8d152SAndre Oppermann 	sin = (struct sockaddr_in *)&sro.ro_dst;
1281df8bae1dSRodney W. Grimes 	sin->sin_family = AF_INET;
1282df8bae1dSRodney W. Grimes 	sin->sin_len = sizeof(*sin);
1283df8bae1dSRodney W. Grimes 	sin->sin_addr = dst;
128497d8d152SAndre Oppermann 	rtalloc_ign(&sro, RTF_CLONING);
1285df8bae1dSRodney W. Grimes 
128697d8d152SAndre Oppermann 	if (sro.ro_rt == NULL)
128702410549SRobert Watson 		return (NULL);
128802c1c707SAndre Oppermann 
128997d8d152SAndre Oppermann 	ifa = ifatoia(sro.ro_rt->rt_ifa);
129097d8d152SAndre Oppermann 	RTFREE(sro.ro_rt);
129102410549SRobert Watson 	return (ifa);
1292df8bae1dSRodney W. Grimes }
1293df8bae1dSRodney W. Grimes 
1294df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1295df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1296df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1297df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1298df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1299fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
13003b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1301df8bae1dSRodney W. Grimes };
1302df8bae1dSRodney W. Grimes 
1303df8bae1dSRodney W. Grimes /*
1304df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1305df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1306df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1307df8bae1dSRodney W. Grimes  * of codes and types.
1308df8bae1dSRodney W. Grimes  *
1309df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1310df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1311df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1312df8bae1dSRodney W. Grimes  * protocol deal with that.
1313df8bae1dSRodney W. Grimes  *
1314df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1315df8bae1dSRodney W. Grimes  * via a source route.
1316df8bae1dSRodney W. Grimes  */
13179b932e9eSAndre Oppermann void
13189b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
1319df8bae1dSRodney W. Grimes {
13202b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
13219b932e9eSAndre Oppermann 	struct in_ifaddr *ia = NULL;
1322df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
13239b932e9eSAndre Oppermann 	struct in_addr dest;
1324c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
13253efc3014SJulian Elischer 
13269b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1327df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1328df8bae1dSRodney W. Grimes 		m_freem(m);
1329df8bae1dSRodney W. Grimes 		return;
1330df8bae1dSRodney W. Grimes 	}
13311b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
13321b968362SDag-Erling Smørgrav 	if (!ipstealth) {
13331b968362SDag-Erling Smørgrav #endif
1334df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
13351b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
133602c1c707SAndre Oppermann 			    0, 0);
1337df8bae1dSRodney W. Grimes 			return;
1338df8bae1dSRodney W. Grimes 		}
13391b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
13401b968362SDag-Erling Smørgrav 	}
13411b968362SDag-Erling Smørgrav #endif
1342df8bae1dSRodney W. Grimes 
13439b932e9eSAndre Oppermann 	if (!srcrt && (ia = ip_rtaddr(ip->ip_dst)) == NULL) {
134402c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
1345df8bae1dSRodney W. Grimes 		return;
134602c1c707SAndre Oppermann 	}
1347df8bae1dSRodney W. Grimes 
1348df8bae1dSRodney W. Grimes 	/*
1349bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1350bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1351bfef7ed4SIan Dowse 	 *
13524d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
13534d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
13544d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
13554d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
13564d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
13574d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
13584d2e3692SLuigi Rizzo 	 *
1359bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1360bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1361bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1362bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1363df8bae1dSRodney W. Grimes 	 */
1364780b2f69SAndre Oppermann 	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
1365a163d034SWarner Losh 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
13669967cafcSSam Leffler 		/*
13679967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
13689967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
13699967cafcSSam Leffler 		 * be conservative and just discard the copy since
13709967cafcSSam Leffler 		 * code below may some day want the tags.
13719967cafcSSam Leffler 		 */
13729967cafcSSam Leffler 		m_free(mcopy);
13739967cafcSSam Leffler 		mcopy = NULL;
13749967cafcSSam Leffler 	}
1375bfef7ed4SIan Dowse 	if (mcopy != NULL) {
1376780b2f69SAndre Oppermann 		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
1377e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1378bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1379bfef7ed4SIan Dowse 	}
138004287599SRuslan Ermilov 
138104287599SRuslan Ermilov #ifdef IPSTEALTH
138204287599SRuslan Ermilov 	if (!ipstealth) {
138304287599SRuslan Ermilov #endif
138404287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
138504287599SRuslan Ermilov #ifdef IPSTEALTH
138604287599SRuslan Ermilov 	}
138704287599SRuslan Ermilov #endif
1388df8bae1dSRodney W. Grimes 
1389df8bae1dSRodney W. Grimes 	/*
1390df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1391df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1392df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1393df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1394df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1395df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1396df8bae1dSRodney W. Grimes 	 */
13979b932e9eSAndre Oppermann 	dest.s_addr = 0;
13989b932e9eSAndre Oppermann 	if (!srcrt && ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) {
139902c1c707SAndre Oppermann 		struct sockaddr_in *sin;
140002c1c707SAndre Oppermann 		struct route ro;
140102c1c707SAndre Oppermann 		struct rtentry *rt;
140202c1c707SAndre Oppermann 
14030cfbbe3bSAndre Oppermann 		bzero(&ro, sizeof(ro));
140402c1c707SAndre Oppermann 		sin = (struct sockaddr_in *)&ro.ro_dst;
140502c1c707SAndre Oppermann 		sin->sin_family = AF_INET;
140602c1c707SAndre Oppermann 		sin->sin_len = sizeof(*sin);
14079b932e9eSAndre Oppermann 		sin->sin_addr = ip->ip_dst;
140826d02ca7SAndre Oppermann 		rtalloc_ign(&ro, RTF_CLONING);
140902c1c707SAndre Oppermann 
141002c1c707SAndre Oppermann 		rt = ro.ro_rt;
141102c1c707SAndre Oppermann 
141202c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
14139b932e9eSAndre Oppermann 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
1414df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1415df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1416df8bae1dSRodney W. Grimes 
1417df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1418df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1419df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
14209b932e9eSAndre Oppermann 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
1421df8bae1dSRodney W. Grimes 				else
14229b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1423df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1424df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1425df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1426df8bae1dSRodney W. Grimes 			}
1427df8bae1dSRodney W. Grimes 		}
142802c1c707SAndre Oppermann 		if (rt)
142902c1c707SAndre Oppermann 			RTFREE(rt);
143002c1c707SAndre Oppermann 	}
1431df8bae1dSRodney W. Grimes 
143202410549SRobert Watson 	error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
1433df8bae1dSRodney W. Grimes 	if (error)
1434df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1435df8bae1dSRodney W. Grimes 	else {
1436df8bae1dSRodney W. Grimes 		ipstat.ips_forward++;
1437df8bae1dSRodney W. Grimes 		if (type)
1438df8bae1dSRodney W. Grimes 			ipstat.ips_redirectsent++;
1439df8bae1dSRodney W. Grimes 		else {
14409188b4a1SAndre Oppermann 			if (mcopy)
1441df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1442df8bae1dSRodney W. Grimes 			return;
1443df8bae1dSRodney W. Grimes 		}
1444df8bae1dSRodney W. Grimes 	}
1445df8bae1dSRodney W. Grimes 	if (mcopy == NULL)
1446df8bae1dSRodney W. Grimes 		return;
1447df8bae1dSRodney W. Grimes 
1448df8bae1dSRodney W. Grimes 	switch (error) {
1449df8bae1dSRodney W. Grimes 
1450df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1451df8bae1dSRodney W. Grimes 		/* type, code set above */
1452df8bae1dSRodney W. Grimes 		break;
1453df8bae1dSRodney W. Grimes 
1454df8bae1dSRodney W. Grimes 	case ENETUNREACH:		/* shouldn't happen, checked above */
1455df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1456df8bae1dSRodney W. Grimes 	case ENETDOWN:
1457df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1458df8bae1dSRodney W. Grimes 	default:
1459df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1460df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1461df8bae1dSRodney W. Grimes 		break;
1462df8bae1dSRodney W. Grimes 
1463df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1464df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1465df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
146602c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
14676a800098SYoshinobu Inoue 		/*
14686a800098SYoshinobu Inoue 		 * If the packet is routed over IPsec tunnel, tell the
14696a800098SYoshinobu Inoue 		 * originator the tunnel MTU.
14706a800098SYoshinobu Inoue 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
14716a800098SYoshinobu Inoue 		 * XXX quickhack!!!
14726a800098SYoshinobu Inoue 		 */
147302c1c707SAndre Oppermann 		{
14746a800098SYoshinobu Inoue 			struct secpolicy *sp = NULL;
14756a800098SYoshinobu Inoue 			int ipsecerror;
14766a800098SYoshinobu Inoue 			int ipsechdr;
147702c1c707SAndre Oppermann 			struct route *ro;
14786a800098SYoshinobu Inoue 
147902c1c707SAndre Oppermann #ifdef IPSEC
14806a800098SYoshinobu Inoue 			sp = ipsec4_getpolicybyaddr(mcopy,
14816a800098SYoshinobu Inoue 						    IPSEC_DIR_OUTBOUND,
14826a800098SYoshinobu Inoue 						    IP_FORWARDING,
14836a800098SYoshinobu Inoue 						    &ipsecerror);
148402c1c707SAndre Oppermann #else /* FAST_IPSEC */
1485b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(mcopy,
1486b9234fafSSam Leffler 						   IPSEC_DIR_OUTBOUND,
1487b9234fafSSam Leffler 						   IP_FORWARDING,
1488b9234fafSSam Leffler 						   &ipsecerror);
148902c1c707SAndre Oppermann #endif
149002c1c707SAndre Oppermann 			if (sp != NULL) {
1491b9234fafSSam Leffler 				/* count IPsec header size */
1492b9234fafSSam Leffler 				ipsechdr = ipsec4_hdrsiz(mcopy,
1493b9234fafSSam Leffler 							 IPSEC_DIR_OUTBOUND,
1494b9234fafSSam Leffler 							 NULL);
1495b9234fafSSam Leffler 
1496b9234fafSSam Leffler 				/*
1497b9234fafSSam Leffler 				 * find the correct route for outer IPv4
1498b9234fafSSam Leffler 				 * header, compute tunnel MTU.
1499b9234fafSSam Leffler 				 */
1500b9234fafSSam Leffler 				if (sp->req != NULL
1501b9234fafSSam Leffler 				 && sp->req->sav != NULL
1502b9234fafSSam Leffler 				 && sp->req->sav->sah != NULL) {
150302c1c707SAndre Oppermann 					ro = &sp->req->sav->sah->sa_route;
150402c1c707SAndre Oppermann 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1505c773494eSAndre Oppermann 						mtu =
150657ab3660SBruce M Simpson 						    ro->ro_rt->rt_rmx.rmx_mtu ?
150757ab3660SBruce M Simpson 						    ro->ro_rt->rt_rmx.rmx_mtu :
150802c1c707SAndre Oppermann 						    ro->ro_rt->rt_ifp->if_mtu;
1509c773494eSAndre Oppermann 						mtu -= ipsechdr;
1510b9234fafSSam Leffler 					}
1511b9234fafSSam Leffler 				}
1512b9234fafSSam Leffler 
151302c1c707SAndre Oppermann #ifdef IPSEC
151402c1c707SAndre Oppermann 				key_freesp(sp);
151502c1c707SAndre Oppermann #else /* FAST_IPSEC */
1516b9234fafSSam Leffler 				KEY_FREESP(&sp);
151702c1c707SAndre Oppermann #endif
151802c1c707SAndre Oppermann 				ipstat.ips_cantfrag++;
151902c1c707SAndre Oppermann 				break;
1520ab48768bSAndre Oppermann 			}
152102c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
15229b932e9eSAndre Oppermann 		/*
1523ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1524ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1525ab48768bSAndre Oppermann 		 * current packet size.
15269b932e9eSAndre Oppermann 		 */
1527ab48768bSAndre Oppermann 		if (mtu == 0) {
1528ab48768bSAndre Oppermann 			if (ia != NULL)
1529c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1530ab48768bSAndre Oppermann 			else
1531ab48768bSAndre Oppermann 				mtu = ip_next_mtu(ip->ip_len, 0);
1532ab48768bSAndre Oppermann 		}
153302c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
1534b9234fafSSam Leffler 		}
153502c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
1536df8bae1dSRodney W. Grimes 		ipstat.ips_cantfrag++;
1537df8bae1dSRodney W. Grimes 		break;
1538df8bae1dSRodney W. Grimes 
1539df8bae1dSRodney W. Grimes 	case ENOBUFS:
1540df285b3dSMike Silbersack 		/*
1541df285b3dSMike Silbersack 		 * A router should not generate ICMP_SOURCEQUENCH as
1542df285b3dSMike Silbersack 		 * required in RFC1812 Requirements for IP Version 4 Routers.
1543df285b3dSMike Silbersack 		 * Source quench could be a big problem under DoS attacks,
1544df285b3dSMike Silbersack 		 * or if the underlying interface is rate-limited.
1545df285b3dSMike Silbersack 		 * Those who need source quench packets may re-enable them
1546df285b3dSMike Silbersack 		 * via the net.inet.ip.sendsourcequench sysctl.
1547df285b3dSMike Silbersack 		 */
1548df285b3dSMike Silbersack 		if (ip_sendsourcequench == 0) {
1549df285b3dSMike Silbersack 			m_freem(mcopy);
1550df285b3dSMike Silbersack 			return;
1551df285b3dSMike Silbersack 		} else {
1552df8bae1dSRodney W. Grimes 			type = ICMP_SOURCEQUENCH;
1553df8bae1dSRodney W. Grimes 			code = 0;
1554df285b3dSMike Silbersack 		}
1555df8bae1dSRodney W. Grimes 		break;
15563a06e3e0SRuslan Ermilov 
15573a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
15583a06e3e0SRuslan Ermilov 		m_freem(mcopy);
15593a06e3e0SRuslan Ermilov 		return;
1560df8bae1dSRodney W. Grimes 	}
1561c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1562df8bae1dSRodney W. Grimes }
1563df8bae1dSRodney W. Grimes 
156482c23ebaSBill Fenner void
156582c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m)
156682c23ebaSBill Fenner 	register struct inpcb *inp;
156782c23ebaSBill Fenner 	register struct mbuf **mp;
156882c23ebaSBill Fenner 	register struct ip *ip;
156982c23ebaSBill Fenner 	register struct mbuf *m;
157082c23ebaSBill Fenner {
1571be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
1572be8a62e8SPoul-Henning Kamp 		struct bintime bt;
1573be8a62e8SPoul-Henning Kamp 
1574be8a62e8SPoul-Henning Kamp 		bintime(&bt);
1575be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
1576be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
1577be8a62e8SPoul-Henning Kamp 			SCM_BINTIME, SOL_SOCKET);
1578be8a62e8SPoul-Henning Kamp 			if (*mp)
1579be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
1580be8a62e8SPoul-Henning Kamp 		}
158182c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
158282c23ebaSBill Fenner 			struct timeval tv;
158382c23ebaSBill Fenner 
1584be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
158582c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
158682c23ebaSBill Fenner 				SCM_TIMESTAMP, SOL_SOCKET);
158782c23ebaSBill Fenner 			if (*mp)
158882c23ebaSBill Fenner 				mp = &(*mp)->m_next;
15894cc20ab1SSeigo Tanimura 		}
1590be8a62e8SPoul-Henning Kamp 	}
159182c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
159282c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
159382c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
159482c23ebaSBill Fenner 		if (*mp)
159582c23ebaSBill Fenner 			mp = &(*mp)->m_next;
159682c23ebaSBill Fenner 	}
15974957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
15984957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
15994957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
16004957466bSMatthew N. Dodd 		if (*mp)
16014957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
16024957466bSMatthew N. Dodd 	}
160382c23ebaSBill Fenner #ifdef notyet
160482c23ebaSBill Fenner 	/* XXX
160582c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
160682c23ebaSBill Fenner 	 * than they already were.
160782c23ebaSBill Fenner 	 */
160882c23ebaSBill Fenner 	/* options were tossed already */
160982c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
161082c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
161182c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
161282c23ebaSBill Fenner 		if (*mp)
161382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
161482c23ebaSBill Fenner 	}
161582c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
161682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1617e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
161882c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
161982c23ebaSBill Fenner 		if (*mp)
162082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
162182c23ebaSBill Fenner 	}
162282c23ebaSBill Fenner #endif
162382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1624d314ad7bSJulian Elischer 		struct ifnet *ifp;
1625d314ad7bSJulian Elischer 		struct sdlbuf {
162682c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1627d314ad7bSJulian Elischer 			u_char	pad[32];
1628d314ad7bSJulian Elischer 		} sdlbuf;
1629d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1630d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
163182c23ebaSBill Fenner 
1632d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
1633d314ad7bSJulian Elischer 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
16344a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1635d314ad7bSJulian Elischer 			/*
1636d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1637d314ad7bSJulian Elischer 			 */
1638d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
1639d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
1640d314ad7bSJulian Elischer 				goto makedummy;
1641d314ad7bSJulian Elischer 			}
1642d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1643d314ad7bSJulian Elischer 		} else {
1644d314ad7bSJulian Elischer makedummy:
1645d314ad7bSJulian Elischer 			sdl2->sdl_len
1646d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
1647d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1648d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1649d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1650d314ad7bSJulian Elischer 		}
1651d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
165282c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
165382c23ebaSBill Fenner 		if (*mp)
165482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
165582c23ebaSBill Fenner 	}
165682c23ebaSBill Fenner }
165782c23ebaSBill Fenner 
16584d2e3692SLuigi Rizzo /*
16594d2e3692SLuigi Rizzo  * XXX these routines are called from the upper part of the kernel.
16604d2e3692SLuigi Rizzo  * They need to be locked when we remove Giant.
16614d2e3692SLuigi Rizzo  *
16624d2e3692SLuigi Rizzo  * They could also be moved to ip_mroute.c, since all the RSVP
16634d2e3692SLuigi Rizzo  *  handling is done there already.
16644d2e3692SLuigi Rizzo  */
16654d2e3692SLuigi Rizzo static int ip_rsvp_on;
16664d2e3692SLuigi Rizzo struct socket *ip_rsvpd;
1667df8bae1dSRodney W. Grimes int
1668f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1669f0068c4aSGarrett Wollman {
1670f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1671f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1672f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1673f0068c4aSGarrett Wollman 
1674f0068c4aSGarrett Wollman 	if (ip_rsvpd != NULL)
1675f0068c4aSGarrett Wollman 		return EADDRINUSE;
1676f0068c4aSGarrett Wollman 
1677f0068c4aSGarrett Wollman 	ip_rsvpd = so;
16781c5de19aSGarrett Wollman 	/*
16791c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
16801c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
16811c5de19aSGarrett Wollman 	 */
16821c5de19aSGarrett Wollman 	if (!ip_rsvp_on) {
16831c5de19aSGarrett Wollman 		ip_rsvp_on = 1;
16841c5de19aSGarrett Wollman 		rsvp_on++;
16851c5de19aSGarrett Wollman 	}
1686f0068c4aSGarrett Wollman 
1687f0068c4aSGarrett Wollman 	return 0;
1688f0068c4aSGarrett Wollman }
1689f0068c4aSGarrett Wollman 
1690f0068c4aSGarrett Wollman int
1691f0068c4aSGarrett Wollman ip_rsvp_done(void)
1692f0068c4aSGarrett Wollman {
1693f0068c4aSGarrett Wollman 	ip_rsvpd = NULL;
16941c5de19aSGarrett Wollman 	/*
16951c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
16961c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
16971c5de19aSGarrett Wollman 	 */
16981c5de19aSGarrett Wollman 	if (ip_rsvp_on) {
16991c5de19aSGarrett Wollman 		ip_rsvp_on = 0;
17001c5de19aSGarrett Wollman 		rsvp_on--;
17011c5de19aSGarrett Wollman 	}
1702f0068c4aSGarrett Wollman 	return 0;
1703f0068c4aSGarrett Wollman }
1704bbb4330bSLuigi Rizzo 
1705bbb4330bSLuigi Rizzo void
1706bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
1707bbb4330bSLuigi Rizzo {
1708bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
1709bbb4330bSLuigi Rizzo 		rsvp_input_p(m, off);
1710bbb4330bSLuigi Rizzo 		return;
1711bbb4330bSLuigi Rizzo 	}
1712bbb4330bSLuigi Rizzo 
1713bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1714bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1715bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1716bbb4330bSLuigi Rizzo 	 */
1717bbb4330bSLuigi Rizzo 
1718bbb4330bSLuigi Rizzo 	if (!rsvp_on) {
1719bbb4330bSLuigi Rizzo 		m_freem(m);
1720bbb4330bSLuigi Rizzo 		return;
1721bbb4330bSLuigi Rizzo 	}
1722bbb4330bSLuigi Rizzo 
1723bbb4330bSLuigi Rizzo 	if (ip_rsvpd != NULL) {
1724bbb4330bSLuigi Rizzo 		rip_input(m, off);
1725bbb4330bSLuigi Rizzo 		return;
1726bbb4330bSLuigi Rizzo 	}
1727bbb4330bSLuigi Rizzo 	/* Drop the packet */
1728bbb4330bSLuigi Rizzo 	m_freem(m);
1729bbb4330bSLuigi Rizzo }
1730