xref: /freebsd/sys/netinet/ip_input.c (revision ac9d7e2618a58c838eaeedfbb90e6a70f7530637)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
14df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
15df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
16df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
17df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
18df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
19df8bae1dSRodney W. Grimes  *    without specific prior written permission.
20df8bae1dSRodney W. Grimes  *
21df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
32df8bae1dSRodney W. Grimes  *
33df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34c3aac50fSPeter Wemm  * $FreeBSD$
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37e4f4247aSEivind Eklund #include "opt_bootp.h"
3874a9466cSGary Palmer #include "opt_ipfw.h"
39b715f178SLuigi Rizzo #include "opt_ipdn.h"
40fbd1372aSJoerg Wunsch #include "opt_ipdivert.h"
411ee25934SPeter Wemm #include "opt_ipfilter.h"
4227108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
436a800098SYoshinobu Inoue #include "opt_ipsec.h"
4436b0360bSRobert Watson #include "opt_mac.h"
45c4ac87eaSDarren Reed #include "opt_pfil_hooks.h"
4664dddc18SKris Kennaway #include "opt_random_ip_id.h"
4774a9466cSGary Palmer 
48df8bae1dSRodney W. Grimes #include <sys/param.h>
49df8bae1dSRodney W. Grimes #include <sys/systm.h>
5036b0360bSRobert Watson #include <sys/mac.h>
51df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
52b715f178SLuigi Rizzo #include <sys/malloc.h>
53df8bae1dSRodney W. Grimes #include <sys/domain.h>
54df8bae1dSRodney W. Grimes #include <sys/protosw.h>
55df8bae1dSRodney W. Grimes #include <sys/socket.h>
56df8bae1dSRodney W. Grimes #include <sys/time.h>
57df8bae1dSRodney W. Grimes #include <sys/kernel.h>
581025071fSGarrett Wollman #include <sys/syslog.h>
59b5e8ce9fSBruce Evans #include <sys/sysctl.h>
60df8bae1dSRodney W. Grimes 
61c85540ddSAndrey A. Chernov #include <net/pfil.h>
62df8bae1dSRodney W. Grimes #include <net/if.h>
639494d596SBrooks Davis #include <net/if_types.h>
64d314ad7bSJulian Elischer #include <net/if_var.h>
6582c23ebaSBill Fenner #include <net/if_dl.h>
66df8bae1dSRodney W. Grimes #include <net/route.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68df8bae1dSRodney W. Grimes 
69df8bae1dSRodney W. Grimes #include <netinet/in.h>
70df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
71b5e8ce9fSBruce Evans #include <netinet/in_var.h>
72df8bae1dSRodney W. Grimes #include <netinet/ip.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
7658938916SGarrett Wollman #include <machine/in_cksum.h>
77df8bae1dSRodney W. Grimes 
78f0068c4aSGarrett Wollman #include <sys/socketvar.h>
796ddbf1e2SGary Palmer 
806ddbf1e2SGary Palmer #include <netinet/ip_fw.h>
81ac9d7e26SMax Laier #include <netinet/ip_divert.h>
82db69a05dSPaul Saab #include <netinet/ip_dummynet.h>
83db69a05dSPaul Saab 
846a800098SYoshinobu Inoue #ifdef IPSEC
856a800098SYoshinobu Inoue #include <netinet6/ipsec.h>
866a800098SYoshinobu Inoue #include <netkey/key.h>
876a800098SYoshinobu Inoue #endif
886a800098SYoshinobu Inoue 
89b9234fafSSam Leffler #ifdef FAST_IPSEC
90b9234fafSSam Leffler #include <netipsec/ipsec.h>
91b9234fafSSam Leffler #include <netipsec/key.h>
92b9234fafSSam Leffler #endif
93b9234fafSSam Leffler 
941c5de19aSGarrett Wollman int rsvp_on = 0;
95f0068c4aSGarrett Wollman 
961f91d8c5SDavid Greenman int	ipforwarding = 0;
970312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
983d177f46SBill Fumerola     &ipforwarding, 0, "Enable IP forwarding between interfaces");
990312fbe9SPoul-Henning Kamp 
100d4fb926cSGarrett Wollman static int	ipsendredirects = 1; /* XXX */
1010312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
1023d177f46SBill Fumerola     &ipsendredirects, 0, "Enable sending IP redirects");
1030312fbe9SPoul-Henning Kamp 
104df8bae1dSRodney W. Grimes int	ip_defttl = IPDEFTTL;
1050312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
1063d177f46SBill Fumerola     &ip_defttl, 0, "Maximum TTL on IP packets");
1070312fbe9SPoul-Henning Kamp 
1080312fbe9SPoul-Henning Kamp static int	ip_dosourceroute = 0;
1090312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
1103d177f46SBill Fumerola     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
1114fce5804SGuido van Rooij 
1124fce5804SGuido van Rooij static int	ip_acceptsourceroute = 0;
1134fce5804SGuido van Rooij SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
1143d177f46SBill Fumerola     CTLFLAG_RW, &ip_acceptsourceroute, 0,
1153d177f46SBill Fumerola     "Enable accepting source routed IP packets");
1166a800098SYoshinobu Inoue 
1176a800098SYoshinobu Inoue static int	ip_keepfaith = 0;
1186a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
1196a800098SYoshinobu Inoue 	&ip_keepfaith,	0,
1206a800098SYoshinobu Inoue 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1216a800098SYoshinobu Inoue 
122402062e8SMike Silbersack static int    nipq = 0;         /* total # of reass queues */
123402062e8SMike Silbersack static int    maxnipq;
124690a6055SJesper Skriver SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
125402062e8SMike Silbersack 	&maxnipq, 0,
126690a6055SJesper Skriver 	"Maximum number of IPv4 fragment reassembly queue entries");
127690a6055SJesper Skriver 
128375386e2SMike Silbersack static int    maxfragsperpacket;
129375386e2SMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
130375386e2SMike Silbersack 	&maxfragsperpacket, 0,
131375386e2SMike Silbersack 	"Maximum number of IPv4 fragments allowed per packet");
132375386e2SMike Silbersack 
133df285b3dSMike Silbersack static int	ip_sendsourcequench = 0;
134df285b3dSMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
135df285b3dSMike Silbersack 	&ip_sendsourcequench, 0,
136df285b3dSMike Silbersack 	"Enable the transmission of source quench packets");
137df285b3dSMike Silbersack 
138823db0e9SDon Lewis /*
139823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
140823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
141a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
142823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1433f67c834SDon Lewis  *
144a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
145a8f12100SDon Lewis  * to translate the destination address to another local interface.
1463f67c834SDon Lewis  *
1473f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1483f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1493f67c834SDon Lewis  * packets for those addresses are received.
150823db0e9SDon Lewis  */
151b3e95d4eSJonathan Lemon static int	ip_checkinterface = 1;
152b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
153b3e95d4eSJonathan Lemon     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
154b3e95d4eSJonathan Lemon 
155df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1560312fbe9SPoul-Henning Kamp static int	ipprintfs = 0;
157df8bae1dSRodney W. Grimes #endif
158134ea224SSam Leffler #ifdef PFIL_HOOKS
159134ea224SSam Leffler struct pfil_head inet_pfil_hook;
160134ea224SSam Leffler #endif
161df8bae1dSRodney W. Grimes 
1621cafed39SJonathan Lemon static struct	ifqueue ipintrq;
163ca925d9cSJonathan Lemon static int	ipqmaxlen = IFQ_MAXLEN;
164ca925d9cSJonathan Lemon 
165df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
166f0ffb944SJulian Elischer extern	struct protosw inetsw[];
167df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
16859562606SGarrett Wollman struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
169ca925d9cSJonathan Lemon struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
170ca925d9cSJonathan Lemon u_long 	in_ifaddrhmask;				/* mask for hash table */
171ca925d9cSJonathan Lemon 
172afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
1733d177f46SBill Fumerola     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
1740312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
1753d177f46SBill Fumerola     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
176df8bae1dSRodney W. Grimes 
177f23b4c91SGarrett Wollman struct ipstat ipstat;
178c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
1793d177f46SBill Fumerola     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
180194a213eSAndrey A. Chernov 
181194a213eSAndrey A. Chernov /* Packet reassembly stuff */
182194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2      6
183194a213eSAndrey A. Chernov #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
184194a213eSAndrey A. Chernov #define IPREASS_HMASK           (IPREASS_NHASH - 1)
185194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \
186831a80b0SMatthew Dillon 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
187194a213eSAndrey A. Chernov 
188462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
1892fad1e93SSam Leffler struct mtx ipqlock;
1902fad1e93SSam Leffler 
1912fad1e93SSam Leffler #define	IPQ_LOCK()	mtx_lock(&ipqlock)
1922fad1e93SSam Leffler #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
193888c2a3cSSam Leffler #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
194888c2a3cSSam Leffler #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
195f23b4c91SGarrett Wollman 
1960312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1970312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1983d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1990312fbe9SPoul-Henning Kamp #endif
2000312fbe9SPoul-Henning Kamp 
2011b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
202c76ff708SAndre Oppermann int	ipstealth = 0;
2031b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
2041b968362SDag-Erling Smørgrav     &ipstealth, 0, "");
2051b968362SDag-Erling Smørgrav #endif
2061b968362SDag-Erling Smørgrav 
207cfe8b629SGarrett Wollman 
20823bf9953SPoul-Henning Kamp /* Firewall hooks */
20923bf9953SPoul-Henning Kamp ip_fw_chk_t *ip_fw_chk_ptr;
2109fcc0795SLuigi Rizzo int fw_enable = 1 ;
21197850a5dSLuigi Rizzo int fw_one_pass = 1;
212e7319babSPoul-Henning Kamp 
213db69a05dSPaul Saab /* Dummynet hooks */
214db69a05dSPaul Saab ip_dn_io_t *ip_dn_io_ptr;
215b715f178SLuigi Rizzo 
216929b31ddSSam Leffler /*
2174d2e3692SLuigi Rizzo  * XXX this is ugly -- the following two global variables are
2184d2e3692SLuigi Rizzo  * used to store packet state while it travels through the stack.
2194d2e3692SLuigi Rizzo  * Note that the code even makes assumptions on the size and
2204d2e3692SLuigi Rizzo  * alignment of fields inside struct ip_srcrt so e.g. adding some
2214d2e3692SLuigi Rizzo  * fields will break the code. This needs to be fixed.
2224d2e3692SLuigi Rizzo  *
223df8bae1dSRodney W. Grimes  * We need to save the IP options in case a protocol wants to respond
224df8bae1dSRodney W. Grimes  * to an incoming packet over the same route if the packet got here
225df8bae1dSRodney W. Grimes  * using IP source routing.  This allows connection establishment and
226df8bae1dSRodney W. Grimes  * maintenance when the remote end is on a network that is not known
227df8bae1dSRodney W. Grimes  * to us.
228df8bae1dSRodney W. Grimes  */
2290312fbe9SPoul-Henning Kamp static int	ip_nhops = 0;
230df8bae1dSRodney W. Grimes static	struct ip_srcrt {
231df8bae1dSRodney W. Grimes 	struct	in_addr dst;			/* final destination */
232df8bae1dSRodney W. Grimes 	char	nop;				/* one NOP to align */
233df8bae1dSRodney W. Grimes 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
234df8bae1dSRodney W. Grimes 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
235df8bae1dSRodney W. Grimes } ip_srcrt;
236df8bae1dSRodney W. Grimes 
2374d77a549SAlfred Perlstein static void	save_rte(u_char *, struct in_addr);
2382b25acc1SLuigi Rizzo static int	ip_dooptions(struct mbuf *m, int,
2392b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
24002c1c707SAndre Oppermann static void	ip_forward(struct mbuf *m, int srcrt,
2412b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2424d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
243ac9d7e26SMax Laier static struct	mbuf *ip_reass(struct mbuf *, struct ipqhead *, struct ipq *);
2448948e4baSArchie Cobbs 
245df8bae1dSRodney W. Grimes /*
246df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
247df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
248df8bae1dSRodney W. Grimes  */
249df8bae1dSRodney W. Grimes void
250df8bae1dSRodney W. Grimes ip_init()
251df8bae1dSRodney W. Grimes {
252f0ffb944SJulian Elischer 	register struct protosw *pr;
253df8bae1dSRodney W. Grimes 	register int i;
254df8bae1dSRodney W. Grimes 
25559562606SGarrett Wollman 	TAILQ_INIT(&in_ifaddrhead);
256ca925d9cSJonathan Lemon 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
257f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
258df8bae1dSRodney W. Grimes 	if (pr == 0)
259df8bae1dSRodney W. Grimes 		panic("ip_init");
260df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
261df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
262f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
263f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
264df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
265df8bae1dSRodney W. Grimes 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
266df8bae1dSRodney W. Grimes 			ip_protox[pr->pr_protocol] = pr - inetsw;
267194a213eSAndrey A. Chernov 
268134ea224SSam Leffler #ifdef PFIL_HOOKS
269134ea224SSam Leffler 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
270134ea224SSam Leffler 	inet_pfil_hook.ph_af = AF_INET;
271134ea224SSam Leffler 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
272134ea224SSam Leffler 		printf("%s: WARNING: unable to register pfil hook, "
273134ea224SSam Leffler 			"error %d\n", __func__, i);
274134ea224SSam Leffler #endif /* PFIL_HOOKS */
275134ea224SSam Leffler 
2762fad1e93SSam Leffler 	IPQ_LOCK_INIT();
277194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++)
278462b86feSPoul-Henning Kamp 	    TAILQ_INIT(&ipq[i]);
279194a213eSAndrey A. Chernov 
280375386e2SMike Silbersack 	maxnipq = nmbclusters / 32;
281375386e2SMike Silbersack 	maxfragsperpacket = 16;
282194a213eSAndrey A. Chernov 
28364dddc18SKris Kennaway #ifndef RANDOM_IP_ID
284227ee8a1SPoul-Henning Kamp 	ip_id = time_second & 0xffff;
28564dddc18SKris Kennaway #endif
286df8bae1dSRodney W. Grimes 	ipintrq.ifq_maxlen = ipqmaxlen;
2876008862bSJohn Baldwin 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
2887902224cSSam Leffler 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
289df8bae1dSRodney W. Grimes }
290df8bae1dSRodney W. Grimes 
2914d2e3692SLuigi Rizzo /*
292df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
293df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
294df8bae1dSRodney W. Grimes  */
295c67b1d17SGarrett Wollman void
296c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
297df8bae1dSRodney W. Grimes {
2989188b4a1SAndre Oppermann 	struct ip *ip = NULL;
29923bf9953SPoul-Henning Kamp 	struct ipq *fp;
3005da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
301ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
3029188b4a1SAndre Oppermann 	int    i, checkif, hlen = 0;
30347c861ecSBrian Somers 	u_short sum;
3047538a9a0SJonathan Lemon 	struct in_addr pkt_dst;
305ac9d7e26SMax Laier #ifdef IPDIVERT
306ac9d7e26SMax Laier 	u_int32_t divert_info;			/* packet divert/tee info */
307ac9d7e26SMax Laier #endif
3082b25acc1SLuigi Rizzo 	struct ip_fw_args args;
30902c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
310f51f805fSSam Leffler #ifdef PFIL_HOOKS
311f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
312f51f805fSSam Leffler #endif
313b9234fafSSam Leffler #ifdef FAST_IPSEC
31436e8826fSMax Laier 	struct m_tag *mtag;
315b9234fafSSam Leffler 	struct tdb_ident *tdbi;
316b9234fafSSam Leffler 	struct secpolicy *sp;
317b9234fafSSam Leffler 	int s, error;
318b9234fafSSam Leffler #endif /* FAST_IPSEC */
319b715f178SLuigi Rizzo 
3202b25acc1SLuigi Rizzo 	args.eh = NULL;
3212b25acc1SLuigi Rizzo 	args.oif = NULL;
322df8bae1dSRodney W. Grimes 
323fe584538SDag-Erling Smørgrav   	M_ASSERTPKTHDR(m);
324db40007dSAndrew R. Reiter 
325ac9d7e26SMax Laier 	args.next_hop = ip_claim_next_hop(m);
326ac9d7e26SMax Laier 	args.rule = ip_dn_claim_rule(m);
327ac9d7e26SMax Laier 
328ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
329ac9d7e26SMax Laier 		/* ip_fastforward firewall changed dest to local */
330ac9d7e26SMax Laier 		m->m_flags &= ~M_FASTFWD_OURS;	/* for reflected mbufs */
3319188b4a1SAndre Oppermann   		goto ours;
332ac9d7e26SMax Laier   	}
33336e8826fSMax Laier 
3342b25acc1SLuigi Rizzo   	if (args.rule) {	/* dummynet already filtered us */
3352b25acc1SLuigi Rizzo   		ip = mtod(m, struct ip *);
33653be11f6SPoul-Henning Kamp   		hlen = ip->ip_hl << 2;
3372b25acc1SLuigi Rizzo 		goto iphack ;
3382b25acc1SLuigi Rizzo 	}
3392b25acc1SLuigi Rizzo 
340df8bae1dSRodney W. Grimes 	ipstat.ips_total++;
34158938916SGarrett Wollman 
34258938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
34358938916SGarrett Wollman 		goto tooshort;
34458938916SGarrett Wollman 
345df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
346df8bae1dSRodney W. Grimes 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
347df8bae1dSRodney W. Grimes 		ipstat.ips_toosmall++;
348c67b1d17SGarrett Wollman 		return;
349df8bae1dSRodney W. Grimes 	}
350df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
35158938916SGarrett Wollman 
35253be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
353df8bae1dSRodney W. Grimes 		ipstat.ips_badvers++;
354df8bae1dSRodney W. Grimes 		goto bad;
355df8bae1dSRodney W. Grimes 	}
35658938916SGarrett Wollman 
35753be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
358df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
359df8bae1dSRodney W. Grimes 		ipstat.ips_badhlen++;
360df8bae1dSRodney W. Grimes 		goto bad;
361df8bae1dSRodney W. Grimes 	}
362df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
363df8bae1dSRodney W. Grimes 		if ((m = m_pullup(m, hlen)) == 0) {
364df8bae1dSRodney W. Grimes 			ipstat.ips_badhlen++;
365c67b1d17SGarrett Wollman 			return;
366df8bae1dSRodney W. Grimes 		}
367df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
368df8bae1dSRodney W. Grimes 	}
36933841545SHajimu UMEMOTO 
37033841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
37133841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
37233841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
37333841545SHajimu UMEMOTO 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
37433841545SHajimu UMEMOTO 			ipstat.ips_badaddr++;
37533841545SHajimu UMEMOTO 			goto bad;
37633841545SHajimu UMEMOTO 		}
37733841545SHajimu UMEMOTO 	}
37833841545SHajimu UMEMOTO 
379db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
380db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
381db4f9cc7SJonathan Lemon 	} else {
38258938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
38347c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
38458938916SGarrett Wollman 		} else {
38547c861ecSBrian Somers 			sum = in_cksum(m, hlen);
38658938916SGarrett Wollman 		}
387db4f9cc7SJonathan Lemon 	}
38847c861ecSBrian Somers 	if (sum) {
389df8bae1dSRodney W. Grimes 		ipstat.ips_badsum++;
390df8bae1dSRodney W. Grimes 		goto bad;
391df8bae1dSRodney W. Grimes 	}
392df8bae1dSRodney W. Grimes 
393df8bae1dSRodney W. Grimes 	/*
394df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
395df8bae1dSRodney W. Grimes 	 */
396fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
397df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
398df8bae1dSRodney W. Grimes 		ipstat.ips_badlen++;
399df8bae1dSRodney W. Grimes 		goto bad;
400df8bae1dSRodney W. Grimes 	}
401fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
402df8bae1dSRodney W. Grimes 
403df8bae1dSRodney W. Grimes 	/*
404df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
405df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
406df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
407df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
408df8bae1dSRodney W. Grimes 	 */
409df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
41058938916SGarrett Wollman tooshort:
411df8bae1dSRodney W. Grimes 		ipstat.ips_tooshort++;
412df8bae1dSRodney W. Grimes 		goto bad;
413df8bae1dSRodney W. Grimes 	}
414df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
415df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
416df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
417df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
418df8bae1dSRodney W. Grimes 		} else
419df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
420df8bae1dSRodney W. Grimes 	}
42114dd6717SSam Leffler #if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
42214dd6717SSam Leffler 	/*
42314dd6717SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
42414dd6717SSam Leffler 	 */
4250f9ade71SHajimu UMEMOTO 	if (ipsec_getnhist(m))
42614dd6717SSam Leffler 		goto pass;
42714dd6717SSam Leffler #endif
4281f76a5e2SSam Leffler #if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF)
4291f76a5e2SSam Leffler 	/*
4301f76a5e2SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
4311f76a5e2SSam Leffler 	 */
4321f76a5e2SSam Leffler 	if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
4331f76a5e2SSam Leffler 		goto pass;
4341f76a5e2SSam Leffler #endif
4353f67c834SDon Lewis 
4364dd1662bSUgen J.S. Antsilevich 	/*
4374dd1662bSUgen J.S. Antsilevich 	 * IpHack's section.
4384dd1662bSUgen J.S. Antsilevich 	 * Right now when no processing on packet has done
4394dd1662bSUgen J.S. Antsilevich 	 * and it is still fresh out of network we do our black
4404dd1662bSUgen J.S. Antsilevich 	 * deals with it.
44193e0e116SJulian Elischer 	 * - Firewall: deny/allow/divert
442fed1c7e9SSøren Schmidt 	 * - Xlate: translate packet's addr/port (NAT).
443b715f178SLuigi Rizzo 	 * - Pipe: pass pkt through dummynet.
4444dd1662bSUgen J.S. Antsilevich 	 * - Wrap: fake packet's addr/port <unimpl.>
4454dd1662bSUgen J.S. Antsilevich 	 * - Encapsulate: put it in another IP and send out. <unimp.>
4464dd1662bSUgen J.S. Antsilevich  	 */
447b715f178SLuigi Rizzo 
448b715f178SLuigi Rizzo iphack:
449df8bae1dSRodney W. Grimes 
450c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
451c4ac87eaSDarren Reed 	/*
452134ea224SSam Leffler 	 * Run through list of hooks for input packets.
453f51f805fSSam Leffler 	 *
454f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
455f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
456f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
457c4ac87eaSDarren Reed 	 */
458f51f805fSSam Leffler 	odst = ip->ip_dst;
459134ea224SSam Leffler 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
460134ea224SSam Leffler 	    PFIL_IN) != 0)
461beec8214SDarren Reed 		return;
462134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
463c4ac87eaSDarren Reed 		return;
464c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
46502c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
466c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
467c4ac87eaSDarren Reed 
4687b109fa4SLuigi Rizzo 	if (fw_enable && IPFW_LOADED) {
469f9e354dfSJulian Elischer 		/*
470f9e354dfSJulian Elischer 		 * If we've been forwarded from the output side, then
471f9e354dfSJulian Elischer 		 * skip the firewall a second time
472f9e354dfSJulian Elischer 		 */
4732b25acc1SLuigi Rizzo 		if (args.next_hop)
474f9e354dfSJulian Elischer 			goto ours;
4752b25acc1SLuigi Rizzo 
4762b25acc1SLuigi Rizzo 		args.m = m;
4772b25acc1SLuigi Rizzo 		i = ip_fw_chk_ptr(&args);
4782b25acc1SLuigi Rizzo 		m = args.m;
4792b25acc1SLuigi Rizzo 
480d60315beSLuigi Rizzo 		if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
481507b4b54SLuigi Rizzo 			if (m)
482507b4b54SLuigi Rizzo 				m_freem(m);
483b715f178SLuigi Rizzo 			return;
484507b4b54SLuigi Rizzo 		}
485d60315beSLuigi Rizzo 		ip = mtod(m, struct ip *); /* just in case m changed */
4862b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop == NULL)	/* common case */
487b715f178SLuigi Rizzo 			goto pass;
4887b109fa4SLuigi Rizzo                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
4898948e4baSArchie Cobbs 			/* Send packet to the appropriate pipe */
4902b25acc1SLuigi Rizzo 			ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
491e4676ba6SJulian Elischer 			return;
49293e0e116SJulian Elischer 		}
493b715f178SLuigi Rizzo #ifdef IPDIVERT
4948948e4baSArchie Cobbs 		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
4958948e4baSArchie Cobbs 			/* Divert or tee packet */
496b715f178SLuigi Rizzo 			goto ours;
497b715f178SLuigi Rizzo 		}
498b715f178SLuigi Rizzo #endif
4992b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop != NULL)
500b715f178SLuigi Rizzo 			goto pass;
501b715f178SLuigi Rizzo 		/*
502b715f178SLuigi Rizzo 		 * if we get here, the packet must be dropped
503b715f178SLuigi Rizzo 		 */
504b715f178SLuigi Rizzo 		m_freem(m);
505b715f178SLuigi Rizzo 		return;
506b715f178SLuigi Rizzo 	}
507b715f178SLuigi Rizzo pass:
508100ba1a6SJordan K. Hubbard 
509df8bae1dSRodney W. Grimes 	/*
510df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
511df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
512df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
513df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
514df8bae1dSRodney W. Grimes 	 */
515df8bae1dSRodney W. Grimes 	ip_nhops = 0;		/* for source routed packets */
5162b25acc1SLuigi Rizzo 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
517c67b1d17SGarrett Wollman 		return;
518df8bae1dSRodney W. Grimes 
519f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
520f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
521f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
522f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
523f0068c4aSGarrett Wollman 	 * grabbing the packet.
524f0068c4aSGarrett Wollman          */
5251c5de19aSGarrett Wollman 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
526f0068c4aSGarrett Wollman 		goto ours;
527f0068c4aSGarrett Wollman 
528df8bae1dSRodney W. Grimes 	/*
529df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
530cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
531cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
532cc766e04SGarrett Wollman 	 * with it).
533df8bae1dSRodney W. Grimes 	 */
534cc766e04SGarrett Wollman 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
535cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
536cc766e04SGarrett Wollman 		goto ours;
537cc766e04SGarrett Wollman 
5387538a9a0SJonathan Lemon 	/*
5397538a9a0SJonathan Lemon 	 * Cache the destination address of the packet; this may be
5407538a9a0SJonathan Lemon 	 * changed by use of 'ipfw fwd'.
5417538a9a0SJonathan Lemon 	 */
5422b25acc1SLuigi Rizzo 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
5437538a9a0SJonathan Lemon 
544823db0e9SDon Lewis 	/*
545823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
546823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
547823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
548e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
549e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
5503f67c834SDon Lewis 	 *
5513f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
5523f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
5533f67c834SDon Lewis 	 *
554a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
5553f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5563f67c834SDon Lewis 	 * the packets are received.
557823db0e9SDon Lewis 	 */
558823db0e9SDon Lewis 	checkif = ip_checkinterface && (ipforwarding == 0) &&
5599494d596SBrooks Davis 	    m->m_pkthdr.rcvif != NULL &&
560e15ae1b2SDon Lewis 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
561189a0ba4SMax Laier 	    (args.next_hop == NULL) && (dchg == 0);
562823db0e9SDon Lewis 
563ca925d9cSJonathan Lemon 	/*
564ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
565ca925d9cSJonathan Lemon 	 */
566ca925d9cSJonathan Lemon 	LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
567f9e354dfSJulian Elischer 		/*
568823db0e9SDon Lewis 		 * If the address matches, verify that the packet
569823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
570823db0e9SDon Lewis 		 * enabled.
571f9e354dfSJulian Elischer 		 */
572823db0e9SDon Lewis 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
573823db0e9SDon Lewis 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
574ed1ff184SJulian Elischer 			goto ours;
575ca925d9cSJonathan Lemon 	}
576823db0e9SDon Lewis 	/*
577ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
578ca925d9cSJonathan Lemon 	 *
579ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
580ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
581ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
582ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
583823db0e9SDon Lewis 	 */
584ca925d9cSJonathan Lemon 	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
585ca925d9cSJonathan Lemon 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
586ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
587ca925d9cSJonathan Lemon 				continue;
588ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
589df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
5907538a9a0SJonathan Lemon 			    pkt_dst.s_addr)
591df8bae1dSRodney W. Grimes 				goto ours;
5927538a9a0SJonathan Lemon 			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
593df8bae1dSRodney W. Grimes 				goto ours;
594ca925d9cSJonathan Lemon #ifdef BOOTP_COMPAT
595ca925d9cSJonathan Lemon 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
596ca925d9cSJonathan Lemon 				goto ours;
597ca925d9cSJonathan Lemon #endif
598df8bae1dSRodney W. Grimes 		}
599df8bae1dSRodney W. Grimes 	}
600df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
601df8bae1dSRodney W. Grimes 		struct in_multi *inm;
602df8bae1dSRodney W. Grimes 		if (ip_mrouter) {
603df8bae1dSRodney W. Grimes 			/*
604df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
605df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
606df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
607df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
608df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
609df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
610df8bae1dSRodney W. Grimes 			 */
611bbb4330bSLuigi Rizzo 			if (ip_mforward &&
612bbb4330bSLuigi Rizzo 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
613df8bae1dSRodney W. Grimes 				ipstat.ips_cantforward++;
614df8bae1dSRodney W. Grimes 				m_freem(m);
615c67b1d17SGarrett Wollman 				return;
616df8bae1dSRodney W. Grimes 			}
617df8bae1dSRodney W. Grimes 
618df8bae1dSRodney W. Grimes 			/*
61911612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
620df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
621df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
622df8bae1dSRodney W. Grimes 			 */
623df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
624df8bae1dSRodney W. Grimes 				goto ours;
625df8bae1dSRodney W. Grimes 			ipstat.ips_forward++;
626df8bae1dSRodney W. Grimes 		}
627df8bae1dSRodney W. Grimes 		/*
628df8bae1dSRodney W. Grimes 		 * See if we belong to the destination multicast group on the
629df8bae1dSRodney W. Grimes 		 * arrival interface.
630df8bae1dSRodney W. Grimes 		 */
631df8bae1dSRodney W. Grimes 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
632df8bae1dSRodney W. Grimes 		if (inm == NULL) {
63382c39223SGarrett Wollman 			ipstat.ips_notmember++;
634df8bae1dSRodney W. Grimes 			m_freem(m);
635c67b1d17SGarrett Wollman 			return;
636df8bae1dSRodney W. Grimes 		}
637df8bae1dSRodney W. Grimes 		goto ours;
638df8bae1dSRodney W. Grimes 	}
639df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
640df8bae1dSRodney W. Grimes 		goto ours;
641df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
642df8bae1dSRodney W. Grimes 		goto ours;
643df8bae1dSRodney W. Grimes 
6446a800098SYoshinobu Inoue 	/*
6456a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6466a800098SYoshinobu Inoue 	 */
6476a800098SYoshinobu Inoue 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
6486a800098SYoshinobu Inoue 		if (ip_keepfaith) {
6496a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
6506a800098SYoshinobu Inoue 				goto ours;
6516a800098SYoshinobu Inoue 		}
6526a800098SYoshinobu Inoue 		m_freem(m);
6536a800098SYoshinobu Inoue 		return;
6546a800098SYoshinobu Inoue 	}
6559494d596SBrooks Davis 
656df8bae1dSRodney W. Grimes 	/*
657df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
658df8bae1dSRodney W. Grimes 	 */
659df8bae1dSRodney W. Grimes 	if (ipforwarding == 0) {
660df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
661df8bae1dSRodney W. Grimes 		m_freem(m);
662546f251bSChris D. Faulhaber 	} else {
663546f251bSChris D. Faulhaber #ifdef IPSEC
664546f251bSChris D. Faulhaber 		/*
665546f251bSChris D. Faulhaber 		 * Enforce inbound IPsec SPD.
666546f251bSChris D. Faulhaber 		 */
667546f251bSChris D. Faulhaber 		if (ipsec4_in_reject(m, NULL)) {
668546f251bSChris D. Faulhaber 			ipsecstat.in_polvio++;
669546f251bSChris D. Faulhaber 			goto bad;
670546f251bSChris D. Faulhaber 		}
671546f251bSChris D. Faulhaber #endif /* IPSEC */
672b9234fafSSam Leffler #ifdef FAST_IPSEC
673b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
674b9234fafSSam Leffler 		s = splnet();
675b9234fafSSam Leffler 		if (mtag != NULL) {
676b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
677b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
678b9234fafSSam Leffler 		} else {
679b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
680b9234fafSSam Leffler 						   IP_FORWARDING, &error);
681b9234fafSSam Leffler 		}
682b9234fafSSam Leffler 		if (sp == NULL) {	/* NB: can happen if error */
683b9234fafSSam Leffler 			splx(s);
684b9234fafSSam Leffler 			/*XXX error stat???*/
685b9234fafSSam Leffler 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
686b9234fafSSam Leffler 			goto bad;
687b9234fafSSam Leffler 		}
688b9234fafSSam Leffler 
689b9234fafSSam Leffler 		/*
690b9234fafSSam Leffler 		 * Check security policy against packet attributes.
691b9234fafSSam Leffler 		 */
692b9234fafSSam Leffler 		error = ipsec_in_reject(sp, m);
693b9234fafSSam Leffler 		KEY_FREESP(&sp);
694b9234fafSSam Leffler 		splx(s);
695b9234fafSSam Leffler 		if (error) {
696b9234fafSSam Leffler 			ipstat.ips_cantforward++;
697b9234fafSSam Leffler 			goto bad;
698b9234fafSSam Leffler 		}
699b9234fafSSam Leffler #endif /* FAST_IPSEC */
70002c1c707SAndre Oppermann 		ip_forward(m, dchg, args.next_hop);
701546f251bSChris D. Faulhaber 	}
702c67b1d17SGarrett Wollman 	return;
703df8bae1dSRodney W. Grimes 
704df8bae1dSRodney W. Grimes ours:
705d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
706d0ebc0d2SYaroslav Tykhiy 	/*
707d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
708d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
709d0ebc0d2SYaroslav Tykhiy 	 */
7102b25acc1SLuigi Rizzo 	if (ipstealth && hlen > sizeof (struct ip) &&
7112b25acc1SLuigi Rizzo 	    ip_dooptions(m, 1, args.next_hop))
712d0ebc0d2SYaroslav Tykhiy 		return;
713d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
714d0ebc0d2SYaroslav Tykhiy 
7155da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
7165da9f8faSJosef Karthauser 	if (ia != NULL) {
7175da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
7185da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
7195da9f8faSJosef Karthauser 	}
720100ba1a6SJordan K. Hubbard 
72163f8d699SJordan K. Hubbard 	/*
722df8bae1dSRodney W. Grimes 	 * If offset or IP_MF are set, must reassemble.
723df8bae1dSRodney W. Grimes 	 * Otherwise, nothing need be done.
724df8bae1dSRodney W. Grimes 	 * (We could look in the reassembly queue to see
725df8bae1dSRodney W. Grimes 	 * if the packet was previously fragmented,
726df8bae1dSRodney W. Grimes 	 * but it's not worth the time; just let them time out.)
727df8bae1dSRodney W. Grimes 	 */
728b6ea1aa5SRuslan Ermilov 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
7296a800098SYoshinobu Inoue 
730ecf44c01SMike Silbersack 		/* If maxnipq is 0, never accept fragments. */
731ac64c866SMike Silbersack 		if (maxnipq == 0) {
732ac64c866SMike Silbersack                 	ipstat.ips_fragments++;
733ac64c866SMike Silbersack 			ipstat.ips_fragdropped++;
734ac64c866SMike Silbersack 			goto bad;
735ac64c866SMike Silbersack 		}
736ac64c866SMike Silbersack 
737194a213eSAndrey A. Chernov 		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
7382fad1e93SSam Leffler 		IPQ_LOCK();
739df8bae1dSRodney W. Grimes 		/*
740df8bae1dSRodney W. Grimes 		 * Look for queue of fragments
741df8bae1dSRodney W. Grimes 		 * of this datagram.
742df8bae1dSRodney W. Grimes 		 */
743462b86feSPoul-Henning Kamp 		TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
744df8bae1dSRodney W. Grimes 			if (ip->ip_id == fp->ipq_id &&
745df8bae1dSRodney W. Grimes 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
746df8bae1dSRodney W. Grimes 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
74736b0360bSRobert Watson #ifdef MAC
74836b0360bSRobert Watson 			    mac_fragment_match(m, fp) &&
74936b0360bSRobert Watson #endif
750df8bae1dSRodney W. Grimes 			    ip->ip_p == fp->ipq_p)
751df8bae1dSRodney W. Grimes 				goto found;
752df8bae1dSRodney W. Grimes 
753042bbfa3SRobert Watson 		fp = NULL;
754194a213eSAndrey A. Chernov 
755ac64c866SMike Silbersack 		/*
756ac64c866SMike Silbersack 		 * Enforce upper bound on number of fragmented packets
757ac64c866SMike Silbersack 		 * for which we attempt reassembly;
758ac64c866SMike Silbersack 		 * If maxnipq is -1, accept all fragments without limitation.
759ac64c866SMike Silbersack 		 */
760ac64c866SMike Silbersack 		if ((nipq > maxnipq) && (maxnipq > 0)) {
761194a213eSAndrey A. Chernov 		    /*
762194a213eSAndrey A. Chernov 		     * drop something from the tail of the current queue
763194a213eSAndrey A. Chernov 		     * before proceeding further
764194a213eSAndrey A. Chernov 		     */
765462b86feSPoul-Henning Kamp 		    struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
766462b86feSPoul-Henning Kamp 		    if (q == NULL) {   /* gak */
767194a213eSAndrey A. Chernov 			for (i = 0; i < IPREASS_NHASH; i++) {
768462b86feSPoul-Henning Kamp 			    struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
769462b86feSPoul-Henning Kamp 			    if (r) {
77099e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += r->ipq_nfrags;
771462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], r);
772194a213eSAndrey A. Chernov 				break;
773194a213eSAndrey A. Chernov 			    }
774194a213eSAndrey A. Chernov 			}
775ac64c866SMike Silbersack 		    } else {
77699e8617dSMaxim Konovalov 			ipstat.ips_fragtimeout += q->ipq_nfrags;
777462b86feSPoul-Henning Kamp 			ip_freef(&ipq[sum], q);
778ac64c866SMike Silbersack 		    }
779194a213eSAndrey A. Chernov 		}
780194a213eSAndrey A. Chernov found:
781df8bae1dSRodney W. Grimes 		/*
782df8bae1dSRodney W. Grimes 		 * Adjust ip_len to not reflect header,
783df8bae1dSRodney W. Grimes 		 * convert offset of this to bytes.
784df8bae1dSRodney W. Grimes 		 */
785df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
786b6ea1aa5SRuslan Ermilov 		if (ip->ip_off & IP_MF) {
7876effc713SDoug Rabson 		        /*
7886effc713SDoug Rabson 		         * Make sure that fragments have a data length
7896effc713SDoug Rabson 			 * that's a non-zero multiple of 8 bytes.
7906effc713SDoug Rabson 		         */
7916effc713SDoug Rabson 			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
7922fad1e93SSam Leffler 				IPQ_UNLOCK();
7936effc713SDoug Rabson 				ipstat.ips_toosmall++; /* XXX */
7946effc713SDoug Rabson 				goto bad;
7956effc713SDoug Rabson 			}
7966effc713SDoug Rabson 			m->m_flags |= M_FRAG;
7971cf43499SMaxim Konovalov 		} else
7981cf43499SMaxim Konovalov 			m->m_flags &= ~M_FRAG;
799df8bae1dSRodney W. Grimes 		ip->ip_off <<= 3;
800df8bae1dSRodney W. Grimes 
801df8bae1dSRodney W. Grimes 		/*
802b6ea1aa5SRuslan Ermilov 		 * Attempt reassembly; if it succeeds, proceed.
803ac9d7e26SMax Laier 		 * ip_reass() will return a different mbuf.
804df8bae1dSRodney W. Grimes 		 */
805df8bae1dSRodney W. Grimes 		ipstat.ips_fragments++;
806487bdb38SRuslan Ermilov 		m->m_pkthdr.header = ip;
807ac9d7e26SMax Laier 		m = ip_reass(m, &ipq[sum], fp);
8082fad1e93SSam Leffler 		IPQ_UNLOCK();
8092b25acc1SLuigi Rizzo 		if (m == 0)
810c67b1d17SGarrett Wollman 			return;
811df8bae1dSRodney W. Grimes 		ipstat.ips_reassembled++;
8126a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8137e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
81453be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
815af782f1cSBrian Somers #ifdef IPDIVERT
8168948e4baSArchie Cobbs 		/* Restore original checksum before diverting packet */
817ac9d7e26SMax Laier 		if (divert_find_info(m) != 0) {
818af782f1cSBrian Somers 			ip->ip_len += hlen;
819fd8e4ebcSMike Barcroft 			ip->ip_len = htons(ip->ip_len);
820fd8e4ebcSMike Barcroft 			ip->ip_off = htons(ip->ip_off);
821af782f1cSBrian Somers 			ip->ip_sum = 0;
82260123168SRuslan Ermilov 			if (hlen == sizeof(struct ip))
823af782f1cSBrian Somers 				ip->ip_sum = in_cksum_hdr(ip);
82460123168SRuslan Ermilov 			else
82560123168SRuslan Ermilov 				ip->ip_sum = in_cksum(m, hlen);
826fd8e4ebcSMike Barcroft 			ip->ip_off = ntohs(ip->ip_off);
827fd8e4ebcSMike Barcroft 			ip->ip_len = ntohs(ip->ip_len);
828af782f1cSBrian Somers 			ip->ip_len -= hlen;
829af782f1cSBrian Somers 		}
830af782f1cSBrian Somers #endif
831df8bae1dSRodney W. Grimes 	} else
832df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
833df8bae1dSRodney W. Grimes 
83493e0e116SJulian Elischer #ifdef IPDIVERT
83593e0e116SJulian Elischer 	/*
8368948e4baSArchie Cobbs 	 * Divert or tee packet to the divert protocol if required.
83793e0e116SJulian Elischer 	 */
838ac9d7e26SMax Laier 	divert_info = divert_find_info(m);
8398948e4baSArchie Cobbs 	if (divert_info != 0) {
840ac9d7e26SMax Laier 		struct mbuf *clone;
8418948e4baSArchie Cobbs 
8428948e4baSArchie Cobbs 		/* Clone packet if we're doing a 'tee' */
8438948e4baSArchie Cobbs 		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
844ac9d7e26SMax Laier 			clone = divert_clone(m);
845ac9d7e26SMax Laier 		else
846ac9d7e26SMax Laier 			clone = NULL;
8478948e4baSArchie Cobbs 
8488948e4baSArchie Cobbs 		/* Restore packet header fields to original values */
8498948e4baSArchie Cobbs 		ip->ip_len += hlen;
850fd8e4ebcSMike Barcroft 		ip->ip_len = htons(ip->ip_len);
851fd8e4ebcSMike Barcroft 		ip->ip_off = htons(ip->ip_off);
8528948e4baSArchie Cobbs 
8538948e4baSArchie Cobbs 		/* Deliver packet to divert input routine */
854ac9d7e26SMax Laier 		divert_packet(m, 1);
855e4676ba6SJulian Elischer 		ipstat.ips_delivered++;
8568948e4baSArchie Cobbs 
8578948e4baSArchie Cobbs 		/* If 'tee', continue with original packet */
8588948e4baSArchie Cobbs 		if (clone == NULL)
85993e0e116SJulian Elischer 			return;
8608948e4baSArchie Cobbs 		m = clone;
8618948e4baSArchie Cobbs 		ip = mtod(m, struct ip *);
86256962689SCrist J. Clark 		ip->ip_len += hlen;
8632b25acc1SLuigi Rizzo 		/*
8642b25acc1SLuigi Rizzo 		 * Jump backwards to complete processing of the
865ac9d7e26SMax Laier 		 * packet.  We do not need to clear args.next_hop
866ac9d7e26SMax Laier 		 * as that will not be used again and the cloned packet
867ac9d7e26SMax Laier 		 * doesn't contain a divert packet tag so we won't
868ac9d7e26SMax Laier 		 * re-entry this block.
8692b25acc1SLuigi Rizzo 		 */
87056962689SCrist J. Clark 		goto pass;
87193e0e116SJulian Elischer 	}
87293e0e116SJulian Elischer #endif
87393e0e116SJulian Elischer 
87433841545SHajimu UMEMOTO #ifdef IPSEC
87533841545SHajimu UMEMOTO 	/*
87633841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
87733841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
87833841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
87933841545SHajimu UMEMOTO 	 */
88033841545SHajimu UMEMOTO 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
88133841545SHajimu UMEMOTO 	    ipsec4_in_reject(m, NULL)) {
88233841545SHajimu UMEMOTO 		ipsecstat.in_polvio++;
88333841545SHajimu UMEMOTO 		goto bad;
88433841545SHajimu UMEMOTO 	}
88533841545SHajimu UMEMOTO #endif
886b9234fafSSam Leffler #if FAST_IPSEC
887b9234fafSSam Leffler 	/*
888b9234fafSSam Leffler 	 * enforce IPsec policy checking if we are seeing last header.
889b9234fafSSam Leffler 	 * note that we do not visit this with protocols with pcb layer
890b9234fafSSam Leffler 	 * code - like udp/tcp/raw ip.
891b9234fafSSam Leffler 	 */
892b9234fafSSam Leffler 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
893b9234fafSSam Leffler 		/*
894b9234fafSSam Leffler 		 * Check if the packet has already had IPsec processing
895b9234fafSSam Leffler 		 * done.  If so, then just pass it along.  This tag gets
896b9234fafSSam Leffler 		 * set during AH, ESP, etc. input handling, before the
897b9234fafSSam Leffler 		 * packet is returned to the ip input queue for delivery.
898b9234fafSSam Leffler 		 */
899b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
900b9234fafSSam Leffler 		s = splnet();
901b9234fafSSam Leffler 		if (mtag != NULL) {
902b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
903b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
904b9234fafSSam Leffler 		} else {
905b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
906b9234fafSSam Leffler 						   IP_FORWARDING, &error);
907b9234fafSSam Leffler 		}
908b9234fafSSam Leffler 		if (sp != NULL) {
909b9234fafSSam Leffler 			/*
910b9234fafSSam Leffler 			 * Check security policy against packet attributes.
911b9234fafSSam Leffler 			 */
912b9234fafSSam Leffler 			error = ipsec_in_reject(sp, m);
913b9234fafSSam Leffler 			KEY_FREESP(&sp);
914b9234fafSSam Leffler 		} else {
915b9234fafSSam Leffler 			/* XXX error stat??? */
916b9234fafSSam Leffler 			error = EINVAL;
917b9234fafSSam Leffler DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
918b9234fafSSam Leffler 			goto bad;
919b9234fafSSam Leffler 		}
920b9234fafSSam Leffler 		splx(s);
921b9234fafSSam Leffler 		if (error)
922b9234fafSSam Leffler 			goto bad;
923b9234fafSSam Leffler 	}
924b9234fafSSam Leffler #endif /* FAST_IPSEC */
92533841545SHajimu UMEMOTO 
926df8bae1dSRodney W. Grimes 	/*
927df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
928df8bae1dSRodney W. Grimes 	 */
929df8bae1dSRodney W. Grimes 	ipstat.ips_delivered++;
9302b25acc1SLuigi Rizzo 	if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
931ac9d7e26SMax Laier 		/* attach next hop info for TCP */
932ac9d7e26SMax Laier 		struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD,
933ac9d7e26SMax Laier 		    sizeof(struct sockaddr_in *), M_NOWAIT);
934ac9d7e26SMax Laier 		if (mtag == NULL)
935ac9d7e26SMax Laier 			goto bad;
936ac9d7e26SMax Laier 		*(struct sockaddr_in **)(mtag+1) = args.next_hop;
937ac9d7e26SMax Laier 		m_tag_prepend(m, mtag);
938ac9d7e26SMax Laier 	}
939ac9d7e26SMax Laier 	NET_PICKUP_GIANT();
9402b25acc1SLuigi Rizzo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
9417902224cSSam Leffler 	NET_DROP_GIANT();
942c67b1d17SGarrett Wollman 	return;
943df8bae1dSRodney W. Grimes bad:
944df8bae1dSRodney W. Grimes 	m_freem(m);
945c67b1d17SGarrett Wollman }
946c67b1d17SGarrett Wollman 
947c67b1d17SGarrett Wollman /*
9488948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
9498948e4baSArchie Cobbs  * whole datagram.  If a chain for reassembly of this datagram already
9508948e4baSArchie Cobbs  * exists, then it is given as fp; otherwise have to make a chain.
9518948e4baSArchie Cobbs  *
9528948e4baSArchie Cobbs  * When IPDIVERT enabled, keep additional state with each packet that
9538948e4baSArchie Cobbs  * tells us if we need to divert or tee the packet we're building.
9542b25acc1SLuigi Rizzo  * In particular, *divinfo includes the port and TEE flag,
9552b25acc1SLuigi Rizzo  * *divert_rule is the number of the matching rule.
956df8bae1dSRodney W. Grimes  */
9578948e4baSArchie Cobbs 
9586a800098SYoshinobu Inoue static struct mbuf *
959ac9d7e26SMax Laier ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp)
960df8bae1dSRodney W. Grimes {
9616effc713SDoug Rabson 	struct ip *ip = mtod(m, struct ip *);
962b6ea1aa5SRuslan Ermilov 	register struct mbuf *p, *q, *nq;
963df8bae1dSRodney W. Grimes 	struct mbuf *t;
96453be11f6SPoul-Henning Kamp 	int hlen = ip->ip_hl << 2;
965df8bae1dSRodney W. Grimes 	int i, next;
96659dfcba4SHajimu UMEMOTO 	u_int8_t ecn, ecn0;
967df8bae1dSRodney W. Grimes 
9682fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
9692fad1e93SSam Leffler 
970df8bae1dSRodney W. Grimes 	/*
971df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
972df8bae1dSRodney W. Grimes 	 * would confuse code below.
973df8bae1dSRodney W. Grimes 	 */
974df8bae1dSRodney W. Grimes 	m->m_data += hlen;
975df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
976df8bae1dSRodney W. Grimes 
977df8bae1dSRodney W. Grimes 	/*
978df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
979df8bae1dSRodney W. Grimes 	 */
980042bbfa3SRobert Watson 	if (fp == NULL) {
981a163d034SWarner Losh 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
982df8bae1dSRodney W. Grimes 			goto dropfrag;
983df8bae1dSRodney W. Grimes 		fp = mtod(t, struct ipq *);
98436b0360bSRobert Watson #ifdef MAC
9855e7ce478SRobert Watson 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
9865e7ce478SRobert Watson 			m_free(t);
9875e7ce478SRobert Watson 			goto dropfrag;
9885e7ce478SRobert Watson 		}
98936b0360bSRobert Watson 		mac_create_ipq(m, fp);
99036b0360bSRobert Watson #endif
991462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
992194a213eSAndrey A. Chernov 		nipq++;
993375386e2SMike Silbersack 		fp->ipq_nfrags = 1;
994df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
995df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
996df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
9976effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
9986effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
999af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1000af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
1001af38c68cSLuigi Rizzo 		goto inserted;
100236b0360bSRobert Watson 	} else {
1003375386e2SMike Silbersack 		fp->ipq_nfrags++;
100436b0360bSRobert Watson #ifdef MAC
100536b0360bSRobert Watson 		mac_update_ipq(m, fp);
100636b0360bSRobert Watson #endif
1007df8bae1dSRodney W. Grimes 	}
1008df8bae1dSRodney W. Grimes 
10096effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
10106effc713SDoug Rabson 
1011df8bae1dSRodney W. Grimes 	/*
101259dfcba4SHajimu UMEMOTO 	 * Handle ECN by comparing this segment with the first one;
101359dfcba4SHajimu UMEMOTO 	 * if CE is set, do not lose CE.
101459dfcba4SHajimu UMEMOTO 	 * drop if CE and not-ECT are mixed for the same packet.
101559dfcba4SHajimu UMEMOTO 	 */
101659dfcba4SHajimu UMEMOTO 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
101759dfcba4SHajimu UMEMOTO 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
101859dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_CE) {
101959dfcba4SHajimu UMEMOTO 		if (ecn0 == IPTOS_ECN_NOTECT)
102059dfcba4SHajimu UMEMOTO 			goto dropfrag;
102159dfcba4SHajimu UMEMOTO 		if (ecn0 != IPTOS_ECN_CE)
102259dfcba4SHajimu UMEMOTO 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
102359dfcba4SHajimu UMEMOTO 	}
102459dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
102559dfcba4SHajimu UMEMOTO 		goto dropfrag;
102659dfcba4SHajimu UMEMOTO 
102759dfcba4SHajimu UMEMOTO 	/*
1028df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
1029df8bae1dSRodney W. Grimes 	 */
10306effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
10316effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
1032df8bae1dSRodney W. Grimes 			break;
1033df8bae1dSRodney W. Grimes 
1034df8bae1dSRodney W. Grimes 	/*
1035df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
1036df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
1037af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
1038af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
1039db4f9cc7SJonathan Lemon 	 *
1040db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
1041db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
1042df8bae1dSRodney W. Grimes 	 */
10436effc713SDoug Rabson 	if (p) {
10446effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1045df8bae1dSRodney W. Grimes 		if (i > 0) {
1046df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
1047df8bae1dSRodney W. Grimes 				goto dropfrag;
10486a800098SYoshinobu Inoue 			m_adj(m, i);
1049db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
1050df8bae1dSRodney W. Grimes 			ip->ip_off += i;
1051df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
1052df8bae1dSRodney W. Grimes 		}
1053af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
1054af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
1055af38c68cSLuigi Rizzo 	} else {
1056af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
1057af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1058df8bae1dSRodney W. Grimes 	}
1059df8bae1dSRodney W. Grimes 
1060df8bae1dSRodney W. Grimes 	/*
1061df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
1062df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
1063df8bae1dSRodney W. Grimes 	 */
10646effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1065af38c68cSLuigi Rizzo 	     q = nq) {
1066b36f5b37SMaxim Konovalov 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
10676effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
10686effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
10696effc713SDoug Rabson 			GETIP(q)->ip_off += i;
10706effc713SDoug Rabson 			m_adj(q, i);
1071db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
1072df8bae1dSRodney W. Grimes 			break;
1073df8bae1dSRodney W. Grimes 		}
10746effc713SDoug Rabson 		nq = q->m_nextpkt;
1075af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
107699e8617dSMaxim Konovalov 		ipstat.ips_fragdropped++;
1077375386e2SMike Silbersack 		fp->ipq_nfrags--;
10786effc713SDoug Rabson 		m_freem(q);
1079df8bae1dSRodney W. Grimes 	}
1080df8bae1dSRodney W. Grimes 
1081af38c68cSLuigi Rizzo inserted:
108293e0e116SJulian Elischer 
108393e0e116SJulian Elischer #ifdef IPDIVERT
1084ac9d7e26SMax Laier 	if (ip->ip_off != 0) {
108593e0e116SJulian Elischer 		/*
1086ac9d7e26SMax Laier 		 * Strip any divert information; only the info
1087ac9d7e26SMax Laier 		 * on the first fragment is used/kept.
108893e0e116SJulian Elischer 		 */
1089ac9d7e26SMax Laier 		struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
1090ac9d7e26SMax Laier 		if (mtag)
1091ac9d7e26SMax Laier 			m_tag_delete(m, mtag);
10922b25acc1SLuigi Rizzo 	}
109393e0e116SJulian Elischer #endif
109493e0e116SJulian Elischer 
1095df8bae1dSRodney W. Grimes 	/*
1096375386e2SMike Silbersack 	 * Check for complete reassembly and perform frag per packet
1097375386e2SMike Silbersack 	 * limiting.
1098375386e2SMike Silbersack 	 *
1099375386e2SMike Silbersack 	 * Frag limiting is performed here so that the nth frag has
1100375386e2SMike Silbersack 	 * a chance to complete the packet before we drop the packet.
1101375386e2SMike Silbersack 	 * As a result, n+1 frags are actually allowed per packet, but
1102375386e2SMike Silbersack 	 * only n will ever be stored. (n = maxfragsperpacket.)
1103375386e2SMike Silbersack 	 *
1104df8bae1dSRodney W. Grimes 	 */
11056effc713SDoug Rabson 	next = 0;
11066effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1107375386e2SMike Silbersack 		if (GETIP(q)->ip_off != next) {
110899e8617dSMaxim Konovalov 			if (fp->ipq_nfrags > maxfragsperpacket) {
110999e8617dSMaxim Konovalov 				ipstat.ips_fragdropped += fp->ipq_nfrags;
1110375386e2SMike Silbersack 				ip_freef(head, fp);
111199e8617dSMaxim Konovalov 			}
11126effc713SDoug Rabson 			return (0);
1113375386e2SMike Silbersack 		}
11146effc713SDoug Rabson 		next += GETIP(q)->ip_len;
11156effc713SDoug Rabson 	}
11166effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
1117375386e2SMike Silbersack 	if (p->m_flags & M_FRAG) {
111899e8617dSMaxim Konovalov 		if (fp->ipq_nfrags > maxfragsperpacket) {
111999e8617dSMaxim Konovalov 			ipstat.ips_fragdropped += fp->ipq_nfrags;
1120375386e2SMike Silbersack 			ip_freef(head, fp);
112199e8617dSMaxim Konovalov 		}
1122df8bae1dSRodney W. Grimes 		return (0);
1123375386e2SMike Silbersack 	}
1124df8bae1dSRodney W. Grimes 
1125df8bae1dSRodney W. Grimes 	/*
1126430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1127430d30d8SBill Fenner 	 */
11286effc713SDoug Rabson 	q = fp->ipq_frags;
11296effc713SDoug Rabson 	ip = GETIP(q);
113053be11f6SPoul-Henning Kamp 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
1131430d30d8SBill Fenner 		ipstat.ips_toolong++;
113299e8617dSMaxim Konovalov 		ipstat.ips_fragdropped += fp->ipq_nfrags;
1133462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1134430d30d8SBill Fenner 		return (0);
1135430d30d8SBill Fenner 	}
1136430d30d8SBill Fenner 
1137430d30d8SBill Fenner 	/*
1138430d30d8SBill Fenner 	 * Concatenate fragments.
1139df8bae1dSRodney W. Grimes 	 */
11406effc713SDoug Rabson 	m = q;
1141df8bae1dSRodney W. Grimes 	t = m->m_next;
1142df8bae1dSRodney W. Grimes 	m->m_next = 0;
1143df8bae1dSRodney W. Grimes 	m_cat(m, t);
11446effc713SDoug Rabson 	nq = q->m_nextpkt;
1145945aa40dSDoug Rabson 	q->m_nextpkt = 0;
11466effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
11476effc713SDoug Rabson 		nq = q->m_nextpkt;
1148945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1149db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1150db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1151a8db1d93SJonathan Lemon 		m_cat(m, q);
1152df8bae1dSRodney W. Grimes 	}
115336b0360bSRobert Watson #ifdef MAC
115436b0360bSRobert Watson 	mac_create_datagram_from_ipq(fp, m);
115536b0360bSRobert Watson 	mac_destroy_ipq(fp);
115636b0360bSRobert Watson #endif
1157df8bae1dSRodney W. Grimes 
1158df8bae1dSRodney W. Grimes 	/*
1159df8bae1dSRodney W. Grimes 	 * Create header for new ip packet by
1160df8bae1dSRodney W. Grimes 	 * modifying header of first packet;
1161df8bae1dSRodney W. Grimes 	 * dequeue and discard fragment reassembly header.
1162df8bae1dSRodney W. Grimes 	 * Make header visible.
1163df8bae1dSRodney W. Grimes 	 */
1164df8bae1dSRodney W. Grimes 	ip->ip_len = next;
11656effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
11666effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1167462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1168194a213eSAndrey A. Chernov 	nipq--;
1169df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
117053be11f6SPoul-Henning Kamp 	m->m_len += (ip->ip_hl << 2);
117153be11f6SPoul-Henning Kamp 	m->m_data -= (ip->ip_hl << 2);
1172df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1173a5554bf0SPoul-Henning Kamp 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
1174a5554bf0SPoul-Henning Kamp 		m_fixhdr(m);
11756a800098SYoshinobu Inoue 	return (m);
1176df8bae1dSRodney W. Grimes 
1177df8bae1dSRodney W. Grimes dropfrag:
1178df8bae1dSRodney W. Grimes 	ipstat.ips_fragdropped++;
1179042bbfa3SRobert Watson 	if (fp != NULL)
1180375386e2SMike Silbersack 		fp->ipq_nfrags--;
1181df8bae1dSRodney W. Grimes 	m_freem(m);
1182df8bae1dSRodney W. Grimes 	return (0);
11836effc713SDoug Rabson 
11846effc713SDoug Rabson #undef GETIP
1185df8bae1dSRodney W. Grimes }
1186df8bae1dSRodney W. Grimes 
1187df8bae1dSRodney W. Grimes /*
1188df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1189df8bae1dSRodney W. Grimes  * associated datagrams.
1190df8bae1dSRodney W. Grimes  */
11910312fbe9SPoul-Henning Kamp static void
1192462b86feSPoul-Henning Kamp ip_freef(fhp, fp)
1193462b86feSPoul-Henning Kamp 	struct ipqhead *fhp;
1194df8bae1dSRodney W. Grimes 	struct ipq *fp;
1195df8bae1dSRodney W. Grimes {
11966effc713SDoug Rabson 	register struct mbuf *q;
1197df8bae1dSRodney W. Grimes 
11982fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
11992fad1e93SSam Leffler 
12006effc713SDoug Rabson 	while (fp->ipq_frags) {
12016effc713SDoug Rabson 		q = fp->ipq_frags;
12026effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
12036effc713SDoug Rabson 		m_freem(q);
1204df8bae1dSRodney W. Grimes 	}
1205462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1206df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
1207194a213eSAndrey A. Chernov 	nipq--;
1208df8bae1dSRodney W. Grimes }
1209df8bae1dSRodney W. Grimes 
1210df8bae1dSRodney W. Grimes /*
1211df8bae1dSRodney W. Grimes  * IP timer processing;
1212df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1213df8bae1dSRodney W. Grimes  * queue, discard it.
1214df8bae1dSRodney W. Grimes  */
1215df8bae1dSRodney W. Grimes void
1216df8bae1dSRodney W. Grimes ip_slowtimo()
1217df8bae1dSRodney W. Grimes {
1218df8bae1dSRodney W. Grimes 	register struct ipq *fp;
1219df8bae1dSRodney W. Grimes 	int s = splnet();
1220194a213eSAndrey A. Chernov 	int i;
1221df8bae1dSRodney W. Grimes 
12222fad1e93SSam Leffler 	IPQ_LOCK();
1223194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1224462b86feSPoul-Henning Kamp 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1225462b86feSPoul-Henning Kamp 			struct ipq *fpp;
1226462b86feSPoul-Henning Kamp 
1227462b86feSPoul-Henning Kamp 			fpp = fp;
1228462b86feSPoul-Henning Kamp 			fp = TAILQ_NEXT(fp, ipq_list);
1229462b86feSPoul-Henning Kamp 			if(--fpp->ipq_ttl == 0) {
123099e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
1231462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], fpp);
1232df8bae1dSRodney W. Grimes 			}
1233df8bae1dSRodney W. Grimes 		}
1234194a213eSAndrey A. Chernov 	}
1235690a6055SJesper Skriver 	/*
1236690a6055SJesper Skriver 	 * If we are over the maximum number of fragments
1237690a6055SJesper Skriver 	 * (due to the limit being lowered), drain off
1238690a6055SJesper Skriver 	 * enough to get down to the new limit.
1239690a6055SJesper Skriver 	 */
1240a75a485dSMike Silbersack 	if (maxnipq >= 0 && nipq > maxnipq) {
1241690a6055SJesper Skriver 		for (i = 0; i < IPREASS_NHASH; i++) {
1242b36f5b37SMaxim Konovalov 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
124399e8617dSMaxim Konovalov 				ipstat.ips_fragdropped +=
124499e8617dSMaxim Konovalov 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1245690a6055SJesper Skriver 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1246690a6055SJesper Skriver 			}
1247690a6055SJesper Skriver 		}
1248690a6055SJesper Skriver 	}
12492fad1e93SSam Leffler 	IPQ_UNLOCK();
1250df8bae1dSRodney W. Grimes 	splx(s);
1251df8bae1dSRodney W. Grimes }
1252df8bae1dSRodney W. Grimes 
1253df8bae1dSRodney W. Grimes /*
1254df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1255df8bae1dSRodney W. Grimes  */
1256df8bae1dSRodney W. Grimes void
1257df8bae1dSRodney W. Grimes ip_drain()
1258df8bae1dSRodney W. Grimes {
1259194a213eSAndrey A. Chernov 	int     i;
1260ce29ab3aSGarrett Wollman 
12612fad1e93SSam Leffler 	IPQ_LOCK();
1262194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1263462b86feSPoul-Henning Kamp 		while(!TAILQ_EMPTY(&ipq[i])) {
126499e8617dSMaxim Konovalov 			ipstat.ips_fragdropped +=
126599e8617dSMaxim Konovalov 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1266462b86feSPoul-Henning Kamp 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1267194a213eSAndrey A. Chernov 		}
1268194a213eSAndrey A. Chernov 	}
12692fad1e93SSam Leffler 	IPQ_UNLOCK();
1270ce29ab3aSGarrett Wollman 	in_rtqdrain();
1271df8bae1dSRodney W. Grimes }
1272df8bae1dSRodney W. Grimes 
1273df8bae1dSRodney W. Grimes /*
1274df8bae1dSRodney W. Grimes  * Do option processing on a datagram,
1275df8bae1dSRodney W. Grimes  * possibly discarding it if bad options are encountered,
1276df8bae1dSRodney W. Grimes  * or forwarding it if source-routed.
1277d0ebc0d2SYaroslav Tykhiy  * The pass argument is used when operating in the IPSTEALTH
1278d0ebc0d2SYaroslav Tykhiy  * mode to tell what options to process:
1279d0ebc0d2SYaroslav Tykhiy  * [LS]SRR (pass 0) or the others (pass 1).
1280d0ebc0d2SYaroslav Tykhiy  * The reason for as many as two passes is that when doing IPSTEALTH,
1281d0ebc0d2SYaroslav Tykhiy  * non-routing options should be processed only if the packet is for us.
1282df8bae1dSRodney W. Grimes  * Returns 1 if packet has been forwarded/freed,
1283df8bae1dSRodney W. Grimes  * 0 if the packet should be processed further.
1284df8bae1dSRodney W. Grimes  */
12850312fbe9SPoul-Henning Kamp static int
12862b25acc1SLuigi Rizzo ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1287df8bae1dSRodney W. Grimes {
12882b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
12892b25acc1SLuigi Rizzo 	u_char *cp;
12902b25acc1SLuigi Rizzo 	struct in_ifaddr *ia;
1291df8bae1dSRodney W. Grimes 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1292df8bae1dSRodney W. Grimes 	struct in_addr *sin, dst;
1293df8bae1dSRodney W. Grimes 	n_time ntime;
12944d2e3692SLuigi Rizzo 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1295df8bae1dSRodney W. Grimes 
1296df8bae1dSRodney W. Grimes 	dst = ip->ip_dst;
1297df8bae1dSRodney W. Grimes 	cp = (u_char *)(ip + 1);
129853be11f6SPoul-Henning Kamp 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1299df8bae1dSRodney W. Grimes 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1300df8bae1dSRodney W. Grimes 		opt = cp[IPOPT_OPTVAL];
1301df8bae1dSRodney W. Grimes 		if (opt == IPOPT_EOL)
1302df8bae1dSRodney W. Grimes 			break;
1303df8bae1dSRodney W. Grimes 		if (opt == IPOPT_NOP)
1304df8bae1dSRodney W. Grimes 			optlen = 1;
1305df8bae1dSRodney W. Grimes 		else {
1306fdcb8debSJun-ichiro itojun Hagino 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1307fdcb8debSJun-ichiro itojun Hagino 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1308fdcb8debSJun-ichiro itojun Hagino 				goto bad;
1309fdcb8debSJun-ichiro itojun Hagino 			}
1310df8bae1dSRodney W. Grimes 			optlen = cp[IPOPT_OLEN];
1311707d00a3SJonathan Lemon 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1312df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1313df8bae1dSRodney W. Grimes 				goto bad;
1314df8bae1dSRodney W. Grimes 			}
1315df8bae1dSRodney W. Grimes 		}
1316df8bae1dSRodney W. Grimes 		switch (opt) {
1317df8bae1dSRodney W. Grimes 
1318df8bae1dSRodney W. Grimes 		default:
1319df8bae1dSRodney W. Grimes 			break;
1320df8bae1dSRodney W. Grimes 
1321df8bae1dSRodney W. Grimes 		/*
1322df8bae1dSRodney W. Grimes 		 * Source routing with record.
1323df8bae1dSRodney W. Grimes 		 * Find interface with current destination address.
1324df8bae1dSRodney W. Grimes 		 * If none on this machine then drop if strictly routed,
1325df8bae1dSRodney W. Grimes 		 * or do nothing if loosely routed.
1326df8bae1dSRodney W. Grimes 		 * Record interface address and bring up next address
1327df8bae1dSRodney W. Grimes 		 * component.  If strictly routed make sure next
1328df8bae1dSRodney W. Grimes 		 * address is on directly accessible net.
1329df8bae1dSRodney W. Grimes 		 */
1330df8bae1dSRodney W. Grimes 		case IPOPT_LSRR:
1331df8bae1dSRodney W. Grimes 		case IPOPT_SSRR:
1332d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1333d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass > 0)
1334d0ebc0d2SYaroslav Tykhiy 				break;
1335d0ebc0d2SYaroslav Tykhiy #endif
133633841545SHajimu UMEMOTO 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
133733841545SHajimu UMEMOTO 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
133833841545SHajimu UMEMOTO 				goto bad;
133933841545SHajimu UMEMOTO 			}
1340df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1341df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1342df8bae1dSRodney W. Grimes 				goto bad;
1343df8bae1dSRodney W. Grimes 			}
1344df8bae1dSRodney W. Grimes 			ipaddr.sin_addr = ip->ip_dst;
1345df8bae1dSRodney W. Grimes 			ia = (struct in_ifaddr *)
1346df8bae1dSRodney W. Grimes 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1347df8bae1dSRodney W. Grimes 			if (ia == 0) {
1348df8bae1dSRodney W. Grimes 				if (opt == IPOPT_SSRR) {
1349df8bae1dSRodney W. Grimes 					type = ICMP_UNREACH;
1350df8bae1dSRodney W. Grimes 					code = ICMP_UNREACH_SRCFAIL;
1351df8bae1dSRodney W. Grimes 					goto bad;
1352df8bae1dSRodney W. Grimes 				}
1353bc189bf8SGuido van Rooij 				if (!ip_dosourceroute)
1354bc189bf8SGuido van Rooij 					goto nosourcerouting;
1355df8bae1dSRodney W. Grimes 				/*
1356df8bae1dSRodney W. Grimes 				 * Loose routing, and not at next destination
1357df8bae1dSRodney W. Grimes 				 * yet; nothing to do except forward.
1358df8bae1dSRodney W. Grimes 				 */
1359df8bae1dSRodney W. Grimes 				break;
1360df8bae1dSRodney W. Grimes 			}
1361df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
13625d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr)) {
1363df8bae1dSRodney W. Grimes 				/*
1364df8bae1dSRodney W. Grimes 				 * End of source route.  Should be for us.
1365df8bae1dSRodney W. Grimes 				 */
13664fce5804SGuido van Rooij 				if (!ip_acceptsourceroute)
13674fce5804SGuido van Rooij 					goto nosourcerouting;
1368df8bae1dSRodney W. Grimes 				save_rte(cp, ip->ip_src);
1369df8bae1dSRodney W. Grimes 				break;
1370df8bae1dSRodney W. Grimes 			}
1371d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1372d0ebc0d2SYaroslav Tykhiy 			if (ipstealth)
1373d0ebc0d2SYaroslav Tykhiy 				goto dropit;
1374d0ebc0d2SYaroslav Tykhiy #endif
13751025071fSGarrett Wollman 			if (!ip_dosourceroute) {
13760af8d3ecSDavid Greenman 				if (ipforwarding) {
13770af8d3ecSDavid Greenman 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
13780af8d3ecSDavid Greenman 					/*
13790af8d3ecSDavid Greenman 					 * Acting as a router, so generate ICMP
13800af8d3ecSDavid Greenman 					 */
1381efa48587SGuido van Rooij nosourcerouting:
1382bc189bf8SGuido van Rooij 					strcpy(buf, inet_ntoa(ip->ip_dst));
13831025071fSGarrett Wollman 					log(LOG_WARNING,
13841025071fSGarrett Wollman 					    "attempted source route from %s to %s\n",
13851025071fSGarrett Wollman 					    inet_ntoa(ip->ip_src), buf);
13861025071fSGarrett Wollman 					type = ICMP_UNREACH;
13871025071fSGarrett Wollman 					code = ICMP_UNREACH_SRCFAIL;
13881025071fSGarrett Wollman 					goto bad;
13890af8d3ecSDavid Greenman 				} else {
13900af8d3ecSDavid Greenman 					/*
13910af8d3ecSDavid Greenman 					 * Not acting as a router, so silently drop.
13920af8d3ecSDavid Greenman 					 */
1393d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1394d0ebc0d2SYaroslav Tykhiy dropit:
1395d0ebc0d2SYaroslav Tykhiy #endif
13960af8d3ecSDavid Greenman 					ipstat.ips_cantforward++;
13970af8d3ecSDavid Greenman 					m_freem(m);
13980af8d3ecSDavid Greenman 					return (1);
13990af8d3ecSDavid Greenman 				}
14001025071fSGarrett Wollman 			}
14011025071fSGarrett Wollman 
1402df8bae1dSRodney W. Grimes 			/*
1403df8bae1dSRodney W. Grimes 			 * locate outgoing interface
1404df8bae1dSRodney W. Grimes 			 */
140594a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, cp + off,
1406df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
14071025071fSGarrett Wollman 
1408df8bae1dSRodney W. Grimes 			if (opt == IPOPT_SSRR) {
1409df8bae1dSRodney W. Grimes #define	INA	struct in_ifaddr *
1410df8bae1dSRodney W. Grimes #define	SA	struct sockaddr *
1411df8bae1dSRodney W. Grimes 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1412df8bae1dSRodney W. Grimes 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1413df8bae1dSRodney W. Grimes 			} else
141402c1c707SAndre Oppermann 				ia = ip_rtaddr(ipaddr.sin_addr);
1415df8bae1dSRodney W. Grimes 			if (ia == 0) {
1416df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1417df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_SRCFAIL;
1418df8bae1dSRodney W. Grimes 				goto bad;
1419df8bae1dSRodney W. Grimes 			}
1420df8bae1dSRodney W. Grimes 			ip->ip_dst = ipaddr.sin_addr;
142194a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
142294a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1423df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1424df8bae1dSRodney W. Grimes 			/*
1425df8bae1dSRodney W. Grimes 			 * Let ip_intr's mcast routing check handle mcast pkts
1426df8bae1dSRodney W. Grimes 			 */
1427df8bae1dSRodney W. Grimes 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1428df8bae1dSRodney W. Grimes 			break;
1429df8bae1dSRodney W. Grimes 
1430df8bae1dSRodney W. Grimes 		case IPOPT_RR:
1431d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1432d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1433d0ebc0d2SYaroslav Tykhiy 				break;
1434d0ebc0d2SYaroslav Tykhiy #endif
1435707d00a3SJonathan Lemon 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1436707d00a3SJonathan Lemon 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1437707d00a3SJonathan Lemon 				goto bad;
1438707d00a3SJonathan Lemon 			}
1439df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1440df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1441df8bae1dSRodney W. Grimes 				goto bad;
1442df8bae1dSRodney W. Grimes 			}
1443df8bae1dSRodney W. Grimes 			/*
1444df8bae1dSRodney W. Grimes 			 * If no space remains, ignore.
1445df8bae1dSRodney W. Grimes 			 */
1446df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
14475d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr))
1448df8bae1dSRodney W. Grimes 				break;
144994a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1450df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
1451df8bae1dSRodney W. Grimes 			/*
1452df8bae1dSRodney W. Grimes 			 * locate outgoing interface; if we're the destination,
1453df8bae1dSRodney W. Grimes 			 * use the incoming interface (should be same).
1454df8bae1dSRodney W. Grimes 			 */
1455df8bae1dSRodney W. Grimes 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
145602c1c707SAndre Oppermann 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
1457df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1458df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_HOST;
1459df8bae1dSRodney W. Grimes 				goto bad;
1460df8bae1dSRodney W. Grimes 			}
146194a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
146294a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1463df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1464df8bae1dSRodney W. Grimes 			break;
1465df8bae1dSRodney W. Grimes 
1466df8bae1dSRodney W. Grimes 		case IPOPT_TS:
1467d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1468d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1469d0ebc0d2SYaroslav Tykhiy 				break;
1470d0ebc0d2SYaroslav Tykhiy #endif
1471df8bae1dSRodney W. Grimes 			code = cp - (u_char *)ip;
147207514071SJonathan Lemon 			if (optlen < 4 || optlen > 40) {
147307514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1474df8bae1dSRodney W. Grimes 				goto bad;
147533841545SHajimu UMEMOTO 			}
147607514071SJonathan Lemon 			if ((off = cp[IPOPT_OFFSET]) < 5) {
147707514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
147833841545SHajimu UMEMOTO 				goto bad;
147933841545SHajimu UMEMOTO 			}
148007514071SJonathan Lemon 			if (off > optlen - (int)sizeof(int32_t)) {
148107514071SJonathan Lemon 				cp[IPOPT_OFFSET + 1] += (1 << 4);
148207514071SJonathan Lemon 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
148307514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1484df8bae1dSRodney W. Grimes 					goto bad;
148533841545SHajimu UMEMOTO 				}
1486df8bae1dSRodney W. Grimes 				break;
1487df8bae1dSRodney W. Grimes 			}
148807514071SJonathan Lemon 			off--;				/* 0 origin */
148907514071SJonathan Lemon 			sin = (struct in_addr *)(cp + off);
149007514071SJonathan Lemon 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1491df8bae1dSRodney W. Grimes 
1492df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSONLY:
1493df8bae1dSRodney W. Grimes 				break;
1494df8bae1dSRodney W. Grimes 
1495df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSANDADDR:
149607514071SJonathan Lemon 				if (off + sizeof(n_time) +
149707514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
149807514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1499df8bae1dSRodney W. Grimes 					goto bad;
150033841545SHajimu UMEMOTO 				}
1501df8bae1dSRodney W. Grimes 				ipaddr.sin_addr = dst;
1502df8bae1dSRodney W. Grimes 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1503df8bae1dSRodney W. Grimes 							    m->m_pkthdr.rcvif);
1504df8bae1dSRodney W. Grimes 				if (ia == 0)
1505df8bae1dSRodney W. Grimes 					continue;
150694a5d9b6SDavid Greenman 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
150794a5d9b6SDavid Greenman 				    sizeof(struct in_addr));
150807514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1509a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1510df8bae1dSRodney W. Grimes 				break;
1511df8bae1dSRodney W. Grimes 
1512df8bae1dSRodney W. Grimes 			case IPOPT_TS_PRESPEC:
151307514071SJonathan Lemon 				if (off + sizeof(n_time) +
151407514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
151507514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1516df8bae1dSRodney W. Grimes 					goto bad;
151733841545SHajimu UMEMOTO 				}
151894a5d9b6SDavid Greenman 				(void)memcpy(&ipaddr.sin_addr, sin,
1519df8bae1dSRodney W. Grimes 				    sizeof(struct in_addr));
1520df8bae1dSRodney W. Grimes 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1521df8bae1dSRodney W. Grimes 					continue;
152207514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1523a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1524df8bae1dSRodney W. Grimes 				break;
1525df8bae1dSRodney W. Grimes 
1526df8bae1dSRodney W. Grimes 			default:
152707514071SJonathan Lemon 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1528df8bae1dSRodney W. Grimes 				goto bad;
1529df8bae1dSRodney W. Grimes 			}
1530df8bae1dSRodney W. Grimes 			ntime = iptime();
153107514071SJonathan Lemon 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
153207514071SJonathan Lemon 			cp[IPOPT_OFFSET] += sizeof(n_time);
1533df8bae1dSRodney W. Grimes 		}
1534df8bae1dSRodney W. Grimes 	}
153547174b49SAndrey A. Chernov 	if (forward && ipforwarding) {
153602c1c707SAndre Oppermann 		ip_forward(m, 1, next_hop);
1537df8bae1dSRodney W. Grimes 		return (1);
1538df8bae1dSRodney W. Grimes 	}
1539df8bae1dSRodney W. Grimes 	return (0);
1540df8bae1dSRodney W. Grimes bad:
1541df8bae1dSRodney W. Grimes 	icmp_error(m, type, code, 0, 0);
1542df8bae1dSRodney W. Grimes 	ipstat.ips_badoptions++;
1543df8bae1dSRodney W. Grimes 	return (1);
1544df8bae1dSRodney W. Grimes }
1545df8bae1dSRodney W. Grimes 
1546df8bae1dSRodney W. Grimes /*
1547df8bae1dSRodney W. Grimes  * Given address of next destination (final or next hop),
1548df8bae1dSRodney W. Grimes  * return internet address info of interface to be used to get there.
1549df8bae1dSRodney W. Grimes  */
1550bd714208SRuslan Ermilov struct in_ifaddr *
155102c1c707SAndre Oppermann ip_rtaddr(dst)
1552df8bae1dSRodney W. Grimes 	struct in_addr dst;
1553df8bae1dSRodney W. Grimes {
155497d8d152SAndre Oppermann 	struct route sro;
155502c1c707SAndre Oppermann 	struct sockaddr_in *sin;
155602c1c707SAndre Oppermann 	struct in_ifaddr *ifa;
1557df8bae1dSRodney W. Grimes 
15580cfbbe3bSAndre Oppermann 	bzero(&sro, sizeof(sro));
155997d8d152SAndre Oppermann 	sin = (struct sockaddr_in *)&sro.ro_dst;
1560df8bae1dSRodney W. Grimes 	sin->sin_family = AF_INET;
1561df8bae1dSRodney W. Grimes 	sin->sin_len = sizeof(*sin);
1562df8bae1dSRodney W. Grimes 	sin->sin_addr = dst;
156397d8d152SAndre Oppermann 	rtalloc_ign(&sro, RTF_CLONING);
1564df8bae1dSRodney W. Grimes 
156597d8d152SAndre Oppermann 	if (sro.ro_rt == NULL)
1566df8bae1dSRodney W. Grimes 		return ((struct in_ifaddr *)0);
156702c1c707SAndre Oppermann 
156897d8d152SAndre Oppermann 	ifa = ifatoia(sro.ro_rt->rt_ifa);
156997d8d152SAndre Oppermann 	RTFREE(sro.ro_rt);
157002c1c707SAndre Oppermann 	return ifa;
1571df8bae1dSRodney W. Grimes }
1572df8bae1dSRodney W. Grimes 
1573df8bae1dSRodney W. Grimes /*
1574df8bae1dSRodney W. Grimes  * Save incoming source route for use in replies,
1575df8bae1dSRodney W. Grimes  * to be picked up later by ip_srcroute if the receiver is interested.
1576df8bae1dSRodney W. Grimes  */
157737c84183SPoul-Henning Kamp static void
1578df8bae1dSRodney W. Grimes save_rte(option, dst)
1579df8bae1dSRodney W. Grimes 	u_char *option;
1580df8bae1dSRodney W. Grimes 	struct in_addr dst;
1581df8bae1dSRodney W. Grimes {
1582df8bae1dSRodney W. Grimes 	unsigned olen;
1583df8bae1dSRodney W. Grimes 
1584df8bae1dSRodney W. Grimes 	olen = option[IPOPT_OLEN];
1585df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1586df8bae1dSRodney W. Grimes 	if (ipprintfs)
1587df8bae1dSRodney W. Grimes 		printf("save_rte: olen %d\n", olen);
1588df8bae1dSRodney W. Grimes #endif
1589df8bae1dSRodney W. Grimes 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1590df8bae1dSRodney W. Grimes 		return;
15910453d3cbSBruce Evans 	bcopy(option, ip_srcrt.srcopt, olen);
1592df8bae1dSRodney W. Grimes 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1593df8bae1dSRodney W. Grimes 	ip_srcrt.dst = dst;
1594df8bae1dSRodney W. Grimes }
1595df8bae1dSRodney W. Grimes 
1596df8bae1dSRodney W. Grimes /*
1597df8bae1dSRodney W. Grimes  * Retrieve incoming source route for use in replies,
1598df8bae1dSRodney W. Grimes  * in the same form used by setsockopt.
1599df8bae1dSRodney W. Grimes  * The first hop is placed before the options, will be removed later.
1600df8bae1dSRodney W. Grimes  */
1601df8bae1dSRodney W. Grimes struct mbuf *
1602df8bae1dSRodney W. Grimes ip_srcroute()
1603df8bae1dSRodney W. Grimes {
1604df8bae1dSRodney W. Grimes 	register struct in_addr *p, *q;
1605df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1606df8bae1dSRodney W. Grimes 
1607df8bae1dSRodney W. Grimes 	if (ip_nhops == 0)
1608df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1609a163d034SWarner Losh 	m = m_get(M_DONTWAIT, MT_HEADER);
1610df8bae1dSRodney W. Grimes 	if (m == 0)
1611df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1612df8bae1dSRodney W. Grimes 
1613df8bae1dSRodney W. Grimes #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1614df8bae1dSRodney W. Grimes 
1615df8bae1dSRodney W. Grimes 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1616df8bae1dSRodney W. Grimes 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1617df8bae1dSRodney W. Grimes 	    OPTSIZ;
1618df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1619df8bae1dSRodney W. Grimes 	if (ipprintfs)
1620df8bae1dSRodney W. Grimes 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1621df8bae1dSRodney W. Grimes #endif
1622df8bae1dSRodney W. Grimes 
1623df8bae1dSRodney W. Grimes 	/*
1624df8bae1dSRodney W. Grimes 	 * First save first hop for return route
1625df8bae1dSRodney W. Grimes 	 */
1626df8bae1dSRodney W. Grimes 	p = &ip_srcrt.route[ip_nhops - 1];
1627df8bae1dSRodney W. Grimes 	*(mtod(m, struct in_addr *)) = *p--;
1628df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1629df8bae1dSRodney W. Grimes 	if (ipprintfs)
1630af38c68cSLuigi Rizzo 		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1631df8bae1dSRodney W. Grimes #endif
1632df8bae1dSRodney W. Grimes 
1633df8bae1dSRodney W. Grimes 	/*
1634df8bae1dSRodney W. Grimes 	 * Copy option fields and padding (nop) to mbuf.
1635df8bae1dSRodney W. Grimes 	 */
1636df8bae1dSRodney W. Grimes 	ip_srcrt.nop = IPOPT_NOP;
1637df8bae1dSRodney W. Grimes 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
163894a5d9b6SDavid Greenman 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
163994a5d9b6SDavid Greenman 	    &ip_srcrt.nop, OPTSIZ);
1640df8bae1dSRodney W. Grimes 	q = (struct in_addr *)(mtod(m, caddr_t) +
1641df8bae1dSRodney W. Grimes 	    sizeof(struct in_addr) + OPTSIZ);
1642df8bae1dSRodney W. Grimes #undef OPTSIZ
1643df8bae1dSRodney W. Grimes 	/*
1644df8bae1dSRodney W. Grimes 	 * Record return path as an IP source route,
1645df8bae1dSRodney W. Grimes 	 * reversing the path (pointers are now aligned).
1646df8bae1dSRodney W. Grimes 	 */
1647df8bae1dSRodney W. Grimes 	while (p >= ip_srcrt.route) {
1648df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1649df8bae1dSRodney W. Grimes 		if (ipprintfs)
1650af38c68cSLuigi Rizzo 			printf(" %lx", (u_long)ntohl(q->s_addr));
1651df8bae1dSRodney W. Grimes #endif
1652df8bae1dSRodney W. Grimes 		*q++ = *p--;
1653df8bae1dSRodney W. Grimes 	}
1654df8bae1dSRodney W. Grimes 	/*
1655df8bae1dSRodney W. Grimes 	 * Last hop goes to final destination.
1656df8bae1dSRodney W. Grimes 	 */
1657df8bae1dSRodney W. Grimes 	*q = ip_srcrt.dst;
1658df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1659df8bae1dSRodney W. Grimes 	if (ipprintfs)
1660af38c68cSLuigi Rizzo 		printf(" %lx\n", (u_long)ntohl(q->s_addr));
1661df8bae1dSRodney W. Grimes #endif
1662df8bae1dSRodney W. Grimes 	return (m);
1663df8bae1dSRodney W. Grimes }
1664df8bae1dSRodney W. Grimes 
1665df8bae1dSRodney W. Grimes /*
1666df8bae1dSRodney W. Grimes  * Strip out IP options, at higher
1667df8bae1dSRodney W. Grimes  * level protocol in the kernel.
1668df8bae1dSRodney W. Grimes  * Second argument is buffer to which options
1669df8bae1dSRodney W. Grimes  * will be moved, and return value is their length.
1670df8bae1dSRodney W. Grimes  * XXX should be deleted; last arg currently ignored.
1671df8bae1dSRodney W. Grimes  */
1672df8bae1dSRodney W. Grimes void
1673df8bae1dSRodney W. Grimes ip_stripoptions(m, mopt)
1674df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1675df8bae1dSRodney W. Grimes 	struct mbuf *mopt;
1676df8bae1dSRodney W. Grimes {
1677df8bae1dSRodney W. Grimes 	register int i;
1678df8bae1dSRodney W. Grimes 	struct ip *ip = mtod(m, struct ip *);
1679df8bae1dSRodney W. Grimes 	register caddr_t opts;
1680df8bae1dSRodney W. Grimes 	int olen;
1681df8bae1dSRodney W. Grimes 
168253be11f6SPoul-Henning Kamp 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
1683df8bae1dSRodney W. Grimes 	opts = (caddr_t)(ip + 1);
1684df8bae1dSRodney W. Grimes 	i = m->m_len - (sizeof (struct ip) + olen);
1685df8bae1dSRodney W. Grimes 	bcopy(opts + olen, opts, (unsigned)i);
1686df8bae1dSRodney W. Grimes 	m->m_len -= olen;
1687df8bae1dSRodney W. Grimes 	if (m->m_flags & M_PKTHDR)
1688df8bae1dSRodney W. Grimes 		m->m_pkthdr.len -= olen;
168953be11f6SPoul-Henning Kamp 	ip->ip_v = IPVERSION;
169053be11f6SPoul-Henning Kamp 	ip->ip_hl = sizeof(struct ip) >> 2;
1691df8bae1dSRodney W. Grimes }
1692df8bae1dSRodney W. Grimes 
1693df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1694df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1695df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1696df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1697df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1698fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
16993b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1700df8bae1dSRodney W. Grimes };
1701df8bae1dSRodney W. Grimes 
1702df8bae1dSRodney W. Grimes /*
1703df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1704df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1705df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1706df8bae1dSRodney W. Grimes  * of codes and types.
1707df8bae1dSRodney W. Grimes  *
1708df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1709df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1710df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1711df8bae1dSRodney W. Grimes  * protocol deal with that.
1712df8bae1dSRodney W. Grimes  *
1713df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1714df8bae1dSRodney W. Grimes  * via a source route.
1715df8bae1dSRodney W. Grimes  */
17160312fbe9SPoul-Henning Kamp static void
171702c1c707SAndre Oppermann ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1718df8bae1dSRodney W. Grimes {
17192b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
172002c1c707SAndre Oppermann 	struct in_ifaddr *ia;
172126f9a767SRodney W. Grimes 	int error, type = 0, code = 0;
1722df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
1723df8bae1dSRodney W. Grimes 	n_long dest;
17243efc3014SJulian Elischer 	struct in_addr pkt_dst;
1725df8bae1dSRodney W. Grimes 	struct ifnet *destifp;
1726b9234fafSSam Leffler #if defined(IPSEC) || defined(FAST_IPSEC)
17276a800098SYoshinobu Inoue 	struct ifnet dummyifp;
17286a800098SYoshinobu Inoue #endif
1729df8bae1dSRodney W. Grimes 
17303efc3014SJulian Elischer 	/*
17313efc3014SJulian Elischer 	 * Cache the destination address of the packet; this may be
17323efc3014SJulian Elischer 	 * changed by use of 'ipfw fwd'.
17333efc3014SJulian Elischer 	 */
17342b25acc1SLuigi Rizzo 	pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
17353efc3014SJulian Elischer 
1736df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1737df8bae1dSRodney W. Grimes 	if (ipprintfs)
173861ce519bSPoul-Henning Kamp 		printf("forward: src %lx dst %lx ttl %x\n",
17393efc3014SJulian Elischer 		    (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1740162886e2SBruce Evans 		    ip->ip_ttl);
1741df8bae1dSRodney W. Grimes #endif
1742100ba1a6SJordan K. Hubbard 
1743100ba1a6SJordan K. Hubbard 
17443efc3014SJulian Elischer 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1745df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1746df8bae1dSRodney W. Grimes 		m_freem(m);
1747df8bae1dSRodney W. Grimes 		return;
1748df8bae1dSRodney W. Grimes 	}
17491b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17501b968362SDag-Erling Smørgrav 	if (!ipstealth) {
17511b968362SDag-Erling Smørgrav #endif
1752df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
17531b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
175402c1c707SAndre Oppermann 			    0, 0);
1755df8bae1dSRodney W. Grimes 			return;
1756df8bae1dSRodney W. Grimes 		}
17571b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17581b968362SDag-Erling Smørgrav 	}
17591b968362SDag-Erling Smørgrav #endif
1760df8bae1dSRodney W. Grimes 
176102c1c707SAndre Oppermann 	if ((ia = ip_rtaddr(pkt_dst)) == 0) {
176202c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
1763df8bae1dSRodney W. Grimes 		return;
176402c1c707SAndre Oppermann 	}
1765df8bae1dSRodney W. Grimes 
1766df8bae1dSRodney W. Grimes 	/*
1767bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1768bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1769bfef7ed4SIan Dowse 	 *
17704d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
17714d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
17724d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
17734d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
17744d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
17754d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
17764d2e3692SLuigi Rizzo 	 *
1777bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1778bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1779bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1780bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1781df8bae1dSRodney W. Grimes 	 */
1782a163d034SWarner Losh 	MGET(mcopy, M_DONTWAIT, m->m_type);
1783a163d034SWarner Losh 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
17849967cafcSSam Leffler 		/*
17859967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
17869967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
17879967cafcSSam Leffler 		 * be conservative and just discard the copy since
17889967cafcSSam Leffler 		 * code below may some day want the tags.
17899967cafcSSam Leffler 		 */
17909967cafcSSam Leffler 		m_free(mcopy);
17919967cafcSSam Leffler 		mcopy = NULL;
17929967cafcSSam Leffler 	}
1793bfef7ed4SIan Dowse 	if (mcopy != NULL) {
179453be11f6SPoul-Henning Kamp 		mcopy->m_len = imin((ip->ip_hl << 2) + 8,
1795bfef7ed4SIan Dowse 		    (int)ip->ip_len);
1796bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1797bfef7ed4SIan Dowse 	}
179804287599SRuslan Ermilov 
179904287599SRuslan Ermilov #ifdef IPSTEALTH
180004287599SRuslan Ermilov 	if (!ipstealth) {
180104287599SRuslan Ermilov #endif
180204287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
180304287599SRuslan Ermilov #ifdef IPSTEALTH
180404287599SRuslan Ermilov 	}
180504287599SRuslan Ermilov #endif
1806df8bae1dSRodney W. Grimes 
1807df8bae1dSRodney W. Grimes 	/*
1808df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1809df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1810df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1811df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1812df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1813df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1814df8bae1dSRodney W. Grimes 	 */
181502c1c707SAndre Oppermann 	dest = 0;
181602c1c707SAndre Oppermann 	if (ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) {
181702c1c707SAndre Oppermann 		struct sockaddr_in *sin;
181802c1c707SAndre Oppermann 		struct route ro;
181902c1c707SAndre Oppermann 		struct rtentry *rt;
182002c1c707SAndre Oppermann 
18210cfbbe3bSAndre Oppermann 		bzero(&ro, sizeof(ro));
182202c1c707SAndre Oppermann 		sin = (struct sockaddr_in *)&ro.ro_dst;
182302c1c707SAndre Oppermann 		sin->sin_family = AF_INET;
182402c1c707SAndre Oppermann 		sin->sin_len = sizeof(*sin);
182502c1c707SAndre Oppermann 		sin->sin_addr = pkt_dst;
182626d02ca7SAndre Oppermann 		rtalloc_ign(&ro, RTF_CLONING);
182702c1c707SAndre Oppermann 
182802c1c707SAndre Oppermann 		rt = ro.ro_rt;
182902c1c707SAndre Oppermann 
183002c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1831df8bae1dSRodney W. Grimes 		    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
18322b25acc1SLuigi Rizzo 		    ipsendredirects && !srcrt && !next_hop) {
1833df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1834df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1835df8bae1dSRodney W. Grimes 
1836df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1837df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1838df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
1839df8bae1dSRodney W. Grimes 					dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1840df8bae1dSRodney W. Grimes 				else
18413efc3014SJulian Elischer 					dest = pkt_dst.s_addr;
1842df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1843df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1844df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1845df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1846df8bae1dSRodney W. Grimes 				if (ipprintfs)
1847df8bae1dSRodney W. Grimes 					printf("redirect (%d) to %lx\n", code, (u_long)dest);
1848df8bae1dSRodney W. Grimes #endif
1849df8bae1dSRodney W. Grimes 			}
1850df8bae1dSRodney W. Grimes 		}
185102c1c707SAndre Oppermann 		if (rt)
185202c1c707SAndre Oppermann 			RTFREE(rt);
185302c1c707SAndre Oppermann 	}
1854df8bae1dSRodney W. Grimes 
185536e8826fSMax Laier     {
1856ea779ff3SLuigi Rizzo 	if (next_hop) {
1857ac9d7e26SMax Laier 		struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD,
1858ac9d7e26SMax Laier 		    sizeof(struct sockaddr_in *), M_NOWAIT);
1859ac9d7e26SMax Laier 		if (mtag == NULL) {
1860ac9d7e26SMax Laier 			m_freem(m);
1861ac9d7e26SMax Laier 			return;
1862ac9d7e26SMax Laier 		}
1863ac9d7e26SMax Laier 		*(struct sockaddr_in **)(mtag+1) = next_hop;
1864ac9d7e26SMax Laier 		m_tag_prepend(m, mtag);
1865ea779ff3SLuigi Rizzo 	}
186602c1c707SAndre Oppermann 	error = ip_output(m, (struct mbuf *)0, NULL, IP_FORWARDING, 0, NULL);
186736e8826fSMax Laier     }
1868df8bae1dSRodney W. Grimes 	if (error)
1869df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1870df8bae1dSRodney W. Grimes 	else {
1871df8bae1dSRodney W. Grimes 		ipstat.ips_forward++;
1872df8bae1dSRodney W. Grimes 		if (type)
1873df8bae1dSRodney W. Grimes 			ipstat.ips_redirectsent++;
1874df8bae1dSRodney W. Grimes 		else {
18759188b4a1SAndre Oppermann 			if (mcopy)
1876df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1877df8bae1dSRodney W. Grimes 			return;
1878df8bae1dSRodney W. Grimes 		}
1879df8bae1dSRodney W. Grimes 	}
1880df8bae1dSRodney W. Grimes 	if (mcopy == NULL)
1881df8bae1dSRodney W. Grimes 		return;
1882df8bae1dSRodney W. Grimes 	destifp = NULL;
1883df8bae1dSRodney W. Grimes 
1884df8bae1dSRodney W. Grimes 	switch (error) {
1885df8bae1dSRodney W. Grimes 
1886df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1887df8bae1dSRodney W. Grimes 		/* type, code set above */
1888df8bae1dSRodney W. Grimes 		break;
1889df8bae1dSRodney W. Grimes 
1890df8bae1dSRodney W. Grimes 	case ENETUNREACH:		/* shouldn't happen, checked above */
1891df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1892df8bae1dSRodney W. Grimes 	case ENETDOWN:
1893df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1894df8bae1dSRodney W. Grimes 	default:
1895df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1896df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1897df8bae1dSRodney W. Grimes 		break;
1898df8bae1dSRodney W. Grimes 
1899df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1900df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1901df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
190202c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
19036a800098SYoshinobu Inoue 		/*
19046a800098SYoshinobu Inoue 		 * If the packet is routed over IPsec tunnel, tell the
19056a800098SYoshinobu Inoue 		 * originator the tunnel MTU.
19066a800098SYoshinobu Inoue 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
19076a800098SYoshinobu Inoue 		 * XXX quickhack!!!
19086a800098SYoshinobu Inoue 		 */
190902c1c707SAndre Oppermann 		{
19106a800098SYoshinobu Inoue 			struct secpolicy *sp = NULL;
19116a800098SYoshinobu Inoue 			int ipsecerror;
19126a800098SYoshinobu Inoue 			int ipsechdr;
191302c1c707SAndre Oppermann 			struct route *ro;
19146a800098SYoshinobu Inoue 
191502c1c707SAndre Oppermann #ifdef IPSEC
19166a800098SYoshinobu Inoue 			sp = ipsec4_getpolicybyaddr(mcopy,
19176a800098SYoshinobu Inoue 						    IPSEC_DIR_OUTBOUND,
19186a800098SYoshinobu Inoue 						    IP_FORWARDING,
19196a800098SYoshinobu Inoue 						    &ipsecerror);
192002c1c707SAndre Oppermann #else /* FAST_IPSEC */
1921b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(mcopy,
1922b9234fafSSam Leffler 						   IPSEC_DIR_OUTBOUND,
1923b9234fafSSam Leffler 						   IP_FORWARDING,
1924b9234fafSSam Leffler 						   &ipsecerror);
192502c1c707SAndre Oppermann #endif
192602c1c707SAndre Oppermann 			if (sp != NULL) {
1927b9234fafSSam Leffler 				/* count IPsec header size */
1928b9234fafSSam Leffler 				ipsechdr = ipsec4_hdrsiz(mcopy,
1929b9234fafSSam Leffler 							 IPSEC_DIR_OUTBOUND,
1930b9234fafSSam Leffler 							 NULL);
1931b9234fafSSam Leffler 
1932b9234fafSSam Leffler 				/*
1933b9234fafSSam Leffler 				 * find the correct route for outer IPv4
1934b9234fafSSam Leffler 				 * header, compute tunnel MTU.
1935b9234fafSSam Leffler 				 *
1936b9234fafSSam Leffler 				 * XXX BUG ALERT
1937b9234fafSSam Leffler 				 * The "dummyifp" code relies upon the fact
1938b9234fafSSam Leffler 				 * that icmp_error() touches only ifp->if_mtu.
1939b9234fafSSam Leffler 				 */
1940b9234fafSSam Leffler 				/*XXX*/
1941b9234fafSSam Leffler 				destifp = NULL;
1942b9234fafSSam Leffler 				if (sp->req != NULL
1943b9234fafSSam Leffler 				 && sp->req->sav != NULL
1944b9234fafSSam Leffler 				 && sp->req->sav->sah != NULL) {
194502c1c707SAndre Oppermann 					ro = &sp->req->sav->sah->sa_route;
194602c1c707SAndre Oppermann 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1947b9234fafSSam Leffler 						dummyifp.if_mtu =
194802c1c707SAndre Oppermann 						    ro->ro_rt->rt_ifp->if_mtu;
1949b9234fafSSam Leffler 						dummyifp.if_mtu -= ipsechdr;
1950b9234fafSSam Leffler 						destifp = &dummyifp;
1951b9234fafSSam Leffler 					}
1952b9234fafSSam Leffler 				}
1953b9234fafSSam Leffler 
195402c1c707SAndre Oppermann #ifdef IPSEC
195502c1c707SAndre Oppermann 				key_freesp(sp);
195602c1c707SAndre Oppermann #else /* FAST_IPSEC */
1957b9234fafSSam Leffler 				KEY_FREESP(&sp);
195802c1c707SAndre Oppermann #endif
195902c1c707SAndre Oppermann 				ipstat.ips_cantfrag++;
196002c1c707SAndre Oppermann 				break;
196102c1c707SAndre Oppermann 			} else
196202c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
196302c1c707SAndre Oppermann 		destifp = ia->ia_ifp;
196402c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
1965b9234fafSSam Leffler 		}
196602c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
1967df8bae1dSRodney W. Grimes 		ipstat.ips_cantfrag++;
1968df8bae1dSRodney W. Grimes 		break;
1969df8bae1dSRodney W. Grimes 
1970df8bae1dSRodney W. Grimes 	case ENOBUFS:
1971df285b3dSMike Silbersack 		/*
1972df285b3dSMike Silbersack 		 * A router should not generate ICMP_SOURCEQUENCH as
1973df285b3dSMike Silbersack 		 * required in RFC1812 Requirements for IP Version 4 Routers.
1974df285b3dSMike Silbersack 		 * Source quench could be a big problem under DoS attacks,
1975df285b3dSMike Silbersack 		 * or if the underlying interface is rate-limited.
1976df285b3dSMike Silbersack 		 * Those who need source quench packets may re-enable them
1977df285b3dSMike Silbersack 		 * via the net.inet.ip.sendsourcequench sysctl.
1978df285b3dSMike Silbersack 		 */
1979df285b3dSMike Silbersack 		if (ip_sendsourcequench == 0) {
1980df285b3dSMike Silbersack 			m_freem(mcopy);
1981df285b3dSMike Silbersack 			return;
1982df285b3dSMike Silbersack 		} else {
1983df8bae1dSRodney W. Grimes 			type = ICMP_SOURCEQUENCH;
1984df8bae1dSRodney W. Grimes 			code = 0;
1985df285b3dSMike Silbersack 		}
1986df8bae1dSRodney W. Grimes 		break;
19873a06e3e0SRuslan Ermilov 
19883a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
19893a06e3e0SRuslan Ermilov 		m_freem(mcopy);
19903a06e3e0SRuslan Ermilov 		return;
1991df8bae1dSRodney W. Grimes 	}
1992df8bae1dSRodney W. Grimes 	icmp_error(mcopy, type, code, dest, destifp);
1993df8bae1dSRodney W. Grimes }
1994df8bae1dSRodney W. Grimes 
199582c23ebaSBill Fenner void
199682c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m)
199782c23ebaSBill Fenner 	register struct inpcb *inp;
199882c23ebaSBill Fenner 	register struct mbuf **mp;
199982c23ebaSBill Fenner 	register struct ip *ip;
200082c23ebaSBill Fenner 	register struct mbuf *m;
200182c23ebaSBill Fenner {
2002be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
2003be8a62e8SPoul-Henning Kamp 		struct bintime bt;
2004be8a62e8SPoul-Henning Kamp 
2005be8a62e8SPoul-Henning Kamp 		bintime(&bt);
2006be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
2007be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
2008be8a62e8SPoul-Henning Kamp 			SCM_BINTIME, SOL_SOCKET);
2009be8a62e8SPoul-Henning Kamp 			if (*mp)
2010be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
2011be8a62e8SPoul-Henning Kamp 		}
201282c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
201382c23ebaSBill Fenner 			struct timeval tv;
201482c23ebaSBill Fenner 
2015be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
201682c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
201782c23ebaSBill Fenner 				SCM_TIMESTAMP, SOL_SOCKET);
201882c23ebaSBill Fenner 			if (*mp)
201982c23ebaSBill Fenner 				mp = &(*mp)->m_next;
20204cc20ab1SSeigo Tanimura 		}
2021be8a62e8SPoul-Henning Kamp 	}
202282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
202382c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
202482c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
202582c23ebaSBill Fenner 		if (*mp)
202682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
202782c23ebaSBill Fenner 	}
20284957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
20294957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
20304957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
20314957466bSMatthew N. Dodd 		if (*mp)
20324957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
20334957466bSMatthew N. Dodd 	}
203482c23ebaSBill Fenner #ifdef notyet
203582c23ebaSBill Fenner 	/* XXX
203682c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
203782c23ebaSBill Fenner 	 * than they already were.
203882c23ebaSBill Fenner 	 */
203982c23ebaSBill Fenner 	/* options were tossed already */
204082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
204182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
204282c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
204382c23ebaSBill Fenner 		if (*mp)
204482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
204582c23ebaSBill Fenner 	}
204682c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
204782c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
204882c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
204982c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
205082c23ebaSBill Fenner 		if (*mp)
205182c23ebaSBill Fenner 			mp = &(*mp)->m_next;
205282c23ebaSBill Fenner 	}
205382c23ebaSBill Fenner #endif
205482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
2055d314ad7bSJulian Elischer 		struct ifnet *ifp;
2056d314ad7bSJulian Elischer 		struct sdlbuf {
205782c23ebaSBill Fenner 			struct sockaddr_dl sdl;
2058d314ad7bSJulian Elischer 			u_char	pad[32];
2059d314ad7bSJulian Elischer 		} sdlbuf;
2060d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
2061d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
206282c23ebaSBill Fenner 
2063d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
2064d314ad7bSJulian Elischer 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
2065f9132cebSJonathan Lemon 			sdp = (struct sockaddr_dl *)
2066f9132cebSJonathan Lemon 			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
2067d314ad7bSJulian Elischer 			/*
2068d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
2069d314ad7bSJulian Elischer 			 */
2070d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
2071d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
2072d314ad7bSJulian Elischer 				goto makedummy;
2073d314ad7bSJulian Elischer 			}
2074d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
2075d314ad7bSJulian Elischer 		} else {
2076d314ad7bSJulian Elischer makedummy:
2077d314ad7bSJulian Elischer 			sdl2->sdl_len
2078d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
2079d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
2080d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
2081d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2082d314ad7bSJulian Elischer 		}
2083d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
208482c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
208582c23ebaSBill Fenner 		if (*mp)
208682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
208782c23ebaSBill Fenner 	}
208882c23ebaSBill Fenner }
208982c23ebaSBill Fenner 
20904d2e3692SLuigi Rizzo /*
20914d2e3692SLuigi Rizzo  * XXX these routines are called from the upper part of the kernel.
20924d2e3692SLuigi Rizzo  * They need to be locked when we remove Giant.
20934d2e3692SLuigi Rizzo  *
20944d2e3692SLuigi Rizzo  * They could also be moved to ip_mroute.c, since all the RSVP
20954d2e3692SLuigi Rizzo  *  handling is done there already.
20964d2e3692SLuigi Rizzo  */
20974d2e3692SLuigi Rizzo static int ip_rsvp_on;
20984d2e3692SLuigi Rizzo struct socket *ip_rsvpd;
2099df8bae1dSRodney W. Grimes int
2100f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
2101f0068c4aSGarrett Wollman {
2102f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
2103f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
2104f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
2105f0068c4aSGarrett Wollman 
2106f0068c4aSGarrett Wollman 	if (ip_rsvpd != NULL)
2107f0068c4aSGarrett Wollman 		return EADDRINUSE;
2108f0068c4aSGarrett Wollman 
2109f0068c4aSGarrett Wollman 	ip_rsvpd = so;
21101c5de19aSGarrett Wollman 	/*
21111c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
21121c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21131c5de19aSGarrett Wollman 	 */
21141c5de19aSGarrett Wollman 	if (!ip_rsvp_on) {
21151c5de19aSGarrett Wollman 		ip_rsvp_on = 1;
21161c5de19aSGarrett Wollman 		rsvp_on++;
21171c5de19aSGarrett Wollman 	}
2118f0068c4aSGarrett Wollman 
2119f0068c4aSGarrett Wollman 	return 0;
2120f0068c4aSGarrett Wollman }
2121f0068c4aSGarrett Wollman 
2122f0068c4aSGarrett Wollman int
2123f0068c4aSGarrett Wollman ip_rsvp_done(void)
2124f0068c4aSGarrett Wollman {
2125f0068c4aSGarrett Wollman 	ip_rsvpd = NULL;
21261c5de19aSGarrett Wollman 	/*
21271c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
21281c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21291c5de19aSGarrett Wollman 	 */
21301c5de19aSGarrett Wollman 	if (ip_rsvp_on) {
21311c5de19aSGarrett Wollman 		ip_rsvp_on = 0;
21321c5de19aSGarrett Wollman 		rsvp_on--;
21331c5de19aSGarrett Wollman 	}
2134f0068c4aSGarrett Wollman 	return 0;
2135f0068c4aSGarrett Wollman }
2136bbb4330bSLuigi Rizzo 
2137bbb4330bSLuigi Rizzo void
2138bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
2139bbb4330bSLuigi Rizzo {
2140bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
2141bbb4330bSLuigi Rizzo 		rsvp_input_p(m, off);
2142bbb4330bSLuigi Rizzo 		return;
2143bbb4330bSLuigi Rizzo 	}
2144bbb4330bSLuigi Rizzo 
2145bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
2146bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
2147bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
2148bbb4330bSLuigi Rizzo 	 */
2149bbb4330bSLuigi Rizzo 
2150bbb4330bSLuigi Rizzo 	if (!rsvp_on) {
2151bbb4330bSLuigi Rizzo 		m_freem(m);
2152bbb4330bSLuigi Rizzo 		return;
2153bbb4330bSLuigi Rizzo 	}
2154bbb4330bSLuigi Rizzo 
2155bbb4330bSLuigi Rizzo 	if (ip_rsvpd != NULL) {
2156bbb4330bSLuigi Rizzo 		rip_input(m, off);
2157bbb4330bSLuigi Rizzo 		return;
2158bbb4330bSLuigi Rizzo 	}
2159bbb4330bSLuigi Rizzo 	/* Drop the packet */
2160bbb4330bSLuigi Rizzo 	m_freem(m);
2161bbb4330bSLuigi Rizzo }
2162