xref: /freebsd/sys/netinet/ip_input.c (revision f0cada84b1e275b6067addcc9eea2d98600adb58)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
30c3aac50fSPeter Wemm  * $FreeBSD$
31df8bae1dSRodney W. Grimes  */
32df8bae1dSRodney W. Grimes 
330ac40133SBrian Somers #include "opt_bootp.h"
3474a9466cSGary Palmer #include "opt_ipfw.h"
35b715f178SLuigi Rizzo #include "opt_ipdn.h"
36fbd1372aSJoerg Wunsch #include "opt_ipdivert.h"
371ee25934SPeter Wemm #include "opt_ipfilter.h"
3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
396a800098SYoshinobu Inoue #include "opt_ipsec.h"
4036b0360bSRobert Watson #include "opt_mac.h"
41c4ac87eaSDarren Reed #include "opt_pfil_hooks.h"
4264dddc18SKris Kennaway #include "opt_random_ip_id.h"
4374a9466cSGary Palmer 
44df8bae1dSRodney W. Grimes #include <sys/param.h>
45df8bae1dSRodney W. Grimes #include <sys/systm.h>
4636b0360bSRobert Watson #include <sys/mac.h>
47df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
48b715f178SLuigi Rizzo #include <sys/malloc.h>
49df8bae1dSRodney W. Grimes #include <sys/domain.h>
50df8bae1dSRodney W. Grimes #include <sys/protosw.h>
51df8bae1dSRodney W. Grimes #include <sys/socket.h>
52df8bae1dSRodney W. Grimes #include <sys/time.h>
53df8bae1dSRodney W. Grimes #include <sys/kernel.h>
541025071fSGarrett Wollman #include <sys/syslog.h>
55b5e8ce9fSBruce Evans #include <sys/sysctl.h>
56df8bae1dSRodney W. Grimes 
57c85540ddSAndrey A. Chernov #include <net/pfil.h>
58df8bae1dSRodney W. Grimes #include <net/if.h>
599494d596SBrooks Davis #include <net/if_types.h>
60d314ad7bSJulian Elischer #include <net/if_var.h>
6182c23ebaSBill Fenner #include <net/if_dl.h>
62df8bae1dSRodney W. Grimes #include <net/route.h>
63748e0b0aSGarrett Wollman #include <net/netisr.h>
64df8bae1dSRodney W. Grimes 
65df8bae1dSRodney W. Grimes #include <netinet/in.h>
66df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
67b5e8ce9fSBruce Evans #include <netinet/in_var.h>
68df8bae1dSRodney W. Grimes #include <netinet/ip.h>
69df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
70df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
71df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
7258938916SGarrett Wollman #include <machine/in_cksum.h>
73df8bae1dSRodney W. Grimes 
74f0068c4aSGarrett Wollman #include <sys/socketvar.h>
756ddbf1e2SGary Palmer 
766ddbf1e2SGary Palmer #include <netinet/ip_fw.h>
77ac9d7e26SMax Laier #include <netinet/ip_divert.h>
78db69a05dSPaul Saab #include <netinet/ip_dummynet.h>
79db69a05dSPaul Saab 
806a800098SYoshinobu Inoue #ifdef IPSEC
816a800098SYoshinobu Inoue #include <netinet6/ipsec.h>
826a800098SYoshinobu Inoue #include <netkey/key.h>
836a800098SYoshinobu Inoue #endif
846a800098SYoshinobu Inoue 
85b9234fafSSam Leffler #ifdef FAST_IPSEC
86b9234fafSSam Leffler #include <netipsec/ipsec.h>
87b9234fafSSam Leffler #include <netipsec/key.h>
88b9234fafSSam Leffler #endif
89b9234fafSSam Leffler 
901c5de19aSGarrett Wollman int rsvp_on = 0;
91f0068c4aSGarrett Wollman 
921f91d8c5SDavid Greenman int	ipforwarding = 0;
930312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
943d177f46SBill Fumerola     &ipforwarding, 0, "Enable IP forwarding between interfaces");
950312fbe9SPoul-Henning Kamp 
96d4fb926cSGarrett Wollman static int	ipsendredirects = 1; /* XXX */
970312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
983d177f46SBill Fumerola     &ipsendredirects, 0, "Enable sending IP redirects");
990312fbe9SPoul-Henning Kamp 
100df8bae1dSRodney W. Grimes int	ip_defttl = IPDEFTTL;
1010312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
1023d177f46SBill Fumerola     &ip_defttl, 0, "Maximum TTL on IP packets");
1030312fbe9SPoul-Henning Kamp 
1040312fbe9SPoul-Henning Kamp static int	ip_dosourceroute = 0;
1050312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
1063d177f46SBill Fumerola     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
1074fce5804SGuido van Rooij 
1084fce5804SGuido van Rooij static int	ip_acceptsourceroute = 0;
1094fce5804SGuido van Rooij SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
1103d177f46SBill Fumerola     CTLFLAG_RW, &ip_acceptsourceroute, 0,
1113d177f46SBill Fumerola     "Enable accepting source routed IP packets");
1126a800098SYoshinobu Inoue 
1132bde81acSAndre Oppermann int		ip_doopts = 1;	/* 0 = ignore, 1 = process, 2 = reject */
1142bde81acSAndre Oppermann SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
1152bde81acSAndre Oppermann     &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
1162bde81acSAndre Oppermann 
1176a800098SYoshinobu Inoue static int	ip_keepfaith = 0;
1186a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
1196a800098SYoshinobu Inoue 	&ip_keepfaith,	0,
1206a800098SYoshinobu Inoue 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1216a800098SYoshinobu Inoue 
122402062e8SMike Silbersack static int    nipq = 0;         /* total # of reass queues */
123402062e8SMike Silbersack static int    maxnipq;
124690a6055SJesper Skriver SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
125402062e8SMike Silbersack 	&maxnipq, 0,
126690a6055SJesper Skriver 	"Maximum number of IPv4 fragment reassembly queue entries");
127690a6055SJesper Skriver 
128375386e2SMike Silbersack static int    maxfragsperpacket;
129375386e2SMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
130375386e2SMike Silbersack 	&maxfragsperpacket, 0,
131375386e2SMike Silbersack 	"Maximum number of IPv4 fragments allowed per packet");
132375386e2SMike Silbersack 
133df285b3dSMike Silbersack static int	ip_sendsourcequench = 0;
134df285b3dSMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
135df285b3dSMike Silbersack 	&ip_sendsourcequench, 0,
136df285b3dSMike Silbersack 	"Enable the transmission of source quench packets");
137df285b3dSMike Silbersack 
138823db0e9SDon Lewis /*
139823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
140823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
141a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
142823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1433f67c834SDon Lewis  *
144a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
145a8f12100SDon Lewis  * to translate the destination address to another local interface.
1463f67c834SDon Lewis  *
1473f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1483f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1493f67c834SDon Lewis  * packets for those addresses are received.
150823db0e9SDon Lewis  */
151b3e95d4eSJonathan Lemon static int	ip_checkinterface = 1;
152b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
153b3e95d4eSJonathan Lemon     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
154b3e95d4eSJonathan Lemon 
155df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1560312fbe9SPoul-Henning Kamp static int	ipprintfs = 0;
157df8bae1dSRodney W. Grimes #endif
158134ea224SSam Leffler #ifdef PFIL_HOOKS
159134ea224SSam Leffler struct pfil_head inet_pfil_hook;
160134ea224SSam Leffler #endif
161df8bae1dSRodney W. Grimes 
1621cafed39SJonathan Lemon static struct	ifqueue ipintrq;
163ca925d9cSJonathan Lemon static int	ipqmaxlen = IFQ_MAXLEN;
164ca925d9cSJonathan Lemon 
165df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
166f0ffb944SJulian Elischer extern	struct protosw inetsw[];
167df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
16859562606SGarrett Wollman struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
169ca925d9cSJonathan Lemon struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
170ca925d9cSJonathan Lemon u_long 	in_ifaddrhmask;				/* mask for hash table */
171ca925d9cSJonathan Lemon 
172afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
1733d177f46SBill Fumerola     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
1740312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
1753d177f46SBill Fumerola     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
176df8bae1dSRodney W. Grimes 
177f23b4c91SGarrett Wollman struct ipstat ipstat;
178c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
1793d177f46SBill Fumerola     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
180194a213eSAndrey A. Chernov 
181194a213eSAndrey A. Chernov /* Packet reassembly stuff */
182194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2      6
183194a213eSAndrey A. Chernov #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
184194a213eSAndrey A. Chernov #define IPREASS_HMASK           (IPREASS_NHASH - 1)
185194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \
186831a80b0SMatthew Dillon 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
187194a213eSAndrey A. Chernov 
188462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
1892fad1e93SSam Leffler struct mtx ipqlock;
1902fad1e93SSam Leffler 
1912fad1e93SSam Leffler #define	IPQ_LOCK()	mtx_lock(&ipqlock)
1922fad1e93SSam Leffler #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
193888c2a3cSSam Leffler #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
194888c2a3cSSam Leffler #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
195f23b4c91SGarrett Wollman 
1960312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1970312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1983d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1990312fbe9SPoul-Henning Kamp #endif
2000312fbe9SPoul-Henning Kamp 
2011b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
202c76ff708SAndre Oppermann int	ipstealth = 0;
2031b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
2041b968362SDag-Erling Smørgrav     &ipstealth, 0, "");
2051b968362SDag-Erling Smørgrav #endif
2061b968362SDag-Erling Smørgrav 
207cfe8b629SGarrett Wollman 
20823bf9953SPoul-Henning Kamp /* Firewall hooks */
20923bf9953SPoul-Henning Kamp ip_fw_chk_t *ip_fw_chk_ptr;
2109fcc0795SLuigi Rizzo int fw_enable = 1 ;
21197850a5dSLuigi Rizzo int fw_one_pass = 1;
212e7319babSPoul-Henning Kamp 
213db69a05dSPaul Saab /* Dummynet hooks */
214db69a05dSPaul Saab ip_dn_io_t *ip_dn_io_ptr;
215b715f178SLuigi Rizzo 
216929b31ddSSam Leffler /*
2174d2e3692SLuigi Rizzo  * XXX this is ugly -- the following two global variables are
2184d2e3692SLuigi Rizzo  * used to store packet state while it travels through the stack.
2194d2e3692SLuigi Rizzo  * Note that the code even makes assumptions on the size and
2204d2e3692SLuigi Rizzo  * alignment of fields inside struct ip_srcrt so e.g. adding some
2214d2e3692SLuigi Rizzo  * fields will break the code. This needs to be fixed.
2224d2e3692SLuigi Rizzo  *
223df8bae1dSRodney W. Grimes  * We need to save the IP options in case a protocol wants to respond
224df8bae1dSRodney W. Grimes  * to an incoming packet over the same route if the packet got here
225df8bae1dSRodney W. Grimes  * using IP source routing.  This allows connection establishment and
226df8bae1dSRodney W. Grimes  * maintenance when the remote end is on a network that is not known
227df8bae1dSRodney W. Grimes  * to us.
228df8bae1dSRodney W. Grimes  */
2290312fbe9SPoul-Henning Kamp static int	ip_nhops = 0;
230df8bae1dSRodney W. Grimes static	struct ip_srcrt {
231df8bae1dSRodney W. Grimes 	struct	in_addr dst;			/* final destination */
232df8bae1dSRodney W. Grimes 	char	nop;				/* one NOP to align */
233df8bae1dSRodney W. Grimes 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
234df8bae1dSRodney W. Grimes 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
235df8bae1dSRodney W. Grimes } ip_srcrt;
236df8bae1dSRodney W. Grimes 
2374d77a549SAlfred Perlstein static void	save_rte(u_char *, struct in_addr);
2382b25acc1SLuigi Rizzo static int	ip_dooptions(struct mbuf *m, int,
2392b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
24002c1c707SAndre Oppermann static void	ip_forward(struct mbuf *m, int srcrt,
2412b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2424d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
243f0cada84SAndre Oppermann static struct	mbuf *ip_reass(struct mbuf *);
2448948e4baSArchie Cobbs 
245df8bae1dSRodney W. Grimes /*
246df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
247df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
248df8bae1dSRodney W. Grimes  */
249df8bae1dSRodney W. Grimes void
250df8bae1dSRodney W. Grimes ip_init()
251df8bae1dSRodney W. Grimes {
252f0ffb944SJulian Elischer 	register struct protosw *pr;
253df8bae1dSRodney W. Grimes 	register int i;
254df8bae1dSRodney W. Grimes 
25559562606SGarrett Wollman 	TAILQ_INIT(&in_ifaddrhead);
256ca925d9cSJonathan Lemon 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
257f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
258df8bae1dSRodney W. Grimes 	if (pr == 0)
259df8bae1dSRodney W. Grimes 		panic("ip_init");
260df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
261df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
262f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
263f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
264df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
265df8bae1dSRodney W. Grimes 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
266df8bae1dSRodney W. Grimes 			ip_protox[pr->pr_protocol] = pr - inetsw;
267194a213eSAndrey A. Chernov 
268134ea224SSam Leffler #ifdef PFIL_HOOKS
269134ea224SSam Leffler 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
270134ea224SSam Leffler 	inet_pfil_hook.ph_af = AF_INET;
271134ea224SSam Leffler 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
272134ea224SSam Leffler 		printf("%s: WARNING: unable to register pfil hook, "
273134ea224SSam Leffler 			"error %d\n", __func__, i);
274134ea224SSam Leffler #endif /* PFIL_HOOKS */
275134ea224SSam Leffler 
2762fad1e93SSam Leffler 	IPQ_LOCK_INIT();
277194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++)
278462b86feSPoul-Henning Kamp 	    TAILQ_INIT(&ipq[i]);
279194a213eSAndrey A. Chernov 
280375386e2SMike Silbersack 	maxnipq = nmbclusters / 32;
281375386e2SMike Silbersack 	maxfragsperpacket = 16;
282194a213eSAndrey A. Chernov 
28364dddc18SKris Kennaway #ifndef RANDOM_IP_ID
284227ee8a1SPoul-Henning Kamp 	ip_id = time_second & 0xffff;
28564dddc18SKris Kennaway #endif
286df8bae1dSRodney W. Grimes 	ipintrq.ifq_maxlen = ipqmaxlen;
2876008862bSJohn Baldwin 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
2887902224cSSam Leffler 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
289df8bae1dSRodney W. Grimes }
290df8bae1dSRodney W. Grimes 
2914d2e3692SLuigi Rizzo /*
292df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
293df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
294df8bae1dSRodney W. Grimes  */
295c67b1d17SGarrett Wollman void
296c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
297df8bae1dSRodney W. Grimes {
2989188b4a1SAndre Oppermann 	struct ip *ip = NULL;
2995da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
300ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
3019188b4a1SAndre Oppermann 	int    i, checkif, hlen = 0;
30247c861ecSBrian Somers 	u_short sum;
3037538a9a0SJonathan Lemon 	struct in_addr pkt_dst;
304ac9d7e26SMax Laier #ifdef IPDIVERT
305ac9d7e26SMax Laier 	u_int32_t divert_info;			/* packet divert/tee info */
306ac9d7e26SMax Laier #endif
3072b25acc1SLuigi Rizzo 	struct ip_fw_args args;
30802c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
309f51f805fSSam Leffler #ifdef PFIL_HOOKS
310f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
311f51f805fSSam Leffler #endif
312b9234fafSSam Leffler #ifdef FAST_IPSEC
31336e8826fSMax Laier 	struct m_tag *mtag;
314b9234fafSSam Leffler 	struct tdb_ident *tdbi;
315b9234fafSSam Leffler 	struct secpolicy *sp;
316b9234fafSSam Leffler 	int s, error;
317b9234fafSSam Leffler #endif /* FAST_IPSEC */
318b715f178SLuigi Rizzo 
3192b25acc1SLuigi Rizzo 	args.eh = NULL;
3202b25acc1SLuigi Rizzo 	args.oif = NULL;
321df8bae1dSRodney W. Grimes 
322fe584538SDag-Erling Smørgrav   	M_ASSERTPKTHDR(m);
323db40007dSAndrew R. Reiter 
3242f3f1e67SDarren Reed 	args.next_hop = m_claim_next(m, PACKET_TAG_IPFORWARD);
325ac9d7e26SMax Laier 	args.rule = ip_dn_claim_rule(m);
326ac9d7e26SMax Laier 
327ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
328ac9d7e26SMax Laier 		/* ip_fastforward firewall changed dest to local */
329ac9d7e26SMax Laier 		m->m_flags &= ~M_FASTFWD_OURS;	/* for reflected mbufs */
3309188b4a1SAndre Oppermann   		goto ours;
331ac9d7e26SMax Laier   	}
33236e8826fSMax Laier 
3332b25acc1SLuigi Rizzo   	if (args.rule) {	/* dummynet already filtered us */
3342b25acc1SLuigi Rizzo   		ip = mtod(m, struct ip *);
33553be11f6SPoul-Henning Kamp   		hlen = ip->ip_hl << 2;
3362b25acc1SLuigi Rizzo 		goto iphack ;
3372b25acc1SLuigi Rizzo 	}
3382b25acc1SLuigi Rizzo 
339df8bae1dSRodney W. Grimes 	ipstat.ips_total++;
34058938916SGarrett Wollman 
34158938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
34258938916SGarrett Wollman 		goto tooshort;
34358938916SGarrett Wollman 
344df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
345df8bae1dSRodney W. Grimes 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
346df8bae1dSRodney W. Grimes 		ipstat.ips_toosmall++;
347c67b1d17SGarrett Wollman 		return;
348df8bae1dSRodney W. Grimes 	}
349df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
35058938916SGarrett Wollman 
35153be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
352df8bae1dSRodney W. Grimes 		ipstat.ips_badvers++;
353df8bae1dSRodney W. Grimes 		goto bad;
354df8bae1dSRodney W. Grimes 	}
35558938916SGarrett Wollman 
35653be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
357df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
358df8bae1dSRodney W. Grimes 		ipstat.ips_badhlen++;
359df8bae1dSRodney W. Grimes 		goto bad;
360df8bae1dSRodney W. Grimes 	}
361df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
362df8bae1dSRodney W. Grimes 		if ((m = m_pullup(m, hlen)) == 0) {
363df8bae1dSRodney W. Grimes 			ipstat.ips_badhlen++;
364c67b1d17SGarrett Wollman 			return;
365df8bae1dSRodney W. Grimes 		}
366df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
367df8bae1dSRodney W. Grimes 	}
36833841545SHajimu UMEMOTO 
36933841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
37033841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
37133841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
37233841545SHajimu UMEMOTO 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
37333841545SHajimu UMEMOTO 			ipstat.ips_badaddr++;
37433841545SHajimu UMEMOTO 			goto bad;
37533841545SHajimu UMEMOTO 		}
37633841545SHajimu UMEMOTO 	}
37733841545SHajimu UMEMOTO 
378db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
379db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
380db4f9cc7SJonathan Lemon 	} else {
38158938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
38247c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
38358938916SGarrett Wollman 		} else {
38447c861ecSBrian Somers 			sum = in_cksum(m, hlen);
38558938916SGarrett Wollman 		}
386db4f9cc7SJonathan Lemon 	}
38747c861ecSBrian Somers 	if (sum) {
388df8bae1dSRodney W. Grimes 		ipstat.ips_badsum++;
389df8bae1dSRodney W. Grimes 		goto bad;
390df8bae1dSRodney W. Grimes 	}
391df8bae1dSRodney W. Grimes 
39202b199f1SMax Laier #ifdef ALTQ
39302b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
39402b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
39502b199f1SMax Laier 		return;
39602b199f1SMax Laier #endif
39702b199f1SMax Laier 
398df8bae1dSRodney W. Grimes 	/*
399df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
400df8bae1dSRodney W. Grimes 	 */
401fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
402df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
403df8bae1dSRodney W. Grimes 		ipstat.ips_badlen++;
404df8bae1dSRodney W. Grimes 		goto bad;
405df8bae1dSRodney W. Grimes 	}
406fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
407df8bae1dSRodney W. Grimes 
408df8bae1dSRodney W. Grimes 	/*
409df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
410df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
411df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
412df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
413df8bae1dSRodney W. Grimes 	 */
414df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
41558938916SGarrett Wollman tooshort:
416df8bae1dSRodney W. Grimes 		ipstat.ips_tooshort++;
417df8bae1dSRodney W. Grimes 		goto bad;
418df8bae1dSRodney W. Grimes 	}
419df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
420df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
421df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
422df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
423df8bae1dSRodney W. Grimes 		} else
424df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
425df8bae1dSRodney W. Grimes 	}
42614dd6717SSam Leffler #if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
42714dd6717SSam Leffler 	/*
42814dd6717SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
42914dd6717SSam Leffler 	 */
4300f9ade71SHajimu UMEMOTO 	if (ipsec_getnhist(m))
43114dd6717SSam Leffler 		goto pass;
43214dd6717SSam Leffler #endif
4331f76a5e2SSam Leffler #if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF)
4341f76a5e2SSam Leffler 	/*
4351f76a5e2SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
4361f76a5e2SSam Leffler 	 */
4371f76a5e2SSam Leffler 	if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
4381f76a5e2SSam Leffler 		goto pass;
4391f76a5e2SSam Leffler #endif
4403f67c834SDon Lewis 
4414dd1662bSUgen J.S. Antsilevich 	/*
4424dd1662bSUgen J.S. Antsilevich 	 * IpHack's section.
4434dd1662bSUgen J.S. Antsilevich 	 * Right now when no processing on packet has done
4444dd1662bSUgen J.S. Antsilevich 	 * and it is still fresh out of network we do our black
4454dd1662bSUgen J.S. Antsilevich 	 * deals with it.
44693e0e116SJulian Elischer 	 * - Firewall: deny/allow/divert
447fed1c7e9SSøren Schmidt 	 * - Xlate: translate packet's addr/port (NAT).
448b715f178SLuigi Rizzo 	 * - Pipe: pass pkt through dummynet.
4494dd1662bSUgen J.S. Antsilevich 	 * - Wrap: fake packet's addr/port <unimpl.>
4504dd1662bSUgen J.S. Antsilevich 	 * - Encapsulate: put it in another IP and send out. <unimp.>
4514dd1662bSUgen J.S. Antsilevich  	 */
452b715f178SLuigi Rizzo 
453b715f178SLuigi Rizzo iphack:
454df8bae1dSRodney W. Grimes 
455c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
456c4ac87eaSDarren Reed 	/*
457134ea224SSam Leffler 	 * Run through list of hooks for input packets.
458f51f805fSSam Leffler 	 *
459f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
460f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
461f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
462c4ac87eaSDarren Reed 	 */
463f51f805fSSam Leffler 	odst = ip->ip_dst;
464134ea224SSam Leffler 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
465134ea224SSam Leffler 	    PFIL_IN) != 0)
466beec8214SDarren Reed 		return;
467134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
468c4ac87eaSDarren Reed 		return;
469c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
47002c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
471c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
472c4ac87eaSDarren Reed 
4737b109fa4SLuigi Rizzo 	if (fw_enable && IPFW_LOADED) {
474f9e354dfSJulian Elischer 		/*
475f9e354dfSJulian Elischer 		 * If we've been forwarded from the output side, then
476f9e354dfSJulian Elischer 		 * skip the firewall a second time
477f9e354dfSJulian Elischer 		 */
4782b25acc1SLuigi Rizzo 		if (args.next_hop)
479f9e354dfSJulian Elischer 			goto ours;
4802b25acc1SLuigi Rizzo 
4812b25acc1SLuigi Rizzo 		args.m = m;
4822b25acc1SLuigi Rizzo 		i = ip_fw_chk_ptr(&args);
4832b25acc1SLuigi Rizzo 		m = args.m;
4842b25acc1SLuigi Rizzo 
485d60315beSLuigi Rizzo 		if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
486507b4b54SLuigi Rizzo 			if (m)
487507b4b54SLuigi Rizzo 				m_freem(m);
488b715f178SLuigi Rizzo 			return;
489507b4b54SLuigi Rizzo 		}
490d60315beSLuigi Rizzo 		ip = mtod(m, struct ip *); /* just in case m changed */
4912b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop == NULL)	/* common case */
492b715f178SLuigi Rizzo 			goto pass;
4937b109fa4SLuigi Rizzo                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
4948948e4baSArchie Cobbs 			/* Send packet to the appropriate pipe */
4952b25acc1SLuigi Rizzo 			ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
496e4676ba6SJulian Elischer 			return;
49793e0e116SJulian Elischer 		}
498b715f178SLuigi Rizzo #ifdef IPDIVERT
4998948e4baSArchie Cobbs 		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
5008948e4baSArchie Cobbs 			/* Divert or tee packet */
501b715f178SLuigi Rizzo 			goto ours;
502b715f178SLuigi Rizzo 		}
503b715f178SLuigi Rizzo #endif
5042b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop != NULL)
505b715f178SLuigi Rizzo 			goto pass;
506b715f178SLuigi Rizzo 		/*
507b715f178SLuigi Rizzo 		 * if we get here, the packet must be dropped
508b715f178SLuigi Rizzo 		 */
509b715f178SLuigi Rizzo 		m_freem(m);
510b715f178SLuigi Rizzo 		return;
511b715f178SLuigi Rizzo 	}
512b715f178SLuigi Rizzo pass:
513100ba1a6SJordan K. Hubbard 
514df8bae1dSRodney W. Grimes 	/*
515df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
516df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
517df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
518df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
519df8bae1dSRodney W. Grimes 	 */
520df8bae1dSRodney W. Grimes 	ip_nhops = 0;		/* for source routed packets */
5212b25acc1SLuigi Rizzo 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
522c67b1d17SGarrett Wollman 		return;
523df8bae1dSRodney W. Grimes 
524f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
525f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
526f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
527f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
528f0068c4aSGarrett Wollman 	 * grabbing the packet.
529f0068c4aSGarrett Wollman          */
5301c5de19aSGarrett Wollman 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
531f0068c4aSGarrett Wollman 		goto ours;
532f0068c4aSGarrett Wollman 
533df8bae1dSRodney W. Grimes 	/*
534df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
535cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
536cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
537cc766e04SGarrett Wollman 	 * with it).
538df8bae1dSRodney W. Grimes 	 */
539cc766e04SGarrett Wollman 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
540cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
541cc766e04SGarrett Wollman 		goto ours;
542cc766e04SGarrett Wollman 
5437538a9a0SJonathan Lemon 	/*
5447538a9a0SJonathan Lemon 	 * Cache the destination address of the packet; this may be
5457538a9a0SJonathan Lemon 	 * changed by use of 'ipfw fwd'.
5467538a9a0SJonathan Lemon 	 */
5472b25acc1SLuigi Rizzo 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
5487538a9a0SJonathan Lemon 
549823db0e9SDon Lewis 	/*
550823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
551823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
552823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
553e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
554e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
5553f67c834SDon Lewis 	 *
5563f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
5573f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
5583f67c834SDon Lewis 	 *
559a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
5603f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5613f67c834SDon Lewis 	 * the packets are received.
562823db0e9SDon Lewis 	 */
563823db0e9SDon Lewis 	checkif = ip_checkinterface && (ipforwarding == 0) &&
5649494d596SBrooks Davis 	    m->m_pkthdr.rcvif != NULL &&
565e15ae1b2SDon Lewis 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
566189a0ba4SMax Laier 	    (args.next_hop == NULL) && (dchg == 0);
567823db0e9SDon Lewis 
568ca925d9cSJonathan Lemon 	/*
569ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
570ca925d9cSJonathan Lemon 	 */
571ca925d9cSJonathan Lemon 	LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
572f9e354dfSJulian Elischer 		/*
573823db0e9SDon Lewis 		 * If the address matches, verify that the packet
574823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
575823db0e9SDon Lewis 		 * enabled.
576f9e354dfSJulian Elischer 		 */
577823db0e9SDon Lewis 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
578823db0e9SDon Lewis 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
579ed1ff184SJulian Elischer 			goto ours;
580ca925d9cSJonathan Lemon 	}
581823db0e9SDon Lewis 	/*
582ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
583ca925d9cSJonathan Lemon 	 *
584ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
585ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
586ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
587ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
588823db0e9SDon Lewis 	 */
5894f450ff9SBruce M Simpson 	if (m->m_pkthdr.rcvif != NULL &&
5904f450ff9SBruce M Simpson 	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
591ca925d9cSJonathan Lemon 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
592ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
593ca925d9cSJonathan Lemon 				continue;
594ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
595df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
5967538a9a0SJonathan Lemon 			    pkt_dst.s_addr)
597df8bae1dSRodney W. Grimes 				goto ours;
5987538a9a0SJonathan Lemon 			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
599df8bae1dSRodney W. Grimes 				goto ours;
6000ac40133SBrian Somers #ifdef BOOTP_COMPAT
601ca925d9cSJonathan Lemon 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
602ca925d9cSJonathan Lemon 				goto ours;
6030ac40133SBrian Somers #endif
604df8bae1dSRodney W. Grimes 		}
605df8bae1dSRodney W. Grimes 	}
606df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
607df8bae1dSRodney W. Grimes 		struct in_multi *inm;
608df8bae1dSRodney W. Grimes 		if (ip_mrouter) {
609df8bae1dSRodney W. Grimes 			/*
610df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
611df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
612df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
613df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
614df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
615df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
616df8bae1dSRodney W. Grimes 			 */
617bbb4330bSLuigi Rizzo 			if (ip_mforward &&
618bbb4330bSLuigi Rizzo 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
619df8bae1dSRodney W. Grimes 				ipstat.ips_cantforward++;
620df8bae1dSRodney W. Grimes 				m_freem(m);
621c67b1d17SGarrett Wollman 				return;
622df8bae1dSRodney W. Grimes 			}
623df8bae1dSRodney W. Grimes 
624df8bae1dSRodney W. Grimes 			/*
62511612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
626df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
627df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
628df8bae1dSRodney W. Grimes 			 */
629df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
630df8bae1dSRodney W. Grimes 				goto ours;
631df8bae1dSRodney W. Grimes 			ipstat.ips_forward++;
632df8bae1dSRodney W. Grimes 		}
633df8bae1dSRodney W. Grimes 		/*
634df8bae1dSRodney W. Grimes 		 * See if we belong to the destination multicast group on the
635df8bae1dSRodney W. Grimes 		 * arrival interface.
636df8bae1dSRodney W. Grimes 		 */
637df8bae1dSRodney W. Grimes 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
638df8bae1dSRodney W. Grimes 		if (inm == NULL) {
63982c39223SGarrett Wollman 			ipstat.ips_notmember++;
640df8bae1dSRodney W. Grimes 			m_freem(m);
641c67b1d17SGarrett Wollman 			return;
642df8bae1dSRodney W. Grimes 		}
643df8bae1dSRodney W. Grimes 		goto ours;
644df8bae1dSRodney W. Grimes 	}
645df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
646df8bae1dSRodney W. Grimes 		goto ours;
647df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
648df8bae1dSRodney W. Grimes 		goto ours;
649df8bae1dSRodney W. Grimes 
6506a800098SYoshinobu Inoue 	/*
6516a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6526a800098SYoshinobu Inoue 	 */
6536a800098SYoshinobu Inoue 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
6546a800098SYoshinobu Inoue 		if (ip_keepfaith) {
6556a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
6566a800098SYoshinobu Inoue 				goto ours;
6576a800098SYoshinobu Inoue 		}
6586a800098SYoshinobu Inoue 		m_freem(m);
6596a800098SYoshinobu Inoue 		return;
6606a800098SYoshinobu Inoue 	}
6619494d596SBrooks Davis 
662df8bae1dSRodney W. Grimes 	/*
663df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
664df8bae1dSRodney W. Grimes 	 */
665df8bae1dSRodney W. Grimes 	if (ipforwarding == 0) {
666df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
667df8bae1dSRodney W. Grimes 		m_freem(m);
668546f251bSChris D. Faulhaber 	} else {
669546f251bSChris D. Faulhaber #ifdef IPSEC
670546f251bSChris D. Faulhaber 		/*
671546f251bSChris D. Faulhaber 		 * Enforce inbound IPsec SPD.
672546f251bSChris D. Faulhaber 		 */
673546f251bSChris D. Faulhaber 		if (ipsec4_in_reject(m, NULL)) {
674546f251bSChris D. Faulhaber 			ipsecstat.in_polvio++;
675546f251bSChris D. Faulhaber 			goto bad;
676546f251bSChris D. Faulhaber 		}
677546f251bSChris D. Faulhaber #endif /* IPSEC */
678b9234fafSSam Leffler #ifdef FAST_IPSEC
679b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
680b9234fafSSam Leffler 		s = splnet();
681b9234fafSSam Leffler 		if (mtag != NULL) {
682b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
683b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
684b9234fafSSam Leffler 		} else {
685b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
686b9234fafSSam Leffler 						   IP_FORWARDING, &error);
687b9234fafSSam Leffler 		}
688b9234fafSSam Leffler 		if (sp == NULL) {	/* NB: can happen if error */
689b9234fafSSam Leffler 			splx(s);
690b9234fafSSam Leffler 			/*XXX error stat???*/
691b9234fafSSam Leffler 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
692b9234fafSSam Leffler 			goto bad;
693b9234fafSSam Leffler 		}
694b9234fafSSam Leffler 
695b9234fafSSam Leffler 		/*
696b9234fafSSam Leffler 		 * Check security policy against packet attributes.
697b9234fafSSam Leffler 		 */
698b9234fafSSam Leffler 		error = ipsec_in_reject(sp, m);
699b9234fafSSam Leffler 		KEY_FREESP(&sp);
700b9234fafSSam Leffler 		splx(s);
701b9234fafSSam Leffler 		if (error) {
702b9234fafSSam Leffler 			ipstat.ips_cantforward++;
703b9234fafSSam Leffler 			goto bad;
704b9234fafSSam Leffler 		}
705b9234fafSSam Leffler #endif /* FAST_IPSEC */
70602c1c707SAndre Oppermann 		ip_forward(m, dchg, args.next_hop);
707546f251bSChris D. Faulhaber 	}
708c67b1d17SGarrett Wollman 	return;
709df8bae1dSRodney W. Grimes 
710df8bae1dSRodney W. Grimes ours:
711d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
712d0ebc0d2SYaroslav Tykhiy 	/*
713d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
714d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
715d0ebc0d2SYaroslav Tykhiy 	 */
7162b25acc1SLuigi Rizzo 	if (ipstealth && hlen > sizeof (struct ip) &&
7172b25acc1SLuigi Rizzo 	    ip_dooptions(m, 1, args.next_hop))
718d0ebc0d2SYaroslav Tykhiy 		return;
719d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
720d0ebc0d2SYaroslav Tykhiy 
7215da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
7225da9f8faSJosef Karthauser 	if (ia != NULL) {
7235da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
7245da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
7255da9f8faSJosef Karthauser 	}
726100ba1a6SJordan K. Hubbard 
72763f8d699SJordan K. Hubbard 	/*
728b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
729ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
730df8bae1dSRodney W. Grimes 	 */
731f0cada84SAndre Oppermann 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
732f0cada84SAndre Oppermann 		m = ip_reass(m);
733f0cada84SAndre Oppermann 		if (m == NULL)
734c67b1d17SGarrett Wollman 			return;
7356a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
7367e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
73753be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
738af782f1cSBrian Somers #ifdef IPDIVERT
7398948e4baSArchie Cobbs 		/* Restore original checksum before diverting packet */
740ac9d7e26SMax Laier 		if (divert_find_info(m) != 0) {
741fd8e4ebcSMike Barcroft 			ip->ip_len = htons(ip->ip_len);
742fd8e4ebcSMike Barcroft 			ip->ip_off = htons(ip->ip_off);
743af782f1cSBrian Somers 			ip->ip_sum = 0;
74460123168SRuslan Ermilov 			if (hlen == sizeof(struct ip))
745af782f1cSBrian Somers 				ip->ip_sum = in_cksum_hdr(ip);
74660123168SRuslan Ermilov 			else
74760123168SRuslan Ermilov 				ip->ip_sum = in_cksum(m, hlen);
748fd8e4ebcSMike Barcroft 			ip->ip_off = ntohs(ip->ip_off);
749fd8e4ebcSMike Barcroft 			ip->ip_len = ntohs(ip->ip_len);
750af782f1cSBrian Somers 		}
751af782f1cSBrian Somers #endif
752f0cada84SAndre Oppermann 	}
753f0cada84SAndre Oppermann 
754f0cada84SAndre Oppermann 	/*
755f0cada84SAndre Oppermann 	 * Further protocols expect the packet length to be w/o the
756f0cada84SAndre Oppermann 	 * IP header.
757f0cada84SAndre Oppermann 	 */
758df8bae1dSRodney W. Grimes 	ip->ip_len -= hlen;
759df8bae1dSRodney W. Grimes 
76093e0e116SJulian Elischer #ifdef IPDIVERT
76193e0e116SJulian Elischer 	/*
7628948e4baSArchie Cobbs 	 * Divert or tee packet to the divert protocol if required.
76393e0e116SJulian Elischer 	 */
764ac9d7e26SMax Laier 	divert_info = divert_find_info(m);
7658948e4baSArchie Cobbs 	if (divert_info != 0) {
766ac9d7e26SMax Laier 		struct mbuf *clone;
7678948e4baSArchie Cobbs 
7688948e4baSArchie Cobbs 		/* Clone packet if we're doing a 'tee' */
7698948e4baSArchie Cobbs 		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
770ac9d7e26SMax Laier 			clone = divert_clone(m);
771ac9d7e26SMax Laier 		else
772ac9d7e26SMax Laier 			clone = NULL;
7738948e4baSArchie Cobbs 
7748948e4baSArchie Cobbs 		/* Restore packet header fields to original values */
7758948e4baSArchie Cobbs 		ip->ip_len += hlen;
776fd8e4ebcSMike Barcroft 		ip->ip_len = htons(ip->ip_len);
777fd8e4ebcSMike Barcroft 		ip->ip_off = htons(ip->ip_off);
7788948e4baSArchie Cobbs 
7798948e4baSArchie Cobbs 		/* Deliver packet to divert input routine */
780ac9d7e26SMax Laier 		divert_packet(m, 1);
781e4676ba6SJulian Elischer 		ipstat.ips_delivered++;
7828948e4baSArchie Cobbs 
7838948e4baSArchie Cobbs 		/* If 'tee', continue with original packet */
7848948e4baSArchie Cobbs 		if (clone == NULL)
78593e0e116SJulian Elischer 			return;
7868948e4baSArchie Cobbs 		m = clone;
7878948e4baSArchie Cobbs 		ip = mtod(m, struct ip *);
78856962689SCrist J. Clark 		ip->ip_len += hlen;
7892b25acc1SLuigi Rizzo 		/*
7902b25acc1SLuigi Rizzo 		 * Jump backwards to complete processing of the
791ac9d7e26SMax Laier 		 * packet.  We do not need to clear args.next_hop
792ac9d7e26SMax Laier 		 * as that will not be used again and the cloned packet
793ac9d7e26SMax Laier 		 * doesn't contain a divert packet tag so we won't
794ac9d7e26SMax Laier 		 * re-entry this block.
7952b25acc1SLuigi Rizzo 		 */
79656962689SCrist J. Clark 		goto pass;
79793e0e116SJulian Elischer 	}
79893e0e116SJulian Elischer #endif
79993e0e116SJulian Elischer 
80033841545SHajimu UMEMOTO #ifdef IPSEC
80133841545SHajimu UMEMOTO 	/*
80233841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
80333841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
80433841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
80533841545SHajimu UMEMOTO 	 */
80633841545SHajimu UMEMOTO 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
80733841545SHajimu UMEMOTO 	    ipsec4_in_reject(m, NULL)) {
80833841545SHajimu UMEMOTO 		ipsecstat.in_polvio++;
80933841545SHajimu UMEMOTO 		goto bad;
81033841545SHajimu UMEMOTO 	}
81133841545SHajimu UMEMOTO #endif
812b9234fafSSam Leffler #if FAST_IPSEC
813b9234fafSSam Leffler 	/*
814b9234fafSSam Leffler 	 * enforce IPsec policy checking if we are seeing last header.
815b9234fafSSam Leffler 	 * note that we do not visit this with protocols with pcb layer
816b9234fafSSam Leffler 	 * code - like udp/tcp/raw ip.
817b9234fafSSam Leffler 	 */
818b9234fafSSam Leffler 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
819b9234fafSSam Leffler 		/*
820b9234fafSSam Leffler 		 * Check if the packet has already had IPsec processing
821b9234fafSSam Leffler 		 * done.  If so, then just pass it along.  This tag gets
822b9234fafSSam Leffler 		 * set during AH, ESP, etc. input handling, before the
823b9234fafSSam Leffler 		 * packet is returned to the ip input queue for delivery.
824b9234fafSSam Leffler 		 */
825b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
826b9234fafSSam Leffler 		s = splnet();
827b9234fafSSam Leffler 		if (mtag != NULL) {
828b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
829b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
830b9234fafSSam Leffler 		} else {
831b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
832b9234fafSSam Leffler 						   IP_FORWARDING, &error);
833b9234fafSSam Leffler 		}
834b9234fafSSam Leffler 		if (sp != NULL) {
835b9234fafSSam Leffler 			/*
836b9234fafSSam Leffler 			 * Check security policy against packet attributes.
837b9234fafSSam Leffler 			 */
838b9234fafSSam Leffler 			error = ipsec_in_reject(sp, m);
839b9234fafSSam Leffler 			KEY_FREESP(&sp);
840b9234fafSSam Leffler 		} else {
841b9234fafSSam Leffler 			/* XXX error stat??? */
842b9234fafSSam Leffler 			error = EINVAL;
843b9234fafSSam Leffler DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
844b9234fafSSam Leffler 			goto bad;
845b9234fafSSam Leffler 		}
846b9234fafSSam Leffler 		splx(s);
847b9234fafSSam Leffler 		if (error)
848b9234fafSSam Leffler 			goto bad;
849b9234fafSSam Leffler 	}
850b9234fafSSam Leffler #endif /* FAST_IPSEC */
85133841545SHajimu UMEMOTO 
852df8bae1dSRodney W. Grimes 	/*
853df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
854df8bae1dSRodney W. Grimes 	 */
855df8bae1dSRodney W. Grimes 	ipstat.ips_delivered++;
8562b25acc1SLuigi Rizzo 	if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
857ac9d7e26SMax Laier 		/* attach next hop info for TCP */
858ac9d7e26SMax Laier 		struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD,
859ac9d7e26SMax Laier 		    sizeof(struct sockaddr_in *), M_NOWAIT);
860ac9d7e26SMax Laier 		if (mtag == NULL)
861ac9d7e26SMax Laier 			goto bad;
862ac9d7e26SMax Laier 		*(struct sockaddr_in **)(mtag+1) = args.next_hop;
863ac9d7e26SMax Laier 		m_tag_prepend(m, mtag);
864ac9d7e26SMax Laier 	}
8652b25acc1SLuigi Rizzo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
866c67b1d17SGarrett Wollman 	return;
867df8bae1dSRodney W. Grimes bad:
868df8bae1dSRodney W. Grimes 	m_freem(m);
869c67b1d17SGarrett Wollman }
870c67b1d17SGarrett Wollman 
871c67b1d17SGarrett Wollman /*
8728948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
873f0cada84SAndre Oppermann  * whole datagram.  If the argument is the first fragment or one
874f0cada84SAndre Oppermann  * in between the function will return NULL and store the mbuf
875f0cada84SAndre Oppermann  * in the fragment chain.  If the argument is the last fragment
876f0cada84SAndre Oppermann  * the packet will be reassembled and the pointer to the new
877f0cada84SAndre Oppermann  * mbuf returned for further processing.  Only m_tags attached
878f0cada84SAndre Oppermann  * to the first packet/fragment are preserved.
879f0cada84SAndre Oppermann  * The IP header is *NOT* adjusted out of iplen.
880df8bae1dSRodney W. Grimes  */
8818948e4baSArchie Cobbs 
882f0cada84SAndre Oppermann struct mbuf *
883f0cada84SAndre Oppermann ip_reass(struct mbuf *m)
884df8bae1dSRodney W. Grimes {
885f0cada84SAndre Oppermann 	struct ip *ip;
886f0cada84SAndre Oppermann 	struct mbuf *p, *q, *nq, *t;
887f0cada84SAndre Oppermann 	struct ipq *fp = NULL;
888f0cada84SAndre Oppermann 	struct ipqhead *head;
889f0cada84SAndre Oppermann 	int i, hlen, next;
89059dfcba4SHajimu UMEMOTO 	u_int8_t ecn, ecn0;
891f0cada84SAndre Oppermann 	u_short hash;
892df8bae1dSRodney W. Grimes 
893f0cada84SAndre Oppermann 	/* If maxnipq is 0, never accept fragments. */
894f0cada84SAndre Oppermann 	if (maxnipq == 0) {
895f0cada84SAndre Oppermann 		ipstat.ips_fragments++;
896f0cada84SAndre Oppermann 		ipstat.ips_fragdropped++;
897f0cada84SAndre Oppermann 		goto dropfrag;
898f0cada84SAndre Oppermann 	}
8992fad1e93SSam Leffler 
900f0cada84SAndre Oppermann 	ip = mtod(m, struct ip *);
901f0cada84SAndre Oppermann 	hlen = ip->ip_hl << 2;
902f0cada84SAndre Oppermann 
903f0cada84SAndre Oppermann 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
904f0cada84SAndre Oppermann 	head = &ipq[hash];
905f0cada84SAndre Oppermann 	IPQ_LOCK();
906f0cada84SAndre Oppermann 
907f0cada84SAndre Oppermann 	/*
908f0cada84SAndre Oppermann 	 * Look for queue of fragments
909f0cada84SAndre Oppermann 	 * of this datagram.
910f0cada84SAndre Oppermann 	 */
911f0cada84SAndre Oppermann 	TAILQ_FOREACH(fp, head, ipq_list)
912f0cada84SAndre Oppermann 		if (ip->ip_id == fp->ipq_id &&
913f0cada84SAndre Oppermann 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
914f0cada84SAndre Oppermann 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
915f0cada84SAndre Oppermann #ifdef MAC
916f0cada84SAndre Oppermann 		    mac_fragment_match(m, fp) &&
917f0cada84SAndre Oppermann #endif
918f0cada84SAndre Oppermann 		    ip->ip_p == fp->ipq_p)
919f0cada84SAndre Oppermann 			goto found;
920f0cada84SAndre Oppermann 
921f0cada84SAndre Oppermann 	fp = NULL;
922f0cada84SAndre Oppermann 
923f0cada84SAndre Oppermann 	/*
924f0cada84SAndre Oppermann 	 * Enforce upper bound on number of fragmented packets
925f0cada84SAndre Oppermann 	 * for which we attempt reassembly;
926f0cada84SAndre Oppermann 	 * If maxnipq is -1, accept all fragments without limitation.
927f0cada84SAndre Oppermann 	 */
928f0cada84SAndre Oppermann 	if ((nipq > maxnipq) && (maxnipq > 0)) {
929f0cada84SAndre Oppermann 		/*
930f0cada84SAndre Oppermann 		 * drop something from the tail of the current queue
931f0cada84SAndre Oppermann 		 * before proceeding further
932f0cada84SAndre Oppermann 		 */
933f0cada84SAndre Oppermann 		struct ipq *q = TAILQ_LAST(head, ipqhead);
934f0cada84SAndre Oppermann 		if (q == NULL) {   /* gak */
935f0cada84SAndre Oppermann 			for (i = 0; i < IPREASS_NHASH; i++) {
936f0cada84SAndre Oppermann 				struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
937f0cada84SAndre Oppermann 				if (r) {
938f0cada84SAndre Oppermann 					ipstat.ips_fragtimeout += r->ipq_nfrags;
939f0cada84SAndre Oppermann 					ip_freef(&ipq[i], r);
940f0cada84SAndre Oppermann 					break;
941f0cada84SAndre Oppermann 				}
942f0cada84SAndre Oppermann 			}
943f0cada84SAndre Oppermann 		} else {
944f0cada84SAndre Oppermann 			ipstat.ips_fragtimeout += q->ipq_nfrags;
945f0cada84SAndre Oppermann 			ip_freef(head, q);
946f0cada84SAndre Oppermann 		}
947f0cada84SAndre Oppermann 	}
948f0cada84SAndre Oppermann 
949f0cada84SAndre Oppermann found:
950f0cada84SAndre Oppermann 	/*
951f0cada84SAndre Oppermann 	 * Adjust ip_len to not reflect header,
952f0cada84SAndre Oppermann 	 * convert offset of this to bytes.
953f0cada84SAndre Oppermann 	 */
954f0cada84SAndre Oppermann 	ip->ip_len -= hlen;
955f0cada84SAndre Oppermann 	if (ip->ip_off & IP_MF) {
956f0cada84SAndre Oppermann 		/*
957f0cada84SAndre Oppermann 		 * Make sure that fragments have a data length
958f0cada84SAndre Oppermann 		 * that's a non-zero multiple of 8 bytes.
959f0cada84SAndre Oppermann 		 */
960f0cada84SAndre Oppermann 		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
961f0cada84SAndre Oppermann 			IPQ_UNLOCK();
962f0cada84SAndre Oppermann 			ipstat.ips_toosmall++; /* XXX */
963f0cada84SAndre Oppermann 			goto dropfrag;
964f0cada84SAndre Oppermann 		}
965f0cada84SAndre Oppermann 		m->m_flags |= M_FRAG;
966f0cada84SAndre Oppermann 	} else
967f0cada84SAndre Oppermann 		m->m_flags &= ~M_FRAG;
968f0cada84SAndre Oppermann 	ip->ip_off <<= 3;
969f0cada84SAndre Oppermann 
970f0cada84SAndre Oppermann 
971f0cada84SAndre Oppermann 	/*
972f0cada84SAndre Oppermann 	 * Attempt reassembly; if it succeeds, proceed.
973f0cada84SAndre Oppermann 	 * ip_reass() will return a different mbuf.
974f0cada84SAndre Oppermann 	 */
975f0cada84SAndre Oppermann 	ipstat.ips_fragments++;
976f0cada84SAndre Oppermann 	m->m_pkthdr.header = ip;
977f0cada84SAndre Oppermann 
978f0cada84SAndre Oppermann 	/* Previous ip_reass() started here. */
979df8bae1dSRodney W. Grimes 	/*
980df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
981df8bae1dSRodney W. Grimes 	 * would confuse code below.
982df8bae1dSRodney W. Grimes 	 */
983df8bae1dSRodney W. Grimes 	m->m_data += hlen;
984df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
985df8bae1dSRodney W. Grimes 
986df8bae1dSRodney W. Grimes 	/*
987df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
988df8bae1dSRodney W. Grimes 	 */
989042bbfa3SRobert Watson 	if (fp == NULL) {
990a163d034SWarner Losh 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
991df8bae1dSRodney W. Grimes 			goto dropfrag;
992df8bae1dSRodney W. Grimes 		fp = mtod(t, struct ipq *);
99336b0360bSRobert Watson #ifdef MAC
9945e7ce478SRobert Watson 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
9955e7ce478SRobert Watson 			m_free(t);
9965e7ce478SRobert Watson 			goto dropfrag;
9975e7ce478SRobert Watson 		}
99836b0360bSRobert Watson 		mac_create_ipq(m, fp);
99936b0360bSRobert Watson #endif
1000462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
1001194a213eSAndrey A. Chernov 		nipq++;
1002375386e2SMike Silbersack 		fp->ipq_nfrags = 1;
1003df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
1004df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
1005df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
10066effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
10076effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
1008af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1009af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
1010af38c68cSLuigi Rizzo 		goto inserted;
101136b0360bSRobert Watson 	} else {
1012375386e2SMike Silbersack 		fp->ipq_nfrags++;
101336b0360bSRobert Watson #ifdef MAC
101436b0360bSRobert Watson 		mac_update_ipq(m, fp);
101536b0360bSRobert Watson #endif
1016df8bae1dSRodney W. Grimes 	}
1017df8bae1dSRodney W. Grimes 
10186effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
10196effc713SDoug Rabson 
1020df8bae1dSRodney W. Grimes 	/*
102159dfcba4SHajimu UMEMOTO 	 * Handle ECN by comparing this segment with the first one;
102259dfcba4SHajimu UMEMOTO 	 * if CE is set, do not lose CE.
102359dfcba4SHajimu UMEMOTO 	 * drop if CE and not-ECT are mixed for the same packet.
102459dfcba4SHajimu UMEMOTO 	 */
102559dfcba4SHajimu UMEMOTO 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
102659dfcba4SHajimu UMEMOTO 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
102759dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_CE) {
102859dfcba4SHajimu UMEMOTO 		if (ecn0 == IPTOS_ECN_NOTECT)
102959dfcba4SHajimu UMEMOTO 			goto dropfrag;
103059dfcba4SHajimu UMEMOTO 		if (ecn0 != IPTOS_ECN_CE)
103159dfcba4SHajimu UMEMOTO 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
103259dfcba4SHajimu UMEMOTO 	}
103359dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
103459dfcba4SHajimu UMEMOTO 		goto dropfrag;
103559dfcba4SHajimu UMEMOTO 
103659dfcba4SHajimu UMEMOTO 	/*
1037df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
1038df8bae1dSRodney W. Grimes 	 */
10396effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
10406effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
1041df8bae1dSRodney W. Grimes 			break;
1042df8bae1dSRodney W. Grimes 
1043df8bae1dSRodney W. Grimes 	/*
1044df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
1045df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
1046af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
1047af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
1048db4f9cc7SJonathan Lemon 	 *
1049db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
1050db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
1051df8bae1dSRodney W. Grimes 	 */
10526effc713SDoug Rabson 	if (p) {
10536effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1054df8bae1dSRodney W. Grimes 		if (i > 0) {
1055df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
1056df8bae1dSRodney W. Grimes 				goto dropfrag;
10576a800098SYoshinobu Inoue 			m_adj(m, i);
1058db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
1059df8bae1dSRodney W. Grimes 			ip->ip_off += i;
1060df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
1061df8bae1dSRodney W. Grimes 		}
1062af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
1063af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
1064af38c68cSLuigi Rizzo 	} else {
1065af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
1066af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1067df8bae1dSRodney W. Grimes 	}
1068df8bae1dSRodney W. Grimes 
1069df8bae1dSRodney W. Grimes 	/*
1070df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
1071df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
1072df8bae1dSRodney W. Grimes 	 */
10736effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1074af38c68cSLuigi Rizzo 	     q = nq) {
1075b36f5b37SMaxim Konovalov 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
10766effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
10776effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
10786effc713SDoug Rabson 			GETIP(q)->ip_off += i;
10796effc713SDoug Rabson 			m_adj(q, i);
1080db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
1081df8bae1dSRodney W. Grimes 			break;
1082df8bae1dSRodney W. Grimes 		}
10836effc713SDoug Rabson 		nq = q->m_nextpkt;
1084af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
108599e8617dSMaxim Konovalov 		ipstat.ips_fragdropped++;
1086375386e2SMike Silbersack 		fp->ipq_nfrags--;
10876effc713SDoug Rabson 		m_freem(q);
1088df8bae1dSRodney W. Grimes 	}
1089df8bae1dSRodney W. Grimes 
1090af38c68cSLuigi Rizzo inserted:
109193e0e116SJulian Elischer 
109293e0e116SJulian Elischer #ifdef IPDIVERT
1093ac9d7e26SMax Laier 	if (ip->ip_off != 0) {
109493e0e116SJulian Elischer 		/*
1095ac9d7e26SMax Laier 		 * Strip any divert information; only the info
1096ac9d7e26SMax Laier 		 * on the first fragment is used/kept.
109793e0e116SJulian Elischer 		 */
1098ac9d7e26SMax Laier 		struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
1099ac9d7e26SMax Laier 		if (mtag)
1100ac9d7e26SMax Laier 			m_tag_delete(m, mtag);
11012b25acc1SLuigi Rizzo 	}
110293e0e116SJulian Elischer #endif
110393e0e116SJulian Elischer 
1104df8bae1dSRodney W. Grimes 	/*
1105375386e2SMike Silbersack 	 * Check for complete reassembly and perform frag per packet
1106375386e2SMike Silbersack 	 * limiting.
1107375386e2SMike Silbersack 	 *
1108375386e2SMike Silbersack 	 * Frag limiting is performed here so that the nth frag has
1109375386e2SMike Silbersack 	 * a chance to complete the packet before we drop the packet.
1110375386e2SMike Silbersack 	 * As a result, n+1 frags are actually allowed per packet, but
1111375386e2SMike Silbersack 	 * only n will ever be stored. (n = maxfragsperpacket.)
1112375386e2SMike Silbersack 	 *
1113df8bae1dSRodney W. Grimes 	 */
11146effc713SDoug Rabson 	next = 0;
11156effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1116375386e2SMike Silbersack 		if (GETIP(q)->ip_off != next) {
111799e8617dSMaxim Konovalov 			if (fp->ipq_nfrags > maxfragsperpacket) {
111899e8617dSMaxim Konovalov 				ipstat.ips_fragdropped += fp->ipq_nfrags;
1119375386e2SMike Silbersack 				ip_freef(head, fp);
112099e8617dSMaxim Konovalov 			}
1121f0cada84SAndre Oppermann 			goto done;
1122375386e2SMike Silbersack 		}
11236effc713SDoug Rabson 		next += GETIP(q)->ip_len;
11246effc713SDoug Rabson 	}
11256effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
1126375386e2SMike Silbersack 	if (p->m_flags & M_FRAG) {
112799e8617dSMaxim Konovalov 		if (fp->ipq_nfrags > maxfragsperpacket) {
112899e8617dSMaxim Konovalov 			ipstat.ips_fragdropped += fp->ipq_nfrags;
1129375386e2SMike Silbersack 			ip_freef(head, fp);
113099e8617dSMaxim Konovalov 		}
1131f0cada84SAndre Oppermann 		goto done;
1132375386e2SMike Silbersack 	}
1133df8bae1dSRodney W. Grimes 
1134df8bae1dSRodney W. Grimes 	/*
1135430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1136430d30d8SBill Fenner 	 */
11376effc713SDoug Rabson 	q = fp->ipq_frags;
11386effc713SDoug Rabson 	ip = GETIP(q);
113953be11f6SPoul-Henning Kamp 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
1140430d30d8SBill Fenner 		ipstat.ips_toolong++;
114199e8617dSMaxim Konovalov 		ipstat.ips_fragdropped += fp->ipq_nfrags;
1142462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1143f0cada84SAndre Oppermann 		goto done;
1144430d30d8SBill Fenner 	}
1145430d30d8SBill Fenner 
1146430d30d8SBill Fenner 	/*
1147430d30d8SBill Fenner 	 * Concatenate fragments.
1148df8bae1dSRodney W. Grimes 	 */
11496effc713SDoug Rabson 	m = q;
1150df8bae1dSRodney W. Grimes 	t = m->m_next;
1151df8bae1dSRodney W. Grimes 	m->m_next = 0;
1152df8bae1dSRodney W. Grimes 	m_cat(m, t);
11536effc713SDoug Rabson 	nq = q->m_nextpkt;
1154945aa40dSDoug Rabson 	q->m_nextpkt = 0;
11556effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
11566effc713SDoug Rabson 		nq = q->m_nextpkt;
1157945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1158db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1159db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1160a8db1d93SJonathan Lemon 		m_cat(m, q);
1161df8bae1dSRodney W. Grimes 	}
116236b0360bSRobert Watson #ifdef MAC
116336b0360bSRobert Watson 	mac_create_datagram_from_ipq(fp, m);
116436b0360bSRobert Watson 	mac_destroy_ipq(fp);
116536b0360bSRobert Watson #endif
1166df8bae1dSRodney W. Grimes 
1167df8bae1dSRodney W. Grimes 	/*
1168f0cada84SAndre Oppermann 	 * Create header for new ip packet by modifying header of first
1169f0cada84SAndre Oppermann 	 * packet;  dequeue and discard fragment reassembly header.
1170df8bae1dSRodney W. Grimes 	 * Make header visible.
1171df8bae1dSRodney W. Grimes 	 */
1172f0cada84SAndre Oppermann 	ip->ip_len = (ip->ip_hl << 2) + next;
11736effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
11746effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1175462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1176194a213eSAndrey A. Chernov 	nipq--;
1177df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
117853be11f6SPoul-Henning Kamp 	m->m_len += (ip->ip_hl << 2);
117953be11f6SPoul-Henning Kamp 	m->m_data -= (ip->ip_hl << 2);
1180df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1181a5554bf0SPoul-Henning Kamp 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
1182a5554bf0SPoul-Henning Kamp 		m_fixhdr(m);
1183f0cada84SAndre Oppermann 	ipstat.ips_reassembled++;
1184f0cada84SAndre Oppermann 	IPQ_UNLOCK();
11856a800098SYoshinobu Inoue 	return (m);
1186df8bae1dSRodney W. Grimes 
1187df8bae1dSRodney W. Grimes dropfrag:
1188df8bae1dSRodney W. Grimes 	ipstat.ips_fragdropped++;
1189042bbfa3SRobert Watson 	if (fp != NULL)
1190375386e2SMike Silbersack 		fp->ipq_nfrags--;
1191df8bae1dSRodney W. Grimes 	m_freem(m);
1192f0cada84SAndre Oppermann done:
1193f0cada84SAndre Oppermann 	IPQ_UNLOCK();
1194f0cada84SAndre Oppermann 	return (NULL);
11956effc713SDoug Rabson 
11966effc713SDoug Rabson #undef GETIP
1197df8bae1dSRodney W. Grimes }
1198df8bae1dSRodney W. Grimes 
1199df8bae1dSRodney W. Grimes /*
1200df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1201df8bae1dSRodney W. Grimes  * associated datagrams.
1202df8bae1dSRodney W. Grimes  */
12030312fbe9SPoul-Henning Kamp static void
1204462b86feSPoul-Henning Kamp ip_freef(fhp, fp)
1205462b86feSPoul-Henning Kamp 	struct ipqhead *fhp;
1206df8bae1dSRodney W. Grimes 	struct ipq *fp;
1207df8bae1dSRodney W. Grimes {
12086effc713SDoug Rabson 	register struct mbuf *q;
1209df8bae1dSRodney W. Grimes 
12102fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
12112fad1e93SSam Leffler 
12126effc713SDoug Rabson 	while (fp->ipq_frags) {
12136effc713SDoug Rabson 		q = fp->ipq_frags;
12146effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
12156effc713SDoug Rabson 		m_freem(q);
1216df8bae1dSRodney W. Grimes 	}
1217462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1218df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
1219194a213eSAndrey A. Chernov 	nipq--;
1220df8bae1dSRodney W. Grimes }
1221df8bae1dSRodney W. Grimes 
1222df8bae1dSRodney W. Grimes /*
1223df8bae1dSRodney W. Grimes  * IP timer processing;
1224df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1225df8bae1dSRodney W. Grimes  * queue, discard it.
1226df8bae1dSRodney W. Grimes  */
1227df8bae1dSRodney W. Grimes void
1228df8bae1dSRodney W. Grimes ip_slowtimo()
1229df8bae1dSRodney W. Grimes {
1230df8bae1dSRodney W. Grimes 	register struct ipq *fp;
1231df8bae1dSRodney W. Grimes 	int s = splnet();
1232194a213eSAndrey A. Chernov 	int i;
1233df8bae1dSRodney W. Grimes 
12342fad1e93SSam Leffler 	IPQ_LOCK();
1235194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1236462b86feSPoul-Henning Kamp 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1237462b86feSPoul-Henning Kamp 			struct ipq *fpp;
1238462b86feSPoul-Henning Kamp 
1239462b86feSPoul-Henning Kamp 			fpp = fp;
1240462b86feSPoul-Henning Kamp 			fp = TAILQ_NEXT(fp, ipq_list);
1241462b86feSPoul-Henning Kamp 			if(--fpp->ipq_ttl == 0) {
124299e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
1243462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], fpp);
1244df8bae1dSRodney W. Grimes 			}
1245df8bae1dSRodney W. Grimes 		}
1246194a213eSAndrey A. Chernov 	}
1247690a6055SJesper Skriver 	/*
1248690a6055SJesper Skriver 	 * If we are over the maximum number of fragments
1249690a6055SJesper Skriver 	 * (due to the limit being lowered), drain off
1250690a6055SJesper Skriver 	 * enough to get down to the new limit.
1251690a6055SJesper Skriver 	 */
1252a75a485dSMike Silbersack 	if (maxnipq >= 0 && nipq > maxnipq) {
1253690a6055SJesper Skriver 		for (i = 0; i < IPREASS_NHASH; i++) {
1254b36f5b37SMaxim Konovalov 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
125599e8617dSMaxim Konovalov 				ipstat.ips_fragdropped +=
125699e8617dSMaxim Konovalov 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1257690a6055SJesper Skriver 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1258690a6055SJesper Skriver 			}
1259690a6055SJesper Skriver 		}
1260690a6055SJesper Skriver 	}
12612fad1e93SSam Leffler 	IPQ_UNLOCK();
1262df8bae1dSRodney W. Grimes 	splx(s);
1263df8bae1dSRodney W. Grimes }
1264df8bae1dSRodney W. Grimes 
1265df8bae1dSRodney W. Grimes /*
1266df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1267df8bae1dSRodney W. Grimes  */
1268df8bae1dSRodney W. Grimes void
1269df8bae1dSRodney W. Grimes ip_drain()
1270df8bae1dSRodney W. Grimes {
1271194a213eSAndrey A. Chernov 	int     i;
1272ce29ab3aSGarrett Wollman 
12732fad1e93SSam Leffler 	IPQ_LOCK();
1274194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1275462b86feSPoul-Henning Kamp 		while(!TAILQ_EMPTY(&ipq[i])) {
127699e8617dSMaxim Konovalov 			ipstat.ips_fragdropped +=
127799e8617dSMaxim Konovalov 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1278462b86feSPoul-Henning Kamp 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1279194a213eSAndrey A. Chernov 		}
1280194a213eSAndrey A. Chernov 	}
12812fad1e93SSam Leffler 	IPQ_UNLOCK();
1282ce29ab3aSGarrett Wollman 	in_rtqdrain();
1283df8bae1dSRodney W. Grimes }
1284df8bae1dSRodney W. Grimes 
1285df8bae1dSRodney W. Grimes /*
1286df8bae1dSRodney W. Grimes  * Do option processing on a datagram,
1287df8bae1dSRodney W. Grimes  * possibly discarding it if bad options are encountered,
1288df8bae1dSRodney W. Grimes  * or forwarding it if source-routed.
1289d0ebc0d2SYaroslav Tykhiy  * The pass argument is used when operating in the IPSTEALTH
1290d0ebc0d2SYaroslav Tykhiy  * mode to tell what options to process:
1291d0ebc0d2SYaroslav Tykhiy  * [LS]SRR (pass 0) or the others (pass 1).
1292d0ebc0d2SYaroslav Tykhiy  * The reason for as many as two passes is that when doing IPSTEALTH,
1293d0ebc0d2SYaroslav Tykhiy  * non-routing options should be processed only if the packet is for us.
1294df8bae1dSRodney W. Grimes  * Returns 1 if packet has been forwarded/freed,
1295df8bae1dSRodney W. Grimes  * 0 if the packet should be processed further.
1296df8bae1dSRodney W. Grimes  */
12970312fbe9SPoul-Henning Kamp static int
12982b25acc1SLuigi Rizzo ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1299df8bae1dSRodney W. Grimes {
13002b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
13012b25acc1SLuigi Rizzo 	u_char *cp;
13022b25acc1SLuigi Rizzo 	struct in_ifaddr *ia;
1303df8bae1dSRodney W. Grimes 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1304df8bae1dSRodney W. Grimes 	struct in_addr *sin, dst;
1305df8bae1dSRodney W. Grimes 	n_time ntime;
13064d2e3692SLuigi Rizzo 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1307df8bae1dSRodney W. Grimes 
13082bde81acSAndre Oppermann 	/* ignore or reject packets with IP options */
13092bde81acSAndre Oppermann 	if (ip_doopts == 0)
13102bde81acSAndre Oppermann 		return 0;
13112bde81acSAndre Oppermann 	else if (ip_doopts == 2) {
13122bde81acSAndre Oppermann 		type = ICMP_UNREACH;
13132bde81acSAndre Oppermann 		code = ICMP_UNREACH_FILTER_PROHIB;
13142bde81acSAndre Oppermann 		goto bad;
13152bde81acSAndre Oppermann 	}
13162bde81acSAndre Oppermann 
1317df8bae1dSRodney W. Grimes 	dst = ip->ip_dst;
1318df8bae1dSRodney W. Grimes 	cp = (u_char *)(ip + 1);
131953be11f6SPoul-Henning Kamp 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1320df8bae1dSRodney W. Grimes 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1321df8bae1dSRodney W. Grimes 		opt = cp[IPOPT_OPTVAL];
1322df8bae1dSRodney W. Grimes 		if (opt == IPOPT_EOL)
1323df8bae1dSRodney W. Grimes 			break;
1324df8bae1dSRodney W. Grimes 		if (opt == IPOPT_NOP)
1325df8bae1dSRodney W. Grimes 			optlen = 1;
1326df8bae1dSRodney W. Grimes 		else {
1327fdcb8debSJun-ichiro itojun Hagino 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1328fdcb8debSJun-ichiro itojun Hagino 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1329fdcb8debSJun-ichiro itojun Hagino 				goto bad;
1330fdcb8debSJun-ichiro itojun Hagino 			}
1331df8bae1dSRodney W. Grimes 			optlen = cp[IPOPT_OLEN];
1332707d00a3SJonathan Lemon 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1333df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1334df8bae1dSRodney W. Grimes 				goto bad;
1335df8bae1dSRodney W. Grimes 			}
1336df8bae1dSRodney W. Grimes 		}
1337df8bae1dSRodney W. Grimes 		switch (opt) {
1338df8bae1dSRodney W. Grimes 
1339df8bae1dSRodney W. Grimes 		default:
1340df8bae1dSRodney W. Grimes 			break;
1341df8bae1dSRodney W. Grimes 
1342df8bae1dSRodney W. Grimes 		/*
1343df8bae1dSRodney W. Grimes 		 * Source routing with record.
1344df8bae1dSRodney W. Grimes 		 * Find interface with current destination address.
1345df8bae1dSRodney W. Grimes 		 * If none on this machine then drop if strictly routed,
1346df8bae1dSRodney W. Grimes 		 * or do nothing if loosely routed.
1347df8bae1dSRodney W. Grimes 		 * Record interface address and bring up next address
1348df8bae1dSRodney W. Grimes 		 * component.  If strictly routed make sure next
1349df8bae1dSRodney W. Grimes 		 * address is on directly accessible net.
1350df8bae1dSRodney W. Grimes 		 */
1351df8bae1dSRodney W. Grimes 		case IPOPT_LSRR:
1352df8bae1dSRodney W. Grimes 		case IPOPT_SSRR:
1353d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1354d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass > 0)
1355d0ebc0d2SYaroslav Tykhiy 				break;
1356d0ebc0d2SYaroslav Tykhiy #endif
135733841545SHajimu UMEMOTO 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
135833841545SHajimu UMEMOTO 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
135933841545SHajimu UMEMOTO 				goto bad;
136033841545SHajimu UMEMOTO 			}
1361df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1362df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1363df8bae1dSRodney W. Grimes 				goto bad;
1364df8bae1dSRodney W. Grimes 			}
1365df8bae1dSRodney W. Grimes 			ipaddr.sin_addr = ip->ip_dst;
1366df8bae1dSRodney W. Grimes 			ia = (struct in_ifaddr *)
1367df8bae1dSRodney W. Grimes 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1368df8bae1dSRodney W. Grimes 			if (ia == 0) {
1369df8bae1dSRodney W. Grimes 				if (opt == IPOPT_SSRR) {
1370df8bae1dSRodney W. Grimes 					type = ICMP_UNREACH;
1371df8bae1dSRodney W. Grimes 					code = ICMP_UNREACH_SRCFAIL;
1372df8bae1dSRodney W. Grimes 					goto bad;
1373df8bae1dSRodney W. Grimes 				}
1374bc189bf8SGuido van Rooij 				if (!ip_dosourceroute)
1375bc189bf8SGuido van Rooij 					goto nosourcerouting;
1376df8bae1dSRodney W. Grimes 				/*
1377df8bae1dSRodney W. Grimes 				 * Loose routing, and not at next destination
1378df8bae1dSRodney W. Grimes 				 * yet; nothing to do except forward.
1379df8bae1dSRodney W. Grimes 				 */
1380df8bae1dSRodney W. Grimes 				break;
1381df8bae1dSRodney W. Grimes 			}
1382df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
13835d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr)) {
1384df8bae1dSRodney W. Grimes 				/*
1385df8bae1dSRodney W. Grimes 				 * End of source route.  Should be for us.
1386df8bae1dSRodney W. Grimes 				 */
13874fce5804SGuido van Rooij 				if (!ip_acceptsourceroute)
13884fce5804SGuido van Rooij 					goto nosourcerouting;
1389df8bae1dSRodney W. Grimes 				save_rte(cp, ip->ip_src);
1390df8bae1dSRodney W. Grimes 				break;
1391df8bae1dSRodney W. Grimes 			}
1392d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1393d0ebc0d2SYaroslav Tykhiy 			if (ipstealth)
1394d0ebc0d2SYaroslav Tykhiy 				goto dropit;
1395d0ebc0d2SYaroslav Tykhiy #endif
13961025071fSGarrett Wollman 			if (!ip_dosourceroute) {
13970af8d3ecSDavid Greenman 				if (ipforwarding) {
13980af8d3ecSDavid Greenman 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
13990af8d3ecSDavid Greenman 					/*
14000af8d3ecSDavid Greenman 					 * Acting as a router, so generate ICMP
14010af8d3ecSDavid Greenman 					 */
1402efa48587SGuido van Rooij nosourcerouting:
1403bc189bf8SGuido van Rooij 					strcpy(buf, inet_ntoa(ip->ip_dst));
14041025071fSGarrett Wollman 					log(LOG_WARNING,
14051025071fSGarrett Wollman 					    "attempted source route from %s to %s\n",
14061025071fSGarrett Wollman 					    inet_ntoa(ip->ip_src), buf);
14071025071fSGarrett Wollman 					type = ICMP_UNREACH;
14081025071fSGarrett Wollman 					code = ICMP_UNREACH_SRCFAIL;
14091025071fSGarrett Wollman 					goto bad;
14100af8d3ecSDavid Greenman 				} else {
14110af8d3ecSDavid Greenman 					/*
14120af8d3ecSDavid Greenman 					 * Not acting as a router, so silently drop.
14130af8d3ecSDavid Greenman 					 */
1414d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1415d0ebc0d2SYaroslav Tykhiy dropit:
1416d0ebc0d2SYaroslav Tykhiy #endif
14170af8d3ecSDavid Greenman 					ipstat.ips_cantforward++;
14180af8d3ecSDavid Greenman 					m_freem(m);
14190af8d3ecSDavid Greenman 					return (1);
14200af8d3ecSDavid Greenman 				}
14211025071fSGarrett Wollman 			}
14221025071fSGarrett Wollman 
1423df8bae1dSRodney W. Grimes 			/*
1424df8bae1dSRodney W. Grimes 			 * locate outgoing interface
1425df8bae1dSRodney W. Grimes 			 */
142694a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, cp + off,
1427df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
14281025071fSGarrett Wollman 
1429df8bae1dSRodney W. Grimes 			if (opt == IPOPT_SSRR) {
1430df8bae1dSRodney W. Grimes #define	INA	struct in_ifaddr *
1431df8bae1dSRodney W. Grimes #define	SA	struct sockaddr *
1432df8bae1dSRodney W. Grimes 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1433df8bae1dSRodney W. Grimes 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1434df8bae1dSRodney W. Grimes 			} else
143502c1c707SAndre Oppermann 				ia = ip_rtaddr(ipaddr.sin_addr);
1436df8bae1dSRodney W. Grimes 			if (ia == 0) {
1437df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1438df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_SRCFAIL;
1439df8bae1dSRodney W. Grimes 				goto bad;
1440df8bae1dSRodney W. Grimes 			}
1441df8bae1dSRodney W. Grimes 			ip->ip_dst = ipaddr.sin_addr;
144294a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
144394a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1444df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1445df8bae1dSRodney W. Grimes 			/*
1446df8bae1dSRodney W. Grimes 			 * Let ip_intr's mcast routing check handle mcast pkts
1447df8bae1dSRodney W. Grimes 			 */
1448df8bae1dSRodney W. Grimes 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1449df8bae1dSRodney W. Grimes 			break;
1450df8bae1dSRodney W. Grimes 
1451df8bae1dSRodney W. Grimes 		case IPOPT_RR:
1452d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1453d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1454d0ebc0d2SYaroslav Tykhiy 				break;
1455d0ebc0d2SYaroslav Tykhiy #endif
1456707d00a3SJonathan Lemon 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1457707d00a3SJonathan Lemon 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1458707d00a3SJonathan Lemon 				goto bad;
1459707d00a3SJonathan Lemon 			}
1460df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1461df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1462df8bae1dSRodney W. Grimes 				goto bad;
1463df8bae1dSRodney W. Grimes 			}
1464df8bae1dSRodney W. Grimes 			/*
1465df8bae1dSRodney W. Grimes 			 * If no space remains, ignore.
1466df8bae1dSRodney W. Grimes 			 */
1467df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
14685d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr))
1469df8bae1dSRodney W. Grimes 				break;
147094a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1471df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
1472df8bae1dSRodney W. Grimes 			/*
1473df8bae1dSRodney W. Grimes 			 * locate outgoing interface; if we're the destination,
1474df8bae1dSRodney W. Grimes 			 * use the incoming interface (should be same).
1475df8bae1dSRodney W. Grimes 			 */
1476df8bae1dSRodney W. Grimes 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
147702c1c707SAndre Oppermann 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
1478df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1479df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_HOST;
1480df8bae1dSRodney W. Grimes 				goto bad;
1481df8bae1dSRodney W. Grimes 			}
148294a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
148394a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1484df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1485df8bae1dSRodney W. Grimes 			break;
1486df8bae1dSRodney W. Grimes 
1487df8bae1dSRodney W. Grimes 		case IPOPT_TS:
1488d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1489d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1490d0ebc0d2SYaroslav Tykhiy 				break;
1491d0ebc0d2SYaroslav Tykhiy #endif
1492df8bae1dSRodney W. Grimes 			code = cp - (u_char *)ip;
149307514071SJonathan Lemon 			if (optlen < 4 || optlen > 40) {
149407514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1495df8bae1dSRodney W. Grimes 				goto bad;
149633841545SHajimu UMEMOTO 			}
149707514071SJonathan Lemon 			if ((off = cp[IPOPT_OFFSET]) < 5) {
149807514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
149933841545SHajimu UMEMOTO 				goto bad;
150033841545SHajimu UMEMOTO 			}
150107514071SJonathan Lemon 			if (off > optlen - (int)sizeof(int32_t)) {
150207514071SJonathan Lemon 				cp[IPOPT_OFFSET + 1] += (1 << 4);
150307514071SJonathan Lemon 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
150407514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1505df8bae1dSRodney W. Grimes 					goto bad;
150633841545SHajimu UMEMOTO 				}
1507df8bae1dSRodney W. Grimes 				break;
1508df8bae1dSRodney W. Grimes 			}
150907514071SJonathan Lemon 			off--;				/* 0 origin */
151007514071SJonathan Lemon 			sin = (struct in_addr *)(cp + off);
151107514071SJonathan Lemon 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1512df8bae1dSRodney W. Grimes 
1513df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSONLY:
1514df8bae1dSRodney W. Grimes 				break;
1515df8bae1dSRodney W. Grimes 
1516df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSANDADDR:
151707514071SJonathan Lemon 				if (off + sizeof(n_time) +
151807514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
151907514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1520df8bae1dSRodney W. Grimes 					goto bad;
152133841545SHajimu UMEMOTO 				}
1522df8bae1dSRodney W. Grimes 				ipaddr.sin_addr = dst;
1523df8bae1dSRodney W. Grimes 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1524df8bae1dSRodney W. Grimes 							    m->m_pkthdr.rcvif);
1525df8bae1dSRodney W. Grimes 				if (ia == 0)
1526df8bae1dSRodney W. Grimes 					continue;
152794a5d9b6SDavid Greenman 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
152894a5d9b6SDavid Greenman 				    sizeof(struct in_addr));
152907514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1530a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1531df8bae1dSRodney W. Grimes 				break;
1532df8bae1dSRodney W. Grimes 
1533df8bae1dSRodney W. Grimes 			case IPOPT_TS_PRESPEC:
153407514071SJonathan Lemon 				if (off + sizeof(n_time) +
153507514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
153607514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1537df8bae1dSRodney W. Grimes 					goto bad;
153833841545SHajimu UMEMOTO 				}
153994a5d9b6SDavid Greenman 				(void)memcpy(&ipaddr.sin_addr, sin,
1540df8bae1dSRodney W. Grimes 				    sizeof(struct in_addr));
1541df8bae1dSRodney W. Grimes 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1542df8bae1dSRodney W. Grimes 					continue;
154307514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1544a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1545df8bae1dSRodney W. Grimes 				break;
1546df8bae1dSRodney W. Grimes 
1547df8bae1dSRodney W. Grimes 			default:
154807514071SJonathan Lemon 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1549df8bae1dSRodney W. Grimes 				goto bad;
1550df8bae1dSRodney W. Grimes 			}
1551df8bae1dSRodney W. Grimes 			ntime = iptime();
155207514071SJonathan Lemon 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
155307514071SJonathan Lemon 			cp[IPOPT_OFFSET] += sizeof(n_time);
1554df8bae1dSRodney W. Grimes 		}
1555df8bae1dSRodney W. Grimes 	}
155647174b49SAndrey A. Chernov 	if (forward && ipforwarding) {
155702c1c707SAndre Oppermann 		ip_forward(m, 1, next_hop);
1558df8bae1dSRodney W. Grimes 		return (1);
1559df8bae1dSRodney W. Grimes 	}
1560df8bae1dSRodney W. Grimes 	return (0);
1561df8bae1dSRodney W. Grimes bad:
1562df8bae1dSRodney W. Grimes 	icmp_error(m, type, code, 0, 0);
1563df8bae1dSRodney W. Grimes 	ipstat.ips_badoptions++;
1564df8bae1dSRodney W. Grimes 	return (1);
1565df8bae1dSRodney W. Grimes }
1566df8bae1dSRodney W. Grimes 
1567df8bae1dSRodney W. Grimes /*
1568df8bae1dSRodney W. Grimes  * Given address of next destination (final or next hop),
1569df8bae1dSRodney W. Grimes  * return internet address info of interface to be used to get there.
1570df8bae1dSRodney W. Grimes  */
1571bd714208SRuslan Ermilov struct in_ifaddr *
157202c1c707SAndre Oppermann ip_rtaddr(dst)
1573df8bae1dSRodney W. Grimes 	struct in_addr dst;
1574df8bae1dSRodney W. Grimes {
157597d8d152SAndre Oppermann 	struct route sro;
157602c1c707SAndre Oppermann 	struct sockaddr_in *sin;
157702c1c707SAndre Oppermann 	struct in_ifaddr *ifa;
1578df8bae1dSRodney W. Grimes 
15790cfbbe3bSAndre Oppermann 	bzero(&sro, sizeof(sro));
158097d8d152SAndre Oppermann 	sin = (struct sockaddr_in *)&sro.ro_dst;
1581df8bae1dSRodney W. Grimes 	sin->sin_family = AF_INET;
1582df8bae1dSRodney W. Grimes 	sin->sin_len = sizeof(*sin);
1583df8bae1dSRodney W. Grimes 	sin->sin_addr = dst;
158497d8d152SAndre Oppermann 	rtalloc_ign(&sro, RTF_CLONING);
1585df8bae1dSRodney W. Grimes 
158697d8d152SAndre Oppermann 	if (sro.ro_rt == NULL)
1587df8bae1dSRodney W. Grimes 		return ((struct in_ifaddr *)0);
158802c1c707SAndre Oppermann 
158997d8d152SAndre Oppermann 	ifa = ifatoia(sro.ro_rt->rt_ifa);
159097d8d152SAndre Oppermann 	RTFREE(sro.ro_rt);
159102c1c707SAndre Oppermann 	return ifa;
1592df8bae1dSRodney W. Grimes }
1593df8bae1dSRodney W. Grimes 
1594df8bae1dSRodney W. Grimes /*
1595df8bae1dSRodney W. Grimes  * Save incoming source route for use in replies,
1596df8bae1dSRodney W. Grimes  * to be picked up later by ip_srcroute if the receiver is interested.
1597df8bae1dSRodney W. Grimes  */
159837c84183SPoul-Henning Kamp static void
1599df8bae1dSRodney W. Grimes save_rte(option, dst)
1600df8bae1dSRodney W. Grimes 	u_char *option;
1601df8bae1dSRodney W. Grimes 	struct in_addr dst;
1602df8bae1dSRodney W. Grimes {
1603df8bae1dSRodney W. Grimes 	unsigned olen;
1604df8bae1dSRodney W. Grimes 
1605df8bae1dSRodney W. Grimes 	olen = option[IPOPT_OLEN];
1606df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1607df8bae1dSRodney W. Grimes 	if (ipprintfs)
1608df8bae1dSRodney W. Grimes 		printf("save_rte: olen %d\n", olen);
1609df8bae1dSRodney W. Grimes #endif
1610df8bae1dSRodney W. Grimes 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1611df8bae1dSRodney W. Grimes 		return;
16120453d3cbSBruce Evans 	bcopy(option, ip_srcrt.srcopt, olen);
1613df8bae1dSRodney W. Grimes 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1614df8bae1dSRodney W. Grimes 	ip_srcrt.dst = dst;
1615df8bae1dSRodney W. Grimes }
1616df8bae1dSRodney W. Grimes 
1617df8bae1dSRodney W. Grimes /*
1618df8bae1dSRodney W. Grimes  * Retrieve incoming source route for use in replies,
1619df8bae1dSRodney W. Grimes  * in the same form used by setsockopt.
1620df8bae1dSRodney W. Grimes  * The first hop is placed before the options, will be removed later.
1621df8bae1dSRodney W. Grimes  */
1622df8bae1dSRodney W. Grimes struct mbuf *
1623df8bae1dSRodney W. Grimes ip_srcroute()
1624df8bae1dSRodney W. Grimes {
1625df8bae1dSRodney W. Grimes 	register struct in_addr *p, *q;
1626df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1627df8bae1dSRodney W. Grimes 
1628df8bae1dSRodney W. Grimes 	if (ip_nhops == 0)
1629df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1630a163d034SWarner Losh 	m = m_get(M_DONTWAIT, MT_HEADER);
1631df8bae1dSRodney W. Grimes 	if (m == 0)
1632df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1633df8bae1dSRodney W. Grimes 
1634df8bae1dSRodney W. Grimes #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1635df8bae1dSRodney W. Grimes 
1636df8bae1dSRodney W. Grimes 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1637df8bae1dSRodney W. Grimes 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1638df8bae1dSRodney W. Grimes 	    OPTSIZ;
1639df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1640df8bae1dSRodney W. Grimes 	if (ipprintfs)
1641df8bae1dSRodney W. Grimes 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1642df8bae1dSRodney W. Grimes #endif
1643df8bae1dSRodney W. Grimes 
1644df8bae1dSRodney W. Grimes 	/*
1645df8bae1dSRodney W. Grimes 	 * First save first hop for return route
1646df8bae1dSRodney W. Grimes 	 */
1647df8bae1dSRodney W. Grimes 	p = &ip_srcrt.route[ip_nhops - 1];
1648df8bae1dSRodney W. Grimes 	*(mtod(m, struct in_addr *)) = *p--;
1649df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1650df8bae1dSRodney W. Grimes 	if (ipprintfs)
1651af38c68cSLuigi Rizzo 		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1652df8bae1dSRodney W. Grimes #endif
1653df8bae1dSRodney W. Grimes 
1654df8bae1dSRodney W. Grimes 	/*
1655df8bae1dSRodney W. Grimes 	 * Copy option fields and padding (nop) to mbuf.
1656df8bae1dSRodney W. Grimes 	 */
1657df8bae1dSRodney W. Grimes 	ip_srcrt.nop = IPOPT_NOP;
1658df8bae1dSRodney W. Grimes 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
165994a5d9b6SDavid Greenman 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
166094a5d9b6SDavid Greenman 	    &ip_srcrt.nop, OPTSIZ);
1661df8bae1dSRodney W. Grimes 	q = (struct in_addr *)(mtod(m, caddr_t) +
1662df8bae1dSRodney W. Grimes 	    sizeof(struct in_addr) + OPTSIZ);
1663df8bae1dSRodney W. Grimes #undef OPTSIZ
1664df8bae1dSRodney W. Grimes 	/*
1665df8bae1dSRodney W. Grimes 	 * Record return path as an IP source route,
1666df8bae1dSRodney W. Grimes 	 * reversing the path (pointers are now aligned).
1667df8bae1dSRodney W. Grimes 	 */
1668df8bae1dSRodney W. Grimes 	while (p >= ip_srcrt.route) {
1669df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1670df8bae1dSRodney W. Grimes 		if (ipprintfs)
1671af38c68cSLuigi Rizzo 			printf(" %lx", (u_long)ntohl(q->s_addr));
1672df8bae1dSRodney W. Grimes #endif
1673df8bae1dSRodney W. Grimes 		*q++ = *p--;
1674df8bae1dSRodney W. Grimes 	}
1675df8bae1dSRodney W. Grimes 	/*
1676df8bae1dSRodney W. Grimes 	 * Last hop goes to final destination.
1677df8bae1dSRodney W. Grimes 	 */
1678df8bae1dSRodney W. Grimes 	*q = ip_srcrt.dst;
1679df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1680df8bae1dSRodney W. Grimes 	if (ipprintfs)
1681af38c68cSLuigi Rizzo 		printf(" %lx\n", (u_long)ntohl(q->s_addr));
1682df8bae1dSRodney W. Grimes #endif
1683df8bae1dSRodney W. Grimes 	return (m);
1684df8bae1dSRodney W. Grimes }
1685df8bae1dSRodney W. Grimes 
1686df8bae1dSRodney W. Grimes /*
1687df8bae1dSRodney W. Grimes  * Strip out IP options, at higher
1688df8bae1dSRodney W. Grimes  * level protocol in the kernel.
1689df8bae1dSRodney W. Grimes  * Second argument is buffer to which options
1690df8bae1dSRodney W. Grimes  * will be moved, and return value is their length.
1691df8bae1dSRodney W. Grimes  * XXX should be deleted; last arg currently ignored.
1692df8bae1dSRodney W. Grimes  */
1693df8bae1dSRodney W. Grimes void
1694df8bae1dSRodney W. Grimes ip_stripoptions(m, mopt)
1695df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1696df8bae1dSRodney W. Grimes 	struct mbuf *mopt;
1697df8bae1dSRodney W. Grimes {
1698df8bae1dSRodney W. Grimes 	register int i;
1699df8bae1dSRodney W. Grimes 	struct ip *ip = mtod(m, struct ip *);
1700df8bae1dSRodney W. Grimes 	register caddr_t opts;
1701df8bae1dSRodney W. Grimes 	int olen;
1702df8bae1dSRodney W. Grimes 
170353be11f6SPoul-Henning Kamp 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
1704df8bae1dSRodney W. Grimes 	opts = (caddr_t)(ip + 1);
1705df8bae1dSRodney W. Grimes 	i = m->m_len - (sizeof (struct ip) + olen);
1706df8bae1dSRodney W. Grimes 	bcopy(opts + olen, opts, (unsigned)i);
1707df8bae1dSRodney W. Grimes 	m->m_len -= olen;
1708df8bae1dSRodney W. Grimes 	if (m->m_flags & M_PKTHDR)
1709df8bae1dSRodney W. Grimes 		m->m_pkthdr.len -= olen;
171053be11f6SPoul-Henning Kamp 	ip->ip_v = IPVERSION;
171153be11f6SPoul-Henning Kamp 	ip->ip_hl = sizeof(struct ip) >> 2;
1712df8bae1dSRodney W. Grimes }
1713df8bae1dSRodney W. Grimes 
1714df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1715df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1716df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1717df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1718df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1719fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
17203b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1721df8bae1dSRodney W. Grimes };
1722df8bae1dSRodney W. Grimes 
1723df8bae1dSRodney W. Grimes /*
1724df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1725df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1726df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1727df8bae1dSRodney W. Grimes  * of codes and types.
1728df8bae1dSRodney W. Grimes  *
1729df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1730df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1731df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1732df8bae1dSRodney W. Grimes  * protocol deal with that.
1733df8bae1dSRodney W. Grimes  *
1734df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1735df8bae1dSRodney W. Grimes  * via a source route.
1736df8bae1dSRodney W. Grimes  */
17370312fbe9SPoul-Henning Kamp static void
173802c1c707SAndre Oppermann ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1739df8bae1dSRodney W. Grimes {
17402b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
174102c1c707SAndre Oppermann 	struct in_ifaddr *ia;
174226f9a767SRodney W. Grimes 	int error, type = 0, code = 0;
1743df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
1744df8bae1dSRodney W. Grimes 	n_long dest;
17453efc3014SJulian Elischer 	struct in_addr pkt_dst;
1746df8bae1dSRodney W. Grimes 	struct ifnet *destifp;
1747b9234fafSSam Leffler #if defined(IPSEC) || defined(FAST_IPSEC)
17486a800098SYoshinobu Inoue 	struct ifnet dummyifp;
17496a800098SYoshinobu Inoue #endif
1750df8bae1dSRodney W. Grimes 
17513efc3014SJulian Elischer 	/*
17523efc3014SJulian Elischer 	 * Cache the destination address of the packet; this may be
17533efc3014SJulian Elischer 	 * changed by use of 'ipfw fwd'.
17543efc3014SJulian Elischer 	 */
17552b25acc1SLuigi Rizzo 	pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
17563efc3014SJulian Elischer 
1757df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1758df8bae1dSRodney W. Grimes 	if (ipprintfs)
175961ce519bSPoul-Henning Kamp 		printf("forward: src %lx dst %lx ttl %x\n",
17603efc3014SJulian Elischer 		    (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1761162886e2SBruce Evans 		    ip->ip_ttl);
1762df8bae1dSRodney W. Grimes #endif
1763100ba1a6SJordan K. Hubbard 
1764100ba1a6SJordan K. Hubbard 
17653efc3014SJulian Elischer 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1766df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1767df8bae1dSRodney W. Grimes 		m_freem(m);
1768df8bae1dSRodney W. Grimes 		return;
1769df8bae1dSRodney W. Grimes 	}
17701b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17711b968362SDag-Erling Smørgrav 	if (!ipstealth) {
17721b968362SDag-Erling Smørgrav #endif
1773df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
17741b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
177502c1c707SAndre Oppermann 			    0, 0);
1776df8bae1dSRodney W. Grimes 			return;
1777df8bae1dSRodney W. Grimes 		}
17781b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17791b968362SDag-Erling Smørgrav 	}
17801b968362SDag-Erling Smørgrav #endif
1781df8bae1dSRodney W. Grimes 
178202c1c707SAndre Oppermann 	if ((ia = ip_rtaddr(pkt_dst)) == 0) {
178302c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
1784df8bae1dSRodney W. Grimes 		return;
178502c1c707SAndre Oppermann 	}
1786df8bae1dSRodney W. Grimes 
1787df8bae1dSRodney W. Grimes 	/*
1788bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1789bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1790bfef7ed4SIan Dowse 	 *
17914d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
17924d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
17934d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
17944d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
17954d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
17964d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
17974d2e3692SLuigi Rizzo 	 *
1798bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1799bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1800bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1801bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1802df8bae1dSRodney W. Grimes 	 */
1803a163d034SWarner Losh 	MGET(mcopy, M_DONTWAIT, m->m_type);
1804a163d034SWarner Losh 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
18059967cafcSSam Leffler 		/*
18069967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
18079967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
18089967cafcSSam Leffler 		 * be conservative and just discard the copy since
18099967cafcSSam Leffler 		 * code below may some day want the tags.
18109967cafcSSam Leffler 		 */
18119967cafcSSam Leffler 		m_free(mcopy);
18129967cafcSSam Leffler 		mcopy = NULL;
18139967cafcSSam Leffler 	}
1814bfef7ed4SIan Dowse 	if (mcopy != NULL) {
181553be11f6SPoul-Henning Kamp 		mcopy->m_len = imin((ip->ip_hl << 2) + 8,
1816bfef7ed4SIan Dowse 		    (int)ip->ip_len);
1817e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1818bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1819bfef7ed4SIan Dowse 	}
182004287599SRuslan Ermilov 
182104287599SRuslan Ermilov #ifdef IPSTEALTH
182204287599SRuslan Ermilov 	if (!ipstealth) {
182304287599SRuslan Ermilov #endif
182404287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
182504287599SRuslan Ermilov #ifdef IPSTEALTH
182604287599SRuslan Ermilov 	}
182704287599SRuslan Ermilov #endif
1828df8bae1dSRodney W. Grimes 
1829df8bae1dSRodney W. Grimes 	/*
1830df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1831df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1832df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1833df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1834df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1835df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1836df8bae1dSRodney W. Grimes 	 */
183702c1c707SAndre Oppermann 	dest = 0;
183802c1c707SAndre Oppermann 	if (ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) {
183902c1c707SAndre Oppermann 		struct sockaddr_in *sin;
184002c1c707SAndre Oppermann 		struct route ro;
184102c1c707SAndre Oppermann 		struct rtentry *rt;
184202c1c707SAndre Oppermann 
18430cfbbe3bSAndre Oppermann 		bzero(&ro, sizeof(ro));
184402c1c707SAndre Oppermann 		sin = (struct sockaddr_in *)&ro.ro_dst;
184502c1c707SAndre Oppermann 		sin->sin_family = AF_INET;
184602c1c707SAndre Oppermann 		sin->sin_len = sizeof(*sin);
184702c1c707SAndre Oppermann 		sin->sin_addr = pkt_dst;
184826d02ca7SAndre Oppermann 		rtalloc_ign(&ro, RTF_CLONING);
184902c1c707SAndre Oppermann 
185002c1c707SAndre Oppermann 		rt = ro.ro_rt;
185102c1c707SAndre Oppermann 
185202c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1853df8bae1dSRodney W. Grimes 		    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
18542b25acc1SLuigi Rizzo 		    ipsendredirects && !srcrt && !next_hop) {
1855df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1856df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1857df8bae1dSRodney W. Grimes 
1858df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1859df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1860df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
1861df8bae1dSRodney W. Grimes 					dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1862df8bae1dSRodney W. Grimes 				else
18633efc3014SJulian Elischer 					dest = pkt_dst.s_addr;
1864df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1865df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1866df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1867df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1868df8bae1dSRodney W. Grimes 				if (ipprintfs)
1869df8bae1dSRodney W. Grimes 					printf("redirect (%d) to %lx\n", code, (u_long)dest);
1870df8bae1dSRodney W. Grimes #endif
1871df8bae1dSRodney W. Grimes 			}
1872df8bae1dSRodney W. Grimes 		}
187302c1c707SAndre Oppermann 		if (rt)
187402c1c707SAndre Oppermann 			RTFREE(rt);
187502c1c707SAndre Oppermann 	}
1876df8bae1dSRodney W. Grimes 
1877ea779ff3SLuigi Rizzo 	if (next_hop) {
1878ac9d7e26SMax Laier 		struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD,
1879ac9d7e26SMax Laier 		    sizeof(struct sockaddr_in *), M_NOWAIT);
1880ac9d7e26SMax Laier 		if (mtag == NULL) {
1881ac9d7e26SMax Laier 			m_freem(m);
1882ac9d7e26SMax Laier 			return;
1883ac9d7e26SMax Laier 		}
1884ac9d7e26SMax Laier 		*(struct sockaddr_in **)(mtag+1) = next_hop;
1885ac9d7e26SMax Laier 		m_tag_prepend(m, mtag);
1886ea779ff3SLuigi Rizzo 	}
188702c1c707SAndre Oppermann 	error = ip_output(m, (struct mbuf *)0, NULL, IP_FORWARDING, 0, NULL);
1888df8bae1dSRodney W. Grimes 	if (error)
1889df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1890df8bae1dSRodney W. Grimes 	else {
1891df8bae1dSRodney W. Grimes 		ipstat.ips_forward++;
1892df8bae1dSRodney W. Grimes 		if (type)
1893df8bae1dSRodney W. Grimes 			ipstat.ips_redirectsent++;
1894df8bae1dSRodney W. Grimes 		else {
18959188b4a1SAndre Oppermann 			if (mcopy)
1896df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1897df8bae1dSRodney W. Grimes 			return;
1898df8bae1dSRodney W. Grimes 		}
1899df8bae1dSRodney W. Grimes 	}
1900df8bae1dSRodney W. Grimes 	if (mcopy == NULL)
1901df8bae1dSRodney W. Grimes 		return;
1902df8bae1dSRodney W. Grimes 	destifp = NULL;
1903df8bae1dSRodney W. Grimes 
1904df8bae1dSRodney W. Grimes 	switch (error) {
1905df8bae1dSRodney W. Grimes 
1906df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1907df8bae1dSRodney W. Grimes 		/* type, code set above */
1908df8bae1dSRodney W. Grimes 		break;
1909df8bae1dSRodney W. Grimes 
1910df8bae1dSRodney W. Grimes 	case ENETUNREACH:		/* shouldn't happen, checked above */
1911df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1912df8bae1dSRodney W. Grimes 	case ENETDOWN:
1913df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1914df8bae1dSRodney W. Grimes 	default:
1915df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1916df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1917df8bae1dSRodney W. Grimes 		break;
1918df8bae1dSRodney W. Grimes 
1919df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1920df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1921df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
192202c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
19236a800098SYoshinobu Inoue 		/*
19246a800098SYoshinobu Inoue 		 * If the packet is routed over IPsec tunnel, tell the
19256a800098SYoshinobu Inoue 		 * originator the tunnel MTU.
19266a800098SYoshinobu Inoue 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
19276a800098SYoshinobu Inoue 		 * XXX quickhack!!!
19286a800098SYoshinobu Inoue 		 */
192902c1c707SAndre Oppermann 		{
19306a800098SYoshinobu Inoue 			struct secpolicy *sp = NULL;
19316a800098SYoshinobu Inoue 			int ipsecerror;
19326a800098SYoshinobu Inoue 			int ipsechdr;
193302c1c707SAndre Oppermann 			struct route *ro;
19346a800098SYoshinobu Inoue 
193502c1c707SAndre Oppermann #ifdef IPSEC
19366a800098SYoshinobu Inoue 			sp = ipsec4_getpolicybyaddr(mcopy,
19376a800098SYoshinobu Inoue 						    IPSEC_DIR_OUTBOUND,
19386a800098SYoshinobu Inoue 						    IP_FORWARDING,
19396a800098SYoshinobu Inoue 						    &ipsecerror);
194002c1c707SAndre Oppermann #else /* FAST_IPSEC */
1941b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(mcopy,
1942b9234fafSSam Leffler 						   IPSEC_DIR_OUTBOUND,
1943b9234fafSSam Leffler 						   IP_FORWARDING,
1944b9234fafSSam Leffler 						   &ipsecerror);
194502c1c707SAndre Oppermann #endif
194602c1c707SAndre Oppermann 			if (sp != NULL) {
1947b9234fafSSam Leffler 				/* count IPsec header size */
1948b9234fafSSam Leffler 				ipsechdr = ipsec4_hdrsiz(mcopy,
1949b9234fafSSam Leffler 							 IPSEC_DIR_OUTBOUND,
1950b9234fafSSam Leffler 							 NULL);
1951b9234fafSSam Leffler 
1952b9234fafSSam Leffler 				/*
1953b9234fafSSam Leffler 				 * find the correct route for outer IPv4
1954b9234fafSSam Leffler 				 * header, compute tunnel MTU.
1955b9234fafSSam Leffler 				 *
1956b9234fafSSam Leffler 				 * XXX BUG ALERT
1957b9234fafSSam Leffler 				 * The "dummyifp" code relies upon the fact
1958b9234fafSSam Leffler 				 * that icmp_error() touches only ifp->if_mtu.
1959b9234fafSSam Leffler 				 */
1960b9234fafSSam Leffler 				/*XXX*/
1961b9234fafSSam Leffler 				destifp = NULL;
1962b9234fafSSam Leffler 				if (sp->req != NULL
1963b9234fafSSam Leffler 				 && sp->req->sav != NULL
1964b9234fafSSam Leffler 				 && sp->req->sav->sah != NULL) {
196502c1c707SAndre Oppermann 					ro = &sp->req->sav->sah->sa_route;
196602c1c707SAndre Oppermann 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1967b9234fafSSam Leffler 						dummyifp.if_mtu =
196857ab3660SBruce M Simpson 						    ro->ro_rt->rt_rmx.rmx_mtu ?
196957ab3660SBruce M Simpson 						    ro->ro_rt->rt_rmx.rmx_mtu :
197002c1c707SAndre Oppermann 						    ro->ro_rt->rt_ifp->if_mtu;
1971b9234fafSSam Leffler 						dummyifp.if_mtu -= ipsechdr;
1972b9234fafSSam Leffler 						destifp = &dummyifp;
1973b9234fafSSam Leffler 					}
1974b9234fafSSam Leffler 				}
1975b9234fafSSam Leffler 
197602c1c707SAndre Oppermann #ifdef IPSEC
197702c1c707SAndre Oppermann 				key_freesp(sp);
197802c1c707SAndre Oppermann #else /* FAST_IPSEC */
1979b9234fafSSam Leffler 				KEY_FREESP(&sp);
198002c1c707SAndre Oppermann #endif
198102c1c707SAndre Oppermann 				ipstat.ips_cantfrag++;
198202c1c707SAndre Oppermann 				break;
198302c1c707SAndre Oppermann 			} else
198402c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
198502c1c707SAndre Oppermann 		destifp = ia->ia_ifp;
198602c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
1987b9234fafSSam Leffler 		}
198802c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
1989df8bae1dSRodney W. Grimes 		ipstat.ips_cantfrag++;
1990df8bae1dSRodney W. Grimes 		break;
1991df8bae1dSRodney W. Grimes 
1992df8bae1dSRodney W. Grimes 	case ENOBUFS:
1993df285b3dSMike Silbersack 		/*
1994df285b3dSMike Silbersack 		 * A router should not generate ICMP_SOURCEQUENCH as
1995df285b3dSMike Silbersack 		 * required in RFC1812 Requirements for IP Version 4 Routers.
1996df285b3dSMike Silbersack 		 * Source quench could be a big problem under DoS attacks,
1997df285b3dSMike Silbersack 		 * or if the underlying interface is rate-limited.
1998df285b3dSMike Silbersack 		 * Those who need source quench packets may re-enable them
1999df285b3dSMike Silbersack 		 * via the net.inet.ip.sendsourcequench sysctl.
2000df285b3dSMike Silbersack 		 */
2001df285b3dSMike Silbersack 		if (ip_sendsourcequench == 0) {
2002df285b3dSMike Silbersack 			m_freem(mcopy);
2003df285b3dSMike Silbersack 			return;
2004df285b3dSMike Silbersack 		} else {
2005df8bae1dSRodney W. Grimes 			type = ICMP_SOURCEQUENCH;
2006df8bae1dSRodney W. Grimes 			code = 0;
2007df285b3dSMike Silbersack 		}
2008df8bae1dSRodney W. Grimes 		break;
20093a06e3e0SRuslan Ermilov 
20103a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
20113a06e3e0SRuslan Ermilov 		m_freem(mcopy);
20123a06e3e0SRuslan Ermilov 		return;
2013df8bae1dSRodney W. Grimes 	}
2014df8bae1dSRodney W. Grimes 	icmp_error(mcopy, type, code, dest, destifp);
2015df8bae1dSRodney W. Grimes }
2016df8bae1dSRodney W. Grimes 
201782c23ebaSBill Fenner void
201882c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m)
201982c23ebaSBill Fenner 	register struct inpcb *inp;
202082c23ebaSBill Fenner 	register struct mbuf **mp;
202182c23ebaSBill Fenner 	register struct ip *ip;
202282c23ebaSBill Fenner 	register struct mbuf *m;
202382c23ebaSBill Fenner {
2024be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
2025be8a62e8SPoul-Henning Kamp 		struct bintime bt;
2026be8a62e8SPoul-Henning Kamp 
2027be8a62e8SPoul-Henning Kamp 		bintime(&bt);
2028be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
2029be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
2030be8a62e8SPoul-Henning Kamp 			SCM_BINTIME, SOL_SOCKET);
2031be8a62e8SPoul-Henning Kamp 			if (*mp)
2032be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
2033be8a62e8SPoul-Henning Kamp 		}
203482c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
203582c23ebaSBill Fenner 			struct timeval tv;
203682c23ebaSBill Fenner 
2037be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
203882c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
203982c23ebaSBill Fenner 				SCM_TIMESTAMP, SOL_SOCKET);
204082c23ebaSBill Fenner 			if (*mp)
204182c23ebaSBill Fenner 				mp = &(*mp)->m_next;
20424cc20ab1SSeigo Tanimura 		}
2043be8a62e8SPoul-Henning Kamp 	}
204482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
204582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
204682c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
204782c23ebaSBill Fenner 		if (*mp)
204882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
204982c23ebaSBill Fenner 	}
20504957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
20514957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
20524957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
20534957466bSMatthew N. Dodd 		if (*mp)
20544957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
20554957466bSMatthew N. Dodd 	}
205682c23ebaSBill Fenner #ifdef notyet
205782c23ebaSBill Fenner 	/* XXX
205882c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
205982c23ebaSBill Fenner 	 * than they already were.
206082c23ebaSBill Fenner 	 */
206182c23ebaSBill Fenner 	/* options were tossed already */
206282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
206382c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
206482c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
206582c23ebaSBill Fenner 		if (*mp)
206682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
206782c23ebaSBill Fenner 	}
206882c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
206982c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
207082c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
207182c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
207282c23ebaSBill Fenner 		if (*mp)
207382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
207482c23ebaSBill Fenner 	}
207582c23ebaSBill Fenner #endif
207682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
2077d314ad7bSJulian Elischer 		struct ifnet *ifp;
2078d314ad7bSJulian Elischer 		struct sdlbuf {
207982c23ebaSBill Fenner 			struct sockaddr_dl sdl;
2080d314ad7bSJulian Elischer 			u_char	pad[32];
2081d314ad7bSJulian Elischer 		} sdlbuf;
2082d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
2083d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
208482c23ebaSBill Fenner 
2085d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
2086d314ad7bSJulian Elischer 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
2087f9132cebSJonathan Lemon 			sdp = (struct sockaddr_dl *)
2088f9132cebSJonathan Lemon 			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
2089d314ad7bSJulian Elischer 			/*
2090d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
2091d314ad7bSJulian Elischer 			 */
2092d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
2093d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
2094d314ad7bSJulian Elischer 				goto makedummy;
2095d314ad7bSJulian Elischer 			}
2096d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
2097d314ad7bSJulian Elischer 		} else {
2098d314ad7bSJulian Elischer makedummy:
2099d314ad7bSJulian Elischer 			sdl2->sdl_len
2100d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
2101d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
2102d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
2103d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2104d314ad7bSJulian Elischer 		}
2105d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
210682c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
210782c23ebaSBill Fenner 		if (*mp)
210882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
210982c23ebaSBill Fenner 	}
211082c23ebaSBill Fenner }
211182c23ebaSBill Fenner 
21124d2e3692SLuigi Rizzo /*
21134d2e3692SLuigi Rizzo  * XXX these routines are called from the upper part of the kernel.
21144d2e3692SLuigi Rizzo  * They need to be locked when we remove Giant.
21154d2e3692SLuigi Rizzo  *
21164d2e3692SLuigi Rizzo  * They could also be moved to ip_mroute.c, since all the RSVP
21174d2e3692SLuigi Rizzo  *  handling is done there already.
21184d2e3692SLuigi Rizzo  */
21194d2e3692SLuigi Rizzo static int ip_rsvp_on;
21204d2e3692SLuigi Rizzo struct socket *ip_rsvpd;
2121df8bae1dSRodney W. Grimes int
2122f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
2123f0068c4aSGarrett Wollman {
2124f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
2125f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
2126f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
2127f0068c4aSGarrett Wollman 
2128f0068c4aSGarrett Wollman 	if (ip_rsvpd != NULL)
2129f0068c4aSGarrett Wollman 		return EADDRINUSE;
2130f0068c4aSGarrett Wollman 
2131f0068c4aSGarrett Wollman 	ip_rsvpd = so;
21321c5de19aSGarrett Wollman 	/*
21331c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
21341c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21351c5de19aSGarrett Wollman 	 */
21361c5de19aSGarrett Wollman 	if (!ip_rsvp_on) {
21371c5de19aSGarrett Wollman 		ip_rsvp_on = 1;
21381c5de19aSGarrett Wollman 		rsvp_on++;
21391c5de19aSGarrett Wollman 	}
2140f0068c4aSGarrett Wollman 
2141f0068c4aSGarrett Wollman 	return 0;
2142f0068c4aSGarrett Wollman }
2143f0068c4aSGarrett Wollman 
2144f0068c4aSGarrett Wollman int
2145f0068c4aSGarrett Wollman ip_rsvp_done(void)
2146f0068c4aSGarrett Wollman {
2147f0068c4aSGarrett Wollman 	ip_rsvpd = NULL;
21481c5de19aSGarrett Wollman 	/*
21491c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
21501c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21511c5de19aSGarrett Wollman 	 */
21521c5de19aSGarrett Wollman 	if (ip_rsvp_on) {
21531c5de19aSGarrett Wollman 		ip_rsvp_on = 0;
21541c5de19aSGarrett Wollman 		rsvp_on--;
21551c5de19aSGarrett Wollman 	}
2156f0068c4aSGarrett Wollman 	return 0;
2157f0068c4aSGarrett Wollman }
2158bbb4330bSLuigi Rizzo 
2159bbb4330bSLuigi Rizzo void
2160bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
2161bbb4330bSLuigi Rizzo {
2162bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
2163bbb4330bSLuigi Rizzo 		rsvp_input_p(m, off);
2164bbb4330bSLuigi Rizzo 		return;
2165bbb4330bSLuigi Rizzo 	}
2166bbb4330bSLuigi Rizzo 
2167bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
2168bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
2169bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
2170bbb4330bSLuigi Rizzo 	 */
2171bbb4330bSLuigi Rizzo 
2172bbb4330bSLuigi Rizzo 	if (!rsvp_on) {
2173bbb4330bSLuigi Rizzo 		m_freem(m);
2174bbb4330bSLuigi Rizzo 		return;
2175bbb4330bSLuigi Rizzo 	}
2176bbb4330bSLuigi Rizzo 
2177bbb4330bSLuigi Rizzo 	if (ip_rsvpd != NULL) {
2178bbb4330bSLuigi Rizzo 		rip_input(m, off);
2179bbb4330bSLuigi Rizzo 		return;
2180bbb4330bSLuigi Rizzo 	}
2181bbb4330bSLuigi Rizzo 	/* Drop the packet */
2182bbb4330bSLuigi Rizzo 	m_freem(m);
2183bbb4330bSLuigi Rizzo }
2184