xref: /freebsd/sys/netinet/ip_input.c (revision be8a62e8214b3cdd0e4d182389cbd42451fc04ea)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
14df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
15df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
16df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
17df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
18df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
19df8bae1dSRodney W. Grimes  *    without specific prior written permission.
20df8bae1dSRodney W. Grimes  *
21df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
32df8bae1dSRodney W. Grimes  *
33df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34c3aac50fSPeter Wemm  * $FreeBSD$
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37e4f4247aSEivind Eklund #include "opt_bootp.h"
3874a9466cSGary Palmer #include "opt_ipfw.h"
39b715f178SLuigi Rizzo #include "opt_ipdn.h"
40fbd1372aSJoerg Wunsch #include "opt_ipdivert.h"
411ee25934SPeter Wemm #include "opt_ipfilter.h"
4227108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
436a800098SYoshinobu Inoue #include "opt_ipsec.h"
4436b0360bSRobert Watson #include "opt_mac.h"
45c4ac87eaSDarren Reed #include "opt_pfil_hooks.h"
4664dddc18SKris Kennaway #include "opt_random_ip_id.h"
4774a9466cSGary Palmer 
48df8bae1dSRodney W. Grimes #include <sys/param.h>
49df8bae1dSRodney W. Grimes #include <sys/systm.h>
5036b0360bSRobert Watson #include <sys/mac.h>
51df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
52b715f178SLuigi Rizzo #include <sys/malloc.h>
53df8bae1dSRodney W. Grimes #include <sys/domain.h>
54df8bae1dSRodney W. Grimes #include <sys/protosw.h>
55df8bae1dSRodney W. Grimes #include <sys/socket.h>
56df8bae1dSRodney W. Grimes #include <sys/time.h>
57df8bae1dSRodney W. Grimes #include <sys/kernel.h>
581025071fSGarrett Wollman #include <sys/syslog.h>
59b5e8ce9fSBruce Evans #include <sys/sysctl.h>
60df8bae1dSRodney W. Grimes 
61c85540ddSAndrey A. Chernov #include <net/pfil.h>
62df8bae1dSRodney W. Grimes #include <net/if.h>
639494d596SBrooks Davis #include <net/if_types.h>
64d314ad7bSJulian Elischer #include <net/if_var.h>
6582c23ebaSBill Fenner #include <net/if_dl.h>
66df8bae1dSRodney W. Grimes #include <net/route.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68df8bae1dSRodney W. Grimes 
69df8bae1dSRodney W. Grimes #include <netinet/in.h>
70df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
71b5e8ce9fSBruce Evans #include <netinet/in_var.h>
72df8bae1dSRodney W. Grimes #include <netinet/ip.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
7658938916SGarrett Wollman #include <machine/in_cksum.h>
77df8bae1dSRodney W. Grimes 
78f0068c4aSGarrett Wollman #include <sys/socketvar.h>
796ddbf1e2SGary Palmer 
806ddbf1e2SGary Palmer #include <netinet/ip_fw.h>
81db69a05dSPaul Saab #include <netinet/ip_dummynet.h>
82db69a05dSPaul Saab 
836a800098SYoshinobu Inoue #ifdef IPSEC
846a800098SYoshinobu Inoue #include <netinet6/ipsec.h>
856a800098SYoshinobu Inoue #include <netkey/key.h>
866a800098SYoshinobu Inoue #endif
876a800098SYoshinobu Inoue 
88b9234fafSSam Leffler #ifdef FAST_IPSEC
89b9234fafSSam Leffler #include <netipsec/ipsec.h>
90b9234fafSSam Leffler #include <netipsec/key.h>
91b9234fafSSam Leffler #endif
92b9234fafSSam Leffler 
931c5de19aSGarrett Wollman int rsvp_on = 0;
94f0068c4aSGarrett Wollman 
951f91d8c5SDavid Greenman int	ipforwarding = 0;
960312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
973d177f46SBill Fumerola     &ipforwarding, 0, "Enable IP forwarding between interfaces");
980312fbe9SPoul-Henning Kamp 
99d4fb926cSGarrett Wollman static int	ipsendredirects = 1; /* XXX */
1000312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
1013d177f46SBill Fumerola     &ipsendredirects, 0, "Enable sending IP redirects");
1020312fbe9SPoul-Henning Kamp 
103df8bae1dSRodney W. Grimes int	ip_defttl = IPDEFTTL;
1040312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
1053d177f46SBill Fumerola     &ip_defttl, 0, "Maximum TTL on IP packets");
1060312fbe9SPoul-Henning Kamp 
1070312fbe9SPoul-Henning Kamp static int	ip_dosourceroute = 0;
1080312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
1093d177f46SBill Fumerola     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
1104fce5804SGuido van Rooij 
1114fce5804SGuido van Rooij static int	ip_acceptsourceroute = 0;
1124fce5804SGuido van Rooij SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
1133d177f46SBill Fumerola     CTLFLAG_RW, &ip_acceptsourceroute, 0,
1143d177f46SBill Fumerola     "Enable accepting source routed IP packets");
1156a800098SYoshinobu Inoue 
1166a800098SYoshinobu Inoue static int	ip_keepfaith = 0;
1176a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
1186a800098SYoshinobu Inoue 	&ip_keepfaith,	0,
1196a800098SYoshinobu Inoue 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1206a800098SYoshinobu Inoue 
121402062e8SMike Silbersack static int    nipq = 0;         /* total # of reass queues */
122402062e8SMike Silbersack static int    maxnipq;
123690a6055SJesper Skriver SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
124402062e8SMike Silbersack 	&maxnipq, 0,
125690a6055SJesper Skriver 	"Maximum number of IPv4 fragment reassembly queue entries");
126690a6055SJesper Skriver 
127375386e2SMike Silbersack static int    maxfragsperpacket;
128375386e2SMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
129375386e2SMike Silbersack 	&maxfragsperpacket, 0,
130375386e2SMike Silbersack 	"Maximum number of IPv4 fragments allowed per packet");
131375386e2SMike Silbersack 
132df285b3dSMike Silbersack static int	ip_sendsourcequench = 0;
133df285b3dSMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
134df285b3dSMike Silbersack 	&ip_sendsourcequench, 0,
135df285b3dSMike Silbersack 	"Enable the transmission of source quench packets");
136df285b3dSMike Silbersack 
137823db0e9SDon Lewis /*
138823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
139823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
140a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
141823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1423f67c834SDon Lewis  *
143a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
144a8f12100SDon Lewis  * to translate the destination address to another local interface.
1453f67c834SDon Lewis  *
1463f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1473f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1483f67c834SDon Lewis  * packets for those addresses are received.
149823db0e9SDon Lewis  */
150b3e95d4eSJonathan Lemon static int	ip_checkinterface = 1;
151b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
152b3e95d4eSJonathan Lemon     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
153b3e95d4eSJonathan Lemon 
154df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1550312fbe9SPoul-Henning Kamp static int	ipprintfs = 0;
156df8bae1dSRodney W. Grimes #endif
157134ea224SSam Leffler #ifdef PFIL_HOOKS
158134ea224SSam Leffler struct pfil_head inet_pfil_hook;
159134ea224SSam Leffler #endif
160df8bae1dSRodney W. Grimes 
1611cafed39SJonathan Lemon static struct	ifqueue ipintrq;
162ca925d9cSJonathan Lemon static int	ipqmaxlen = IFQ_MAXLEN;
163ca925d9cSJonathan Lemon 
164df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
165f0ffb944SJulian Elischer extern	struct protosw inetsw[];
166df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
16759562606SGarrett Wollman struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
168ca925d9cSJonathan Lemon struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
169ca925d9cSJonathan Lemon u_long 	in_ifaddrhmask;				/* mask for hash table */
170ca925d9cSJonathan Lemon 
171afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
1723d177f46SBill Fumerola     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
1730312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
1743d177f46SBill Fumerola     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
175df8bae1dSRodney W. Grimes 
176f23b4c91SGarrett Wollman struct ipstat ipstat;
177c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
1783d177f46SBill Fumerola     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
179194a213eSAndrey A. Chernov 
180194a213eSAndrey A. Chernov /* Packet reassembly stuff */
181194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2      6
182194a213eSAndrey A. Chernov #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
183194a213eSAndrey A. Chernov #define IPREASS_HMASK           (IPREASS_NHASH - 1)
184194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \
185831a80b0SMatthew Dillon 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
186194a213eSAndrey A. Chernov 
187462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
1882fad1e93SSam Leffler struct mtx ipqlock;
1892fad1e93SSam Leffler 
1902fad1e93SSam Leffler #define	IPQ_LOCK()	mtx_lock(&ipqlock)
1912fad1e93SSam Leffler #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
192888c2a3cSSam Leffler #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
193888c2a3cSSam Leffler #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
194f23b4c91SGarrett Wollman 
1950312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1960312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1973d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1980312fbe9SPoul-Henning Kamp #endif
1990312fbe9SPoul-Henning Kamp 
2001b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
201c76ff708SAndre Oppermann int	ipstealth = 0;
2021b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
2031b968362SDag-Erling Smørgrav     &ipstealth, 0, "");
2041b968362SDag-Erling Smørgrav #endif
2051b968362SDag-Erling Smørgrav 
206cfe8b629SGarrett Wollman 
20723bf9953SPoul-Henning Kamp /* Firewall hooks */
20823bf9953SPoul-Henning Kamp ip_fw_chk_t *ip_fw_chk_ptr;
2099fcc0795SLuigi Rizzo int fw_enable = 1 ;
21097850a5dSLuigi Rizzo int fw_one_pass = 1;
211e7319babSPoul-Henning Kamp 
212db69a05dSPaul Saab /* Dummynet hooks */
213db69a05dSPaul Saab ip_dn_io_t *ip_dn_io_ptr;
214b715f178SLuigi Rizzo 
215929b31ddSSam Leffler /*
2164d2e3692SLuigi Rizzo  * XXX this is ugly -- the following two global variables are
2174d2e3692SLuigi Rizzo  * used to store packet state while it travels through the stack.
2184d2e3692SLuigi Rizzo  * Note that the code even makes assumptions on the size and
2194d2e3692SLuigi Rizzo  * alignment of fields inside struct ip_srcrt so e.g. adding some
2204d2e3692SLuigi Rizzo  * fields will break the code. This needs to be fixed.
2214d2e3692SLuigi Rizzo  *
222df8bae1dSRodney W. Grimes  * We need to save the IP options in case a protocol wants to respond
223df8bae1dSRodney W. Grimes  * to an incoming packet over the same route if the packet got here
224df8bae1dSRodney W. Grimes  * using IP source routing.  This allows connection establishment and
225df8bae1dSRodney W. Grimes  * maintenance when the remote end is on a network that is not known
226df8bae1dSRodney W. Grimes  * to us.
227df8bae1dSRodney W. Grimes  */
2280312fbe9SPoul-Henning Kamp static int	ip_nhops = 0;
229df8bae1dSRodney W. Grimes static	struct ip_srcrt {
230df8bae1dSRodney W. Grimes 	struct	in_addr dst;			/* final destination */
231df8bae1dSRodney W. Grimes 	char	nop;				/* one NOP to align */
232df8bae1dSRodney W. Grimes 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
233df8bae1dSRodney W. Grimes 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
234df8bae1dSRodney W. Grimes } ip_srcrt;
235df8bae1dSRodney W. Grimes 
2364d77a549SAlfred Perlstein static void	save_rte(u_char *, struct in_addr);
2372b25acc1SLuigi Rizzo static int	ip_dooptions(struct mbuf *m, int,
2382b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
23902c1c707SAndre Oppermann static void	ip_forward(struct mbuf *m, int srcrt,
2402b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2414d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
2422b25acc1SLuigi Rizzo static struct	mbuf *ip_reass(struct mbuf *, struct ipqhead *,
2432b25acc1SLuigi Rizzo 		struct ipq *, u_int32_t *, u_int16_t *);
2448948e4baSArchie Cobbs 
245df8bae1dSRodney W. Grimes /*
246df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
247df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
248df8bae1dSRodney W. Grimes  */
249df8bae1dSRodney W. Grimes void
250df8bae1dSRodney W. Grimes ip_init()
251df8bae1dSRodney W. Grimes {
252f0ffb944SJulian Elischer 	register struct protosw *pr;
253df8bae1dSRodney W. Grimes 	register int i;
254df8bae1dSRodney W. Grimes 
25559562606SGarrett Wollman 	TAILQ_INIT(&in_ifaddrhead);
256ca925d9cSJonathan Lemon 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
257f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
258df8bae1dSRodney W. Grimes 	if (pr == 0)
259df8bae1dSRodney W. Grimes 		panic("ip_init");
260df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
261df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
262f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
263f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
264df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
265df8bae1dSRodney W. Grimes 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
266df8bae1dSRodney W. Grimes 			ip_protox[pr->pr_protocol] = pr - inetsw;
267194a213eSAndrey A. Chernov 
268134ea224SSam Leffler #ifdef PFIL_HOOKS
269134ea224SSam Leffler 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
270134ea224SSam Leffler 	inet_pfil_hook.ph_af = AF_INET;
271134ea224SSam Leffler 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
272134ea224SSam Leffler 		printf("%s: WARNING: unable to register pfil hook, "
273134ea224SSam Leffler 			"error %d\n", __func__, i);
274134ea224SSam Leffler #endif /* PFIL_HOOKS */
275134ea224SSam Leffler 
2762fad1e93SSam Leffler 	IPQ_LOCK_INIT();
277194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++)
278462b86feSPoul-Henning Kamp 	    TAILQ_INIT(&ipq[i]);
279194a213eSAndrey A. Chernov 
280375386e2SMike Silbersack 	maxnipq = nmbclusters / 32;
281375386e2SMike Silbersack 	maxfragsperpacket = 16;
282194a213eSAndrey A. Chernov 
28364dddc18SKris Kennaway #ifndef RANDOM_IP_ID
284227ee8a1SPoul-Henning Kamp 	ip_id = time_second & 0xffff;
28564dddc18SKris Kennaway #endif
286df8bae1dSRodney W. Grimes 	ipintrq.ifq_maxlen = ipqmaxlen;
2876008862bSJohn Baldwin 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
2887902224cSSam Leffler 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
289df8bae1dSRodney W. Grimes }
290df8bae1dSRodney W. Grimes 
2914d2e3692SLuigi Rizzo /*
292df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
293df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
294df8bae1dSRodney W. Grimes  */
295c67b1d17SGarrett Wollman void
296c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
297df8bae1dSRodney W. Grimes {
2989188b4a1SAndre Oppermann 	struct ip *ip = NULL;
29923bf9953SPoul-Henning Kamp 	struct ipq *fp;
3005da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
301ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
3029188b4a1SAndre Oppermann 	int    i, checkif, hlen = 0;
3039188b4a1SAndre Oppermann 	int    ours = 0;
30447c861ecSBrian Somers 	u_short sum;
3057538a9a0SJonathan Lemon 	struct in_addr pkt_dst;
3068948e4baSArchie Cobbs 	u_int32_t divert_info = 0;		/* packet divert/tee info */
3072b25acc1SLuigi Rizzo 	struct ip_fw_args args;
30802c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
309f51f805fSSam Leffler #ifdef PFIL_HOOKS
310f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
311f51f805fSSam Leffler #endif
312b9234fafSSam Leffler #ifdef FAST_IPSEC
313b9234fafSSam Leffler 	struct m_tag *mtag;
314b9234fafSSam Leffler 	struct tdb_ident *tdbi;
315b9234fafSSam Leffler 	struct secpolicy *sp;
316b9234fafSSam Leffler 	int s, error;
317b9234fafSSam Leffler #endif /* FAST_IPSEC */
318b715f178SLuigi Rizzo 
3192b25acc1SLuigi Rizzo 	args.eh = NULL;
3202b25acc1SLuigi Rizzo 	args.oif = NULL;
3212b25acc1SLuigi Rizzo 	args.rule = NULL;
3222b25acc1SLuigi Rizzo 	args.divert_rule = 0;			/* divert cookie */
3232b25acc1SLuigi Rizzo 	args.next_hop = NULL;
3248948e4baSArchie Cobbs 
3259188b4a1SAndre Oppermann 	/*
3269188b4a1SAndre Oppermann 	 * Grab info from MT_TAG mbufs prepended to the chain.
3279188b4a1SAndre Oppermann 	 *
3289188b4a1SAndre Oppermann 	 * XXX: This is ugly. These pseudo mbuf prepend tags should really
3299188b4a1SAndre Oppermann 	 * be real m_tags.  Before these have always been allocated on the
3309188b4a1SAndre Oppermann 	 * callers stack, so we didn't have to free them.  Now with
3319188b4a1SAndre Oppermann 	 * ip_fastforward they are true mbufs and we have to free them
3329188b4a1SAndre Oppermann 	 * otherwise we have a leak.  Must rewrite ipfw to use m_tags.
3339188b4a1SAndre Oppermann 	 */
3349188b4a1SAndre Oppermann 	for (; m && m->m_type == MT_TAG;) {
3359188b4a1SAndre Oppermann 		struct mbuf *m0;
3369188b4a1SAndre Oppermann 
3375d846453SSam Leffler 		switch(m->_m_tag_id) {
3382b25acc1SLuigi Rizzo 		default:
3392b25acc1SLuigi Rizzo 			printf("ip_input: unrecognised MT_TAG tag %d\n",
3405d846453SSam Leffler 			    m->_m_tag_id);
3412b25acc1SLuigi Rizzo 			break;
3422b25acc1SLuigi Rizzo 
3432b25acc1SLuigi Rizzo 		case PACKET_TAG_DUMMYNET:
3442b25acc1SLuigi Rizzo 			args.rule = ((struct dn_pkt *)m)->rule;
3452b25acc1SLuigi Rizzo 			break;
3462b25acc1SLuigi Rizzo 
3472b25acc1SLuigi Rizzo 		case PACKET_TAG_DIVERT:
3487627c6cbSMaxime Henrion 			args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
3492b25acc1SLuigi Rizzo 			break;
3502b25acc1SLuigi Rizzo 
3512b25acc1SLuigi Rizzo 		case PACKET_TAG_IPFORWARD:
3522b25acc1SLuigi Rizzo 			args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
3532b25acc1SLuigi Rizzo 			break;
3549188b4a1SAndre Oppermann 
3559188b4a1SAndre Oppermann 		case PACKET_TAG_IPFASTFWD_OURS:
3569188b4a1SAndre Oppermann 			ours = 1;
3579188b4a1SAndre Oppermann 			break;
3582b25acc1SLuigi Rizzo 		}
3599188b4a1SAndre Oppermann 
3609188b4a1SAndre Oppermann 		m0 = m;
3619188b4a1SAndre Oppermann 		m = m->m_next;
3629188b4a1SAndre Oppermann 		/* XXX: This is set by ip_fastforward */
3639188b4a1SAndre Oppermann 		if (m0->m_nextpkt == (struct mbuf *)1)
3649188b4a1SAndre Oppermann 			m_free(m0);
3652b25acc1SLuigi Rizzo 	}
366df8bae1dSRodney W. Grimes 
367fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
368db40007dSAndrew R. Reiter 
3699188b4a1SAndre Oppermann 	if (ours)		/* ip_fastforward firewall changed dest to local */
3709188b4a1SAndre Oppermann 		goto ours;
3719188b4a1SAndre Oppermann 
3722b25acc1SLuigi Rizzo 	if (args.rule) {	/* dummynet already filtered us */
3732b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
37453be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
3752b25acc1SLuigi Rizzo 		goto iphack ;
3762b25acc1SLuigi Rizzo 	}
3772b25acc1SLuigi Rizzo 
378df8bae1dSRodney W. Grimes 	ipstat.ips_total++;
37958938916SGarrett Wollman 
38058938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
38158938916SGarrett Wollman 		goto tooshort;
38258938916SGarrett Wollman 
383df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
384df8bae1dSRodney W. Grimes 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
385df8bae1dSRodney W. Grimes 		ipstat.ips_toosmall++;
386c67b1d17SGarrett Wollman 		return;
387df8bae1dSRodney W. Grimes 	}
388df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
38958938916SGarrett Wollman 
39053be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
391df8bae1dSRodney W. Grimes 		ipstat.ips_badvers++;
392df8bae1dSRodney W. Grimes 		goto bad;
393df8bae1dSRodney W. Grimes 	}
39458938916SGarrett Wollman 
39553be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
396df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
397df8bae1dSRodney W. Grimes 		ipstat.ips_badhlen++;
398df8bae1dSRodney W. Grimes 		goto bad;
399df8bae1dSRodney W. Grimes 	}
400df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
401df8bae1dSRodney W. Grimes 		if ((m = m_pullup(m, hlen)) == 0) {
402df8bae1dSRodney W. Grimes 			ipstat.ips_badhlen++;
403c67b1d17SGarrett Wollman 			return;
404df8bae1dSRodney W. Grimes 		}
405df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
406df8bae1dSRodney W. Grimes 	}
40733841545SHajimu UMEMOTO 
40833841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
40933841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
41033841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
41133841545SHajimu UMEMOTO 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
41233841545SHajimu UMEMOTO 			ipstat.ips_badaddr++;
41333841545SHajimu UMEMOTO 			goto bad;
41433841545SHajimu UMEMOTO 		}
41533841545SHajimu UMEMOTO 	}
41633841545SHajimu UMEMOTO 
417db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
418db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
419db4f9cc7SJonathan Lemon 	} else {
42058938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
42147c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
42258938916SGarrett Wollman 		} else {
42347c861ecSBrian Somers 			sum = in_cksum(m, hlen);
42458938916SGarrett Wollman 		}
425db4f9cc7SJonathan Lemon 	}
42647c861ecSBrian Somers 	if (sum) {
427df8bae1dSRodney W. Grimes 		ipstat.ips_badsum++;
428df8bae1dSRodney W. Grimes 		goto bad;
429df8bae1dSRodney W. Grimes 	}
430df8bae1dSRodney W. Grimes 
431df8bae1dSRodney W. Grimes 	/*
432df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
433df8bae1dSRodney W. Grimes 	 */
434fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
435df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
436df8bae1dSRodney W. Grimes 		ipstat.ips_badlen++;
437df8bae1dSRodney W. Grimes 		goto bad;
438df8bae1dSRodney W. Grimes 	}
439fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
440df8bae1dSRodney W. Grimes 
441df8bae1dSRodney W. Grimes 	/*
442df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
443df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
444df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
445df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
446df8bae1dSRodney W. Grimes 	 */
447df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
44858938916SGarrett Wollman tooshort:
449df8bae1dSRodney W. Grimes 		ipstat.ips_tooshort++;
450df8bae1dSRodney W. Grimes 		goto bad;
451df8bae1dSRodney W. Grimes 	}
452df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
453df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
454df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
455df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
456df8bae1dSRodney W. Grimes 		} else
457df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
458df8bae1dSRodney W. Grimes 	}
45914dd6717SSam Leffler #if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
46014dd6717SSam Leffler 	/*
46114dd6717SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
46214dd6717SSam Leffler 	 */
4630f9ade71SHajimu UMEMOTO 	if (ipsec_getnhist(m))
46414dd6717SSam Leffler 		goto pass;
46514dd6717SSam Leffler #endif
4661f76a5e2SSam Leffler #if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF)
4671f76a5e2SSam Leffler 	/*
4681f76a5e2SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
4691f76a5e2SSam Leffler 	 */
4701f76a5e2SSam Leffler 	if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
4711f76a5e2SSam Leffler 		goto pass;
4721f76a5e2SSam Leffler #endif
4733f67c834SDon Lewis 
4744dd1662bSUgen J.S. Antsilevich 	/*
4754dd1662bSUgen J.S. Antsilevich 	 * IpHack's section.
4764dd1662bSUgen J.S. Antsilevich 	 * Right now when no processing on packet has done
4774dd1662bSUgen J.S. Antsilevich 	 * and it is still fresh out of network we do our black
4784dd1662bSUgen J.S. Antsilevich 	 * deals with it.
47993e0e116SJulian Elischer 	 * - Firewall: deny/allow/divert
480fed1c7e9SSøren Schmidt 	 * - Xlate: translate packet's addr/port (NAT).
481b715f178SLuigi Rizzo 	 * - Pipe: pass pkt through dummynet.
4824dd1662bSUgen J.S. Antsilevich 	 * - Wrap: fake packet's addr/port <unimpl.>
4834dd1662bSUgen J.S. Antsilevich 	 * - Encapsulate: put it in another IP and send out. <unimp.>
4844dd1662bSUgen J.S. Antsilevich  	 */
485b715f178SLuigi Rizzo 
486b715f178SLuigi Rizzo iphack:
487df8bae1dSRodney W. Grimes 
488c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
489c4ac87eaSDarren Reed 	/*
490134ea224SSam Leffler 	 * Run through list of hooks for input packets.
491f51f805fSSam Leffler 	 *
492f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
493f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
494f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
495c4ac87eaSDarren Reed 	 */
496f51f805fSSam Leffler 	odst = ip->ip_dst;
497134ea224SSam Leffler 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
498134ea224SSam Leffler 	    PFIL_IN) != 0)
499beec8214SDarren Reed 		return;
500134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
501c4ac87eaSDarren Reed 		return;
502c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
50302c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
504c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
505c4ac87eaSDarren Reed 
5067b109fa4SLuigi Rizzo 	if (fw_enable && IPFW_LOADED) {
507f9e354dfSJulian Elischer 		/*
508f9e354dfSJulian Elischer 		 * If we've been forwarded from the output side, then
509f9e354dfSJulian Elischer 		 * skip the firewall a second time
510f9e354dfSJulian Elischer 		 */
5112b25acc1SLuigi Rizzo 		if (args.next_hop)
512f9e354dfSJulian Elischer 			goto ours;
5132b25acc1SLuigi Rizzo 
5142b25acc1SLuigi Rizzo 		args.m = m;
5152b25acc1SLuigi Rizzo 		i = ip_fw_chk_ptr(&args);
5162b25acc1SLuigi Rizzo 		m = args.m;
5172b25acc1SLuigi Rizzo 
518d60315beSLuigi Rizzo 		if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
519507b4b54SLuigi Rizzo 			if (m)
520507b4b54SLuigi Rizzo 				m_freem(m);
521b715f178SLuigi Rizzo 			return;
522507b4b54SLuigi Rizzo 		}
523d60315beSLuigi Rizzo 		ip = mtod(m, struct ip *); /* just in case m changed */
5242b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop == NULL)	/* common case */
525b715f178SLuigi Rizzo 			goto pass;
5267b109fa4SLuigi Rizzo                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
5278948e4baSArchie Cobbs 			/* Send packet to the appropriate pipe */
5282b25acc1SLuigi Rizzo 			ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
529e4676ba6SJulian Elischer 			return;
53093e0e116SJulian Elischer 		}
531b715f178SLuigi Rizzo #ifdef IPDIVERT
5328948e4baSArchie Cobbs 		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
5338948e4baSArchie Cobbs 			/* Divert or tee packet */
5348948e4baSArchie Cobbs 			divert_info = i;
535b715f178SLuigi Rizzo 			goto ours;
536b715f178SLuigi Rizzo 		}
537b715f178SLuigi Rizzo #endif
5382b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop != NULL)
539b715f178SLuigi Rizzo 			goto pass;
540b715f178SLuigi Rizzo 		/*
541b715f178SLuigi Rizzo 		 * if we get here, the packet must be dropped
542b715f178SLuigi Rizzo 		 */
543b715f178SLuigi Rizzo 		m_freem(m);
544b715f178SLuigi Rizzo 		return;
545b715f178SLuigi Rizzo 	}
546b715f178SLuigi Rizzo pass:
547100ba1a6SJordan K. Hubbard 
548df8bae1dSRodney W. Grimes 	/*
549df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
550df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
551df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
552df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
553df8bae1dSRodney W. Grimes 	 */
554df8bae1dSRodney W. Grimes 	ip_nhops = 0;		/* for source routed packets */
5552b25acc1SLuigi Rizzo 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
556c67b1d17SGarrett Wollman 		return;
557df8bae1dSRodney W. Grimes 
558f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
559f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
560f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
561f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
562f0068c4aSGarrett Wollman 	 * grabbing the packet.
563f0068c4aSGarrett Wollman          */
5641c5de19aSGarrett Wollman 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
565f0068c4aSGarrett Wollman 		goto ours;
566f0068c4aSGarrett Wollman 
567df8bae1dSRodney W. Grimes 	/*
568df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
569cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
570cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
571cc766e04SGarrett Wollman 	 * with it).
572df8bae1dSRodney W. Grimes 	 */
573cc766e04SGarrett Wollman 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
574cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
575cc766e04SGarrett Wollman 		goto ours;
576cc766e04SGarrett Wollman 
5777538a9a0SJonathan Lemon 	/*
5787538a9a0SJonathan Lemon 	 * Cache the destination address of the packet; this may be
5797538a9a0SJonathan Lemon 	 * changed by use of 'ipfw fwd'.
5807538a9a0SJonathan Lemon 	 */
5812b25acc1SLuigi Rizzo 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
5827538a9a0SJonathan Lemon 
583823db0e9SDon Lewis 	/*
584823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
585823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
586823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
587e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
588e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
5893f67c834SDon Lewis 	 *
5903f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
5913f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
5923f67c834SDon Lewis 	 *
593a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
5943f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5953f67c834SDon Lewis 	 * the packets are received.
596823db0e9SDon Lewis 	 */
597823db0e9SDon Lewis 	checkif = ip_checkinterface && (ipforwarding == 0) &&
5989494d596SBrooks Davis 	    m->m_pkthdr.rcvif != NULL &&
599e15ae1b2SDon Lewis 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
6002b25acc1SLuigi Rizzo 	    (args.next_hop == NULL);
601823db0e9SDon Lewis 
602ca925d9cSJonathan Lemon 	/*
603ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
604ca925d9cSJonathan Lemon 	 */
605ca925d9cSJonathan Lemon 	LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
606f9e354dfSJulian Elischer 		/*
607823db0e9SDon Lewis 		 * If the address matches, verify that the packet
608823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
609823db0e9SDon Lewis 		 * enabled.
610f9e354dfSJulian Elischer 		 */
611823db0e9SDon Lewis 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
612823db0e9SDon Lewis 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
613ed1ff184SJulian Elischer 			goto ours;
614ca925d9cSJonathan Lemon 	}
615823db0e9SDon Lewis 	/*
616ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
617ca925d9cSJonathan Lemon 	 *
618ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
619ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
620ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
621ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
622823db0e9SDon Lewis 	 */
623ca925d9cSJonathan Lemon 	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
624ca925d9cSJonathan Lemon 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
625ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
626ca925d9cSJonathan Lemon 				continue;
627ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
628df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
6297538a9a0SJonathan Lemon 			    pkt_dst.s_addr)
630df8bae1dSRodney W. Grimes 				goto ours;
6317538a9a0SJonathan Lemon 			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
632df8bae1dSRodney W. Grimes 				goto ours;
633ca925d9cSJonathan Lemon #ifdef BOOTP_COMPAT
634ca925d9cSJonathan Lemon 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
635ca925d9cSJonathan Lemon 				goto ours;
636ca925d9cSJonathan Lemon #endif
637df8bae1dSRodney W. Grimes 		}
638df8bae1dSRodney W. Grimes 	}
639df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
640df8bae1dSRodney W. Grimes 		struct in_multi *inm;
641df8bae1dSRodney W. Grimes 		if (ip_mrouter) {
642df8bae1dSRodney W. Grimes 			/*
643df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
644df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
645df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
646df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
647df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
648df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
649df8bae1dSRodney W. Grimes 			 */
650bbb4330bSLuigi Rizzo 			if (ip_mforward &&
651bbb4330bSLuigi Rizzo 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
652df8bae1dSRodney W. Grimes 				ipstat.ips_cantforward++;
653df8bae1dSRodney W. Grimes 				m_freem(m);
654c67b1d17SGarrett Wollman 				return;
655df8bae1dSRodney W. Grimes 			}
656df8bae1dSRodney W. Grimes 
657df8bae1dSRodney W. Grimes 			/*
65811612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
659df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
660df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
661df8bae1dSRodney W. Grimes 			 */
662df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
663df8bae1dSRodney W. Grimes 				goto ours;
664df8bae1dSRodney W. Grimes 			ipstat.ips_forward++;
665df8bae1dSRodney W. Grimes 		}
666df8bae1dSRodney W. Grimes 		/*
667df8bae1dSRodney W. Grimes 		 * See if we belong to the destination multicast group on the
668df8bae1dSRodney W. Grimes 		 * arrival interface.
669df8bae1dSRodney W. Grimes 		 */
670df8bae1dSRodney W. Grimes 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
671df8bae1dSRodney W. Grimes 		if (inm == NULL) {
67282c39223SGarrett Wollman 			ipstat.ips_notmember++;
673df8bae1dSRodney W. Grimes 			m_freem(m);
674c67b1d17SGarrett Wollman 			return;
675df8bae1dSRodney W. Grimes 		}
676df8bae1dSRodney W. Grimes 		goto ours;
677df8bae1dSRodney W. Grimes 	}
678df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
679df8bae1dSRodney W. Grimes 		goto ours;
680df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
681df8bae1dSRodney W. Grimes 		goto ours;
682df8bae1dSRodney W. Grimes 
6836a800098SYoshinobu Inoue 	/*
6846a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6856a800098SYoshinobu Inoue 	 */
6866a800098SYoshinobu Inoue 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
6876a800098SYoshinobu Inoue 		if (ip_keepfaith) {
6886a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
6896a800098SYoshinobu Inoue 				goto ours;
6906a800098SYoshinobu Inoue 		}
6916a800098SYoshinobu Inoue 		m_freem(m);
6926a800098SYoshinobu Inoue 		return;
6936a800098SYoshinobu Inoue 	}
6949494d596SBrooks Davis 
695df8bae1dSRodney W. Grimes 	/*
696df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
697df8bae1dSRodney W. Grimes 	 */
698df8bae1dSRodney W. Grimes 	if (ipforwarding == 0) {
699df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
700df8bae1dSRodney W. Grimes 		m_freem(m);
701546f251bSChris D. Faulhaber 	} else {
702546f251bSChris D. Faulhaber #ifdef IPSEC
703546f251bSChris D. Faulhaber 		/*
704546f251bSChris D. Faulhaber 		 * Enforce inbound IPsec SPD.
705546f251bSChris D. Faulhaber 		 */
706546f251bSChris D. Faulhaber 		if (ipsec4_in_reject(m, NULL)) {
707546f251bSChris D. Faulhaber 			ipsecstat.in_polvio++;
708546f251bSChris D. Faulhaber 			goto bad;
709546f251bSChris D. Faulhaber 		}
710546f251bSChris D. Faulhaber #endif /* IPSEC */
711b9234fafSSam Leffler #ifdef FAST_IPSEC
712b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
713b9234fafSSam Leffler 		s = splnet();
714b9234fafSSam Leffler 		if (mtag != NULL) {
715b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
716b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
717b9234fafSSam Leffler 		} else {
718b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
719b9234fafSSam Leffler 						   IP_FORWARDING, &error);
720b9234fafSSam Leffler 		}
721b9234fafSSam Leffler 		if (sp == NULL) {	/* NB: can happen if error */
722b9234fafSSam Leffler 			splx(s);
723b9234fafSSam Leffler 			/*XXX error stat???*/
724b9234fafSSam Leffler 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
725b9234fafSSam Leffler 			goto bad;
726b9234fafSSam Leffler 		}
727b9234fafSSam Leffler 
728b9234fafSSam Leffler 		/*
729b9234fafSSam Leffler 		 * Check security policy against packet attributes.
730b9234fafSSam Leffler 		 */
731b9234fafSSam Leffler 		error = ipsec_in_reject(sp, m);
732b9234fafSSam Leffler 		KEY_FREESP(&sp);
733b9234fafSSam Leffler 		splx(s);
734b9234fafSSam Leffler 		if (error) {
735b9234fafSSam Leffler 			ipstat.ips_cantforward++;
736b9234fafSSam Leffler 			goto bad;
737b9234fafSSam Leffler 		}
738b9234fafSSam Leffler #endif /* FAST_IPSEC */
73902c1c707SAndre Oppermann 		ip_forward(m, dchg, args.next_hop);
740546f251bSChris D. Faulhaber 	}
741c67b1d17SGarrett Wollman 	return;
742df8bae1dSRodney W. Grimes 
743df8bae1dSRodney W. Grimes ours:
744d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
745d0ebc0d2SYaroslav Tykhiy 	/*
746d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
747d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
748d0ebc0d2SYaroslav Tykhiy 	 */
7492b25acc1SLuigi Rizzo 	if (ipstealth && hlen > sizeof (struct ip) &&
7502b25acc1SLuigi Rizzo 	    ip_dooptions(m, 1, args.next_hop))
751d0ebc0d2SYaroslav Tykhiy 		return;
752d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
753d0ebc0d2SYaroslav Tykhiy 
7545da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
7555da9f8faSJosef Karthauser 	if (ia != NULL) {
7565da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
7575da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
7585da9f8faSJosef Karthauser 	}
759100ba1a6SJordan K. Hubbard 
76063f8d699SJordan K. Hubbard 	/*
761df8bae1dSRodney W. Grimes 	 * If offset or IP_MF are set, must reassemble.
762df8bae1dSRodney W. Grimes 	 * Otherwise, nothing need be done.
763df8bae1dSRodney W. Grimes 	 * (We could look in the reassembly queue to see
764df8bae1dSRodney W. Grimes 	 * if the packet was previously fragmented,
765df8bae1dSRodney W. Grimes 	 * but it's not worth the time; just let them time out.)
766df8bae1dSRodney W. Grimes 	 */
767b6ea1aa5SRuslan Ermilov 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
7686a800098SYoshinobu Inoue 
769ecf44c01SMike Silbersack 		/* If maxnipq is 0, never accept fragments. */
770ac64c866SMike Silbersack 		if (maxnipq == 0) {
771ac64c866SMike Silbersack                 	ipstat.ips_fragments++;
772ac64c866SMike Silbersack 			ipstat.ips_fragdropped++;
773ac64c866SMike Silbersack 			goto bad;
774ac64c866SMike Silbersack 		}
775ac64c866SMike Silbersack 
776194a213eSAndrey A. Chernov 		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
7772fad1e93SSam Leffler 		IPQ_LOCK();
778df8bae1dSRodney W. Grimes 		/*
779df8bae1dSRodney W. Grimes 		 * Look for queue of fragments
780df8bae1dSRodney W. Grimes 		 * of this datagram.
781df8bae1dSRodney W. Grimes 		 */
782462b86feSPoul-Henning Kamp 		TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
783df8bae1dSRodney W. Grimes 			if (ip->ip_id == fp->ipq_id &&
784df8bae1dSRodney W. Grimes 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
785df8bae1dSRodney W. Grimes 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
78636b0360bSRobert Watson #ifdef MAC
78736b0360bSRobert Watson 			    mac_fragment_match(m, fp) &&
78836b0360bSRobert Watson #endif
789df8bae1dSRodney W. Grimes 			    ip->ip_p == fp->ipq_p)
790df8bae1dSRodney W. Grimes 				goto found;
791df8bae1dSRodney W. Grimes 
792042bbfa3SRobert Watson 		fp = NULL;
793194a213eSAndrey A. Chernov 
794ac64c866SMike Silbersack 		/*
795ac64c866SMike Silbersack 		 * Enforce upper bound on number of fragmented packets
796ac64c866SMike Silbersack 		 * for which we attempt reassembly;
797ac64c866SMike Silbersack 		 * If maxnipq is -1, accept all fragments without limitation.
798ac64c866SMike Silbersack 		 */
799ac64c866SMike Silbersack 		if ((nipq > maxnipq) && (maxnipq > 0)) {
800194a213eSAndrey A. Chernov 		    /*
801194a213eSAndrey A. Chernov 		     * drop something from the tail of the current queue
802194a213eSAndrey A. Chernov 		     * before proceeding further
803194a213eSAndrey A. Chernov 		     */
804462b86feSPoul-Henning Kamp 		    struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
805462b86feSPoul-Henning Kamp 		    if (q == NULL) {   /* gak */
806194a213eSAndrey A. Chernov 			for (i = 0; i < IPREASS_NHASH; i++) {
807462b86feSPoul-Henning Kamp 			    struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
808462b86feSPoul-Henning Kamp 			    if (r) {
80999e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += r->ipq_nfrags;
810462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], r);
811194a213eSAndrey A. Chernov 				break;
812194a213eSAndrey A. Chernov 			    }
813194a213eSAndrey A. Chernov 			}
814ac64c866SMike Silbersack 		    } else {
81599e8617dSMaxim Konovalov 			ipstat.ips_fragtimeout += q->ipq_nfrags;
816462b86feSPoul-Henning Kamp 			ip_freef(&ipq[sum], q);
817ac64c866SMike Silbersack 		    }
818194a213eSAndrey A. Chernov 		}
819194a213eSAndrey A. Chernov found:
820df8bae1dSRodney W. Grimes 		/*
821df8bae1dSRodney W. Grimes 		 * Adjust ip_len to not reflect header,
822df8bae1dSRodney W. Grimes 		 * convert offset of this to bytes.
823df8bae1dSRodney W. Grimes 		 */
824df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
825b6ea1aa5SRuslan Ermilov 		if (ip->ip_off & IP_MF) {
8266effc713SDoug Rabson 		        /*
8276effc713SDoug Rabson 		         * Make sure that fragments have a data length
8286effc713SDoug Rabson 			 * that's a non-zero multiple of 8 bytes.
8296effc713SDoug Rabson 		         */
8306effc713SDoug Rabson 			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
8312fad1e93SSam Leffler 				IPQ_UNLOCK();
8326effc713SDoug Rabson 				ipstat.ips_toosmall++; /* XXX */
8336effc713SDoug Rabson 				goto bad;
8346effc713SDoug Rabson 			}
8356effc713SDoug Rabson 			m->m_flags |= M_FRAG;
8361cf43499SMaxim Konovalov 		} else
8371cf43499SMaxim Konovalov 			m->m_flags &= ~M_FRAG;
838df8bae1dSRodney W. Grimes 		ip->ip_off <<= 3;
839df8bae1dSRodney W. Grimes 
840df8bae1dSRodney W. Grimes 		/*
841b6ea1aa5SRuslan Ermilov 		 * Attempt reassembly; if it succeeds, proceed.
8422b25acc1SLuigi Rizzo 		 * ip_reass() will return a different mbuf, and update
8432b25acc1SLuigi Rizzo 		 * the divert info in divert_info and args.divert_rule.
844df8bae1dSRodney W. Grimes 		 */
845df8bae1dSRodney W. Grimes 		ipstat.ips_fragments++;
846487bdb38SRuslan Ermilov 		m->m_pkthdr.header = ip;
8476a800098SYoshinobu Inoue 		m = ip_reass(m,
8482b25acc1SLuigi Rizzo 		    &ipq[sum], fp, &divert_info, &args.divert_rule);
8492fad1e93SSam Leffler 		IPQ_UNLOCK();
8502b25acc1SLuigi Rizzo 		if (m == 0)
851c67b1d17SGarrett Wollman 			return;
852df8bae1dSRodney W. Grimes 		ipstat.ips_reassembled++;
8536a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8547e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
85553be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
856af782f1cSBrian Somers #ifdef IPDIVERT
8578948e4baSArchie Cobbs 		/* Restore original checksum before diverting packet */
8588948e4baSArchie Cobbs 		if (divert_info != 0) {
859af782f1cSBrian Somers 			ip->ip_len += hlen;
860fd8e4ebcSMike Barcroft 			ip->ip_len = htons(ip->ip_len);
861fd8e4ebcSMike Barcroft 			ip->ip_off = htons(ip->ip_off);
862af782f1cSBrian Somers 			ip->ip_sum = 0;
86360123168SRuslan Ermilov 			if (hlen == sizeof(struct ip))
864af782f1cSBrian Somers 				ip->ip_sum = in_cksum_hdr(ip);
86560123168SRuslan Ermilov 			else
86660123168SRuslan Ermilov 				ip->ip_sum = in_cksum(m, hlen);
867fd8e4ebcSMike Barcroft 			ip->ip_off = ntohs(ip->ip_off);
868fd8e4ebcSMike Barcroft 			ip->ip_len = ntohs(ip->ip_len);
869af782f1cSBrian Somers 			ip->ip_len -= hlen;
870af782f1cSBrian Somers 		}
871af782f1cSBrian Somers #endif
872df8bae1dSRodney W. Grimes 	} else
873df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
874df8bae1dSRodney W. Grimes 
87593e0e116SJulian Elischer #ifdef IPDIVERT
87693e0e116SJulian Elischer 	/*
8778948e4baSArchie Cobbs 	 * Divert or tee packet to the divert protocol if required.
87893e0e116SJulian Elischer 	 */
8798948e4baSArchie Cobbs 	if (divert_info != 0) {
8808948e4baSArchie Cobbs 		struct mbuf *clone = NULL;
8818948e4baSArchie Cobbs 
8828948e4baSArchie Cobbs 		/* Clone packet if we're doing a 'tee' */
8838948e4baSArchie Cobbs 		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
884a163d034SWarner Losh 			clone = m_dup(m, M_DONTWAIT);
8858948e4baSArchie Cobbs 
8868948e4baSArchie Cobbs 		/* Restore packet header fields to original values */
8878948e4baSArchie Cobbs 		ip->ip_len += hlen;
888fd8e4ebcSMike Barcroft 		ip->ip_len = htons(ip->ip_len);
889fd8e4ebcSMike Barcroft 		ip->ip_off = htons(ip->ip_off);
8908948e4baSArchie Cobbs 
8918948e4baSArchie Cobbs 		/* Deliver packet to divert input routine */
8922b25acc1SLuigi Rizzo 		divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
893e4676ba6SJulian Elischer 		ipstat.ips_delivered++;
8948948e4baSArchie Cobbs 
8958948e4baSArchie Cobbs 		/* If 'tee', continue with original packet */
8968948e4baSArchie Cobbs 		if (clone == NULL)
89793e0e116SJulian Elischer 			return;
8988948e4baSArchie Cobbs 		m = clone;
8998948e4baSArchie Cobbs 		ip = mtod(m, struct ip *);
90056962689SCrist J. Clark 		ip->ip_len += hlen;
9012b25acc1SLuigi Rizzo 		/*
9022b25acc1SLuigi Rizzo 		 * Jump backwards to complete processing of the
9032b25acc1SLuigi Rizzo 		 * packet. But first clear divert_info to avoid
9042b25acc1SLuigi Rizzo 		 * entering this block again.
9052b25acc1SLuigi Rizzo 		 * We do not need to clear args.divert_rule
9062b25acc1SLuigi Rizzo 		 * or args.next_hop as they will not be used.
9072b25acc1SLuigi Rizzo 		 */
90856962689SCrist J. Clark 		divert_info = 0;
90956962689SCrist J. Clark 		goto pass;
91093e0e116SJulian Elischer 	}
91193e0e116SJulian Elischer #endif
91293e0e116SJulian Elischer 
91333841545SHajimu UMEMOTO #ifdef IPSEC
91433841545SHajimu UMEMOTO 	/*
91533841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
91633841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
91733841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
91833841545SHajimu UMEMOTO 	 */
91933841545SHajimu UMEMOTO 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
92033841545SHajimu UMEMOTO 	    ipsec4_in_reject(m, NULL)) {
92133841545SHajimu UMEMOTO 		ipsecstat.in_polvio++;
92233841545SHajimu UMEMOTO 		goto bad;
92333841545SHajimu UMEMOTO 	}
92433841545SHajimu UMEMOTO #endif
925b9234fafSSam Leffler #if FAST_IPSEC
926b9234fafSSam Leffler 	/*
927b9234fafSSam Leffler 	 * enforce IPsec policy checking if we are seeing last header.
928b9234fafSSam Leffler 	 * note that we do not visit this with protocols with pcb layer
929b9234fafSSam Leffler 	 * code - like udp/tcp/raw ip.
930b9234fafSSam Leffler 	 */
931b9234fafSSam Leffler 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
932b9234fafSSam Leffler 		/*
933b9234fafSSam Leffler 		 * Check if the packet has already had IPsec processing
934b9234fafSSam Leffler 		 * done.  If so, then just pass it along.  This tag gets
935b9234fafSSam Leffler 		 * set during AH, ESP, etc. input handling, before the
936b9234fafSSam Leffler 		 * packet is returned to the ip input queue for delivery.
937b9234fafSSam Leffler 		 */
938b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
939b9234fafSSam Leffler 		s = splnet();
940b9234fafSSam Leffler 		if (mtag != NULL) {
941b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
942b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
943b9234fafSSam Leffler 		} else {
944b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
945b9234fafSSam Leffler 						   IP_FORWARDING, &error);
946b9234fafSSam Leffler 		}
947b9234fafSSam Leffler 		if (sp != NULL) {
948b9234fafSSam Leffler 			/*
949b9234fafSSam Leffler 			 * Check security policy against packet attributes.
950b9234fafSSam Leffler 			 */
951b9234fafSSam Leffler 			error = ipsec_in_reject(sp, m);
952b9234fafSSam Leffler 			KEY_FREESP(&sp);
953b9234fafSSam Leffler 		} else {
954b9234fafSSam Leffler 			/* XXX error stat??? */
955b9234fafSSam Leffler 			error = EINVAL;
956b9234fafSSam Leffler DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
957b9234fafSSam Leffler 			goto bad;
958b9234fafSSam Leffler 		}
959b9234fafSSam Leffler 		splx(s);
960b9234fafSSam Leffler 		if (error)
961b9234fafSSam Leffler 			goto bad;
962b9234fafSSam Leffler 	}
963b9234fafSSam Leffler #endif /* FAST_IPSEC */
96433841545SHajimu UMEMOTO 
965df8bae1dSRodney W. Grimes 	/*
966df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
967df8bae1dSRodney W. Grimes 	 */
968df8bae1dSRodney W. Grimes 	ipstat.ips_delivered++;
9697902224cSSam Leffler 	NET_PICKUP_GIANT();
9702b25acc1SLuigi Rizzo 	if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
9712b25acc1SLuigi Rizzo 		/* TCP needs IPFORWARD info if available */
9722b25acc1SLuigi Rizzo 		struct m_hdr tag;
9736a800098SYoshinobu Inoue 
9742b25acc1SLuigi Rizzo 		tag.mh_type = MT_TAG;
9752b25acc1SLuigi Rizzo 		tag.mh_flags = PACKET_TAG_IPFORWARD;
9762b25acc1SLuigi Rizzo 		tag.mh_data = (caddr_t)args.next_hop;
9772b25acc1SLuigi Rizzo 		tag.mh_next = m;
97863346129SBrian Feldman 		tag.mh_nextpkt = NULL;
9792b25acc1SLuigi Rizzo 
9802b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(
9812b25acc1SLuigi Rizzo 			(struct mbuf *)&tag, hlen);
9822b25acc1SLuigi Rizzo 	} else
9832b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
9847902224cSSam Leffler 	NET_DROP_GIANT();
985c67b1d17SGarrett Wollman 	return;
986df8bae1dSRodney W. Grimes bad:
987df8bae1dSRodney W. Grimes 	m_freem(m);
988c67b1d17SGarrett Wollman }
989c67b1d17SGarrett Wollman 
990c67b1d17SGarrett Wollman /*
9918948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
9928948e4baSArchie Cobbs  * whole datagram.  If a chain for reassembly of this datagram already
9938948e4baSArchie Cobbs  * exists, then it is given as fp; otherwise have to make a chain.
9948948e4baSArchie Cobbs  *
9958948e4baSArchie Cobbs  * When IPDIVERT enabled, keep additional state with each packet that
9968948e4baSArchie Cobbs  * tells us if we need to divert or tee the packet we're building.
9972b25acc1SLuigi Rizzo  * In particular, *divinfo includes the port and TEE flag,
9982b25acc1SLuigi Rizzo  * *divert_rule is the number of the matching rule.
999df8bae1dSRodney W. Grimes  */
10008948e4baSArchie Cobbs 
10016a800098SYoshinobu Inoue static struct mbuf *
10022b25acc1SLuigi Rizzo ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
10032b25acc1SLuigi Rizzo 	u_int32_t *divinfo, u_int16_t *divert_rule)
1004df8bae1dSRodney W. Grimes {
10056effc713SDoug Rabson 	struct ip *ip = mtod(m, struct ip *);
1006b6ea1aa5SRuslan Ermilov 	register struct mbuf *p, *q, *nq;
1007df8bae1dSRodney W. Grimes 	struct mbuf *t;
100853be11f6SPoul-Henning Kamp 	int hlen = ip->ip_hl << 2;
1009df8bae1dSRodney W. Grimes 	int i, next;
101059dfcba4SHajimu UMEMOTO 	u_int8_t ecn, ecn0;
1011df8bae1dSRodney W. Grimes 
10122fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
10132fad1e93SSam Leffler 
1014df8bae1dSRodney W. Grimes 	/*
1015df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
1016df8bae1dSRodney W. Grimes 	 * would confuse code below.
1017df8bae1dSRodney W. Grimes 	 */
1018df8bae1dSRodney W. Grimes 	m->m_data += hlen;
1019df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
1020df8bae1dSRodney W. Grimes 
1021df8bae1dSRodney W. Grimes 	/*
1022df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
1023df8bae1dSRodney W. Grimes 	 */
1024042bbfa3SRobert Watson 	if (fp == NULL) {
1025a163d034SWarner Losh 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
1026df8bae1dSRodney W. Grimes 			goto dropfrag;
1027df8bae1dSRodney W. Grimes 		fp = mtod(t, struct ipq *);
102836b0360bSRobert Watson #ifdef MAC
10295e7ce478SRobert Watson 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
10305e7ce478SRobert Watson 			m_free(t);
10315e7ce478SRobert Watson 			goto dropfrag;
10325e7ce478SRobert Watson 		}
103336b0360bSRobert Watson 		mac_create_ipq(m, fp);
103436b0360bSRobert Watson #endif
1035462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
1036194a213eSAndrey A. Chernov 		nipq++;
1037375386e2SMike Silbersack 		fp->ipq_nfrags = 1;
1038df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
1039df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
1040df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
10416effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
10426effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
1043af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1044af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
104593e0e116SJulian Elischer #ifdef IPDIVERT
10468948e4baSArchie Cobbs 		fp->ipq_div_info = 0;
1047bb60f459SJulian Elischer 		fp->ipq_div_cookie = 0;
104893e0e116SJulian Elischer #endif
1049af38c68cSLuigi Rizzo 		goto inserted;
105036b0360bSRobert Watson 	} else {
1051375386e2SMike Silbersack 		fp->ipq_nfrags++;
105236b0360bSRobert Watson #ifdef MAC
105336b0360bSRobert Watson 		mac_update_ipq(m, fp);
105436b0360bSRobert Watson #endif
1055df8bae1dSRodney W. Grimes 	}
1056df8bae1dSRodney W. Grimes 
10576effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
10586effc713SDoug Rabson 
1059df8bae1dSRodney W. Grimes 	/*
106059dfcba4SHajimu UMEMOTO 	 * Handle ECN by comparing this segment with the first one;
106159dfcba4SHajimu UMEMOTO 	 * if CE is set, do not lose CE.
106259dfcba4SHajimu UMEMOTO 	 * drop if CE and not-ECT are mixed for the same packet.
106359dfcba4SHajimu UMEMOTO 	 */
106459dfcba4SHajimu UMEMOTO 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
106559dfcba4SHajimu UMEMOTO 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
106659dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_CE) {
106759dfcba4SHajimu UMEMOTO 		if (ecn0 == IPTOS_ECN_NOTECT)
106859dfcba4SHajimu UMEMOTO 			goto dropfrag;
106959dfcba4SHajimu UMEMOTO 		if (ecn0 != IPTOS_ECN_CE)
107059dfcba4SHajimu UMEMOTO 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
107159dfcba4SHajimu UMEMOTO 	}
107259dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
107359dfcba4SHajimu UMEMOTO 		goto dropfrag;
107459dfcba4SHajimu UMEMOTO 
107559dfcba4SHajimu UMEMOTO 	/*
1076df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
1077df8bae1dSRodney W. Grimes 	 */
10786effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
10796effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
1080df8bae1dSRodney W. Grimes 			break;
1081df8bae1dSRodney W. Grimes 
1082df8bae1dSRodney W. Grimes 	/*
1083df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
1084df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
1085af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
1086af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
1087db4f9cc7SJonathan Lemon 	 *
1088db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
1089db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
1090df8bae1dSRodney W. Grimes 	 */
10916effc713SDoug Rabson 	if (p) {
10926effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1093df8bae1dSRodney W. Grimes 		if (i > 0) {
1094df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
1095df8bae1dSRodney W. Grimes 				goto dropfrag;
10966a800098SYoshinobu Inoue 			m_adj(m, i);
1097db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
1098df8bae1dSRodney W. Grimes 			ip->ip_off += i;
1099df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
1100df8bae1dSRodney W. Grimes 		}
1101af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
1102af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
1103af38c68cSLuigi Rizzo 	} else {
1104af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
1105af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1106df8bae1dSRodney W. Grimes 	}
1107df8bae1dSRodney W. Grimes 
1108df8bae1dSRodney W. Grimes 	/*
1109df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
1110df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
1111df8bae1dSRodney W. Grimes 	 */
11126effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1113af38c68cSLuigi Rizzo 	     q = nq) {
1114b36f5b37SMaxim Konovalov 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
11156effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
11166effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
11176effc713SDoug Rabson 			GETIP(q)->ip_off += i;
11186effc713SDoug Rabson 			m_adj(q, i);
1119db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
1120df8bae1dSRodney W. Grimes 			break;
1121df8bae1dSRodney W. Grimes 		}
11226effc713SDoug Rabson 		nq = q->m_nextpkt;
1123af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
112499e8617dSMaxim Konovalov 		ipstat.ips_fragdropped++;
1125375386e2SMike Silbersack 		fp->ipq_nfrags--;
11266effc713SDoug Rabson 		m_freem(q);
1127df8bae1dSRodney W. Grimes 	}
1128df8bae1dSRodney W. Grimes 
1129af38c68cSLuigi Rizzo inserted:
113093e0e116SJulian Elischer 
113193e0e116SJulian Elischer #ifdef IPDIVERT
113293e0e116SJulian Elischer 	/*
11338948e4baSArchie Cobbs 	 * Transfer firewall instructions to the fragment structure.
11342b25acc1SLuigi Rizzo 	 * Only trust info in the fragment at offset 0.
113593e0e116SJulian Elischer 	 */
11362b25acc1SLuigi Rizzo 	if (ip->ip_off == 0) {
11378948e4baSArchie Cobbs 		fp->ipq_div_info = *divinfo;
11382b25acc1SLuigi Rizzo 		fp->ipq_div_cookie = *divert_rule;
11392b25acc1SLuigi Rizzo 	}
11408948e4baSArchie Cobbs 	*divinfo = 0;
11412b25acc1SLuigi Rizzo 	*divert_rule = 0;
114293e0e116SJulian Elischer #endif
114393e0e116SJulian Elischer 
1144df8bae1dSRodney W. Grimes 	/*
1145375386e2SMike Silbersack 	 * Check for complete reassembly and perform frag per packet
1146375386e2SMike Silbersack 	 * limiting.
1147375386e2SMike Silbersack 	 *
1148375386e2SMike Silbersack 	 * Frag limiting is performed here so that the nth frag has
1149375386e2SMike Silbersack 	 * a chance to complete the packet before we drop the packet.
1150375386e2SMike Silbersack 	 * As a result, n+1 frags are actually allowed per packet, but
1151375386e2SMike Silbersack 	 * only n will ever be stored. (n = maxfragsperpacket.)
1152375386e2SMike Silbersack 	 *
1153df8bae1dSRodney W. Grimes 	 */
11546effc713SDoug Rabson 	next = 0;
11556effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1156375386e2SMike Silbersack 		if (GETIP(q)->ip_off != next) {
115799e8617dSMaxim Konovalov 			if (fp->ipq_nfrags > maxfragsperpacket) {
115899e8617dSMaxim Konovalov 				ipstat.ips_fragdropped += fp->ipq_nfrags;
1159375386e2SMike Silbersack 				ip_freef(head, fp);
116099e8617dSMaxim Konovalov 			}
11616effc713SDoug Rabson 			return (0);
1162375386e2SMike Silbersack 		}
11636effc713SDoug Rabson 		next += GETIP(q)->ip_len;
11646effc713SDoug Rabson 	}
11656effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
1166375386e2SMike Silbersack 	if (p->m_flags & M_FRAG) {
116799e8617dSMaxim Konovalov 		if (fp->ipq_nfrags > maxfragsperpacket) {
116899e8617dSMaxim Konovalov 			ipstat.ips_fragdropped += fp->ipq_nfrags;
1169375386e2SMike Silbersack 			ip_freef(head, fp);
117099e8617dSMaxim Konovalov 		}
1171df8bae1dSRodney W. Grimes 		return (0);
1172375386e2SMike Silbersack 	}
1173df8bae1dSRodney W. Grimes 
1174df8bae1dSRodney W. Grimes 	/*
1175430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1176430d30d8SBill Fenner 	 */
11776effc713SDoug Rabson 	q = fp->ipq_frags;
11786effc713SDoug Rabson 	ip = GETIP(q);
117953be11f6SPoul-Henning Kamp 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
1180430d30d8SBill Fenner 		ipstat.ips_toolong++;
118199e8617dSMaxim Konovalov 		ipstat.ips_fragdropped += fp->ipq_nfrags;
1182462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1183430d30d8SBill Fenner 		return (0);
1184430d30d8SBill Fenner 	}
1185430d30d8SBill Fenner 
1186430d30d8SBill Fenner 	/*
1187430d30d8SBill Fenner 	 * Concatenate fragments.
1188df8bae1dSRodney W. Grimes 	 */
11896effc713SDoug Rabson 	m = q;
1190df8bae1dSRodney W. Grimes 	t = m->m_next;
1191df8bae1dSRodney W. Grimes 	m->m_next = 0;
1192df8bae1dSRodney W. Grimes 	m_cat(m, t);
11936effc713SDoug Rabson 	nq = q->m_nextpkt;
1194945aa40dSDoug Rabson 	q->m_nextpkt = 0;
11956effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
11966effc713SDoug Rabson 		nq = q->m_nextpkt;
1197945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1198db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1199db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1200a8db1d93SJonathan Lemon 		m_cat(m, q);
1201df8bae1dSRodney W. Grimes 	}
120236b0360bSRobert Watson #ifdef MAC
120336b0360bSRobert Watson 	mac_create_datagram_from_ipq(fp, m);
120436b0360bSRobert Watson 	mac_destroy_ipq(fp);
120536b0360bSRobert Watson #endif
1206df8bae1dSRodney W. Grimes 
120793e0e116SJulian Elischer #ifdef IPDIVERT
120893e0e116SJulian Elischer 	/*
12098948e4baSArchie Cobbs 	 * Extract firewall instructions from the fragment structure.
121093e0e116SJulian Elischer 	 */
12118948e4baSArchie Cobbs 	*divinfo = fp->ipq_div_info;
12122b25acc1SLuigi Rizzo 	*divert_rule = fp->ipq_div_cookie;
121393e0e116SJulian Elischer #endif
121493e0e116SJulian Elischer 
1215df8bae1dSRodney W. Grimes 	/*
1216df8bae1dSRodney W. Grimes 	 * Create header for new ip packet by
1217df8bae1dSRodney W. Grimes 	 * modifying header of first packet;
1218df8bae1dSRodney W. Grimes 	 * dequeue and discard fragment reassembly header.
1219df8bae1dSRodney W. Grimes 	 * Make header visible.
1220df8bae1dSRodney W. Grimes 	 */
1221df8bae1dSRodney W. Grimes 	ip->ip_len = next;
12226effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
12236effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1224462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1225194a213eSAndrey A. Chernov 	nipq--;
1226df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
122753be11f6SPoul-Henning Kamp 	m->m_len += (ip->ip_hl << 2);
122853be11f6SPoul-Henning Kamp 	m->m_data -= (ip->ip_hl << 2);
1229df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1230a5554bf0SPoul-Henning Kamp 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
1231a5554bf0SPoul-Henning Kamp 		m_fixhdr(m);
12326a800098SYoshinobu Inoue 	return (m);
1233df8bae1dSRodney W. Grimes 
1234df8bae1dSRodney W. Grimes dropfrag:
1235efe39c6aSJulian Elischer #ifdef IPDIVERT
12368948e4baSArchie Cobbs 	*divinfo = 0;
12372b25acc1SLuigi Rizzo 	*divert_rule = 0;
1238efe39c6aSJulian Elischer #endif
1239df8bae1dSRodney W. Grimes 	ipstat.ips_fragdropped++;
1240042bbfa3SRobert Watson 	if (fp != NULL)
1241375386e2SMike Silbersack 		fp->ipq_nfrags--;
1242df8bae1dSRodney W. Grimes 	m_freem(m);
1243df8bae1dSRodney W. Grimes 	return (0);
12446effc713SDoug Rabson 
12456effc713SDoug Rabson #undef GETIP
1246df8bae1dSRodney W. Grimes }
1247df8bae1dSRodney W. Grimes 
1248df8bae1dSRodney W. Grimes /*
1249df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1250df8bae1dSRodney W. Grimes  * associated datagrams.
1251df8bae1dSRodney W. Grimes  */
12520312fbe9SPoul-Henning Kamp static void
1253462b86feSPoul-Henning Kamp ip_freef(fhp, fp)
1254462b86feSPoul-Henning Kamp 	struct ipqhead *fhp;
1255df8bae1dSRodney W. Grimes 	struct ipq *fp;
1256df8bae1dSRodney W. Grimes {
12576effc713SDoug Rabson 	register struct mbuf *q;
1258df8bae1dSRodney W. Grimes 
12592fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
12602fad1e93SSam Leffler 
12616effc713SDoug Rabson 	while (fp->ipq_frags) {
12626effc713SDoug Rabson 		q = fp->ipq_frags;
12636effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
12646effc713SDoug Rabson 		m_freem(q);
1265df8bae1dSRodney W. Grimes 	}
1266462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1267df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
1268194a213eSAndrey A. Chernov 	nipq--;
1269df8bae1dSRodney W. Grimes }
1270df8bae1dSRodney W. Grimes 
1271df8bae1dSRodney W. Grimes /*
1272df8bae1dSRodney W. Grimes  * IP timer processing;
1273df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1274df8bae1dSRodney W. Grimes  * queue, discard it.
1275df8bae1dSRodney W. Grimes  */
1276df8bae1dSRodney W. Grimes void
1277df8bae1dSRodney W. Grimes ip_slowtimo()
1278df8bae1dSRodney W. Grimes {
1279df8bae1dSRodney W. Grimes 	register struct ipq *fp;
1280df8bae1dSRodney W. Grimes 	int s = splnet();
1281194a213eSAndrey A. Chernov 	int i;
1282df8bae1dSRodney W. Grimes 
12832fad1e93SSam Leffler 	IPQ_LOCK();
1284194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1285462b86feSPoul-Henning Kamp 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1286462b86feSPoul-Henning Kamp 			struct ipq *fpp;
1287462b86feSPoul-Henning Kamp 
1288462b86feSPoul-Henning Kamp 			fpp = fp;
1289462b86feSPoul-Henning Kamp 			fp = TAILQ_NEXT(fp, ipq_list);
1290462b86feSPoul-Henning Kamp 			if(--fpp->ipq_ttl == 0) {
129199e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
1292462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], fpp);
1293df8bae1dSRodney W. Grimes 			}
1294df8bae1dSRodney W. Grimes 		}
1295194a213eSAndrey A. Chernov 	}
1296690a6055SJesper Skriver 	/*
1297690a6055SJesper Skriver 	 * If we are over the maximum number of fragments
1298690a6055SJesper Skriver 	 * (due to the limit being lowered), drain off
1299690a6055SJesper Skriver 	 * enough to get down to the new limit.
1300690a6055SJesper Skriver 	 */
1301a75a485dSMike Silbersack 	if (maxnipq >= 0 && nipq > maxnipq) {
1302690a6055SJesper Skriver 		for (i = 0; i < IPREASS_NHASH; i++) {
1303b36f5b37SMaxim Konovalov 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
130499e8617dSMaxim Konovalov 				ipstat.ips_fragdropped +=
130599e8617dSMaxim Konovalov 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1306690a6055SJesper Skriver 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1307690a6055SJesper Skriver 			}
1308690a6055SJesper Skriver 		}
1309690a6055SJesper Skriver 	}
13102fad1e93SSam Leffler 	IPQ_UNLOCK();
1311df8bae1dSRodney W. Grimes 	splx(s);
1312df8bae1dSRodney W. Grimes }
1313df8bae1dSRodney W. Grimes 
1314df8bae1dSRodney W. Grimes /*
1315df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1316df8bae1dSRodney W. Grimes  */
1317df8bae1dSRodney W. Grimes void
1318df8bae1dSRodney W. Grimes ip_drain()
1319df8bae1dSRodney W. Grimes {
1320194a213eSAndrey A. Chernov 	int     i;
1321ce29ab3aSGarrett Wollman 
13222fad1e93SSam Leffler 	IPQ_LOCK();
1323194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1324462b86feSPoul-Henning Kamp 		while(!TAILQ_EMPTY(&ipq[i])) {
132599e8617dSMaxim Konovalov 			ipstat.ips_fragdropped +=
132699e8617dSMaxim Konovalov 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1327462b86feSPoul-Henning Kamp 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1328194a213eSAndrey A. Chernov 		}
1329194a213eSAndrey A. Chernov 	}
13302fad1e93SSam Leffler 	IPQ_UNLOCK();
1331ce29ab3aSGarrett Wollman 	in_rtqdrain();
1332df8bae1dSRodney W. Grimes }
1333df8bae1dSRodney W. Grimes 
1334df8bae1dSRodney W. Grimes /*
1335df8bae1dSRodney W. Grimes  * Do option processing on a datagram,
1336df8bae1dSRodney W. Grimes  * possibly discarding it if bad options are encountered,
1337df8bae1dSRodney W. Grimes  * or forwarding it if source-routed.
1338d0ebc0d2SYaroslav Tykhiy  * The pass argument is used when operating in the IPSTEALTH
1339d0ebc0d2SYaroslav Tykhiy  * mode to tell what options to process:
1340d0ebc0d2SYaroslav Tykhiy  * [LS]SRR (pass 0) or the others (pass 1).
1341d0ebc0d2SYaroslav Tykhiy  * The reason for as many as two passes is that when doing IPSTEALTH,
1342d0ebc0d2SYaroslav Tykhiy  * non-routing options should be processed only if the packet is for us.
1343df8bae1dSRodney W. Grimes  * Returns 1 if packet has been forwarded/freed,
1344df8bae1dSRodney W. Grimes  * 0 if the packet should be processed further.
1345df8bae1dSRodney W. Grimes  */
13460312fbe9SPoul-Henning Kamp static int
13472b25acc1SLuigi Rizzo ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1348df8bae1dSRodney W. Grimes {
13492b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
13502b25acc1SLuigi Rizzo 	u_char *cp;
13512b25acc1SLuigi Rizzo 	struct in_ifaddr *ia;
1352df8bae1dSRodney W. Grimes 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1353df8bae1dSRodney W. Grimes 	struct in_addr *sin, dst;
1354df8bae1dSRodney W. Grimes 	n_time ntime;
13554d2e3692SLuigi Rizzo 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1356df8bae1dSRodney W. Grimes 
1357df8bae1dSRodney W. Grimes 	dst = ip->ip_dst;
1358df8bae1dSRodney W. Grimes 	cp = (u_char *)(ip + 1);
135953be11f6SPoul-Henning Kamp 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1360df8bae1dSRodney W. Grimes 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1361df8bae1dSRodney W. Grimes 		opt = cp[IPOPT_OPTVAL];
1362df8bae1dSRodney W. Grimes 		if (opt == IPOPT_EOL)
1363df8bae1dSRodney W. Grimes 			break;
1364df8bae1dSRodney W. Grimes 		if (opt == IPOPT_NOP)
1365df8bae1dSRodney W. Grimes 			optlen = 1;
1366df8bae1dSRodney W. Grimes 		else {
1367fdcb8debSJun-ichiro itojun Hagino 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1368fdcb8debSJun-ichiro itojun Hagino 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1369fdcb8debSJun-ichiro itojun Hagino 				goto bad;
1370fdcb8debSJun-ichiro itojun Hagino 			}
1371df8bae1dSRodney W. Grimes 			optlen = cp[IPOPT_OLEN];
1372707d00a3SJonathan Lemon 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1373df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1374df8bae1dSRodney W. Grimes 				goto bad;
1375df8bae1dSRodney W. Grimes 			}
1376df8bae1dSRodney W. Grimes 		}
1377df8bae1dSRodney W. Grimes 		switch (opt) {
1378df8bae1dSRodney W. Grimes 
1379df8bae1dSRodney W. Grimes 		default:
1380df8bae1dSRodney W. Grimes 			break;
1381df8bae1dSRodney W. Grimes 
1382df8bae1dSRodney W. Grimes 		/*
1383df8bae1dSRodney W. Grimes 		 * Source routing with record.
1384df8bae1dSRodney W. Grimes 		 * Find interface with current destination address.
1385df8bae1dSRodney W. Grimes 		 * If none on this machine then drop if strictly routed,
1386df8bae1dSRodney W. Grimes 		 * or do nothing if loosely routed.
1387df8bae1dSRodney W. Grimes 		 * Record interface address and bring up next address
1388df8bae1dSRodney W. Grimes 		 * component.  If strictly routed make sure next
1389df8bae1dSRodney W. Grimes 		 * address is on directly accessible net.
1390df8bae1dSRodney W. Grimes 		 */
1391df8bae1dSRodney W. Grimes 		case IPOPT_LSRR:
1392df8bae1dSRodney W. Grimes 		case IPOPT_SSRR:
1393d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1394d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass > 0)
1395d0ebc0d2SYaroslav Tykhiy 				break;
1396d0ebc0d2SYaroslav Tykhiy #endif
139733841545SHajimu UMEMOTO 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
139833841545SHajimu UMEMOTO 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
139933841545SHajimu UMEMOTO 				goto bad;
140033841545SHajimu UMEMOTO 			}
1401df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1402df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1403df8bae1dSRodney W. Grimes 				goto bad;
1404df8bae1dSRodney W. Grimes 			}
1405df8bae1dSRodney W. Grimes 			ipaddr.sin_addr = ip->ip_dst;
1406df8bae1dSRodney W. Grimes 			ia = (struct in_ifaddr *)
1407df8bae1dSRodney W. Grimes 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1408df8bae1dSRodney W. Grimes 			if (ia == 0) {
1409df8bae1dSRodney W. Grimes 				if (opt == IPOPT_SSRR) {
1410df8bae1dSRodney W. Grimes 					type = ICMP_UNREACH;
1411df8bae1dSRodney W. Grimes 					code = ICMP_UNREACH_SRCFAIL;
1412df8bae1dSRodney W. Grimes 					goto bad;
1413df8bae1dSRodney W. Grimes 				}
1414bc189bf8SGuido van Rooij 				if (!ip_dosourceroute)
1415bc189bf8SGuido van Rooij 					goto nosourcerouting;
1416df8bae1dSRodney W. Grimes 				/*
1417df8bae1dSRodney W. Grimes 				 * Loose routing, and not at next destination
1418df8bae1dSRodney W. Grimes 				 * yet; nothing to do except forward.
1419df8bae1dSRodney W. Grimes 				 */
1420df8bae1dSRodney W. Grimes 				break;
1421df8bae1dSRodney W. Grimes 			}
1422df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
14235d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr)) {
1424df8bae1dSRodney W. Grimes 				/*
1425df8bae1dSRodney W. Grimes 				 * End of source route.  Should be for us.
1426df8bae1dSRodney W. Grimes 				 */
14274fce5804SGuido van Rooij 				if (!ip_acceptsourceroute)
14284fce5804SGuido van Rooij 					goto nosourcerouting;
1429df8bae1dSRodney W. Grimes 				save_rte(cp, ip->ip_src);
1430df8bae1dSRodney W. Grimes 				break;
1431df8bae1dSRodney W. Grimes 			}
1432d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1433d0ebc0d2SYaroslav Tykhiy 			if (ipstealth)
1434d0ebc0d2SYaroslav Tykhiy 				goto dropit;
1435d0ebc0d2SYaroslav Tykhiy #endif
14361025071fSGarrett Wollman 			if (!ip_dosourceroute) {
14370af8d3ecSDavid Greenman 				if (ipforwarding) {
14380af8d3ecSDavid Greenman 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
14390af8d3ecSDavid Greenman 					/*
14400af8d3ecSDavid Greenman 					 * Acting as a router, so generate ICMP
14410af8d3ecSDavid Greenman 					 */
1442efa48587SGuido van Rooij nosourcerouting:
1443bc189bf8SGuido van Rooij 					strcpy(buf, inet_ntoa(ip->ip_dst));
14441025071fSGarrett Wollman 					log(LOG_WARNING,
14451025071fSGarrett Wollman 					    "attempted source route from %s to %s\n",
14461025071fSGarrett Wollman 					    inet_ntoa(ip->ip_src), buf);
14471025071fSGarrett Wollman 					type = ICMP_UNREACH;
14481025071fSGarrett Wollman 					code = ICMP_UNREACH_SRCFAIL;
14491025071fSGarrett Wollman 					goto bad;
14500af8d3ecSDavid Greenman 				} else {
14510af8d3ecSDavid Greenman 					/*
14520af8d3ecSDavid Greenman 					 * Not acting as a router, so silently drop.
14530af8d3ecSDavid Greenman 					 */
1454d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1455d0ebc0d2SYaroslav Tykhiy dropit:
1456d0ebc0d2SYaroslav Tykhiy #endif
14570af8d3ecSDavid Greenman 					ipstat.ips_cantforward++;
14580af8d3ecSDavid Greenman 					m_freem(m);
14590af8d3ecSDavid Greenman 					return (1);
14600af8d3ecSDavid Greenman 				}
14611025071fSGarrett Wollman 			}
14621025071fSGarrett Wollman 
1463df8bae1dSRodney W. Grimes 			/*
1464df8bae1dSRodney W. Grimes 			 * locate outgoing interface
1465df8bae1dSRodney W. Grimes 			 */
146694a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, cp + off,
1467df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
14681025071fSGarrett Wollman 
1469df8bae1dSRodney W. Grimes 			if (opt == IPOPT_SSRR) {
1470df8bae1dSRodney W. Grimes #define	INA	struct in_ifaddr *
1471df8bae1dSRodney W. Grimes #define	SA	struct sockaddr *
1472df8bae1dSRodney W. Grimes 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1473df8bae1dSRodney W. Grimes 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1474df8bae1dSRodney W. Grimes 			} else
147502c1c707SAndre Oppermann 				ia = ip_rtaddr(ipaddr.sin_addr);
1476df8bae1dSRodney W. Grimes 			if (ia == 0) {
1477df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1478df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_SRCFAIL;
1479df8bae1dSRodney W. Grimes 				goto bad;
1480df8bae1dSRodney W. Grimes 			}
1481df8bae1dSRodney W. Grimes 			ip->ip_dst = ipaddr.sin_addr;
148294a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
148394a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1484df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1485df8bae1dSRodney W. Grimes 			/*
1486df8bae1dSRodney W. Grimes 			 * Let ip_intr's mcast routing check handle mcast pkts
1487df8bae1dSRodney W. Grimes 			 */
1488df8bae1dSRodney W. Grimes 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1489df8bae1dSRodney W. Grimes 			break;
1490df8bae1dSRodney W. Grimes 
1491df8bae1dSRodney W. Grimes 		case IPOPT_RR:
1492d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1493d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1494d0ebc0d2SYaroslav Tykhiy 				break;
1495d0ebc0d2SYaroslav Tykhiy #endif
1496707d00a3SJonathan Lemon 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1497707d00a3SJonathan Lemon 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1498707d00a3SJonathan Lemon 				goto bad;
1499707d00a3SJonathan Lemon 			}
1500df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1501df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1502df8bae1dSRodney W. Grimes 				goto bad;
1503df8bae1dSRodney W. Grimes 			}
1504df8bae1dSRodney W. Grimes 			/*
1505df8bae1dSRodney W. Grimes 			 * If no space remains, ignore.
1506df8bae1dSRodney W. Grimes 			 */
1507df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
15085d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr))
1509df8bae1dSRodney W. Grimes 				break;
151094a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1511df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
1512df8bae1dSRodney W. Grimes 			/*
1513df8bae1dSRodney W. Grimes 			 * locate outgoing interface; if we're the destination,
1514df8bae1dSRodney W. Grimes 			 * use the incoming interface (should be same).
1515df8bae1dSRodney W. Grimes 			 */
1516df8bae1dSRodney W. Grimes 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
151702c1c707SAndre Oppermann 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
1518df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1519df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_HOST;
1520df8bae1dSRodney W. Grimes 				goto bad;
1521df8bae1dSRodney W. Grimes 			}
152294a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
152394a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1524df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1525df8bae1dSRodney W. Grimes 			break;
1526df8bae1dSRodney W. Grimes 
1527df8bae1dSRodney W. Grimes 		case IPOPT_TS:
1528d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1529d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1530d0ebc0d2SYaroslav Tykhiy 				break;
1531d0ebc0d2SYaroslav Tykhiy #endif
1532df8bae1dSRodney W. Grimes 			code = cp - (u_char *)ip;
153307514071SJonathan Lemon 			if (optlen < 4 || optlen > 40) {
153407514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1535df8bae1dSRodney W. Grimes 				goto bad;
153633841545SHajimu UMEMOTO 			}
153707514071SJonathan Lemon 			if ((off = cp[IPOPT_OFFSET]) < 5) {
153807514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
153933841545SHajimu UMEMOTO 				goto bad;
154033841545SHajimu UMEMOTO 			}
154107514071SJonathan Lemon 			if (off > optlen - (int)sizeof(int32_t)) {
154207514071SJonathan Lemon 				cp[IPOPT_OFFSET + 1] += (1 << 4);
154307514071SJonathan Lemon 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
154407514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1545df8bae1dSRodney W. Grimes 					goto bad;
154633841545SHajimu UMEMOTO 				}
1547df8bae1dSRodney W. Grimes 				break;
1548df8bae1dSRodney W. Grimes 			}
154907514071SJonathan Lemon 			off--;				/* 0 origin */
155007514071SJonathan Lemon 			sin = (struct in_addr *)(cp + off);
155107514071SJonathan Lemon 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1552df8bae1dSRodney W. Grimes 
1553df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSONLY:
1554df8bae1dSRodney W. Grimes 				break;
1555df8bae1dSRodney W. Grimes 
1556df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSANDADDR:
155707514071SJonathan Lemon 				if (off + sizeof(n_time) +
155807514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
155907514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1560df8bae1dSRodney W. Grimes 					goto bad;
156133841545SHajimu UMEMOTO 				}
1562df8bae1dSRodney W. Grimes 				ipaddr.sin_addr = dst;
1563df8bae1dSRodney W. Grimes 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1564df8bae1dSRodney W. Grimes 							    m->m_pkthdr.rcvif);
1565df8bae1dSRodney W. Grimes 				if (ia == 0)
1566df8bae1dSRodney W. Grimes 					continue;
156794a5d9b6SDavid Greenman 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
156894a5d9b6SDavid Greenman 				    sizeof(struct in_addr));
156907514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1570a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1571df8bae1dSRodney W. Grimes 				break;
1572df8bae1dSRodney W. Grimes 
1573df8bae1dSRodney W. Grimes 			case IPOPT_TS_PRESPEC:
157407514071SJonathan Lemon 				if (off + sizeof(n_time) +
157507514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
157607514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1577df8bae1dSRodney W. Grimes 					goto bad;
157833841545SHajimu UMEMOTO 				}
157994a5d9b6SDavid Greenman 				(void)memcpy(&ipaddr.sin_addr, sin,
1580df8bae1dSRodney W. Grimes 				    sizeof(struct in_addr));
1581df8bae1dSRodney W. Grimes 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1582df8bae1dSRodney W. Grimes 					continue;
158307514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1584a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1585df8bae1dSRodney W. Grimes 				break;
1586df8bae1dSRodney W. Grimes 
1587df8bae1dSRodney W. Grimes 			default:
158807514071SJonathan Lemon 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1589df8bae1dSRodney W. Grimes 				goto bad;
1590df8bae1dSRodney W. Grimes 			}
1591df8bae1dSRodney W. Grimes 			ntime = iptime();
159207514071SJonathan Lemon 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
159307514071SJonathan Lemon 			cp[IPOPT_OFFSET] += sizeof(n_time);
1594df8bae1dSRodney W. Grimes 		}
1595df8bae1dSRodney W. Grimes 	}
159647174b49SAndrey A. Chernov 	if (forward && ipforwarding) {
159702c1c707SAndre Oppermann 		ip_forward(m, 1, next_hop);
1598df8bae1dSRodney W. Grimes 		return (1);
1599df8bae1dSRodney W. Grimes 	}
1600df8bae1dSRodney W. Grimes 	return (0);
1601df8bae1dSRodney W. Grimes bad:
1602df8bae1dSRodney W. Grimes 	icmp_error(m, type, code, 0, 0);
1603df8bae1dSRodney W. Grimes 	ipstat.ips_badoptions++;
1604df8bae1dSRodney W. Grimes 	return (1);
1605df8bae1dSRodney W. Grimes }
1606df8bae1dSRodney W. Grimes 
1607df8bae1dSRodney W. Grimes /*
1608df8bae1dSRodney W. Grimes  * Given address of next destination (final or next hop),
1609df8bae1dSRodney W. Grimes  * return internet address info of interface to be used to get there.
1610df8bae1dSRodney W. Grimes  */
1611bd714208SRuslan Ermilov struct in_ifaddr *
161202c1c707SAndre Oppermann ip_rtaddr(dst)
1613df8bae1dSRodney W. Grimes 	struct in_addr dst;
1614df8bae1dSRodney W. Grimes {
161597d8d152SAndre Oppermann 	struct route sro;
161602c1c707SAndre Oppermann 	struct sockaddr_in *sin;
161702c1c707SAndre Oppermann 	struct in_ifaddr *ifa;
1618df8bae1dSRodney W. Grimes 
16190cfbbe3bSAndre Oppermann 	bzero(&sro, sizeof(sro));
162097d8d152SAndre Oppermann 	sin = (struct sockaddr_in *)&sro.ro_dst;
1621df8bae1dSRodney W. Grimes 	sin->sin_family = AF_INET;
1622df8bae1dSRodney W. Grimes 	sin->sin_len = sizeof(*sin);
1623df8bae1dSRodney W. Grimes 	sin->sin_addr = dst;
162497d8d152SAndre Oppermann 	rtalloc_ign(&sro, RTF_CLONING);
1625df8bae1dSRodney W. Grimes 
162697d8d152SAndre Oppermann 	if (sro.ro_rt == NULL)
1627df8bae1dSRodney W. Grimes 		return ((struct in_ifaddr *)0);
162802c1c707SAndre Oppermann 
162997d8d152SAndre Oppermann 	ifa = ifatoia(sro.ro_rt->rt_ifa);
163097d8d152SAndre Oppermann 	RTFREE(sro.ro_rt);
163102c1c707SAndre Oppermann 	return ifa;
1632df8bae1dSRodney W. Grimes }
1633df8bae1dSRodney W. Grimes 
1634df8bae1dSRodney W. Grimes /*
1635df8bae1dSRodney W. Grimes  * Save incoming source route for use in replies,
1636df8bae1dSRodney W. Grimes  * to be picked up later by ip_srcroute if the receiver is interested.
1637df8bae1dSRodney W. Grimes  */
163837c84183SPoul-Henning Kamp static void
1639df8bae1dSRodney W. Grimes save_rte(option, dst)
1640df8bae1dSRodney W. Grimes 	u_char *option;
1641df8bae1dSRodney W. Grimes 	struct in_addr dst;
1642df8bae1dSRodney W. Grimes {
1643df8bae1dSRodney W. Grimes 	unsigned olen;
1644df8bae1dSRodney W. Grimes 
1645df8bae1dSRodney W. Grimes 	olen = option[IPOPT_OLEN];
1646df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1647df8bae1dSRodney W. Grimes 	if (ipprintfs)
1648df8bae1dSRodney W. Grimes 		printf("save_rte: olen %d\n", olen);
1649df8bae1dSRodney W. Grimes #endif
1650df8bae1dSRodney W. Grimes 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1651df8bae1dSRodney W. Grimes 		return;
16520453d3cbSBruce Evans 	bcopy(option, ip_srcrt.srcopt, olen);
1653df8bae1dSRodney W. Grimes 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1654df8bae1dSRodney W. Grimes 	ip_srcrt.dst = dst;
1655df8bae1dSRodney W. Grimes }
1656df8bae1dSRodney W. Grimes 
1657df8bae1dSRodney W. Grimes /*
1658df8bae1dSRodney W. Grimes  * Retrieve incoming source route for use in replies,
1659df8bae1dSRodney W. Grimes  * in the same form used by setsockopt.
1660df8bae1dSRodney W. Grimes  * The first hop is placed before the options, will be removed later.
1661df8bae1dSRodney W. Grimes  */
1662df8bae1dSRodney W. Grimes struct mbuf *
1663df8bae1dSRodney W. Grimes ip_srcroute()
1664df8bae1dSRodney W. Grimes {
1665df8bae1dSRodney W. Grimes 	register struct in_addr *p, *q;
1666df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1667df8bae1dSRodney W. Grimes 
1668df8bae1dSRodney W. Grimes 	if (ip_nhops == 0)
1669df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1670a163d034SWarner Losh 	m = m_get(M_DONTWAIT, MT_HEADER);
1671df8bae1dSRodney W. Grimes 	if (m == 0)
1672df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1673df8bae1dSRodney W. Grimes 
1674df8bae1dSRodney W. Grimes #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1675df8bae1dSRodney W. Grimes 
1676df8bae1dSRodney W. Grimes 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1677df8bae1dSRodney W. Grimes 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1678df8bae1dSRodney W. Grimes 	    OPTSIZ;
1679df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1680df8bae1dSRodney W. Grimes 	if (ipprintfs)
1681df8bae1dSRodney W. Grimes 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1682df8bae1dSRodney W. Grimes #endif
1683df8bae1dSRodney W. Grimes 
1684df8bae1dSRodney W. Grimes 	/*
1685df8bae1dSRodney W. Grimes 	 * First save first hop for return route
1686df8bae1dSRodney W. Grimes 	 */
1687df8bae1dSRodney W. Grimes 	p = &ip_srcrt.route[ip_nhops - 1];
1688df8bae1dSRodney W. Grimes 	*(mtod(m, struct in_addr *)) = *p--;
1689df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1690df8bae1dSRodney W. Grimes 	if (ipprintfs)
1691af38c68cSLuigi Rizzo 		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1692df8bae1dSRodney W. Grimes #endif
1693df8bae1dSRodney W. Grimes 
1694df8bae1dSRodney W. Grimes 	/*
1695df8bae1dSRodney W. Grimes 	 * Copy option fields and padding (nop) to mbuf.
1696df8bae1dSRodney W. Grimes 	 */
1697df8bae1dSRodney W. Grimes 	ip_srcrt.nop = IPOPT_NOP;
1698df8bae1dSRodney W. Grimes 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
169994a5d9b6SDavid Greenman 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
170094a5d9b6SDavid Greenman 	    &ip_srcrt.nop, OPTSIZ);
1701df8bae1dSRodney W. Grimes 	q = (struct in_addr *)(mtod(m, caddr_t) +
1702df8bae1dSRodney W. Grimes 	    sizeof(struct in_addr) + OPTSIZ);
1703df8bae1dSRodney W. Grimes #undef OPTSIZ
1704df8bae1dSRodney W. Grimes 	/*
1705df8bae1dSRodney W. Grimes 	 * Record return path as an IP source route,
1706df8bae1dSRodney W. Grimes 	 * reversing the path (pointers are now aligned).
1707df8bae1dSRodney W. Grimes 	 */
1708df8bae1dSRodney W. Grimes 	while (p >= ip_srcrt.route) {
1709df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1710df8bae1dSRodney W. Grimes 		if (ipprintfs)
1711af38c68cSLuigi Rizzo 			printf(" %lx", (u_long)ntohl(q->s_addr));
1712df8bae1dSRodney W. Grimes #endif
1713df8bae1dSRodney W. Grimes 		*q++ = *p--;
1714df8bae1dSRodney W. Grimes 	}
1715df8bae1dSRodney W. Grimes 	/*
1716df8bae1dSRodney W. Grimes 	 * Last hop goes to final destination.
1717df8bae1dSRodney W. Grimes 	 */
1718df8bae1dSRodney W. Grimes 	*q = ip_srcrt.dst;
1719df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1720df8bae1dSRodney W. Grimes 	if (ipprintfs)
1721af38c68cSLuigi Rizzo 		printf(" %lx\n", (u_long)ntohl(q->s_addr));
1722df8bae1dSRodney W. Grimes #endif
1723df8bae1dSRodney W. Grimes 	return (m);
1724df8bae1dSRodney W. Grimes }
1725df8bae1dSRodney W. Grimes 
1726df8bae1dSRodney W. Grimes /*
1727df8bae1dSRodney W. Grimes  * Strip out IP options, at higher
1728df8bae1dSRodney W. Grimes  * level protocol in the kernel.
1729df8bae1dSRodney W. Grimes  * Second argument is buffer to which options
1730df8bae1dSRodney W. Grimes  * will be moved, and return value is their length.
1731df8bae1dSRodney W. Grimes  * XXX should be deleted; last arg currently ignored.
1732df8bae1dSRodney W. Grimes  */
1733df8bae1dSRodney W. Grimes void
1734df8bae1dSRodney W. Grimes ip_stripoptions(m, mopt)
1735df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1736df8bae1dSRodney W. Grimes 	struct mbuf *mopt;
1737df8bae1dSRodney W. Grimes {
1738df8bae1dSRodney W. Grimes 	register int i;
1739df8bae1dSRodney W. Grimes 	struct ip *ip = mtod(m, struct ip *);
1740df8bae1dSRodney W. Grimes 	register caddr_t opts;
1741df8bae1dSRodney W. Grimes 	int olen;
1742df8bae1dSRodney W. Grimes 
174353be11f6SPoul-Henning Kamp 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
1744df8bae1dSRodney W. Grimes 	opts = (caddr_t)(ip + 1);
1745df8bae1dSRodney W. Grimes 	i = m->m_len - (sizeof (struct ip) + olen);
1746df8bae1dSRodney W. Grimes 	bcopy(opts + olen, opts, (unsigned)i);
1747df8bae1dSRodney W. Grimes 	m->m_len -= olen;
1748df8bae1dSRodney W. Grimes 	if (m->m_flags & M_PKTHDR)
1749df8bae1dSRodney W. Grimes 		m->m_pkthdr.len -= olen;
175053be11f6SPoul-Henning Kamp 	ip->ip_v = IPVERSION;
175153be11f6SPoul-Henning Kamp 	ip->ip_hl = sizeof(struct ip) >> 2;
1752df8bae1dSRodney W. Grimes }
1753df8bae1dSRodney W. Grimes 
1754df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1755df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1756df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1757df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1758df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1759fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
17603b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1761df8bae1dSRodney W. Grimes };
1762df8bae1dSRodney W. Grimes 
1763df8bae1dSRodney W. Grimes /*
1764df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1765df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1766df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1767df8bae1dSRodney W. Grimes  * of codes and types.
1768df8bae1dSRodney W. Grimes  *
1769df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1770df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1771df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1772df8bae1dSRodney W. Grimes  * protocol deal with that.
1773df8bae1dSRodney W. Grimes  *
1774df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1775df8bae1dSRodney W. Grimes  * via a source route.
1776df8bae1dSRodney W. Grimes  */
17770312fbe9SPoul-Henning Kamp static void
177802c1c707SAndre Oppermann ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1779df8bae1dSRodney W. Grimes {
17802b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
178102c1c707SAndre Oppermann 	struct in_ifaddr *ia;
178226f9a767SRodney W. Grimes 	int error, type = 0, code = 0;
1783df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
1784df8bae1dSRodney W. Grimes 	n_long dest;
17853efc3014SJulian Elischer 	struct in_addr pkt_dst;
1786df8bae1dSRodney W. Grimes 	struct ifnet *destifp;
1787b9234fafSSam Leffler #if defined(IPSEC) || defined(FAST_IPSEC)
17886a800098SYoshinobu Inoue 	struct ifnet dummyifp;
17896a800098SYoshinobu Inoue #endif
1790df8bae1dSRodney W. Grimes 
17913efc3014SJulian Elischer 	/*
17923efc3014SJulian Elischer 	 * Cache the destination address of the packet; this may be
17933efc3014SJulian Elischer 	 * changed by use of 'ipfw fwd'.
17943efc3014SJulian Elischer 	 */
17952b25acc1SLuigi Rizzo 	pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
17963efc3014SJulian Elischer 
1797df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1798df8bae1dSRodney W. Grimes 	if (ipprintfs)
179961ce519bSPoul-Henning Kamp 		printf("forward: src %lx dst %lx ttl %x\n",
18003efc3014SJulian Elischer 		    (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1801162886e2SBruce Evans 		    ip->ip_ttl);
1802df8bae1dSRodney W. Grimes #endif
1803100ba1a6SJordan K. Hubbard 
1804100ba1a6SJordan K. Hubbard 
18053efc3014SJulian Elischer 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1806df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1807df8bae1dSRodney W. Grimes 		m_freem(m);
1808df8bae1dSRodney W. Grimes 		return;
1809df8bae1dSRodney W. Grimes 	}
18101b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18111b968362SDag-Erling Smørgrav 	if (!ipstealth) {
18121b968362SDag-Erling Smørgrav #endif
1813df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
18141b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
181502c1c707SAndre Oppermann 			    0, 0);
1816df8bae1dSRodney W. Grimes 			return;
1817df8bae1dSRodney W. Grimes 		}
18181b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18191b968362SDag-Erling Smørgrav 	}
18201b968362SDag-Erling Smørgrav #endif
1821df8bae1dSRodney W. Grimes 
182202c1c707SAndre Oppermann 	if ((ia = ip_rtaddr(pkt_dst)) == 0) {
182302c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
1824df8bae1dSRodney W. Grimes 		return;
182502c1c707SAndre Oppermann 	}
1826df8bae1dSRodney W. Grimes 
1827df8bae1dSRodney W. Grimes 	/*
1828bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1829bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1830bfef7ed4SIan Dowse 	 *
18314d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
18324d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
18334d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
18344d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
18354d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
18364d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
18374d2e3692SLuigi Rizzo 	 *
1838bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1839bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1840bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1841bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1842df8bae1dSRodney W. Grimes 	 */
1843a163d034SWarner Losh 	MGET(mcopy, M_DONTWAIT, m->m_type);
1844a163d034SWarner Losh 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
18459967cafcSSam Leffler 		/*
18469967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
18479967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
18489967cafcSSam Leffler 		 * be conservative and just discard the copy since
18499967cafcSSam Leffler 		 * code below may some day want the tags.
18509967cafcSSam Leffler 		 */
18519967cafcSSam Leffler 		m_free(mcopy);
18529967cafcSSam Leffler 		mcopy = NULL;
18539967cafcSSam Leffler 	}
1854bfef7ed4SIan Dowse 	if (mcopy != NULL) {
185553be11f6SPoul-Henning Kamp 		mcopy->m_len = imin((ip->ip_hl << 2) + 8,
1856bfef7ed4SIan Dowse 		    (int)ip->ip_len);
1857bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1858bfef7ed4SIan Dowse 	}
185904287599SRuslan Ermilov 
186004287599SRuslan Ermilov #ifdef IPSTEALTH
186104287599SRuslan Ermilov 	if (!ipstealth) {
186204287599SRuslan Ermilov #endif
186304287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
186404287599SRuslan Ermilov #ifdef IPSTEALTH
186504287599SRuslan Ermilov 	}
186604287599SRuslan Ermilov #endif
1867df8bae1dSRodney W. Grimes 
1868df8bae1dSRodney W. Grimes 	/*
1869df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1870df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1871df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1872df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1873df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1874df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1875df8bae1dSRodney W. Grimes 	 */
187602c1c707SAndre Oppermann 	dest = 0;
187702c1c707SAndre Oppermann 	if (ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) {
187802c1c707SAndre Oppermann 		struct sockaddr_in *sin;
187902c1c707SAndre Oppermann 		struct route ro;
188002c1c707SAndre Oppermann 		struct rtentry *rt;
188102c1c707SAndre Oppermann 
18820cfbbe3bSAndre Oppermann 		bzero(&ro, sizeof(ro));
188302c1c707SAndre Oppermann 		sin = (struct sockaddr_in *)&ro.ro_dst;
188402c1c707SAndre Oppermann 		sin->sin_family = AF_INET;
188502c1c707SAndre Oppermann 		sin->sin_len = sizeof(*sin);
188602c1c707SAndre Oppermann 		sin->sin_addr = pkt_dst;
188726d02ca7SAndre Oppermann 		rtalloc_ign(&ro, RTF_CLONING);
188802c1c707SAndre Oppermann 
188902c1c707SAndre Oppermann 		rt = ro.ro_rt;
189002c1c707SAndre Oppermann 
189102c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1892df8bae1dSRodney W. Grimes 		    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
18932b25acc1SLuigi Rizzo 		    ipsendredirects && !srcrt && !next_hop) {
1894df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1895df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1896df8bae1dSRodney W. Grimes 
1897df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1898df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1899df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
1900df8bae1dSRodney W. Grimes 					dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1901df8bae1dSRodney W. Grimes 				else
19023efc3014SJulian Elischer 					dest = pkt_dst.s_addr;
1903df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1904df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1905df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1906df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1907df8bae1dSRodney W. Grimes 				if (ipprintfs)
1908df8bae1dSRodney W. Grimes 					printf("redirect (%d) to %lx\n", code, (u_long)dest);
1909df8bae1dSRodney W. Grimes #endif
1910df8bae1dSRodney W. Grimes 			}
1911df8bae1dSRodney W. Grimes 		}
191202c1c707SAndre Oppermann 		if (rt)
191302c1c707SAndre Oppermann 			RTFREE(rt);
191402c1c707SAndre Oppermann 	}
1915df8bae1dSRodney W. Grimes 
1916ea779ff3SLuigi Rizzo     {
1917ea779ff3SLuigi Rizzo 	struct m_hdr tag;
1918ea779ff3SLuigi Rizzo 
1919ea779ff3SLuigi Rizzo 	if (next_hop) {
1920ea779ff3SLuigi Rizzo 		/* Pass IPFORWARD info if available */
1921ea779ff3SLuigi Rizzo 
1922ea779ff3SLuigi Rizzo 		tag.mh_type = MT_TAG;
1923ea779ff3SLuigi Rizzo 		tag.mh_flags = PACKET_TAG_IPFORWARD;
1924ea779ff3SLuigi Rizzo 		tag.mh_data = (caddr_t)next_hop;
1925ea779ff3SLuigi Rizzo 		tag.mh_next = m;
192663346129SBrian Feldman 		tag.mh_nextpkt = NULL;
1927ea779ff3SLuigi Rizzo 		m = (struct mbuf *)&tag;
1928ea779ff3SLuigi Rizzo 	}
192902c1c707SAndre Oppermann 	error = ip_output(m, (struct mbuf *)0, NULL, IP_FORWARDING, 0, NULL);
1930ea779ff3SLuigi Rizzo     }
1931df8bae1dSRodney W. Grimes 	if (error)
1932df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1933df8bae1dSRodney W. Grimes 	else {
1934df8bae1dSRodney W. Grimes 		ipstat.ips_forward++;
1935df8bae1dSRodney W. Grimes 		if (type)
1936df8bae1dSRodney W. Grimes 			ipstat.ips_redirectsent++;
1937df8bae1dSRodney W. Grimes 		else {
19389188b4a1SAndre Oppermann 			if (mcopy)
1939df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1940df8bae1dSRodney W. Grimes 			return;
1941df8bae1dSRodney W. Grimes 		}
1942df8bae1dSRodney W. Grimes 	}
1943df8bae1dSRodney W. Grimes 	if (mcopy == NULL)
1944df8bae1dSRodney W. Grimes 		return;
1945df8bae1dSRodney W. Grimes 	destifp = NULL;
1946df8bae1dSRodney W. Grimes 
1947df8bae1dSRodney W. Grimes 	switch (error) {
1948df8bae1dSRodney W. Grimes 
1949df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1950df8bae1dSRodney W. Grimes 		/* type, code set above */
1951df8bae1dSRodney W. Grimes 		break;
1952df8bae1dSRodney W. Grimes 
1953df8bae1dSRodney W. Grimes 	case ENETUNREACH:		/* shouldn't happen, checked above */
1954df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1955df8bae1dSRodney W. Grimes 	case ENETDOWN:
1956df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1957df8bae1dSRodney W. Grimes 	default:
1958df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1959df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1960df8bae1dSRodney W. Grimes 		break;
1961df8bae1dSRodney W. Grimes 
1962df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1963df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1964df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
196502c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
19666a800098SYoshinobu Inoue 		/*
19676a800098SYoshinobu Inoue 		 * If the packet is routed over IPsec tunnel, tell the
19686a800098SYoshinobu Inoue 		 * originator the tunnel MTU.
19696a800098SYoshinobu Inoue 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
19706a800098SYoshinobu Inoue 		 * XXX quickhack!!!
19716a800098SYoshinobu Inoue 		 */
197202c1c707SAndre Oppermann 		{
19736a800098SYoshinobu Inoue 			struct secpolicy *sp = NULL;
19746a800098SYoshinobu Inoue 			int ipsecerror;
19756a800098SYoshinobu Inoue 			int ipsechdr;
197602c1c707SAndre Oppermann 			struct route *ro;
19776a800098SYoshinobu Inoue 
197802c1c707SAndre Oppermann #ifdef IPSEC
19796a800098SYoshinobu Inoue 			sp = ipsec4_getpolicybyaddr(mcopy,
19806a800098SYoshinobu Inoue 						    IPSEC_DIR_OUTBOUND,
19816a800098SYoshinobu Inoue 						    IP_FORWARDING,
19826a800098SYoshinobu Inoue 						    &ipsecerror);
198302c1c707SAndre Oppermann #else /* FAST_IPSEC */
1984b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(mcopy,
1985b9234fafSSam Leffler 						   IPSEC_DIR_OUTBOUND,
1986b9234fafSSam Leffler 						   IP_FORWARDING,
1987b9234fafSSam Leffler 						   &ipsecerror);
198802c1c707SAndre Oppermann #endif
198902c1c707SAndre Oppermann 			if (sp != NULL) {
1990b9234fafSSam Leffler 				/* count IPsec header size */
1991b9234fafSSam Leffler 				ipsechdr = ipsec4_hdrsiz(mcopy,
1992b9234fafSSam Leffler 							 IPSEC_DIR_OUTBOUND,
1993b9234fafSSam Leffler 							 NULL);
1994b9234fafSSam Leffler 
1995b9234fafSSam Leffler 				/*
1996b9234fafSSam Leffler 				 * find the correct route for outer IPv4
1997b9234fafSSam Leffler 				 * header, compute tunnel MTU.
1998b9234fafSSam Leffler 				 *
1999b9234fafSSam Leffler 				 * XXX BUG ALERT
2000b9234fafSSam Leffler 				 * The "dummyifp" code relies upon the fact
2001b9234fafSSam Leffler 				 * that icmp_error() touches only ifp->if_mtu.
2002b9234fafSSam Leffler 				 */
2003b9234fafSSam Leffler 				/*XXX*/
2004b9234fafSSam Leffler 				destifp = NULL;
2005b9234fafSSam Leffler 				if (sp->req != NULL
2006b9234fafSSam Leffler 				 && sp->req->sav != NULL
2007b9234fafSSam Leffler 				 && sp->req->sav->sah != NULL) {
200802c1c707SAndre Oppermann 					ro = &sp->req->sav->sah->sa_route;
200902c1c707SAndre Oppermann 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
2010b9234fafSSam Leffler 						dummyifp.if_mtu =
201102c1c707SAndre Oppermann 						    ro->ro_rt->rt_ifp->if_mtu;
2012b9234fafSSam Leffler 						dummyifp.if_mtu -= ipsechdr;
2013b9234fafSSam Leffler 						destifp = &dummyifp;
2014b9234fafSSam Leffler 					}
2015b9234fafSSam Leffler 				}
2016b9234fafSSam Leffler 
201702c1c707SAndre Oppermann #ifdef IPSEC
201802c1c707SAndre Oppermann 				key_freesp(sp);
201902c1c707SAndre Oppermann #else /* FAST_IPSEC */
2020b9234fafSSam Leffler 				KEY_FREESP(&sp);
202102c1c707SAndre Oppermann #endif
202202c1c707SAndre Oppermann 				ipstat.ips_cantfrag++;
202302c1c707SAndre Oppermann 				break;
202402c1c707SAndre Oppermann 			} else
202502c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
202602c1c707SAndre Oppermann 		destifp = ia->ia_ifp;
202702c1c707SAndre Oppermann #if defined(IPSEC) || defined(FAST_IPSEC)
2028b9234fafSSam Leffler 		}
202902c1c707SAndre Oppermann #endif /*IPSEC || FAST_IPSEC*/
2030df8bae1dSRodney W. Grimes 		ipstat.ips_cantfrag++;
2031df8bae1dSRodney W. Grimes 		break;
2032df8bae1dSRodney W. Grimes 
2033df8bae1dSRodney W. Grimes 	case ENOBUFS:
2034df285b3dSMike Silbersack 		/*
2035df285b3dSMike Silbersack 		 * A router should not generate ICMP_SOURCEQUENCH as
2036df285b3dSMike Silbersack 		 * required in RFC1812 Requirements for IP Version 4 Routers.
2037df285b3dSMike Silbersack 		 * Source quench could be a big problem under DoS attacks,
2038df285b3dSMike Silbersack 		 * or if the underlying interface is rate-limited.
2039df285b3dSMike Silbersack 		 * Those who need source quench packets may re-enable them
2040df285b3dSMike Silbersack 		 * via the net.inet.ip.sendsourcequench sysctl.
2041df285b3dSMike Silbersack 		 */
2042df285b3dSMike Silbersack 		if (ip_sendsourcequench == 0) {
2043df285b3dSMike Silbersack 			m_freem(mcopy);
2044df285b3dSMike Silbersack 			return;
2045df285b3dSMike Silbersack 		} else {
2046df8bae1dSRodney W. Grimes 			type = ICMP_SOURCEQUENCH;
2047df8bae1dSRodney W. Grimes 			code = 0;
2048df285b3dSMike Silbersack 		}
2049df8bae1dSRodney W. Grimes 		break;
20503a06e3e0SRuslan Ermilov 
20513a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
20523a06e3e0SRuslan Ermilov 		m_freem(mcopy);
20533a06e3e0SRuslan Ermilov 		return;
2054df8bae1dSRodney W. Grimes 	}
2055df8bae1dSRodney W. Grimes 	icmp_error(mcopy, type, code, dest, destifp);
2056df8bae1dSRodney W. Grimes }
2057df8bae1dSRodney W. Grimes 
205882c23ebaSBill Fenner void
205982c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m)
206082c23ebaSBill Fenner 	register struct inpcb *inp;
206182c23ebaSBill Fenner 	register struct mbuf **mp;
206282c23ebaSBill Fenner 	register struct ip *ip;
206382c23ebaSBill Fenner 	register struct mbuf *m;
206482c23ebaSBill Fenner {
2065be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
2066be8a62e8SPoul-Henning Kamp 		struct bintime bt;
2067be8a62e8SPoul-Henning Kamp 
2068be8a62e8SPoul-Henning Kamp 		bintime(&bt);
2069be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
2070be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
2071be8a62e8SPoul-Henning Kamp 			SCM_BINTIME, SOL_SOCKET);
2072be8a62e8SPoul-Henning Kamp 			if (*mp)
2073be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
2074be8a62e8SPoul-Henning Kamp 		}
207582c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
207682c23ebaSBill Fenner 			struct timeval tv;
207782c23ebaSBill Fenner 
2078be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
207982c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
208082c23ebaSBill Fenner 				SCM_TIMESTAMP, SOL_SOCKET);
208182c23ebaSBill Fenner 			if (*mp)
208282c23ebaSBill Fenner 				mp = &(*mp)->m_next;
20834cc20ab1SSeigo Tanimura 		}
2084be8a62e8SPoul-Henning Kamp 	}
208582c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
208682c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
208782c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
208882c23ebaSBill Fenner 		if (*mp)
208982c23ebaSBill Fenner 			mp = &(*mp)->m_next;
209082c23ebaSBill Fenner 	}
20914957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
20924957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
20934957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
20944957466bSMatthew N. Dodd 		if (*mp)
20954957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
20964957466bSMatthew N. Dodd 	}
209782c23ebaSBill Fenner #ifdef notyet
209882c23ebaSBill Fenner 	/* XXX
209982c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
210082c23ebaSBill Fenner 	 * than they already were.
210182c23ebaSBill Fenner 	 */
210282c23ebaSBill Fenner 	/* options were tossed already */
210382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
210482c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
210582c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
210682c23ebaSBill Fenner 		if (*mp)
210782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
210882c23ebaSBill Fenner 	}
210982c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
211082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
211182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
211282c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
211382c23ebaSBill Fenner 		if (*mp)
211482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
211582c23ebaSBill Fenner 	}
211682c23ebaSBill Fenner #endif
211782c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
2118d314ad7bSJulian Elischer 		struct ifnet *ifp;
2119d314ad7bSJulian Elischer 		struct sdlbuf {
212082c23ebaSBill Fenner 			struct sockaddr_dl sdl;
2121d314ad7bSJulian Elischer 			u_char	pad[32];
2122d314ad7bSJulian Elischer 		} sdlbuf;
2123d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
2124d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
212582c23ebaSBill Fenner 
2126d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
2127d314ad7bSJulian Elischer 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
2128f9132cebSJonathan Lemon 			sdp = (struct sockaddr_dl *)
2129f9132cebSJonathan Lemon 			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
2130d314ad7bSJulian Elischer 			/*
2131d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
2132d314ad7bSJulian Elischer 			 */
2133d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
2134d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
2135d314ad7bSJulian Elischer 				goto makedummy;
2136d314ad7bSJulian Elischer 			}
2137d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
2138d314ad7bSJulian Elischer 		} else {
2139d314ad7bSJulian Elischer makedummy:
2140d314ad7bSJulian Elischer 			sdl2->sdl_len
2141d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
2142d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
2143d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
2144d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2145d314ad7bSJulian Elischer 		}
2146d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
214782c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
214882c23ebaSBill Fenner 		if (*mp)
214982c23ebaSBill Fenner 			mp = &(*mp)->m_next;
215082c23ebaSBill Fenner 	}
215182c23ebaSBill Fenner }
215282c23ebaSBill Fenner 
21534d2e3692SLuigi Rizzo /*
21544d2e3692SLuigi Rizzo  * XXX these routines are called from the upper part of the kernel.
21554d2e3692SLuigi Rizzo  * They need to be locked when we remove Giant.
21564d2e3692SLuigi Rizzo  *
21574d2e3692SLuigi Rizzo  * They could also be moved to ip_mroute.c, since all the RSVP
21584d2e3692SLuigi Rizzo  *  handling is done there already.
21594d2e3692SLuigi Rizzo  */
21604d2e3692SLuigi Rizzo static int ip_rsvp_on;
21614d2e3692SLuigi Rizzo struct socket *ip_rsvpd;
2162df8bae1dSRodney W. Grimes int
2163f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
2164f0068c4aSGarrett Wollman {
2165f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
2166f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
2167f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
2168f0068c4aSGarrett Wollman 
2169f0068c4aSGarrett Wollman 	if (ip_rsvpd != NULL)
2170f0068c4aSGarrett Wollman 		return EADDRINUSE;
2171f0068c4aSGarrett Wollman 
2172f0068c4aSGarrett Wollman 	ip_rsvpd = so;
21731c5de19aSGarrett Wollman 	/*
21741c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
21751c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21761c5de19aSGarrett Wollman 	 */
21771c5de19aSGarrett Wollman 	if (!ip_rsvp_on) {
21781c5de19aSGarrett Wollman 		ip_rsvp_on = 1;
21791c5de19aSGarrett Wollman 		rsvp_on++;
21801c5de19aSGarrett Wollman 	}
2181f0068c4aSGarrett Wollman 
2182f0068c4aSGarrett Wollman 	return 0;
2183f0068c4aSGarrett Wollman }
2184f0068c4aSGarrett Wollman 
2185f0068c4aSGarrett Wollman int
2186f0068c4aSGarrett Wollman ip_rsvp_done(void)
2187f0068c4aSGarrett Wollman {
2188f0068c4aSGarrett Wollman 	ip_rsvpd = NULL;
21891c5de19aSGarrett Wollman 	/*
21901c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
21911c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21921c5de19aSGarrett Wollman 	 */
21931c5de19aSGarrett Wollman 	if (ip_rsvp_on) {
21941c5de19aSGarrett Wollman 		ip_rsvp_on = 0;
21951c5de19aSGarrett Wollman 		rsvp_on--;
21961c5de19aSGarrett Wollman 	}
2197f0068c4aSGarrett Wollman 	return 0;
2198f0068c4aSGarrett Wollman }
2199bbb4330bSLuigi Rizzo 
2200bbb4330bSLuigi Rizzo void
2201bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
2202bbb4330bSLuigi Rizzo {
2203bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
2204bbb4330bSLuigi Rizzo 		rsvp_input_p(m, off);
2205bbb4330bSLuigi Rizzo 		return;
2206bbb4330bSLuigi Rizzo 	}
2207bbb4330bSLuigi Rizzo 
2208bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
2209bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
2210bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
2211bbb4330bSLuigi Rizzo 	 */
2212bbb4330bSLuigi Rizzo 
2213bbb4330bSLuigi Rizzo 	if (!rsvp_on) {
2214bbb4330bSLuigi Rizzo 		m_freem(m);
2215bbb4330bSLuigi Rizzo 		return;
2216bbb4330bSLuigi Rizzo 	}
2217bbb4330bSLuigi Rizzo 
2218bbb4330bSLuigi Rizzo 	if (ip_rsvpd != NULL) {
2219bbb4330bSLuigi Rizzo 		rip_input(m, off);
2220bbb4330bSLuigi Rizzo 		return;
2221bbb4330bSLuigi Rizzo 	}
2222bbb4330bSLuigi Rizzo 	/* Drop the packet */
2223bbb4330bSLuigi Rizzo 	m_freem(m);
2224bbb4330bSLuigi Rizzo }
2225