xref: /freebsd/sys/netinet/ip_input.c (revision 4957466b8e6ac843337f2ad22e7c10fd296ebde6)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
14df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
15df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
16df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
17df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
18df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
19df8bae1dSRodney W. Grimes  *    without specific prior written permission.
20df8bae1dSRodney W. Grimes  *
21df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
32df8bae1dSRodney W. Grimes  *
33df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34c3aac50fSPeter Wemm  * $FreeBSD$
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37e4f4247aSEivind Eklund #include "opt_bootp.h"
3874a9466cSGary Palmer #include "opt_ipfw.h"
39b715f178SLuigi Rizzo #include "opt_ipdn.h"
40fbd1372aSJoerg Wunsch #include "opt_ipdivert.h"
411ee25934SPeter Wemm #include "opt_ipfilter.h"
4227108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
436a800098SYoshinobu Inoue #include "opt_ipsec.h"
4436b0360bSRobert Watson #include "opt_mac.h"
45c4ac87eaSDarren Reed #include "opt_pfil_hooks.h"
4664dddc18SKris Kennaway #include "opt_random_ip_id.h"
4774a9466cSGary Palmer 
48df8bae1dSRodney W. Grimes #include <sys/param.h>
49df8bae1dSRodney W. Grimes #include <sys/systm.h>
5036b0360bSRobert Watson #include <sys/mac.h>
51df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
52b715f178SLuigi Rizzo #include <sys/malloc.h>
53df8bae1dSRodney W. Grimes #include <sys/domain.h>
54df8bae1dSRodney W. Grimes #include <sys/protosw.h>
55df8bae1dSRodney W. Grimes #include <sys/socket.h>
56df8bae1dSRodney W. Grimes #include <sys/time.h>
57df8bae1dSRodney W. Grimes #include <sys/kernel.h>
581025071fSGarrett Wollman #include <sys/syslog.h>
59b5e8ce9fSBruce Evans #include <sys/sysctl.h>
60df8bae1dSRodney W. Grimes 
61c85540ddSAndrey A. Chernov #include <net/pfil.h>
62df8bae1dSRodney W. Grimes #include <net/if.h>
639494d596SBrooks Davis #include <net/if_types.h>
64d314ad7bSJulian Elischer #include <net/if_var.h>
6582c23ebaSBill Fenner #include <net/if_dl.h>
66df8bae1dSRodney W. Grimes #include <net/route.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68df8bae1dSRodney W. Grimes 
69df8bae1dSRodney W. Grimes #include <netinet/in.h>
70df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
71b5e8ce9fSBruce Evans #include <netinet/in_var.h>
72df8bae1dSRodney W. Grimes #include <netinet/ip.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
7658938916SGarrett Wollman #include <machine/in_cksum.h>
77df8bae1dSRodney W. Grimes 
78f0068c4aSGarrett Wollman #include <sys/socketvar.h>
796ddbf1e2SGary Palmer 
806ddbf1e2SGary Palmer #include <netinet/ip_fw.h>
81db69a05dSPaul Saab #include <netinet/ip_dummynet.h>
82db69a05dSPaul Saab 
836a800098SYoshinobu Inoue #ifdef IPSEC
846a800098SYoshinobu Inoue #include <netinet6/ipsec.h>
856a800098SYoshinobu Inoue #include <netkey/key.h>
866a800098SYoshinobu Inoue #endif
876a800098SYoshinobu Inoue 
88b9234fafSSam Leffler #ifdef FAST_IPSEC
89b9234fafSSam Leffler #include <netipsec/ipsec.h>
90b9234fafSSam Leffler #include <netipsec/key.h>
91b9234fafSSam Leffler #endif
92b9234fafSSam Leffler 
931c5de19aSGarrett Wollman int rsvp_on = 0;
94f0068c4aSGarrett Wollman 
951f91d8c5SDavid Greenman int	ipforwarding = 0;
960312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
973d177f46SBill Fumerola     &ipforwarding, 0, "Enable IP forwarding between interfaces");
980312fbe9SPoul-Henning Kamp 
99d4fb926cSGarrett Wollman static int	ipsendredirects = 1; /* XXX */
1000312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
1013d177f46SBill Fumerola     &ipsendredirects, 0, "Enable sending IP redirects");
1020312fbe9SPoul-Henning Kamp 
103df8bae1dSRodney W. Grimes int	ip_defttl = IPDEFTTL;
1040312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
1053d177f46SBill Fumerola     &ip_defttl, 0, "Maximum TTL on IP packets");
1060312fbe9SPoul-Henning Kamp 
1070312fbe9SPoul-Henning Kamp static int	ip_dosourceroute = 0;
1080312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
1093d177f46SBill Fumerola     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
1104fce5804SGuido van Rooij 
1114fce5804SGuido van Rooij static int	ip_acceptsourceroute = 0;
1124fce5804SGuido van Rooij SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
1133d177f46SBill Fumerola     CTLFLAG_RW, &ip_acceptsourceroute, 0,
1143d177f46SBill Fumerola     "Enable accepting source routed IP packets");
1156a800098SYoshinobu Inoue 
1166a800098SYoshinobu Inoue static int	ip_keepfaith = 0;
1176a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
1186a800098SYoshinobu Inoue 	&ip_keepfaith,	0,
1196a800098SYoshinobu Inoue 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1206a800098SYoshinobu Inoue 
121402062e8SMike Silbersack static int    nipq = 0;         /* total # of reass queues */
122402062e8SMike Silbersack static int    maxnipq;
123690a6055SJesper Skriver SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
124402062e8SMike Silbersack 	&maxnipq, 0,
125690a6055SJesper Skriver 	"Maximum number of IPv4 fragment reassembly queue entries");
126690a6055SJesper Skriver 
127375386e2SMike Silbersack static int    maxfragsperpacket;
128375386e2SMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
129375386e2SMike Silbersack 	&maxfragsperpacket, 0,
130375386e2SMike Silbersack 	"Maximum number of IPv4 fragments allowed per packet");
131375386e2SMike Silbersack 
132df285b3dSMike Silbersack static int	ip_sendsourcequench = 0;
133df285b3dSMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
134df285b3dSMike Silbersack 	&ip_sendsourcequench, 0,
135df285b3dSMike Silbersack 	"Enable the transmission of source quench packets");
136df285b3dSMike Silbersack 
137823db0e9SDon Lewis /*
138823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
139823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
140a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
141823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1423f67c834SDon Lewis  *
143a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
144a8f12100SDon Lewis  * to translate the destination address to another local interface.
1453f67c834SDon Lewis  *
1463f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1473f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1483f67c834SDon Lewis  * packets for those addresses are received.
149823db0e9SDon Lewis  */
150b3e95d4eSJonathan Lemon static int	ip_checkinterface = 1;
151b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
152b3e95d4eSJonathan Lemon     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
153b3e95d4eSJonathan Lemon 
154df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1550312fbe9SPoul-Henning Kamp static int	ipprintfs = 0;
156df8bae1dSRodney W. Grimes #endif
157df8bae1dSRodney W. Grimes 
1581cafed39SJonathan Lemon static struct	ifqueue ipintrq;
159ca925d9cSJonathan Lemon static int	ipqmaxlen = IFQ_MAXLEN;
160ca925d9cSJonathan Lemon 
161df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
162f0ffb944SJulian Elischer extern	struct protosw inetsw[];
163df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
16459562606SGarrett Wollman struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
165ca925d9cSJonathan Lemon struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
166ca925d9cSJonathan Lemon u_long 	in_ifaddrhmask;				/* mask for hash table */
167ca925d9cSJonathan Lemon 
168afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
1693d177f46SBill Fumerola     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
1700312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
1713d177f46SBill Fumerola     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
172df8bae1dSRodney W. Grimes 
173f23b4c91SGarrett Wollman struct ipstat ipstat;
174c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
1753d177f46SBill Fumerola     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
176194a213eSAndrey A. Chernov 
177194a213eSAndrey A. Chernov /* Packet reassembly stuff */
178194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2      6
179194a213eSAndrey A. Chernov #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
180194a213eSAndrey A. Chernov #define IPREASS_HMASK           (IPREASS_NHASH - 1)
181194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \
182831a80b0SMatthew Dillon 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
183194a213eSAndrey A. Chernov 
184462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
185f23b4c91SGarrett Wollman 
1860312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1870312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1883d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1890312fbe9SPoul-Henning Kamp #endif
1900312fbe9SPoul-Henning Kamp 
1911b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
1921b968362SDag-Erling Smørgrav static int	ipstealth = 0;
1931b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
1941b968362SDag-Erling Smørgrav     &ipstealth, 0, "");
1951b968362SDag-Erling Smørgrav #endif
1961b968362SDag-Erling Smørgrav 
197cfe8b629SGarrett Wollman 
19823bf9953SPoul-Henning Kamp /* Firewall hooks */
19923bf9953SPoul-Henning Kamp ip_fw_chk_t *ip_fw_chk_ptr;
2009fcc0795SLuigi Rizzo int fw_enable = 1 ;
20197850a5dSLuigi Rizzo int fw_one_pass = 1;
202e7319babSPoul-Henning Kamp 
203db69a05dSPaul Saab /* Dummynet hooks */
204db69a05dSPaul Saab ip_dn_io_t *ip_dn_io_ptr;
205b715f178SLuigi Rizzo 
206afed1b49SDarren Reed 
207e7319babSPoul-Henning Kamp /*
2084d2e3692SLuigi Rizzo  * XXX this is ugly -- the following two global variables are
2094d2e3692SLuigi Rizzo  * used to store packet state while it travels through the stack.
2104d2e3692SLuigi Rizzo  * Note that the code even makes assumptions on the size and
2114d2e3692SLuigi Rizzo  * alignment of fields inside struct ip_srcrt so e.g. adding some
2124d2e3692SLuigi Rizzo  * fields will break the code. This needs to be fixed.
2134d2e3692SLuigi Rizzo  *
214df8bae1dSRodney W. Grimes  * We need to save the IP options in case a protocol wants to respond
215df8bae1dSRodney W. Grimes  * to an incoming packet over the same route if the packet got here
216df8bae1dSRodney W. Grimes  * using IP source routing.  This allows connection establishment and
217df8bae1dSRodney W. Grimes  * maintenance when the remote end is on a network that is not known
218df8bae1dSRodney W. Grimes  * to us.
219df8bae1dSRodney W. Grimes  */
2200312fbe9SPoul-Henning Kamp static int	ip_nhops = 0;
221df8bae1dSRodney W. Grimes static	struct ip_srcrt {
222df8bae1dSRodney W. Grimes 	struct	in_addr dst;			/* final destination */
223df8bae1dSRodney W. Grimes 	char	nop;				/* one NOP to align */
224df8bae1dSRodney W. Grimes 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
225df8bae1dSRodney W. Grimes 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
226df8bae1dSRodney W. Grimes } ip_srcrt;
227df8bae1dSRodney W. Grimes 
2284d77a549SAlfred Perlstein static void	save_rte(u_char *, struct in_addr);
2292b25acc1SLuigi Rizzo static int	ip_dooptions(struct mbuf *m, int,
2302b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2312b25acc1SLuigi Rizzo static void	ip_forward(struct mbuf *m, int srcrt,
2322b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2334d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
2342b25acc1SLuigi Rizzo static struct	mbuf *ip_reass(struct mbuf *, struct ipqhead *,
2352b25acc1SLuigi Rizzo 		struct ipq *, u_int32_t *, u_int16_t *);
2368948e4baSArchie Cobbs 
237df8bae1dSRodney W. Grimes /*
238df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
239df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
240df8bae1dSRodney W. Grimes  */
241df8bae1dSRodney W. Grimes void
242df8bae1dSRodney W. Grimes ip_init()
243df8bae1dSRodney W. Grimes {
244f0ffb944SJulian Elischer 	register struct protosw *pr;
245df8bae1dSRodney W. Grimes 	register int i;
246df8bae1dSRodney W. Grimes 
24759562606SGarrett Wollman 	TAILQ_INIT(&in_ifaddrhead);
248ca925d9cSJonathan Lemon 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
249f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
250df8bae1dSRodney W. Grimes 	if (pr == 0)
251df8bae1dSRodney W. Grimes 		panic("ip_init");
252df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
253df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
254f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
255f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
256df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
257df8bae1dSRodney W. Grimes 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
258df8bae1dSRodney W. Grimes 			ip_protox[pr->pr_protocol] = pr - inetsw;
259194a213eSAndrey A. Chernov 
260194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++)
261462b86feSPoul-Henning Kamp 	    TAILQ_INIT(&ipq[i]);
262194a213eSAndrey A. Chernov 
263375386e2SMike Silbersack 	maxnipq = nmbclusters / 32;
264375386e2SMike Silbersack 	maxfragsperpacket = 16;
265194a213eSAndrey A. Chernov 
26664dddc18SKris Kennaway #ifndef RANDOM_IP_ID
267227ee8a1SPoul-Henning Kamp 	ip_id = time_second & 0xffff;
26864dddc18SKris Kennaway #endif
269df8bae1dSRodney W. Grimes 	ipintrq.ifq_maxlen = ipqmaxlen;
2706008862bSJohn Baldwin 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
2711cafed39SJonathan Lemon 	netisr_register(NETISR_IP, ip_input, &ipintrq);
272df8bae1dSRodney W. Grimes }
273df8bae1dSRodney W. Grimes 
2744d2e3692SLuigi Rizzo /*
2754d2e3692SLuigi Rizzo  * XXX watch out this one. It is perhaps used as a cache for
2764d2e3692SLuigi Rizzo  * the most recently used route ? it is cleared in in_addroute()
2774d2e3692SLuigi Rizzo  * when a new route is successfully created.
2784d2e3692SLuigi Rizzo  */
2791e3d5af0SRuslan Ermilov struct	route ipforward_rt;
280df8bae1dSRodney W. Grimes 
281df8bae1dSRodney W. Grimes /*
282df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
283df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
284df8bae1dSRodney W. Grimes  */
285c67b1d17SGarrett Wollman void
286c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
287df8bae1dSRodney W. Grimes {
28823bf9953SPoul-Henning Kamp 	struct ip *ip;
28923bf9953SPoul-Henning Kamp 	struct ipq *fp;
2905da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
291ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
292823db0e9SDon Lewis 	int    i, hlen, checkif;
29347c861ecSBrian Somers 	u_short sum;
2947538a9a0SJonathan Lemon 	struct in_addr pkt_dst;
2958948e4baSArchie Cobbs 	u_int32_t divert_info = 0;		/* packet divert/tee info */
2962b25acc1SLuigi Rizzo 	struct ip_fw_args args;
297c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
298c4ac87eaSDarren Reed 	struct packet_filter_hook *pfh;
299c4ac87eaSDarren Reed 	struct mbuf *m0;
300c4ac87eaSDarren Reed 	int rv;
301c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
302b9234fafSSam Leffler #ifdef FAST_IPSEC
303b9234fafSSam Leffler 	struct m_tag *mtag;
304b9234fafSSam Leffler 	struct tdb_ident *tdbi;
305b9234fafSSam Leffler 	struct secpolicy *sp;
306b9234fafSSam Leffler 	int s, error;
307b9234fafSSam Leffler #endif /* FAST_IPSEC */
308b715f178SLuigi Rizzo 
3092b25acc1SLuigi Rizzo 	args.eh = NULL;
3102b25acc1SLuigi Rizzo 	args.oif = NULL;
3112b25acc1SLuigi Rizzo 	args.rule = NULL;
3122b25acc1SLuigi Rizzo 	args.divert_rule = 0;			/* divert cookie */
3132b25acc1SLuigi Rizzo 	args.next_hop = NULL;
3148948e4baSArchie Cobbs 
3152b25acc1SLuigi Rizzo 	/* Grab info from MT_TAG mbufs prepended to the chain.	*/
3162b25acc1SLuigi Rizzo 	for (; m && m->m_type == MT_TAG; m = m->m_next) {
3175d846453SSam Leffler 		switch(m->_m_tag_id) {
3182b25acc1SLuigi Rizzo 		default:
3192b25acc1SLuigi Rizzo 			printf("ip_input: unrecognised MT_TAG tag %d\n",
3205d846453SSam Leffler 			    m->_m_tag_id);
3212b25acc1SLuigi Rizzo 			break;
3222b25acc1SLuigi Rizzo 
3232b25acc1SLuigi Rizzo 		case PACKET_TAG_DUMMYNET:
3242b25acc1SLuigi Rizzo 			args.rule = ((struct dn_pkt *)m)->rule;
3252b25acc1SLuigi Rizzo 			break;
3262b25acc1SLuigi Rizzo 
3272b25acc1SLuigi Rizzo 		case PACKET_TAG_DIVERT:
3287627c6cbSMaxime Henrion 			args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
3292b25acc1SLuigi Rizzo 			break;
3302b25acc1SLuigi Rizzo 
3312b25acc1SLuigi Rizzo 		case PACKET_TAG_IPFORWARD:
3322b25acc1SLuigi Rizzo 			args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
3332b25acc1SLuigi Rizzo 			break;
3342b25acc1SLuigi Rizzo 		}
3352b25acc1SLuigi Rizzo 	}
336df8bae1dSRodney W. Grimes 
337fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
338db40007dSAndrew R. Reiter 
3392b25acc1SLuigi Rizzo 	if (args.rule) {	/* dummynet already filtered us */
3402b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
34153be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
3422b25acc1SLuigi Rizzo 		goto iphack ;
3432b25acc1SLuigi Rizzo 	}
3442b25acc1SLuigi Rizzo 
345df8bae1dSRodney W. Grimes 	ipstat.ips_total++;
34658938916SGarrett Wollman 
34758938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
34858938916SGarrett Wollman 		goto tooshort;
34958938916SGarrett Wollman 
350df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
351df8bae1dSRodney W. Grimes 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
352df8bae1dSRodney W. Grimes 		ipstat.ips_toosmall++;
353c67b1d17SGarrett Wollman 		return;
354df8bae1dSRodney W. Grimes 	}
355df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
35658938916SGarrett Wollman 
35753be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
358df8bae1dSRodney W. Grimes 		ipstat.ips_badvers++;
359df8bae1dSRodney W. Grimes 		goto bad;
360df8bae1dSRodney W. Grimes 	}
36158938916SGarrett Wollman 
36253be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
363df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
364df8bae1dSRodney W. Grimes 		ipstat.ips_badhlen++;
365df8bae1dSRodney W. Grimes 		goto bad;
366df8bae1dSRodney W. Grimes 	}
367df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
368df8bae1dSRodney W. Grimes 		if ((m = m_pullup(m, hlen)) == 0) {
369df8bae1dSRodney W. Grimes 			ipstat.ips_badhlen++;
370c67b1d17SGarrett Wollman 			return;
371df8bae1dSRodney W. Grimes 		}
372df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
373df8bae1dSRodney W. Grimes 	}
37433841545SHajimu UMEMOTO 
37533841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
37633841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
37733841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
37833841545SHajimu UMEMOTO 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
37933841545SHajimu UMEMOTO 			ipstat.ips_badaddr++;
38033841545SHajimu UMEMOTO 			goto bad;
38133841545SHajimu UMEMOTO 		}
38233841545SHajimu UMEMOTO 	}
38333841545SHajimu UMEMOTO 
384db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
385db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
386db4f9cc7SJonathan Lemon 	} else {
38758938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
38847c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
38958938916SGarrett Wollman 		} else {
39047c861ecSBrian Somers 			sum = in_cksum(m, hlen);
39158938916SGarrett Wollman 		}
392db4f9cc7SJonathan Lemon 	}
39347c861ecSBrian Somers 	if (sum) {
394df8bae1dSRodney W. Grimes 		ipstat.ips_badsum++;
395df8bae1dSRodney W. Grimes 		goto bad;
396df8bae1dSRodney W. Grimes 	}
397df8bae1dSRodney W. Grimes 
398df8bae1dSRodney W. Grimes 	/*
399df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
400df8bae1dSRodney W. Grimes 	 */
401fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
402df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
403df8bae1dSRodney W. Grimes 		ipstat.ips_badlen++;
404df8bae1dSRodney W. Grimes 		goto bad;
405df8bae1dSRodney W. Grimes 	}
406fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
407df8bae1dSRodney W. Grimes 
408df8bae1dSRodney W. Grimes 	/*
409df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
410df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
411df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
412df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
413df8bae1dSRodney W. Grimes 	 */
414df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
41558938916SGarrett Wollman tooshort:
416df8bae1dSRodney W. Grimes 		ipstat.ips_tooshort++;
417df8bae1dSRodney W. Grimes 		goto bad;
418df8bae1dSRodney W. Grimes 	}
419df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
420df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
421df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
422df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
423df8bae1dSRodney W. Grimes 		} else
424df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
425df8bae1dSRodney W. Grimes 	}
42614dd6717SSam Leffler #if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
42714dd6717SSam Leffler 	/*
42814dd6717SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
42914dd6717SSam Leffler 	 */
43014dd6717SSam Leffler 	if (ipsec_gethist(m, NULL))
43114dd6717SSam Leffler 		goto pass;
43214dd6717SSam Leffler #endif
4333f67c834SDon Lewis 
4344dd1662bSUgen J.S. Antsilevich 	/*
4354dd1662bSUgen J.S. Antsilevich 	 * IpHack's section.
4364dd1662bSUgen J.S. Antsilevich 	 * Right now when no processing on packet has done
4374dd1662bSUgen J.S. Antsilevich 	 * and it is still fresh out of network we do our black
4384dd1662bSUgen J.S. Antsilevich 	 * deals with it.
43993e0e116SJulian Elischer 	 * - Firewall: deny/allow/divert
440fed1c7e9SSøren Schmidt 	 * - Xlate: translate packet's addr/port (NAT).
441b715f178SLuigi Rizzo 	 * - Pipe: pass pkt through dummynet.
4424dd1662bSUgen J.S. Antsilevich 	 * - Wrap: fake packet's addr/port <unimpl.>
4434dd1662bSUgen J.S. Antsilevich 	 * - Encapsulate: put it in another IP and send out. <unimp.>
4444dd1662bSUgen J.S. Antsilevich  	 */
445b715f178SLuigi Rizzo 
446b715f178SLuigi Rizzo iphack:
447df8bae1dSRodney W. Grimes 
448c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
449c4ac87eaSDarren Reed 	/*
450c4ac87eaSDarren Reed 	 * Run through list of hooks for input packets.  If there are any
451c4ac87eaSDarren Reed 	 * filters which require that additional packets in the flow are
452c4ac87eaSDarren Reed 	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
453c4ac87eaSDarren Reed 	 * Note that filters must _never_ set this flag, as another filter
454c4ac87eaSDarren Reed 	 * in the list may have previously cleared it.
455c4ac87eaSDarren Reed 	 */
456c4ac87eaSDarren Reed 	m0 = m;
457c4ac87eaSDarren Reed 	pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
458fc2ffbe6SPoul-Henning Kamp 	for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
459c4ac87eaSDarren Reed 		if (pfh->pfil_func) {
460c4ac87eaSDarren Reed 			rv = pfh->pfil_func(ip, hlen,
461c4ac87eaSDarren Reed 					    m->m_pkthdr.rcvif, 0, &m0);
462c4ac87eaSDarren Reed 			if (rv)
463beec8214SDarren Reed 				return;
464c4ac87eaSDarren Reed 			m = m0;
465c4ac87eaSDarren Reed 			if (m == NULL)
466c4ac87eaSDarren Reed 				return;
467c4ac87eaSDarren Reed 			ip = mtod(m, struct ip *);
468beec8214SDarren Reed 		}
469c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
470c4ac87eaSDarren Reed 
4717b109fa4SLuigi Rizzo 	if (fw_enable && IPFW_LOADED) {
472f9e354dfSJulian Elischer 		/*
473f9e354dfSJulian Elischer 		 * If we've been forwarded from the output side, then
474f9e354dfSJulian Elischer 		 * skip the firewall a second time
475f9e354dfSJulian Elischer 		 */
4762b25acc1SLuigi Rizzo 		if (args.next_hop)
477f9e354dfSJulian Elischer 			goto ours;
4782b25acc1SLuigi Rizzo 
4792b25acc1SLuigi Rizzo 		args.m = m;
4802b25acc1SLuigi Rizzo 		i = ip_fw_chk_ptr(&args);
4812b25acc1SLuigi Rizzo 		m = args.m;
4822b25acc1SLuigi Rizzo 
483d60315beSLuigi Rizzo 		if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
484507b4b54SLuigi Rizzo 			if (m)
485507b4b54SLuigi Rizzo 				m_freem(m);
486b715f178SLuigi Rizzo 			return;
487507b4b54SLuigi Rizzo 		}
488d60315beSLuigi Rizzo 		ip = mtod(m, struct ip *); /* just in case m changed */
4892b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop == NULL)	/* common case */
490b715f178SLuigi Rizzo 			goto pass;
4917b109fa4SLuigi Rizzo                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
4928948e4baSArchie Cobbs 			/* Send packet to the appropriate pipe */
4932b25acc1SLuigi Rizzo 			ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
494e4676ba6SJulian Elischer 			return;
49593e0e116SJulian Elischer 		}
496b715f178SLuigi Rizzo #ifdef IPDIVERT
4978948e4baSArchie Cobbs 		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
4988948e4baSArchie Cobbs 			/* Divert or tee packet */
4998948e4baSArchie Cobbs 			divert_info = i;
500b715f178SLuigi Rizzo 			goto ours;
501b715f178SLuigi Rizzo 		}
502b715f178SLuigi Rizzo #endif
5032b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop != NULL)
504b715f178SLuigi Rizzo 			goto pass;
505b715f178SLuigi Rizzo 		/*
506b715f178SLuigi Rizzo 		 * if we get here, the packet must be dropped
507b715f178SLuigi Rizzo 		 */
508b715f178SLuigi Rizzo 		m_freem(m);
509b715f178SLuigi Rizzo 		return;
510b715f178SLuigi Rizzo 	}
511b715f178SLuigi Rizzo pass:
512100ba1a6SJordan K. Hubbard 
513df8bae1dSRodney W. Grimes 	/*
514df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
515df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
516df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
517df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
518df8bae1dSRodney W. Grimes 	 */
519df8bae1dSRodney W. Grimes 	ip_nhops = 0;		/* for source routed packets */
5202b25acc1SLuigi Rizzo 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
521c67b1d17SGarrett Wollman 		return;
522df8bae1dSRodney W. Grimes 
523f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
524f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
525f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
526f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
527f0068c4aSGarrett Wollman 	 * grabbing the packet.
528f0068c4aSGarrett Wollman          */
5291c5de19aSGarrett Wollman 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
530f0068c4aSGarrett Wollman 		goto ours;
531f0068c4aSGarrett Wollman 
532df8bae1dSRodney W. Grimes 	/*
533df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
534cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
535cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
536cc766e04SGarrett Wollman 	 * with it).
537df8bae1dSRodney W. Grimes 	 */
538cc766e04SGarrett Wollman 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
539cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
540cc766e04SGarrett Wollman 		goto ours;
541cc766e04SGarrett Wollman 
5427538a9a0SJonathan Lemon 	/*
5437538a9a0SJonathan Lemon 	 * Cache the destination address of the packet; this may be
5447538a9a0SJonathan Lemon 	 * changed by use of 'ipfw fwd'.
5457538a9a0SJonathan Lemon 	 */
5462b25acc1SLuigi Rizzo 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
5477538a9a0SJonathan Lemon 
548823db0e9SDon Lewis 	/*
549823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
550823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
551823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
552e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
553e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
5543f67c834SDon Lewis 	 *
5553f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
5563f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
5573f67c834SDon Lewis 	 *
558a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
5593f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5603f67c834SDon Lewis 	 * the packets are received.
561823db0e9SDon Lewis 	 */
562823db0e9SDon Lewis 	checkif = ip_checkinterface && (ipforwarding == 0) &&
5639494d596SBrooks Davis 	    m->m_pkthdr.rcvif != NULL &&
564e15ae1b2SDon Lewis 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
5652b25acc1SLuigi Rizzo 	    (args.next_hop == NULL);
566823db0e9SDon Lewis 
567ca925d9cSJonathan Lemon 	/*
568ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
569ca925d9cSJonathan Lemon 	 */
570ca925d9cSJonathan Lemon 	LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
571f9e354dfSJulian Elischer 		/*
572823db0e9SDon Lewis 		 * If the address matches, verify that the packet
573823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
574823db0e9SDon Lewis 		 * enabled.
575f9e354dfSJulian Elischer 		 */
576823db0e9SDon Lewis 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
577823db0e9SDon Lewis 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
578ed1ff184SJulian Elischer 			goto ours;
579ca925d9cSJonathan Lemon 	}
580823db0e9SDon Lewis 	/*
581ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
582ca925d9cSJonathan Lemon 	 *
583ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
584ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
585ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
586ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
587823db0e9SDon Lewis 	 */
588ca925d9cSJonathan Lemon 	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
589ca925d9cSJonathan Lemon 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
590ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
591ca925d9cSJonathan Lemon 				continue;
592ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
593df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
5947538a9a0SJonathan Lemon 			    pkt_dst.s_addr)
595df8bae1dSRodney W. Grimes 				goto ours;
5967538a9a0SJonathan Lemon 			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
597df8bae1dSRodney W. Grimes 				goto ours;
598ca925d9cSJonathan Lemon #ifdef BOOTP_COMPAT
599ca925d9cSJonathan Lemon 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
600ca925d9cSJonathan Lemon 				goto ours;
601ca925d9cSJonathan Lemon #endif
602df8bae1dSRodney W. Grimes 		}
603df8bae1dSRodney W. Grimes 	}
604df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
605df8bae1dSRodney W. Grimes 		struct in_multi *inm;
606df8bae1dSRodney W. Grimes 		if (ip_mrouter) {
607df8bae1dSRodney W. Grimes 			/*
608df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
609df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
610df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
611df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
612df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
613df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
614df8bae1dSRodney W. Grimes 			 */
615bbb4330bSLuigi Rizzo 			if (ip_mforward &&
616bbb4330bSLuigi Rizzo 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
617df8bae1dSRodney W. Grimes 				ipstat.ips_cantforward++;
618df8bae1dSRodney W. Grimes 				m_freem(m);
619c67b1d17SGarrett Wollman 				return;
620df8bae1dSRodney W. Grimes 			}
621df8bae1dSRodney W. Grimes 
622df8bae1dSRodney W. Grimes 			/*
62311612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
624df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
625df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
626df8bae1dSRodney W. Grimes 			 */
627df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
628df8bae1dSRodney W. Grimes 				goto ours;
629df8bae1dSRodney W. Grimes 			ipstat.ips_forward++;
630df8bae1dSRodney W. Grimes 		}
631df8bae1dSRodney W. Grimes 		/*
632df8bae1dSRodney W. Grimes 		 * See if we belong to the destination multicast group on the
633df8bae1dSRodney W. Grimes 		 * arrival interface.
634df8bae1dSRodney W. Grimes 		 */
635df8bae1dSRodney W. Grimes 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
636df8bae1dSRodney W. Grimes 		if (inm == NULL) {
63782c39223SGarrett Wollman 			ipstat.ips_notmember++;
638df8bae1dSRodney W. Grimes 			m_freem(m);
639c67b1d17SGarrett Wollman 			return;
640df8bae1dSRodney W. Grimes 		}
641df8bae1dSRodney W. Grimes 		goto ours;
642df8bae1dSRodney W. Grimes 	}
643df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
644df8bae1dSRodney W. Grimes 		goto ours;
645df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
646df8bae1dSRodney W. Grimes 		goto ours;
647df8bae1dSRodney W. Grimes 
6486a800098SYoshinobu Inoue 	/*
6496a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6506a800098SYoshinobu Inoue 	 */
6516a800098SYoshinobu Inoue 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
6526a800098SYoshinobu Inoue 		if (ip_keepfaith) {
6536a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
6546a800098SYoshinobu Inoue 				goto ours;
6556a800098SYoshinobu Inoue 		}
6566a800098SYoshinobu Inoue 		m_freem(m);
6576a800098SYoshinobu Inoue 		return;
6586a800098SYoshinobu Inoue 	}
6599494d596SBrooks Davis 
660df8bae1dSRodney W. Grimes 	/*
661df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
662df8bae1dSRodney W. Grimes 	 */
663df8bae1dSRodney W. Grimes 	if (ipforwarding == 0) {
664df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
665df8bae1dSRodney W. Grimes 		m_freem(m);
666546f251bSChris D. Faulhaber 	} else {
667546f251bSChris D. Faulhaber #ifdef IPSEC
668546f251bSChris D. Faulhaber 		/*
669546f251bSChris D. Faulhaber 		 * Enforce inbound IPsec SPD.
670546f251bSChris D. Faulhaber 		 */
671546f251bSChris D. Faulhaber 		if (ipsec4_in_reject(m, NULL)) {
672546f251bSChris D. Faulhaber 			ipsecstat.in_polvio++;
673546f251bSChris D. Faulhaber 			goto bad;
674546f251bSChris D. Faulhaber 		}
675546f251bSChris D. Faulhaber #endif /* IPSEC */
676b9234fafSSam Leffler #ifdef FAST_IPSEC
677b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
678b9234fafSSam Leffler 		s = splnet();
679b9234fafSSam Leffler 		if (mtag != NULL) {
680b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
681b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
682b9234fafSSam Leffler 		} else {
683b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
684b9234fafSSam Leffler 						   IP_FORWARDING, &error);
685b9234fafSSam Leffler 		}
686b9234fafSSam Leffler 		if (sp == NULL) {	/* NB: can happen if error */
687b9234fafSSam Leffler 			splx(s);
688b9234fafSSam Leffler 			/*XXX error stat???*/
689b9234fafSSam Leffler 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
690b9234fafSSam Leffler 			goto bad;
691b9234fafSSam Leffler 		}
692b9234fafSSam Leffler 
693b9234fafSSam Leffler 		/*
694b9234fafSSam Leffler 		 * Check security policy against packet attributes.
695b9234fafSSam Leffler 		 */
696b9234fafSSam Leffler 		error = ipsec_in_reject(sp, m);
697b9234fafSSam Leffler 		KEY_FREESP(&sp);
698b9234fafSSam Leffler 		splx(s);
699b9234fafSSam Leffler 		if (error) {
700b9234fafSSam Leffler 			ipstat.ips_cantforward++;
701b9234fafSSam Leffler 			goto bad;
702b9234fafSSam Leffler 		}
703b9234fafSSam Leffler #endif /* FAST_IPSEC */
7042b25acc1SLuigi Rizzo 		ip_forward(m, 0, args.next_hop);
705546f251bSChris D. Faulhaber 	}
706c67b1d17SGarrett Wollman 	return;
707df8bae1dSRodney W. Grimes 
708df8bae1dSRodney W. Grimes ours:
709d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
710d0ebc0d2SYaroslav Tykhiy 	/*
711d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
712d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
713d0ebc0d2SYaroslav Tykhiy 	 */
7142b25acc1SLuigi Rizzo 	if (ipstealth && hlen > sizeof (struct ip) &&
7152b25acc1SLuigi Rizzo 	    ip_dooptions(m, 1, args.next_hop))
716d0ebc0d2SYaroslav Tykhiy 		return;
717d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
718d0ebc0d2SYaroslav Tykhiy 
7195da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
7205da9f8faSJosef Karthauser 	if (ia != NULL) {
7215da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
7225da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
7235da9f8faSJosef Karthauser 	}
724100ba1a6SJordan K. Hubbard 
72563f8d699SJordan K. Hubbard 	/*
726df8bae1dSRodney W. Grimes 	 * If offset or IP_MF are set, must reassemble.
727df8bae1dSRodney W. Grimes 	 * Otherwise, nothing need be done.
728df8bae1dSRodney W. Grimes 	 * (We could look in the reassembly queue to see
729df8bae1dSRodney W. Grimes 	 * if the packet was previously fragmented,
730df8bae1dSRodney W. Grimes 	 * but it's not worth the time; just let them time out.)
731df8bae1dSRodney W. Grimes 	 */
732b6ea1aa5SRuslan Ermilov 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
7336a800098SYoshinobu Inoue 
734ecf44c01SMike Silbersack 		/* If maxnipq is 0, never accept fragments. */
735ac64c866SMike Silbersack 		if (maxnipq == 0) {
736ac64c866SMike Silbersack                 	ipstat.ips_fragments++;
737ac64c866SMike Silbersack 			ipstat.ips_fragdropped++;
738ac64c866SMike Silbersack 			goto bad;
739ac64c866SMike Silbersack 		}
740ac64c866SMike Silbersack 
741194a213eSAndrey A. Chernov 		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
742df8bae1dSRodney W. Grimes 		/*
743df8bae1dSRodney W. Grimes 		 * Look for queue of fragments
744df8bae1dSRodney W. Grimes 		 * of this datagram.
745df8bae1dSRodney W. Grimes 		 */
746462b86feSPoul-Henning Kamp 		TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
747df8bae1dSRodney W. Grimes 			if (ip->ip_id == fp->ipq_id &&
748df8bae1dSRodney W. Grimes 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
749df8bae1dSRodney W. Grimes 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
75036b0360bSRobert Watson #ifdef MAC
75136b0360bSRobert Watson 			    mac_fragment_match(m, fp) &&
75236b0360bSRobert Watson #endif
753df8bae1dSRodney W. Grimes 			    ip->ip_p == fp->ipq_p)
754df8bae1dSRodney W. Grimes 				goto found;
755df8bae1dSRodney W. Grimes 
756194a213eSAndrey A. Chernov 		fp = 0;
757194a213eSAndrey A. Chernov 
758ac64c866SMike Silbersack 		/*
759ac64c866SMike Silbersack 		 * Enforce upper bound on number of fragmented packets
760ac64c866SMike Silbersack 		 * for which we attempt reassembly;
761ac64c866SMike Silbersack 		 * If maxnipq is -1, accept all fragments without limitation.
762ac64c866SMike Silbersack 		 */
763ac64c866SMike Silbersack 		if ((nipq > maxnipq) && (maxnipq > 0)) {
764194a213eSAndrey A. Chernov 		    /*
765194a213eSAndrey A. Chernov 		     * drop something from the tail of the current queue
766194a213eSAndrey A. Chernov 		     * before proceeding further
767194a213eSAndrey A. Chernov 		     */
768462b86feSPoul-Henning Kamp 		    struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
769462b86feSPoul-Henning Kamp 		    if (q == NULL) {   /* gak */
770194a213eSAndrey A. Chernov 			for (i = 0; i < IPREASS_NHASH; i++) {
771462b86feSPoul-Henning Kamp 			    struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
772462b86feSPoul-Henning Kamp 			    if (r) {
77399e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += r->ipq_nfrags;
774462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], r);
775194a213eSAndrey A. Chernov 				break;
776194a213eSAndrey A. Chernov 			    }
777194a213eSAndrey A. Chernov 			}
778ac64c866SMike Silbersack 		    } else {
77999e8617dSMaxim Konovalov 			ipstat.ips_fragtimeout += q->ipq_nfrags;
780462b86feSPoul-Henning Kamp 			ip_freef(&ipq[sum], q);
781ac64c866SMike Silbersack 		    }
782194a213eSAndrey A. Chernov 		}
783194a213eSAndrey A. Chernov found:
784df8bae1dSRodney W. Grimes 		/*
785df8bae1dSRodney W. Grimes 		 * Adjust ip_len to not reflect header,
786df8bae1dSRodney W. Grimes 		 * convert offset of this to bytes.
787df8bae1dSRodney W. Grimes 		 */
788df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
789b6ea1aa5SRuslan Ermilov 		if (ip->ip_off & IP_MF) {
7906effc713SDoug Rabson 		        /*
7916effc713SDoug Rabson 		         * Make sure that fragments have a data length
7926effc713SDoug Rabson 			 * that's a non-zero multiple of 8 bytes.
7936effc713SDoug Rabson 		         */
7946effc713SDoug Rabson 			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
7956effc713SDoug Rabson 				ipstat.ips_toosmall++; /* XXX */
7966effc713SDoug Rabson 				goto bad;
7976effc713SDoug Rabson 			}
7986effc713SDoug Rabson 			m->m_flags |= M_FRAG;
7991cf43499SMaxim Konovalov 		} else
8001cf43499SMaxim Konovalov 			m->m_flags &= ~M_FRAG;
801df8bae1dSRodney W. Grimes 		ip->ip_off <<= 3;
802df8bae1dSRodney W. Grimes 
803df8bae1dSRodney W. Grimes 		/*
804b6ea1aa5SRuslan Ermilov 		 * Attempt reassembly; if it succeeds, proceed.
8052b25acc1SLuigi Rizzo 		 * ip_reass() will return a different mbuf, and update
8062b25acc1SLuigi Rizzo 		 * the divert info in divert_info and args.divert_rule.
807df8bae1dSRodney W. Grimes 		 */
808df8bae1dSRodney W. Grimes 		ipstat.ips_fragments++;
809487bdb38SRuslan Ermilov 		m->m_pkthdr.header = ip;
8106a800098SYoshinobu Inoue 		m = ip_reass(m,
8112b25acc1SLuigi Rizzo 		    &ipq[sum], fp, &divert_info, &args.divert_rule);
8122b25acc1SLuigi Rizzo 		if (m == 0)
813c67b1d17SGarrett Wollman 			return;
814df8bae1dSRodney W. Grimes 		ipstat.ips_reassembled++;
8156a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8167e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
81753be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
818af782f1cSBrian Somers #ifdef IPDIVERT
8198948e4baSArchie Cobbs 		/* Restore original checksum before diverting packet */
8208948e4baSArchie Cobbs 		if (divert_info != 0) {
821af782f1cSBrian Somers 			ip->ip_len += hlen;
822fd8e4ebcSMike Barcroft 			ip->ip_len = htons(ip->ip_len);
823fd8e4ebcSMike Barcroft 			ip->ip_off = htons(ip->ip_off);
824af782f1cSBrian Somers 			ip->ip_sum = 0;
82560123168SRuslan Ermilov 			if (hlen == sizeof(struct ip))
826af782f1cSBrian Somers 				ip->ip_sum = in_cksum_hdr(ip);
82760123168SRuslan Ermilov 			else
82860123168SRuslan Ermilov 				ip->ip_sum = in_cksum(m, hlen);
829fd8e4ebcSMike Barcroft 			ip->ip_off = ntohs(ip->ip_off);
830fd8e4ebcSMike Barcroft 			ip->ip_len = ntohs(ip->ip_len);
831af782f1cSBrian Somers 			ip->ip_len -= hlen;
832af782f1cSBrian Somers 		}
833af782f1cSBrian Somers #endif
834df8bae1dSRodney W. Grimes 	} else
835df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
836df8bae1dSRodney W. Grimes 
83793e0e116SJulian Elischer #ifdef IPDIVERT
83893e0e116SJulian Elischer 	/*
8398948e4baSArchie Cobbs 	 * Divert or tee packet to the divert protocol if required.
84093e0e116SJulian Elischer 	 */
8418948e4baSArchie Cobbs 	if (divert_info != 0) {
8428948e4baSArchie Cobbs 		struct mbuf *clone = NULL;
8438948e4baSArchie Cobbs 
8448948e4baSArchie Cobbs 		/* Clone packet if we're doing a 'tee' */
8458948e4baSArchie Cobbs 		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
846a163d034SWarner Losh 			clone = m_dup(m, M_DONTWAIT);
8478948e4baSArchie Cobbs 
8488948e4baSArchie Cobbs 		/* Restore packet header fields to original values */
8498948e4baSArchie Cobbs 		ip->ip_len += hlen;
850fd8e4ebcSMike Barcroft 		ip->ip_len = htons(ip->ip_len);
851fd8e4ebcSMike Barcroft 		ip->ip_off = htons(ip->ip_off);
8528948e4baSArchie Cobbs 
8538948e4baSArchie Cobbs 		/* Deliver packet to divert input routine */
8542b25acc1SLuigi Rizzo 		divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
855e4676ba6SJulian Elischer 		ipstat.ips_delivered++;
8568948e4baSArchie Cobbs 
8578948e4baSArchie Cobbs 		/* If 'tee', continue with original packet */
8588948e4baSArchie Cobbs 		if (clone == NULL)
85993e0e116SJulian Elischer 			return;
8608948e4baSArchie Cobbs 		m = clone;
8618948e4baSArchie Cobbs 		ip = mtod(m, struct ip *);
86256962689SCrist J. Clark 		ip->ip_len += hlen;
8632b25acc1SLuigi Rizzo 		/*
8642b25acc1SLuigi Rizzo 		 * Jump backwards to complete processing of the
8652b25acc1SLuigi Rizzo 		 * packet. But first clear divert_info to avoid
8662b25acc1SLuigi Rizzo 		 * entering this block again.
8672b25acc1SLuigi Rizzo 		 * We do not need to clear args.divert_rule
8682b25acc1SLuigi Rizzo 		 * or args.next_hop as they will not be used.
8692b25acc1SLuigi Rizzo 		 */
87056962689SCrist J. Clark 		divert_info = 0;
87156962689SCrist J. Clark 		goto pass;
87293e0e116SJulian Elischer 	}
87393e0e116SJulian Elischer #endif
87493e0e116SJulian Elischer 
87533841545SHajimu UMEMOTO #ifdef IPSEC
87633841545SHajimu UMEMOTO 	/*
87733841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
87833841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
87933841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
88033841545SHajimu UMEMOTO 	 */
88133841545SHajimu UMEMOTO 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
88233841545SHajimu UMEMOTO 	    ipsec4_in_reject(m, NULL)) {
88333841545SHajimu UMEMOTO 		ipsecstat.in_polvio++;
88433841545SHajimu UMEMOTO 		goto bad;
88533841545SHajimu UMEMOTO 	}
88633841545SHajimu UMEMOTO #endif
887b9234fafSSam Leffler #if FAST_IPSEC
888b9234fafSSam Leffler 	/*
889b9234fafSSam Leffler 	 * enforce IPsec policy checking if we are seeing last header.
890b9234fafSSam Leffler 	 * note that we do not visit this with protocols with pcb layer
891b9234fafSSam Leffler 	 * code - like udp/tcp/raw ip.
892b9234fafSSam Leffler 	 */
893b9234fafSSam Leffler 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
894b9234fafSSam Leffler 		/*
895b9234fafSSam Leffler 		 * Check if the packet has already had IPsec processing
896b9234fafSSam Leffler 		 * done.  If so, then just pass it along.  This tag gets
897b9234fafSSam Leffler 		 * set during AH, ESP, etc. input handling, before the
898b9234fafSSam Leffler 		 * packet is returned to the ip input queue for delivery.
899b9234fafSSam Leffler 		 */
900b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
901b9234fafSSam Leffler 		s = splnet();
902b9234fafSSam Leffler 		if (mtag != NULL) {
903b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
904b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
905b9234fafSSam Leffler 		} else {
906b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
907b9234fafSSam Leffler 						   IP_FORWARDING, &error);
908b9234fafSSam Leffler 		}
909b9234fafSSam Leffler 		if (sp != NULL) {
910b9234fafSSam Leffler 			/*
911b9234fafSSam Leffler 			 * Check security policy against packet attributes.
912b9234fafSSam Leffler 			 */
913b9234fafSSam Leffler 			error = ipsec_in_reject(sp, m);
914b9234fafSSam Leffler 			KEY_FREESP(&sp);
915b9234fafSSam Leffler 		} else {
916b9234fafSSam Leffler 			/* XXX error stat??? */
917b9234fafSSam Leffler 			error = EINVAL;
918b9234fafSSam Leffler DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
919b9234fafSSam Leffler 			goto bad;
920b9234fafSSam Leffler 		}
921b9234fafSSam Leffler 		splx(s);
922b9234fafSSam Leffler 		if (error)
923b9234fafSSam Leffler 			goto bad;
924b9234fafSSam Leffler 	}
925b9234fafSSam Leffler #endif /* FAST_IPSEC */
92633841545SHajimu UMEMOTO 
927df8bae1dSRodney W. Grimes 	/*
928df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
929df8bae1dSRodney W. Grimes 	 */
930df8bae1dSRodney W. Grimes 	ipstat.ips_delivered++;
9312b25acc1SLuigi Rizzo 	if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
9322b25acc1SLuigi Rizzo 		/* TCP needs IPFORWARD info if available */
9332b25acc1SLuigi Rizzo 		struct m_hdr tag;
9346a800098SYoshinobu Inoue 
9352b25acc1SLuigi Rizzo 		tag.mh_type = MT_TAG;
9362b25acc1SLuigi Rizzo 		tag.mh_flags = PACKET_TAG_IPFORWARD;
9372b25acc1SLuigi Rizzo 		tag.mh_data = (caddr_t)args.next_hop;
9382b25acc1SLuigi Rizzo 		tag.mh_next = m;
9392b25acc1SLuigi Rizzo 
9402b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(
9412b25acc1SLuigi Rizzo 			(struct mbuf *)&tag, hlen);
9422b25acc1SLuigi Rizzo 	} else
9432b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
944c67b1d17SGarrett Wollman 	return;
945df8bae1dSRodney W. Grimes bad:
946df8bae1dSRodney W. Grimes 	m_freem(m);
947c67b1d17SGarrett Wollman }
948c67b1d17SGarrett Wollman 
949c67b1d17SGarrett Wollman /*
9508948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
9518948e4baSArchie Cobbs  * whole datagram.  If a chain for reassembly of this datagram already
9528948e4baSArchie Cobbs  * exists, then it is given as fp; otherwise have to make a chain.
9538948e4baSArchie Cobbs  *
9548948e4baSArchie Cobbs  * When IPDIVERT enabled, keep additional state with each packet that
9558948e4baSArchie Cobbs  * tells us if we need to divert or tee the packet we're building.
9562b25acc1SLuigi Rizzo  * In particular, *divinfo includes the port and TEE flag,
9572b25acc1SLuigi Rizzo  * *divert_rule is the number of the matching rule.
958df8bae1dSRodney W. Grimes  */
9598948e4baSArchie Cobbs 
9606a800098SYoshinobu Inoue static struct mbuf *
9612b25acc1SLuigi Rizzo ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
9622b25acc1SLuigi Rizzo 	u_int32_t *divinfo, u_int16_t *divert_rule)
963df8bae1dSRodney W. Grimes {
9646effc713SDoug Rabson 	struct ip *ip = mtod(m, struct ip *);
965b6ea1aa5SRuslan Ermilov 	register struct mbuf *p, *q, *nq;
966df8bae1dSRodney W. Grimes 	struct mbuf *t;
96753be11f6SPoul-Henning Kamp 	int hlen = ip->ip_hl << 2;
968df8bae1dSRodney W. Grimes 	int i, next;
969df8bae1dSRodney W. Grimes 
970df8bae1dSRodney W. Grimes 	/*
971df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
972df8bae1dSRodney W. Grimes 	 * would confuse code below.
973df8bae1dSRodney W. Grimes 	 */
974df8bae1dSRodney W. Grimes 	m->m_data += hlen;
975df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
976df8bae1dSRodney W. Grimes 
977df8bae1dSRodney W. Grimes 	/*
978df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
979df8bae1dSRodney W. Grimes 	 */
980df8bae1dSRodney W. Grimes 	if (fp == 0) {
981a163d034SWarner Losh 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
982df8bae1dSRodney W. Grimes 			goto dropfrag;
983df8bae1dSRodney W. Grimes 		fp = mtod(t, struct ipq *);
98436b0360bSRobert Watson #ifdef MAC
9855e7ce478SRobert Watson 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
9865e7ce478SRobert Watson 			m_free(t);
9875e7ce478SRobert Watson 			goto dropfrag;
9885e7ce478SRobert Watson 		}
98936b0360bSRobert Watson 		mac_create_ipq(m, fp);
99036b0360bSRobert Watson #endif
991462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
992194a213eSAndrey A. Chernov 		nipq++;
993375386e2SMike Silbersack 		fp->ipq_nfrags = 1;
994df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
995df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
996df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
9976effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
9986effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
999af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1000af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
100193e0e116SJulian Elischer #ifdef IPDIVERT
10028948e4baSArchie Cobbs 		fp->ipq_div_info = 0;
1003bb60f459SJulian Elischer 		fp->ipq_div_cookie = 0;
100493e0e116SJulian Elischer #endif
1005af38c68cSLuigi Rizzo 		goto inserted;
100636b0360bSRobert Watson 	} else {
1007375386e2SMike Silbersack 		fp->ipq_nfrags++;
100836b0360bSRobert Watson #ifdef MAC
100936b0360bSRobert Watson 		mac_update_ipq(m, fp);
101036b0360bSRobert Watson #endif
1011df8bae1dSRodney W. Grimes 	}
1012df8bae1dSRodney W. Grimes 
10136effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
10146effc713SDoug Rabson 
1015df8bae1dSRodney W. Grimes 	/*
1016df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
1017df8bae1dSRodney W. Grimes 	 */
10186effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
10196effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
1020df8bae1dSRodney W. Grimes 			break;
1021df8bae1dSRodney W. Grimes 
1022df8bae1dSRodney W. Grimes 	/*
1023df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
1024df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
1025af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
1026af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
1027db4f9cc7SJonathan Lemon 	 *
1028db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
1029db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
1030df8bae1dSRodney W. Grimes 	 */
10316effc713SDoug Rabson 	if (p) {
10326effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1033df8bae1dSRodney W. Grimes 		if (i > 0) {
1034df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
1035df8bae1dSRodney W. Grimes 				goto dropfrag;
10366a800098SYoshinobu Inoue 			m_adj(m, i);
1037db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
1038df8bae1dSRodney W. Grimes 			ip->ip_off += i;
1039df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
1040df8bae1dSRodney W. Grimes 		}
1041af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
1042af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
1043af38c68cSLuigi Rizzo 	} else {
1044af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
1045af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1046df8bae1dSRodney W. Grimes 	}
1047df8bae1dSRodney W. Grimes 
1048df8bae1dSRodney W. Grimes 	/*
1049df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
1050df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
1051df8bae1dSRodney W. Grimes 	 */
10526effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1053af38c68cSLuigi Rizzo 	     q = nq) {
1054b36f5b37SMaxim Konovalov 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
10556effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
10566effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
10576effc713SDoug Rabson 			GETIP(q)->ip_off += i;
10586effc713SDoug Rabson 			m_adj(q, i);
1059db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
1060df8bae1dSRodney W. Grimes 			break;
1061df8bae1dSRodney W. Grimes 		}
10626effc713SDoug Rabson 		nq = q->m_nextpkt;
1063af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
106499e8617dSMaxim Konovalov 		ipstat.ips_fragdropped++;
1065375386e2SMike Silbersack 		fp->ipq_nfrags--;
10666effc713SDoug Rabson 		m_freem(q);
1067df8bae1dSRodney W. Grimes 	}
1068df8bae1dSRodney W. Grimes 
1069af38c68cSLuigi Rizzo inserted:
107093e0e116SJulian Elischer 
107193e0e116SJulian Elischer #ifdef IPDIVERT
107293e0e116SJulian Elischer 	/*
10738948e4baSArchie Cobbs 	 * Transfer firewall instructions to the fragment structure.
10742b25acc1SLuigi Rizzo 	 * Only trust info in the fragment at offset 0.
107593e0e116SJulian Elischer 	 */
10762b25acc1SLuigi Rizzo 	if (ip->ip_off == 0) {
10778948e4baSArchie Cobbs 		fp->ipq_div_info = *divinfo;
10782b25acc1SLuigi Rizzo 		fp->ipq_div_cookie = *divert_rule;
10792b25acc1SLuigi Rizzo 	}
10808948e4baSArchie Cobbs 	*divinfo = 0;
10812b25acc1SLuigi Rizzo 	*divert_rule = 0;
108293e0e116SJulian Elischer #endif
108393e0e116SJulian Elischer 
1084df8bae1dSRodney W. Grimes 	/*
1085375386e2SMike Silbersack 	 * Check for complete reassembly and perform frag per packet
1086375386e2SMike Silbersack 	 * limiting.
1087375386e2SMike Silbersack 	 *
1088375386e2SMike Silbersack 	 * Frag limiting is performed here so that the nth frag has
1089375386e2SMike Silbersack 	 * a chance to complete the packet before we drop the packet.
1090375386e2SMike Silbersack 	 * As a result, n+1 frags are actually allowed per packet, but
1091375386e2SMike Silbersack 	 * only n will ever be stored. (n = maxfragsperpacket.)
1092375386e2SMike Silbersack 	 *
1093df8bae1dSRodney W. Grimes 	 */
10946effc713SDoug Rabson 	next = 0;
10956effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1096375386e2SMike Silbersack 		if (GETIP(q)->ip_off != next) {
109799e8617dSMaxim Konovalov 			if (fp->ipq_nfrags > maxfragsperpacket) {
109899e8617dSMaxim Konovalov 				ipstat.ips_fragdropped += fp->ipq_nfrags;
1099375386e2SMike Silbersack 				ip_freef(head, fp);
110099e8617dSMaxim Konovalov 			}
11016effc713SDoug Rabson 			return (0);
1102375386e2SMike Silbersack 		}
11036effc713SDoug Rabson 		next += GETIP(q)->ip_len;
11046effc713SDoug Rabson 	}
11056effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
1106375386e2SMike Silbersack 	if (p->m_flags & M_FRAG) {
110799e8617dSMaxim Konovalov 		if (fp->ipq_nfrags > maxfragsperpacket) {
110899e8617dSMaxim Konovalov 			ipstat.ips_fragdropped += fp->ipq_nfrags;
1109375386e2SMike Silbersack 			ip_freef(head, fp);
111099e8617dSMaxim Konovalov 		}
1111df8bae1dSRodney W. Grimes 		return (0);
1112375386e2SMike Silbersack 	}
1113df8bae1dSRodney W. Grimes 
1114df8bae1dSRodney W. Grimes 	/*
1115430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1116430d30d8SBill Fenner 	 */
11176effc713SDoug Rabson 	q = fp->ipq_frags;
11186effc713SDoug Rabson 	ip = GETIP(q);
111953be11f6SPoul-Henning Kamp 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
1120430d30d8SBill Fenner 		ipstat.ips_toolong++;
112199e8617dSMaxim Konovalov 		ipstat.ips_fragdropped += fp->ipq_nfrags;
1122462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1123430d30d8SBill Fenner 		return (0);
1124430d30d8SBill Fenner 	}
1125430d30d8SBill Fenner 
1126430d30d8SBill Fenner 	/*
1127430d30d8SBill Fenner 	 * Concatenate fragments.
1128df8bae1dSRodney W. Grimes 	 */
11296effc713SDoug Rabson 	m = q;
1130df8bae1dSRodney W. Grimes 	t = m->m_next;
1131df8bae1dSRodney W. Grimes 	m->m_next = 0;
1132df8bae1dSRodney W. Grimes 	m_cat(m, t);
11336effc713SDoug Rabson 	nq = q->m_nextpkt;
1134945aa40dSDoug Rabson 	q->m_nextpkt = 0;
11356effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
11366effc713SDoug Rabson 		nq = q->m_nextpkt;
1137945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1138db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1139db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1140a8db1d93SJonathan Lemon 		m_cat(m, q);
1141df8bae1dSRodney W. Grimes 	}
114236b0360bSRobert Watson #ifdef MAC
114336b0360bSRobert Watson 	mac_create_datagram_from_ipq(fp, m);
114436b0360bSRobert Watson 	mac_destroy_ipq(fp);
114536b0360bSRobert Watson #endif
1146df8bae1dSRodney W. Grimes 
114793e0e116SJulian Elischer #ifdef IPDIVERT
114893e0e116SJulian Elischer 	/*
11498948e4baSArchie Cobbs 	 * Extract firewall instructions from the fragment structure.
115093e0e116SJulian Elischer 	 */
11518948e4baSArchie Cobbs 	*divinfo = fp->ipq_div_info;
11522b25acc1SLuigi Rizzo 	*divert_rule = fp->ipq_div_cookie;
115393e0e116SJulian Elischer #endif
115493e0e116SJulian Elischer 
1155df8bae1dSRodney W. Grimes 	/*
1156df8bae1dSRodney W. Grimes 	 * Create header for new ip packet by
1157df8bae1dSRodney W. Grimes 	 * modifying header of first packet;
1158df8bae1dSRodney W. Grimes 	 * dequeue and discard fragment reassembly header.
1159df8bae1dSRodney W. Grimes 	 * Make header visible.
1160df8bae1dSRodney W. Grimes 	 */
1161df8bae1dSRodney W. Grimes 	ip->ip_len = next;
11626effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
11636effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1164462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1165194a213eSAndrey A. Chernov 	nipq--;
1166df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
116753be11f6SPoul-Henning Kamp 	m->m_len += (ip->ip_hl << 2);
116853be11f6SPoul-Henning Kamp 	m->m_data -= (ip->ip_hl << 2);
1169df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1170a5554bf0SPoul-Henning Kamp 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
1171a5554bf0SPoul-Henning Kamp 		m_fixhdr(m);
11726a800098SYoshinobu Inoue 	return (m);
1173df8bae1dSRodney W. Grimes 
1174df8bae1dSRodney W. Grimes dropfrag:
1175efe39c6aSJulian Elischer #ifdef IPDIVERT
11768948e4baSArchie Cobbs 	*divinfo = 0;
11772b25acc1SLuigi Rizzo 	*divert_rule = 0;
1178efe39c6aSJulian Elischer #endif
1179df8bae1dSRodney W. Grimes 	ipstat.ips_fragdropped++;
1180375386e2SMike Silbersack 	if (fp != 0)
1181375386e2SMike Silbersack 		fp->ipq_nfrags--;
1182df8bae1dSRodney W. Grimes 	m_freem(m);
1183df8bae1dSRodney W. Grimes 	return (0);
11846effc713SDoug Rabson 
11856effc713SDoug Rabson #undef GETIP
1186df8bae1dSRodney W. Grimes }
1187df8bae1dSRodney W. Grimes 
1188df8bae1dSRodney W. Grimes /*
1189df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1190df8bae1dSRodney W. Grimes  * associated datagrams.
1191df8bae1dSRodney W. Grimes  */
11920312fbe9SPoul-Henning Kamp static void
1193462b86feSPoul-Henning Kamp ip_freef(fhp, fp)
1194462b86feSPoul-Henning Kamp 	struct ipqhead *fhp;
1195df8bae1dSRodney W. Grimes 	struct ipq *fp;
1196df8bae1dSRodney W. Grimes {
11976effc713SDoug Rabson 	register struct mbuf *q;
1198df8bae1dSRodney W. Grimes 
11996effc713SDoug Rabson 	while (fp->ipq_frags) {
12006effc713SDoug Rabson 		q = fp->ipq_frags;
12016effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
12026effc713SDoug Rabson 		m_freem(q);
1203df8bae1dSRodney W. Grimes 	}
1204462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1205df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
1206194a213eSAndrey A. Chernov 	nipq--;
1207df8bae1dSRodney W. Grimes }
1208df8bae1dSRodney W. Grimes 
1209df8bae1dSRodney W. Grimes /*
1210df8bae1dSRodney W. Grimes  * IP timer processing;
1211df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1212df8bae1dSRodney W. Grimes  * queue, discard it.
1213df8bae1dSRodney W. Grimes  */
1214df8bae1dSRodney W. Grimes void
1215df8bae1dSRodney W. Grimes ip_slowtimo()
1216df8bae1dSRodney W. Grimes {
1217df8bae1dSRodney W. Grimes 	register struct ipq *fp;
1218df8bae1dSRodney W. Grimes 	int s = splnet();
1219194a213eSAndrey A. Chernov 	int i;
1220df8bae1dSRodney W. Grimes 
1221194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1222462b86feSPoul-Henning Kamp 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1223462b86feSPoul-Henning Kamp 			struct ipq *fpp;
1224462b86feSPoul-Henning Kamp 
1225462b86feSPoul-Henning Kamp 			fpp = fp;
1226462b86feSPoul-Henning Kamp 			fp = TAILQ_NEXT(fp, ipq_list);
1227462b86feSPoul-Henning Kamp 			if(--fpp->ipq_ttl == 0) {
122899e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
1229462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], fpp);
1230df8bae1dSRodney W. Grimes 			}
1231df8bae1dSRodney W. Grimes 		}
1232194a213eSAndrey A. Chernov 	}
1233690a6055SJesper Skriver 	/*
1234690a6055SJesper Skriver 	 * If we are over the maximum number of fragments
1235690a6055SJesper Skriver 	 * (due to the limit being lowered), drain off
1236690a6055SJesper Skriver 	 * enough to get down to the new limit.
1237690a6055SJesper Skriver 	 */
1238a75a485dSMike Silbersack 	if (maxnipq >= 0 && nipq > maxnipq) {
1239690a6055SJesper Skriver 		for (i = 0; i < IPREASS_NHASH; i++) {
1240b36f5b37SMaxim Konovalov 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
124199e8617dSMaxim Konovalov 				ipstat.ips_fragdropped +=
124299e8617dSMaxim Konovalov 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1243690a6055SJesper Skriver 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1244690a6055SJesper Skriver 			}
1245690a6055SJesper Skriver 		}
1246690a6055SJesper Skriver 	}
12471f91d8c5SDavid Greenman 	ipflow_slowtimo();
1248df8bae1dSRodney W. Grimes 	splx(s);
1249df8bae1dSRodney W. Grimes }
1250df8bae1dSRodney W. Grimes 
1251df8bae1dSRodney W. Grimes /*
1252df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1253df8bae1dSRodney W. Grimes  */
1254df8bae1dSRodney W. Grimes void
1255df8bae1dSRodney W. Grimes ip_drain()
1256df8bae1dSRodney W. Grimes {
1257194a213eSAndrey A. Chernov 	int     i;
1258ce29ab3aSGarrett Wollman 
1259194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1260462b86feSPoul-Henning Kamp 		while(!TAILQ_EMPTY(&ipq[i])) {
126199e8617dSMaxim Konovalov 			ipstat.ips_fragdropped +=
126299e8617dSMaxim Konovalov 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1263462b86feSPoul-Henning Kamp 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1264194a213eSAndrey A. Chernov 		}
1265194a213eSAndrey A. Chernov 	}
1266ce29ab3aSGarrett Wollman 	in_rtqdrain();
1267df8bae1dSRodney W. Grimes }
1268df8bae1dSRodney W. Grimes 
1269df8bae1dSRodney W. Grimes /*
1270df8bae1dSRodney W. Grimes  * Do option processing on a datagram,
1271df8bae1dSRodney W. Grimes  * possibly discarding it if bad options are encountered,
1272df8bae1dSRodney W. Grimes  * or forwarding it if source-routed.
1273d0ebc0d2SYaroslav Tykhiy  * The pass argument is used when operating in the IPSTEALTH
1274d0ebc0d2SYaroslav Tykhiy  * mode to tell what options to process:
1275d0ebc0d2SYaroslav Tykhiy  * [LS]SRR (pass 0) or the others (pass 1).
1276d0ebc0d2SYaroslav Tykhiy  * The reason for as many as two passes is that when doing IPSTEALTH,
1277d0ebc0d2SYaroslav Tykhiy  * non-routing options should be processed only if the packet is for us.
1278df8bae1dSRodney W. Grimes  * Returns 1 if packet has been forwarded/freed,
1279df8bae1dSRodney W. Grimes  * 0 if the packet should be processed further.
1280df8bae1dSRodney W. Grimes  */
12810312fbe9SPoul-Henning Kamp static int
12822b25acc1SLuigi Rizzo ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1283df8bae1dSRodney W. Grimes {
12842b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
12852b25acc1SLuigi Rizzo 	u_char *cp;
12862b25acc1SLuigi Rizzo 	struct in_ifaddr *ia;
1287df8bae1dSRodney W. Grimes 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1288df8bae1dSRodney W. Grimes 	struct in_addr *sin, dst;
1289df8bae1dSRodney W. Grimes 	n_time ntime;
12904d2e3692SLuigi Rizzo 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1291df8bae1dSRodney W. Grimes 
1292df8bae1dSRodney W. Grimes 	dst = ip->ip_dst;
1293df8bae1dSRodney W. Grimes 	cp = (u_char *)(ip + 1);
129453be11f6SPoul-Henning Kamp 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1295df8bae1dSRodney W. Grimes 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1296df8bae1dSRodney W. Grimes 		opt = cp[IPOPT_OPTVAL];
1297df8bae1dSRodney W. Grimes 		if (opt == IPOPT_EOL)
1298df8bae1dSRodney W. Grimes 			break;
1299df8bae1dSRodney W. Grimes 		if (opt == IPOPT_NOP)
1300df8bae1dSRodney W. Grimes 			optlen = 1;
1301df8bae1dSRodney W. Grimes 		else {
1302fdcb8debSJun-ichiro itojun Hagino 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1303fdcb8debSJun-ichiro itojun Hagino 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1304fdcb8debSJun-ichiro itojun Hagino 				goto bad;
1305fdcb8debSJun-ichiro itojun Hagino 			}
1306df8bae1dSRodney W. Grimes 			optlen = cp[IPOPT_OLEN];
1307707d00a3SJonathan Lemon 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1308df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1309df8bae1dSRodney W. Grimes 				goto bad;
1310df8bae1dSRodney W. Grimes 			}
1311df8bae1dSRodney W. Grimes 		}
1312df8bae1dSRodney W. Grimes 		switch (opt) {
1313df8bae1dSRodney W. Grimes 
1314df8bae1dSRodney W. Grimes 		default:
1315df8bae1dSRodney W. Grimes 			break;
1316df8bae1dSRodney W. Grimes 
1317df8bae1dSRodney W. Grimes 		/*
1318df8bae1dSRodney W. Grimes 		 * Source routing with record.
1319df8bae1dSRodney W. Grimes 		 * Find interface with current destination address.
1320df8bae1dSRodney W. Grimes 		 * If none on this machine then drop if strictly routed,
1321df8bae1dSRodney W. Grimes 		 * or do nothing if loosely routed.
1322df8bae1dSRodney W. Grimes 		 * Record interface address and bring up next address
1323df8bae1dSRodney W. Grimes 		 * component.  If strictly routed make sure next
1324df8bae1dSRodney W. Grimes 		 * address is on directly accessible net.
1325df8bae1dSRodney W. Grimes 		 */
1326df8bae1dSRodney W. Grimes 		case IPOPT_LSRR:
1327df8bae1dSRodney W. Grimes 		case IPOPT_SSRR:
1328d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1329d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass > 0)
1330d0ebc0d2SYaroslav Tykhiy 				break;
1331d0ebc0d2SYaroslav Tykhiy #endif
133233841545SHajimu UMEMOTO 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
133333841545SHajimu UMEMOTO 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
133433841545SHajimu UMEMOTO 				goto bad;
133533841545SHajimu UMEMOTO 			}
1336df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1337df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1338df8bae1dSRodney W. Grimes 				goto bad;
1339df8bae1dSRodney W. Grimes 			}
1340df8bae1dSRodney W. Grimes 			ipaddr.sin_addr = ip->ip_dst;
1341df8bae1dSRodney W. Grimes 			ia = (struct in_ifaddr *)
1342df8bae1dSRodney W. Grimes 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1343df8bae1dSRodney W. Grimes 			if (ia == 0) {
1344df8bae1dSRodney W. Grimes 				if (opt == IPOPT_SSRR) {
1345df8bae1dSRodney W. Grimes 					type = ICMP_UNREACH;
1346df8bae1dSRodney W. Grimes 					code = ICMP_UNREACH_SRCFAIL;
1347df8bae1dSRodney W. Grimes 					goto bad;
1348df8bae1dSRodney W. Grimes 				}
1349bc189bf8SGuido van Rooij 				if (!ip_dosourceroute)
1350bc189bf8SGuido van Rooij 					goto nosourcerouting;
1351df8bae1dSRodney W. Grimes 				/*
1352df8bae1dSRodney W. Grimes 				 * Loose routing, and not at next destination
1353df8bae1dSRodney W. Grimes 				 * yet; nothing to do except forward.
1354df8bae1dSRodney W. Grimes 				 */
1355df8bae1dSRodney W. Grimes 				break;
1356df8bae1dSRodney W. Grimes 			}
1357df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
13585d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr)) {
1359df8bae1dSRodney W. Grimes 				/*
1360df8bae1dSRodney W. Grimes 				 * End of source route.  Should be for us.
1361df8bae1dSRodney W. Grimes 				 */
13624fce5804SGuido van Rooij 				if (!ip_acceptsourceroute)
13634fce5804SGuido van Rooij 					goto nosourcerouting;
1364df8bae1dSRodney W. Grimes 				save_rte(cp, ip->ip_src);
1365df8bae1dSRodney W. Grimes 				break;
1366df8bae1dSRodney W. Grimes 			}
1367d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1368d0ebc0d2SYaroslav Tykhiy 			if (ipstealth)
1369d0ebc0d2SYaroslav Tykhiy 				goto dropit;
1370d0ebc0d2SYaroslav Tykhiy #endif
13711025071fSGarrett Wollman 			if (!ip_dosourceroute) {
13720af8d3ecSDavid Greenman 				if (ipforwarding) {
13730af8d3ecSDavid Greenman 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
13740af8d3ecSDavid Greenman 					/*
13750af8d3ecSDavid Greenman 					 * Acting as a router, so generate ICMP
13760af8d3ecSDavid Greenman 					 */
1377efa48587SGuido van Rooij nosourcerouting:
1378bc189bf8SGuido van Rooij 					strcpy(buf, inet_ntoa(ip->ip_dst));
13791025071fSGarrett Wollman 					log(LOG_WARNING,
13801025071fSGarrett Wollman 					    "attempted source route from %s to %s\n",
13811025071fSGarrett Wollman 					    inet_ntoa(ip->ip_src), buf);
13821025071fSGarrett Wollman 					type = ICMP_UNREACH;
13831025071fSGarrett Wollman 					code = ICMP_UNREACH_SRCFAIL;
13841025071fSGarrett Wollman 					goto bad;
13850af8d3ecSDavid Greenman 				} else {
13860af8d3ecSDavid Greenman 					/*
13870af8d3ecSDavid Greenman 					 * Not acting as a router, so silently drop.
13880af8d3ecSDavid Greenman 					 */
1389d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1390d0ebc0d2SYaroslav Tykhiy dropit:
1391d0ebc0d2SYaroslav Tykhiy #endif
13920af8d3ecSDavid Greenman 					ipstat.ips_cantforward++;
13930af8d3ecSDavid Greenman 					m_freem(m);
13940af8d3ecSDavid Greenman 					return (1);
13950af8d3ecSDavid Greenman 				}
13961025071fSGarrett Wollman 			}
13971025071fSGarrett Wollman 
1398df8bae1dSRodney W. Grimes 			/*
1399df8bae1dSRodney W. Grimes 			 * locate outgoing interface
1400df8bae1dSRodney W. Grimes 			 */
140194a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, cp + off,
1402df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
14031025071fSGarrett Wollman 
1404df8bae1dSRodney W. Grimes 			if (opt == IPOPT_SSRR) {
1405df8bae1dSRodney W. Grimes #define	INA	struct in_ifaddr *
1406df8bae1dSRodney W. Grimes #define	SA	struct sockaddr *
1407df8bae1dSRodney W. Grimes 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1408df8bae1dSRodney W. Grimes 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1409df8bae1dSRodney W. Grimes 			} else
1410bd714208SRuslan Ermilov 				ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt);
1411df8bae1dSRodney W. Grimes 			if (ia == 0) {
1412df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1413df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_SRCFAIL;
1414df8bae1dSRodney W. Grimes 				goto bad;
1415df8bae1dSRodney W. Grimes 			}
1416df8bae1dSRodney W. Grimes 			ip->ip_dst = ipaddr.sin_addr;
141794a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
141894a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1419df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1420df8bae1dSRodney W. Grimes 			/*
1421df8bae1dSRodney W. Grimes 			 * Let ip_intr's mcast routing check handle mcast pkts
1422df8bae1dSRodney W. Grimes 			 */
1423df8bae1dSRodney W. Grimes 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1424df8bae1dSRodney W. Grimes 			break;
1425df8bae1dSRodney W. Grimes 
1426df8bae1dSRodney W. Grimes 		case IPOPT_RR:
1427d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1428d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1429d0ebc0d2SYaroslav Tykhiy 				break;
1430d0ebc0d2SYaroslav Tykhiy #endif
1431707d00a3SJonathan Lemon 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1432707d00a3SJonathan Lemon 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1433707d00a3SJonathan Lemon 				goto bad;
1434707d00a3SJonathan Lemon 			}
1435df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1436df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1437df8bae1dSRodney W. Grimes 				goto bad;
1438df8bae1dSRodney W. Grimes 			}
1439df8bae1dSRodney W. Grimes 			/*
1440df8bae1dSRodney W. Grimes 			 * If no space remains, ignore.
1441df8bae1dSRodney W. Grimes 			 */
1442df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
14435d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr))
1444df8bae1dSRodney W. Grimes 				break;
144594a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1446df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
1447df8bae1dSRodney W. Grimes 			/*
1448df8bae1dSRodney W. Grimes 			 * locate outgoing interface; if we're the destination,
1449df8bae1dSRodney W. Grimes 			 * use the incoming interface (should be same).
1450df8bae1dSRodney W. Grimes 			 */
1451df8bae1dSRodney W. Grimes 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1452bd714208SRuslan Ermilov 			    (ia = ip_rtaddr(ipaddr.sin_addr,
1453bd714208SRuslan Ermilov 			    &ipforward_rt)) == 0) {
1454df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1455df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_HOST;
1456df8bae1dSRodney W. Grimes 				goto bad;
1457df8bae1dSRodney W. Grimes 			}
145894a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
145994a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1460df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1461df8bae1dSRodney W. Grimes 			break;
1462df8bae1dSRodney W. Grimes 
1463df8bae1dSRodney W. Grimes 		case IPOPT_TS:
1464d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1465d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1466d0ebc0d2SYaroslav Tykhiy 				break;
1467d0ebc0d2SYaroslav Tykhiy #endif
1468df8bae1dSRodney W. Grimes 			code = cp - (u_char *)ip;
146907514071SJonathan Lemon 			if (optlen < 4 || optlen > 40) {
147007514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1471df8bae1dSRodney W. Grimes 				goto bad;
147233841545SHajimu UMEMOTO 			}
147307514071SJonathan Lemon 			if ((off = cp[IPOPT_OFFSET]) < 5) {
147407514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
147533841545SHajimu UMEMOTO 				goto bad;
147633841545SHajimu UMEMOTO 			}
147707514071SJonathan Lemon 			if (off > optlen - (int)sizeof(int32_t)) {
147807514071SJonathan Lemon 				cp[IPOPT_OFFSET + 1] += (1 << 4);
147907514071SJonathan Lemon 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
148007514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1481df8bae1dSRodney W. Grimes 					goto bad;
148233841545SHajimu UMEMOTO 				}
1483df8bae1dSRodney W. Grimes 				break;
1484df8bae1dSRodney W. Grimes 			}
148507514071SJonathan Lemon 			off--;				/* 0 origin */
148607514071SJonathan Lemon 			sin = (struct in_addr *)(cp + off);
148707514071SJonathan Lemon 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1488df8bae1dSRodney W. Grimes 
1489df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSONLY:
1490df8bae1dSRodney W. Grimes 				break;
1491df8bae1dSRodney W. Grimes 
1492df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSANDADDR:
149307514071SJonathan Lemon 				if (off + sizeof(n_time) +
149407514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
149507514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1496df8bae1dSRodney W. Grimes 					goto bad;
149733841545SHajimu UMEMOTO 				}
1498df8bae1dSRodney W. Grimes 				ipaddr.sin_addr = dst;
1499df8bae1dSRodney W. Grimes 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1500df8bae1dSRodney W. Grimes 							    m->m_pkthdr.rcvif);
1501df8bae1dSRodney W. Grimes 				if (ia == 0)
1502df8bae1dSRodney W. Grimes 					continue;
150394a5d9b6SDavid Greenman 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
150494a5d9b6SDavid Greenman 				    sizeof(struct in_addr));
150507514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1506a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1507df8bae1dSRodney W. Grimes 				break;
1508df8bae1dSRodney W. Grimes 
1509df8bae1dSRodney W. Grimes 			case IPOPT_TS_PRESPEC:
151007514071SJonathan Lemon 				if (off + sizeof(n_time) +
151107514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
151207514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1513df8bae1dSRodney W. Grimes 					goto bad;
151433841545SHajimu UMEMOTO 				}
151594a5d9b6SDavid Greenman 				(void)memcpy(&ipaddr.sin_addr, sin,
1516df8bae1dSRodney W. Grimes 				    sizeof(struct in_addr));
1517df8bae1dSRodney W. Grimes 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1518df8bae1dSRodney W. Grimes 					continue;
151907514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1520a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1521df8bae1dSRodney W. Grimes 				break;
1522df8bae1dSRodney W. Grimes 
1523df8bae1dSRodney W. Grimes 			default:
152407514071SJonathan Lemon 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1525df8bae1dSRodney W. Grimes 				goto bad;
1526df8bae1dSRodney W. Grimes 			}
1527df8bae1dSRodney W. Grimes 			ntime = iptime();
152807514071SJonathan Lemon 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
152907514071SJonathan Lemon 			cp[IPOPT_OFFSET] += sizeof(n_time);
1530df8bae1dSRodney W. Grimes 		}
1531df8bae1dSRodney W. Grimes 	}
153247174b49SAndrey A. Chernov 	if (forward && ipforwarding) {
15332b25acc1SLuigi Rizzo 		ip_forward(m, 1, next_hop);
1534df8bae1dSRodney W. Grimes 		return (1);
1535df8bae1dSRodney W. Grimes 	}
1536df8bae1dSRodney W. Grimes 	return (0);
1537df8bae1dSRodney W. Grimes bad:
1538df8bae1dSRodney W. Grimes 	icmp_error(m, type, code, 0, 0);
1539df8bae1dSRodney W. Grimes 	ipstat.ips_badoptions++;
1540df8bae1dSRodney W. Grimes 	return (1);
1541df8bae1dSRodney W. Grimes }
1542df8bae1dSRodney W. Grimes 
1543df8bae1dSRodney W. Grimes /*
1544df8bae1dSRodney W. Grimes  * Given address of next destination (final or next hop),
1545df8bae1dSRodney W. Grimes  * return internet address info of interface to be used to get there.
1546df8bae1dSRodney W. Grimes  */
1547bd714208SRuslan Ermilov struct in_ifaddr *
1548bd714208SRuslan Ermilov ip_rtaddr(dst, rt)
1549df8bae1dSRodney W. Grimes 	struct in_addr dst;
1550bd714208SRuslan Ermilov 	struct route *rt;
1551df8bae1dSRodney W. Grimes {
1552df8bae1dSRodney W. Grimes 	register struct sockaddr_in *sin;
1553df8bae1dSRodney W. Grimes 
1554bd714208SRuslan Ermilov 	sin = (struct sockaddr_in *)&rt->ro_dst;
1555df8bae1dSRodney W. Grimes 
1556bd714208SRuslan Ermilov 	if (rt->ro_rt == 0 ||
1557bd714208SRuslan Ermilov 	    !(rt->ro_rt->rt_flags & RTF_UP) ||
15584078ffb1SRuslan Ermilov 	    dst.s_addr != sin->sin_addr.s_addr) {
1559bd714208SRuslan Ermilov 		if (rt->ro_rt) {
1560bd714208SRuslan Ermilov 			RTFREE(rt->ro_rt);
1561bd714208SRuslan Ermilov 			rt->ro_rt = 0;
1562df8bae1dSRodney W. Grimes 		}
1563df8bae1dSRodney W. Grimes 		sin->sin_family = AF_INET;
1564df8bae1dSRodney W. Grimes 		sin->sin_len = sizeof(*sin);
1565df8bae1dSRodney W. Grimes 		sin->sin_addr = dst;
1566df8bae1dSRodney W. Grimes 
1567bd714208SRuslan Ermilov 		rtalloc_ign(rt, RTF_PRCLONING);
1568df8bae1dSRodney W. Grimes 	}
1569bd714208SRuslan Ermilov 	if (rt->ro_rt == 0)
1570df8bae1dSRodney W. Grimes 		return ((struct in_ifaddr *)0);
1571bd714208SRuslan Ermilov 	return (ifatoia(rt->ro_rt->rt_ifa));
1572df8bae1dSRodney W. Grimes }
1573df8bae1dSRodney W. Grimes 
1574df8bae1dSRodney W. Grimes /*
1575df8bae1dSRodney W. Grimes  * Save incoming source route for use in replies,
1576df8bae1dSRodney W. Grimes  * to be picked up later by ip_srcroute if the receiver is interested.
1577df8bae1dSRodney W. Grimes  */
157837c84183SPoul-Henning Kamp static void
1579df8bae1dSRodney W. Grimes save_rte(option, dst)
1580df8bae1dSRodney W. Grimes 	u_char *option;
1581df8bae1dSRodney W. Grimes 	struct in_addr dst;
1582df8bae1dSRodney W. Grimes {
1583df8bae1dSRodney W. Grimes 	unsigned olen;
1584df8bae1dSRodney W. Grimes 
1585df8bae1dSRodney W. Grimes 	olen = option[IPOPT_OLEN];
1586df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1587df8bae1dSRodney W. Grimes 	if (ipprintfs)
1588df8bae1dSRodney W. Grimes 		printf("save_rte: olen %d\n", olen);
1589df8bae1dSRodney W. Grimes #endif
1590df8bae1dSRodney W. Grimes 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1591df8bae1dSRodney W. Grimes 		return;
15920453d3cbSBruce Evans 	bcopy(option, ip_srcrt.srcopt, olen);
1593df8bae1dSRodney W. Grimes 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1594df8bae1dSRodney W. Grimes 	ip_srcrt.dst = dst;
1595df8bae1dSRodney W. Grimes }
1596df8bae1dSRodney W. Grimes 
1597df8bae1dSRodney W. Grimes /*
1598df8bae1dSRodney W. Grimes  * Retrieve incoming source route for use in replies,
1599df8bae1dSRodney W. Grimes  * in the same form used by setsockopt.
1600df8bae1dSRodney W. Grimes  * The first hop is placed before the options, will be removed later.
1601df8bae1dSRodney W. Grimes  */
1602df8bae1dSRodney W. Grimes struct mbuf *
1603df8bae1dSRodney W. Grimes ip_srcroute()
1604df8bae1dSRodney W. Grimes {
1605df8bae1dSRodney W. Grimes 	register struct in_addr *p, *q;
1606df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1607df8bae1dSRodney W. Grimes 
1608df8bae1dSRodney W. Grimes 	if (ip_nhops == 0)
1609df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1610a163d034SWarner Losh 	m = m_get(M_DONTWAIT, MT_HEADER);
1611df8bae1dSRodney W. Grimes 	if (m == 0)
1612df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1613df8bae1dSRodney W. Grimes 
1614df8bae1dSRodney W. Grimes #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1615df8bae1dSRodney W. Grimes 
1616df8bae1dSRodney W. Grimes 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1617df8bae1dSRodney W. Grimes 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1618df8bae1dSRodney W. Grimes 	    OPTSIZ;
1619df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1620df8bae1dSRodney W. Grimes 	if (ipprintfs)
1621df8bae1dSRodney W. Grimes 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1622df8bae1dSRodney W. Grimes #endif
1623df8bae1dSRodney W. Grimes 
1624df8bae1dSRodney W. Grimes 	/*
1625df8bae1dSRodney W. Grimes 	 * First save first hop for return route
1626df8bae1dSRodney W. Grimes 	 */
1627df8bae1dSRodney W. Grimes 	p = &ip_srcrt.route[ip_nhops - 1];
1628df8bae1dSRodney W. Grimes 	*(mtod(m, struct in_addr *)) = *p--;
1629df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1630df8bae1dSRodney W. Grimes 	if (ipprintfs)
1631af38c68cSLuigi Rizzo 		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1632df8bae1dSRodney W. Grimes #endif
1633df8bae1dSRodney W. Grimes 
1634df8bae1dSRodney W. Grimes 	/*
1635df8bae1dSRodney W. Grimes 	 * Copy option fields and padding (nop) to mbuf.
1636df8bae1dSRodney W. Grimes 	 */
1637df8bae1dSRodney W. Grimes 	ip_srcrt.nop = IPOPT_NOP;
1638df8bae1dSRodney W. Grimes 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
163994a5d9b6SDavid Greenman 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
164094a5d9b6SDavid Greenman 	    &ip_srcrt.nop, OPTSIZ);
1641df8bae1dSRodney W. Grimes 	q = (struct in_addr *)(mtod(m, caddr_t) +
1642df8bae1dSRodney W. Grimes 	    sizeof(struct in_addr) + OPTSIZ);
1643df8bae1dSRodney W. Grimes #undef OPTSIZ
1644df8bae1dSRodney W. Grimes 	/*
1645df8bae1dSRodney W. Grimes 	 * Record return path as an IP source route,
1646df8bae1dSRodney W. Grimes 	 * reversing the path (pointers are now aligned).
1647df8bae1dSRodney W. Grimes 	 */
1648df8bae1dSRodney W. Grimes 	while (p >= ip_srcrt.route) {
1649df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1650df8bae1dSRodney W. Grimes 		if (ipprintfs)
1651af38c68cSLuigi Rizzo 			printf(" %lx", (u_long)ntohl(q->s_addr));
1652df8bae1dSRodney W. Grimes #endif
1653df8bae1dSRodney W. Grimes 		*q++ = *p--;
1654df8bae1dSRodney W. Grimes 	}
1655df8bae1dSRodney W. Grimes 	/*
1656df8bae1dSRodney W. Grimes 	 * Last hop goes to final destination.
1657df8bae1dSRodney W. Grimes 	 */
1658df8bae1dSRodney W. Grimes 	*q = ip_srcrt.dst;
1659df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1660df8bae1dSRodney W. Grimes 	if (ipprintfs)
1661af38c68cSLuigi Rizzo 		printf(" %lx\n", (u_long)ntohl(q->s_addr));
1662df8bae1dSRodney W. Grimes #endif
1663df8bae1dSRodney W. Grimes 	return (m);
1664df8bae1dSRodney W. Grimes }
1665df8bae1dSRodney W. Grimes 
1666df8bae1dSRodney W. Grimes /*
1667df8bae1dSRodney W. Grimes  * Strip out IP options, at higher
1668df8bae1dSRodney W. Grimes  * level protocol in the kernel.
1669df8bae1dSRodney W. Grimes  * Second argument is buffer to which options
1670df8bae1dSRodney W. Grimes  * will be moved, and return value is their length.
1671df8bae1dSRodney W. Grimes  * XXX should be deleted; last arg currently ignored.
1672df8bae1dSRodney W. Grimes  */
1673df8bae1dSRodney W. Grimes void
1674df8bae1dSRodney W. Grimes ip_stripoptions(m, mopt)
1675df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1676df8bae1dSRodney W. Grimes 	struct mbuf *mopt;
1677df8bae1dSRodney W. Grimes {
1678df8bae1dSRodney W. Grimes 	register int i;
1679df8bae1dSRodney W. Grimes 	struct ip *ip = mtod(m, struct ip *);
1680df8bae1dSRodney W. Grimes 	register caddr_t opts;
1681df8bae1dSRodney W. Grimes 	int olen;
1682df8bae1dSRodney W. Grimes 
168353be11f6SPoul-Henning Kamp 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
1684df8bae1dSRodney W. Grimes 	opts = (caddr_t)(ip + 1);
1685df8bae1dSRodney W. Grimes 	i = m->m_len - (sizeof (struct ip) + olen);
1686df8bae1dSRodney W. Grimes 	bcopy(opts + olen, opts, (unsigned)i);
1687df8bae1dSRodney W. Grimes 	m->m_len -= olen;
1688df8bae1dSRodney W. Grimes 	if (m->m_flags & M_PKTHDR)
1689df8bae1dSRodney W. Grimes 		m->m_pkthdr.len -= olen;
169053be11f6SPoul-Henning Kamp 	ip->ip_v = IPVERSION;
169153be11f6SPoul-Henning Kamp 	ip->ip_hl = sizeof(struct ip) >> 2;
1692df8bae1dSRodney W. Grimes }
1693df8bae1dSRodney W. Grimes 
1694df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1695df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1696df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1697df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1698df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1699df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
17003b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1701df8bae1dSRodney W. Grimes };
1702df8bae1dSRodney W. Grimes 
1703df8bae1dSRodney W. Grimes /*
1704df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1705df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1706df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1707df8bae1dSRodney W. Grimes  * of codes and types.
1708df8bae1dSRodney W. Grimes  *
1709df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1710df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1711df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1712df8bae1dSRodney W. Grimes  * protocol deal with that.
1713df8bae1dSRodney W. Grimes  *
1714df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1715df8bae1dSRodney W. Grimes  * via a source route.
1716df8bae1dSRodney W. Grimes  */
17170312fbe9SPoul-Henning Kamp static void
17182b25acc1SLuigi Rizzo ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1719df8bae1dSRodney W. Grimes {
17202b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
17212b25acc1SLuigi Rizzo 	struct rtentry *rt;
172226f9a767SRodney W. Grimes 	int error, type = 0, code = 0;
1723df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
1724df8bae1dSRodney W. Grimes 	n_long dest;
17253efc3014SJulian Elischer 	struct in_addr pkt_dst;
1726df8bae1dSRodney W. Grimes 	struct ifnet *destifp;
1727b9234fafSSam Leffler #if defined(IPSEC) || defined(FAST_IPSEC)
17286a800098SYoshinobu Inoue 	struct ifnet dummyifp;
17296a800098SYoshinobu Inoue #endif
1730df8bae1dSRodney W. Grimes 
1731df8bae1dSRodney W. Grimes 	dest = 0;
17323efc3014SJulian Elischer 	/*
17333efc3014SJulian Elischer 	 * Cache the destination address of the packet; this may be
17343efc3014SJulian Elischer 	 * changed by use of 'ipfw fwd'.
17353efc3014SJulian Elischer 	 */
17362b25acc1SLuigi Rizzo 	pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
17373efc3014SJulian Elischer 
1738df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1739df8bae1dSRodney W. Grimes 	if (ipprintfs)
174061ce519bSPoul-Henning Kamp 		printf("forward: src %lx dst %lx ttl %x\n",
17413efc3014SJulian Elischer 		    (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1742162886e2SBruce Evans 		    ip->ip_ttl);
1743df8bae1dSRodney W. Grimes #endif
1744100ba1a6SJordan K. Hubbard 
1745100ba1a6SJordan K. Hubbard 
17463efc3014SJulian Elischer 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1747df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1748df8bae1dSRodney W. Grimes 		m_freem(m);
1749df8bae1dSRodney W. Grimes 		return;
1750df8bae1dSRodney W. Grimes 	}
17511b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17521b968362SDag-Erling Smørgrav 	if (!ipstealth) {
17531b968362SDag-Erling Smørgrav #endif
1754df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
17551b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
17561b968362SDag-Erling Smørgrav 			    dest, 0);
1757df8bae1dSRodney W. Grimes 			return;
1758df8bae1dSRodney W. Grimes 		}
17591b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17601b968362SDag-Erling Smørgrav 	}
17611b968362SDag-Erling Smørgrav #endif
1762df8bae1dSRodney W. Grimes 
17633efc3014SJulian Elischer 	if (ip_rtaddr(pkt_dst, &ipforward_rt) == 0) {
1764df8bae1dSRodney W. Grimes 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1765df8bae1dSRodney W. Grimes 		return;
17664078ffb1SRuslan Ermilov 	} else
1767df8bae1dSRodney W. Grimes 		rt = ipforward_rt.ro_rt;
1768df8bae1dSRodney W. Grimes 
1769df8bae1dSRodney W. Grimes 	/*
1770bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1771bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1772bfef7ed4SIan Dowse 	 *
17734d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
17744d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
17754d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
17764d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
17774d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
17784d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
17794d2e3692SLuigi Rizzo 	 *
1780bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1781bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1782bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1783bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1784df8bae1dSRodney W. Grimes 	 */
1785a163d034SWarner Losh 	MGET(mcopy, M_DONTWAIT, m->m_type);
1786a163d034SWarner Losh 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
17879967cafcSSam Leffler 		/*
17889967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
17899967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
17909967cafcSSam Leffler 		 * be conservative and just discard the copy since
17919967cafcSSam Leffler 		 * code below may some day want the tags.
17929967cafcSSam Leffler 		 */
17939967cafcSSam Leffler 		m_free(mcopy);
17949967cafcSSam Leffler 		mcopy = NULL;
17959967cafcSSam Leffler 	}
1796bfef7ed4SIan Dowse 	if (mcopy != NULL) {
179753be11f6SPoul-Henning Kamp 		mcopy->m_len = imin((ip->ip_hl << 2) + 8,
1798bfef7ed4SIan Dowse 		    (int)ip->ip_len);
1799bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1800e316463aSRobert Watson #ifdef MAC
1801e316463aSRobert Watson 		/*
1802e316463aSRobert Watson 		 * XXXMAC: This will eventually become an explicit
1803e316463aSRobert Watson 		 * labeling point.
1804e316463aSRobert Watson 		 */
1805e316463aSRobert Watson 		mac_create_mbuf_from_mbuf(m, mcopy);
1806e316463aSRobert Watson #endif
1807bfef7ed4SIan Dowse 	}
180804287599SRuslan Ermilov 
180904287599SRuslan Ermilov #ifdef IPSTEALTH
181004287599SRuslan Ermilov 	if (!ipstealth) {
181104287599SRuslan Ermilov #endif
181204287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
181304287599SRuslan Ermilov #ifdef IPSTEALTH
181404287599SRuslan Ermilov 	}
181504287599SRuslan Ermilov #endif
1816df8bae1dSRodney W. Grimes 
1817df8bae1dSRodney W. Grimes 	/*
1818df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1819df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1820df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1821df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1822df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1823df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1824df8bae1dSRodney W. Grimes 	 */
1825df8bae1dSRodney W. Grimes 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1826df8bae1dSRodney W. Grimes 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1827df8bae1dSRodney W. Grimes 	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
18282b25acc1SLuigi Rizzo 	    ipsendredirects && !srcrt && !next_hop) {
1829df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1830df8bae1dSRodney W. Grimes 		u_long src = ntohl(ip->ip_src.s_addr);
1831df8bae1dSRodney W. Grimes 
1832df8bae1dSRodney W. Grimes 		if (RTA(rt) &&
1833df8bae1dSRodney W. Grimes 		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1834df8bae1dSRodney W. Grimes 		    if (rt->rt_flags & RTF_GATEWAY)
1835df8bae1dSRodney W. Grimes 			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1836df8bae1dSRodney W. Grimes 		    else
18373efc3014SJulian Elischer 			dest = pkt_dst.s_addr;
1838df8bae1dSRodney W. Grimes 		    /* Router requirements says to only send host redirects */
1839df8bae1dSRodney W. Grimes 		    type = ICMP_REDIRECT;
1840df8bae1dSRodney W. Grimes 		    code = ICMP_REDIRECT_HOST;
1841df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1842df8bae1dSRodney W. Grimes 		    if (ipprintfs)
1843df8bae1dSRodney W. Grimes 		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1844df8bae1dSRodney W. Grimes #endif
1845df8bae1dSRodney W. Grimes 		}
1846df8bae1dSRodney W. Grimes 	}
1847df8bae1dSRodney W. Grimes 
1848ea779ff3SLuigi Rizzo     {
1849ea779ff3SLuigi Rizzo 	struct m_hdr tag;
1850ea779ff3SLuigi Rizzo 
1851ea779ff3SLuigi Rizzo 	if (next_hop) {
1852ea779ff3SLuigi Rizzo 		/* Pass IPFORWARD info if available */
1853ea779ff3SLuigi Rizzo 
1854ea779ff3SLuigi Rizzo 		tag.mh_type = MT_TAG;
1855ea779ff3SLuigi Rizzo 		tag.mh_flags = PACKET_TAG_IPFORWARD;
1856ea779ff3SLuigi Rizzo 		tag.mh_data = (caddr_t)next_hop;
1857ea779ff3SLuigi Rizzo 		tag.mh_next = m;
1858ea779ff3SLuigi Rizzo 		m = (struct mbuf *)&tag;
1859ea779ff3SLuigi Rizzo 	}
1860b97d15cbSGarrett Wollman 	error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
18615d846453SSam Leffler 			  IP_FORWARDING, 0, NULL);
1862ea779ff3SLuigi Rizzo     }
1863df8bae1dSRodney W. Grimes 	if (error)
1864df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1865df8bae1dSRodney W. Grimes 	else {
1866df8bae1dSRodney W. Grimes 		ipstat.ips_forward++;
1867df8bae1dSRodney W. Grimes 		if (type)
1868df8bae1dSRodney W. Grimes 			ipstat.ips_redirectsent++;
1869df8bae1dSRodney W. Grimes 		else {
18701f91d8c5SDavid Greenman 			if (mcopy) {
18711f91d8c5SDavid Greenman 				ipflow_create(&ipforward_rt, mcopy);
1872df8bae1dSRodney W. Grimes 				m_freem(mcopy);
18731f91d8c5SDavid Greenman 			}
1874df8bae1dSRodney W. Grimes 			return;
1875df8bae1dSRodney W. Grimes 		}
1876df8bae1dSRodney W. Grimes 	}
1877df8bae1dSRodney W. Grimes 	if (mcopy == NULL)
1878df8bae1dSRodney W. Grimes 		return;
1879df8bae1dSRodney W. Grimes 	destifp = NULL;
1880df8bae1dSRodney W. Grimes 
1881df8bae1dSRodney W. Grimes 	switch (error) {
1882df8bae1dSRodney W. Grimes 
1883df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1884df8bae1dSRodney W. Grimes 		/* type, code set above */
1885df8bae1dSRodney W. Grimes 		break;
1886df8bae1dSRodney W. Grimes 
1887df8bae1dSRodney W. Grimes 	case ENETUNREACH:		/* shouldn't happen, checked above */
1888df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1889df8bae1dSRodney W. Grimes 	case ENETDOWN:
1890df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1891df8bae1dSRodney W. Grimes 	default:
1892df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1893df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1894df8bae1dSRodney W. Grimes 		break;
1895df8bae1dSRodney W. Grimes 
1896df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1897df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1898df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
18995d846453SSam Leffler #ifdef IPSEC
19006a800098SYoshinobu Inoue 		/*
19016a800098SYoshinobu Inoue 		 * If the packet is routed over IPsec tunnel, tell the
19026a800098SYoshinobu Inoue 		 * originator the tunnel MTU.
19036a800098SYoshinobu Inoue 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
19046a800098SYoshinobu Inoue 		 * XXX quickhack!!!
19056a800098SYoshinobu Inoue 		 */
19066a800098SYoshinobu Inoue 		if (ipforward_rt.ro_rt) {
19076a800098SYoshinobu Inoue 			struct secpolicy *sp = NULL;
19086a800098SYoshinobu Inoue 			int ipsecerror;
19096a800098SYoshinobu Inoue 			int ipsechdr;
19106a800098SYoshinobu Inoue 			struct route *ro;
19116a800098SYoshinobu Inoue 
19126a800098SYoshinobu Inoue 			sp = ipsec4_getpolicybyaddr(mcopy,
19136a800098SYoshinobu Inoue 						    IPSEC_DIR_OUTBOUND,
19146a800098SYoshinobu Inoue 			                            IP_FORWARDING,
19156a800098SYoshinobu Inoue 			                            &ipsecerror);
19166a800098SYoshinobu Inoue 
19176a800098SYoshinobu Inoue 			if (sp == NULL)
19186a800098SYoshinobu Inoue 				destifp = ipforward_rt.ro_rt->rt_ifp;
19196a800098SYoshinobu Inoue 			else {
19206a800098SYoshinobu Inoue 				/* count IPsec header size */
19216a800098SYoshinobu Inoue 				ipsechdr = ipsec4_hdrsiz(mcopy,
19226a800098SYoshinobu Inoue 							 IPSEC_DIR_OUTBOUND,
19236a800098SYoshinobu Inoue 							 NULL);
19246a800098SYoshinobu Inoue 
19256a800098SYoshinobu Inoue 				/*
19266a800098SYoshinobu Inoue 				 * find the correct route for outer IPv4
19276a800098SYoshinobu Inoue 				 * header, compute tunnel MTU.
19286a800098SYoshinobu Inoue 				 *
19296a800098SYoshinobu Inoue 				 * XXX BUG ALERT
19306a800098SYoshinobu Inoue 				 * The "dummyifp" code relies upon the fact
19316a800098SYoshinobu Inoue 				 * that icmp_error() touches only ifp->if_mtu.
19326a800098SYoshinobu Inoue 				 */
19336a800098SYoshinobu Inoue 				/*XXX*/
19346a800098SYoshinobu Inoue 				destifp = NULL;
19356a800098SYoshinobu Inoue 				if (sp->req != NULL
19366a800098SYoshinobu Inoue 				 && sp->req->sav != NULL
19376a800098SYoshinobu Inoue 				 && sp->req->sav->sah != NULL) {
19386a800098SYoshinobu Inoue 					ro = &sp->req->sav->sah->sa_route;
19396a800098SYoshinobu Inoue 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
19406a800098SYoshinobu Inoue 						dummyifp.if_mtu =
19416a800098SYoshinobu Inoue 						    ro->ro_rt->rt_ifp->if_mtu;
19426a800098SYoshinobu Inoue 						dummyifp.if_mtu -= ipsechdr;
19436a800098SYoshinobu Inoue 						destifp = &dummyifp;
19446a800098SYoshinobu Inoue 					}
19456a800098SYoshinobu Inoue 				}
19466a800098SYoshinobu Inoue 
19476a800098SYoshinobu Inoue 				key_freesp(sp);
19486a800098SYoshinobu Inoue 			}
19496a800098SYoshinobu Inoue 		}
1950b9234fafSSam Leffler #elif FAST_IPSEC
1951b9234fafSSam Leffler 		/*
1952b9234fafSSam Leffler 		 * If the packet is routed over IPsec tunnel, tell the
1953b9234fafSSam Leffler 		 * originator the tunnel MTU.
1954b9234fafSSam Leffler 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1955b9234fafSSam Leffler 		 * XXX quickhack!!!
1956b9234fafSSam Leffler 		 */
1957b9234fafSSam Leffler 		if (ipforward_rt.ro_rt) {
1958b9234fafSSam Leffler 			struct secpolicy *sp = NULL;
1959b9234fafSSam Leffler 			int ipsecerror;
1960b9234fafSSam Leffler 			int ipsechdr;
1961b9234fafSSam Leffler 			struct route *ro;
1962b9234fafSSam Leffler 
1963b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(mcopy,
1964b9234fafSSam Leffler 						   IPSEC_DIR_OUTBOUND,
1965b9234fafSSam Leffler 			                           IP_FORWARDING,
1966b9234fafSSam Leffler 			                           &ipsecerror);
1967b9234fafSSam Leffler 
1968b9234fafSSam Leffler 			if (sp == NULL)
1969b9234fafSSam Leffler 				destifp = ipforward_rt.ro_rt->rt_ifp;
1970b9234fafSSam Leffler 			else {
1971b9234fafSSam Leffler 				/* count IPsec header size */
1972b9234fafSSam Leffler 				ipsechdr = ipsec4_hdrsiz(mcopy,
1973b9234fafSSam Leffler 							 IPSEC_DIR_OUTBOUND,
1974b9234fafSSam Leffler 							 NULL);
1975b9234fafSSam Leffler 
1976b9234fafSSam Leffler 				/*
1977b9234fafSSam Leffler 				 * find the correct route for outer IPv4
1978b9234fafSSam Leffler 				 * header, compute tunnel MTU.
1979b9234fafSSam Leffler 				 *
1980b9234fafSSam Leffler 				 * XXX BUG ALERT
1981b9234fafSSam Leffler 				 * The "dummyifp" code relies upon the fact
1982b9234fafSSam Leffler 				 * that icmp_error() touches only ifp->if_mtu.
1983b9234fafSSam Leffler 				 */
1984b9234fafSSam Leffler 				/*XXX*/
1985b9234fafSSam Leffler 				destifp = NULL;
1986b9234fafSSam Leffler 				if (sp->req != NULL
1987b9234fafSSam Leffler 				 && sp->req->sav != NULL
1988b9234fafSSam Leffler 				 && sp->req->sav->sah != NULL) {
1989b9234fafSSam Leffler 					ro = &sp->req->sav->sah->sa_route;
1990b9234fafSSam Leffler 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1991b9234fafSSam Leffler 						dummyifp.if_mtu =
1992b9234fafSSam Leffler 						    ro->ro_rt->rt_ifp->if_mtu;
1993b9234fafSSam Leffler 						dummyifp.if_mtu -= ipsechdr;
1994b9234fafSSam Leffler 						destifp = &dummyifp;
1995b9234fafSSam Leffler 					}
1996b9234fafSSam Leffler 				}
1997b9234fafSSam Leffler 
1998b9234fafSSam Leffler 				KEY_FREESP(&sp);
1999b9234fafSSam Leffler 			}
2000b9234fafSSam Leffler 		}
2001b9234fafSSam Leffler #else /* !IPSEC && !FAST_IPSEC */
20025d846453SSam Leffler 		if (ipforward_rt.ro_rt)
20035d846453SSam Leffler 			destifp = ipforward_rt.ro_rt->rt_ifp;
20046a800098SYoshinobu Inoue #endif /*IPSEC*/
2005df8bae1dSRodney W. Grimes 		ipstat.ips_cantfrag++;
2006df8bae1dSRodney W. Grimes 		break;
2007df8bae1dSRodney W. Grimes 
2008df8bae1dSRodney W. Grimes 	case ENOBUFS:
2009df285b3dSMike Silbersack 		/*
2010df285b3dSMike Silbersack 		 * A router should not generate ICMP_SOURCEQUENCH as
2011df285b3dSMike Silbersack 		 * required in RFC1812 Requirements for IP Version 4 Routers.
2012df285b3dSMike Silbersack 		 * Source quench could be a big problem under DoS attacks,
2013df285b3dSMike Silbersack 		 * or if the underlying interface is rate-limited.
2014df285b3dSMike Silbersack 		 * Those who need source quench packets may re-enable them
2015df285b3dSMike Silbersack 		 * via the net.inet.ip.sendsourcequench sysctl.
2016df285b3dSMike Silbersack 		 */
2017df285b3dSMike Silbersack 		if (ip_sendsourcequench == 0) {
2018df285b3dSMike Silbersack 			m_freem(mcopy);
2019df285b3dSMike Silbersack 			return;
2020df285b3dSMike Silbersack 		} else {
2021df8bae1dSRodney W. Grimes 			type = ICMP_SOURCEQUENCH;
2022df8bae1dSRodney W. Grimes 			code = 0;
2023df285b3dSMike Silbersack 		}
2024df8bae1dSRodney W. Grimes 		break;
20253a06e3e0SRuslan Ermilov 
20263a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
20273a06e3e0SRuslan Ermilov 		m_freem(mcopy);
20283a06e3e0SRuslan Ermilov 		return;
2029df8bae1dSRodney W. Grimes 	}
2030df8bae1dSRodney W. Grimes 	icmp_error(mcopy, type, code, dest, destifp);
2031df8bae1dSRodney W. Grimes }
2032df8bae1dSRodney W. Grimes 
203382c23ebaSBill Fenner void
203482c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m)
203582c23ebaSBill Fenner 	register struct inpcb *inp;
203682c23ebaSBill Fenner 	register struct mbuf **mp;
203782c23ebaSBill Fenner 	register struct ip *ip;
203882c23ebaSBill Fenner 	register struct mbuf *m;
203982c23ebaSBill Fenner {
204082c23ebaSBill Fenner 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
204182c23ebaSBill Fenner 		struct timeval tv;
204282c23ebaSBill Fenner 
204382c23ebaSBill Fenner 		microtime(&tv);
204482c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
204582c23ebaSBill Fenner 			SCM_TIMESTAMP, SOL_SOCKET);
204682c23ebaSBill Fenner 		if (*mp)
204782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
20484cc20ab1SSeigo Tanimura 	}
204982c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
205082c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
205182c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
205282c23ebaSBill Fenner 		if (*mp)
205382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
205482c23ebaSBill Fenner 	}
20554957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
20564957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
20574957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
20584957466bSMatthew N. Dodd 		if (*mp)
20594957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
20604957466bSMatthew N. Dodd 	}
206182c23ebaSBill Fenner #ifdef notyet
206282c23ebaSBill Fenner 	/* XXX
206382c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
206482c23ebaSBill Fenner 	 * than they already were.
206582c23ebaSBill Fenner 	 */
206682c23ebaSBill Fenner 	/* options were tossed already */
206782c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
206882c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
206982c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
207082c23ebaSBill Fenner 		if (*mp)
207182c23ebaSBill Fenner 			mp = &(*mp)->m_next;
207282c23ebaSBill Fenner 	}
207382c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
207482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
207582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
207682c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
207782c23ebaSBill Fenner 		if (*mp)
207882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
207982c23ebaSBill Fenner 	}
208082c23ebaSBill Fenner #endif
208182c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
2082d314ad7bSJulian Elischer 		struct ifnet *ifp;
2083d314ad7bSJulian Elischer 		struct sdlbuf {
208482c23ebaSBill Fenner 			struct sockaddr_dl sdl;
2085d314ad7bSJulian Elischer 			u_char	pad[32];
2086d314ad7bSJulian Elischer 		} sdlbuf;
2087d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
2088d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
208982c23ebaSBill Fenner 
2090d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
2091d314ad7bSJulian Elischer 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
2092f9132cebSJonathan Lemon 			sdp = (struct sockaddr_dl *)
2093f9132cebSJonathan Lemon 			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
2094d314ad7bSJulian Elischer 			/*
2095d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
2096d314ad7bSJulian Elischer 			 */
2097d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
2098d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
2099d314ad7bSJulian Elischer 				goto makedummy;
2100d314ad7bSJulian Elischer 			}
2101d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
2102d314ad7bSJulian Elischer 		} else {
2103d314ad7bSJulian Elischer makedummy:
2104d314ad7bSJulian Elischer 			sdl2->sdl_len
2105d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
2106d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
2107d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
2108d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2109d314ad7bSJulian Elischer 		}
2110d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
211182c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
211282c23ebaSBill Fenner 		if (*mp)
211382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
211482c23ebaSBill Fenner 	}
211582c23ebaSBill Fenner }
211682c23ebaSBill Fenner 
21174d2e3692SLuigi Rizzo /*
21184d2e3692SLuigi Rizzo  * XXX these routines are called from the upper part of the kernel.
21194d2e3692SLuigi Rizzo  * They need to be locked when we remove Giant.
21204d2e3692SLuigi Rizzo  *
21214d2e3692SLuigi Rizzo  * They could also be moved to ip_mroute.c, since all the RSVP
21224d2e3692SLuigi Rizzo  *  handling is done there already.
21234d2e3692SLuigi Rizzo  */
21244d2e3692SLuigi Rizzo static int ip_rsvp_on;
21254d2e3692SLuigi Rizzo struct socket *ip_rsvpd;
2126df8bae1dSRodney W. Grimes int
2127f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
2128f0068c4aSGarrett Wollman {
2129f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
2130f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
2131f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
2132f0068c4aSGarrett Wollman 
2133f0068c4aSGarrett Wollman 	if (ip_rsvpd != NULL)
2134f0068c4aSGarrett Wollman 		return EADDRINUSE;
2135f0068c4aSGarrett Wollman 
2136f0068c4aSGarrett Wollman 	ip_rsvpd = so;
21371c5de19aSGarrett Wollman 	/*
21381c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
21391c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21401c5de19aSGarrett Wollman 	 */
21411c5de19aSGarrett Wollman 	if (!ip_rsvp_on) {
21421c5de19aSGarrett Wollman 		ip_rsvp_on = 1;
21431c5de19aSGarrett Wollman 		rsvp_on++;
21441c5de19aSGarrett Wollman 	}
2145f0068c4aSGarrett Wollman 
2146f0068c4aSGarrett Wollman 	return 0;
2147f0068c4aSGarrett Wollman }
2148f0068c4aSGarrett Wollman 
2149f0068c4aSGarrett Wollman int
2150f0068c4aSGarrett Wollman ip_rsvp_done(void)
2151f0068c4aSGarrett Wollman {
2152f0068c4aSGarrett Wollman 	ip_rsvpd = NULL;
21531c5de19aSGarrett Wollman 	/*
21541c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
21551c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21561c5de19aSGarrett Wollman 	 */
21571c5de19aSGarrett Wollman 	if (ip_rsvp_on) {
21581c5de19aSGarrett Wollman 		ip_rsvp_on = 0;
21591c5de19aSGarrett Wollman 		rsvp_on--;
21601c5de19aSGarrett Wollman 	}
2161f0068c4aSGarrett Wollman 	return 0;
2162f0068c4aSGarrett Wollman }
2163bbb4330bSLuigi Rizzo 
2164bbb4330bSLuigi Rizzo void
2165bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
2166bbb4330bSLuigi Rizzo {
2167bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
2168bbb4330bSLuigi Rizzo 		rsvp_input_p(m, off);
2169bbb4330bSLuigi Rizzo 		return;
2170bbb4330bSLuigi Rizzo 	}
2171bbb4330bSLuigi Rizzo 
2172bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
2173bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
2174bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
2175bbb4330bSLuigi Rizzo 	 */
2176bbb4330bSLuigi Rizzo 
2177bbb4330bSLuigi Rizzo 	if (!rsvp_on) {
2178bbb4330bSLuigi Rizzo 		m_freem(m);
2179bbb4330bSLuigi Rizzo 		return;
2180bbb4330bSLuigi Rizzo 	}
2181bbb4330bSLuigi Rizzo 
2182bbb4330bSLuigi Rizzo 	if (ip_rsvpd != NULL) {
2183bbb4330bSLuigi Rizzo 		rip_input(m, off);
2184bbb4330bSLuigi Rizzo 		return;
2185bbb4330bSLuigi Rizzo 	}
2186bbb4330bSLuigi Rizzo 	/* Drop the packet */
2187bbb4330bSLuigi Rizzo 	m_freem(m);
2188bbb4330bSLuigi Rizzo }
2189