xref: /freebsd/sys/netinet/ip_input.c (revision 134ea22494d53dc5228a4b2520fd7b28c17297d4)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
14df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
15df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
16df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
17df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
18df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
19df8bae1dSRodney W. Grimes  *    without specific prior written permission.
20df8bae1dSRodney W. Grimes  *
21df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
32df8bae1dSRodney W. Grimes  *
33df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34c3aac50fSPeter Wemm  * $FreeBSD$
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37e4f4247aSEivind Eklund #include "opt_bootp.h"
3874a9466cSGary Palmer #include "opt_ipfw.h"
39b715f178SLuigi Rizzo #include "opt_ipdn.h"
40fbd1372aSJoerg Wunsch #include "opt_ipdivert.h"
411ee25934SPeter Wemm #include "opt_ipfilter.h"
4227108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
436a800098SYoshinobu Inoue #include "opt_ipsec.h"
4436b0360bSRobert Watson #include "opt_mac.h"
45c4ac87eaSDarren Reed #include "opt_pfil_hooks.h"
4664dddc18SKris Kennaway #include "opt_random_ip_id.h"
4774a9466cSGary Palmer 
48df8bae1dSRodney W. Grimes #include <sys/param.h>
49df8bae1dSRodney W. Grimes #include <sys/systm.h>
5036b0360bSRobert Watson #include <sys/mac.h>
51df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
52b715f178SLuigi Rizzo #include <sys/malloc.h>
53df8bae1dSRodney W. Grimes #include <sys/domain.h>
54df8bae1dSRodney W. Grimes #include <sys/protosw.h>
55df8bae1dSRodney W. Grimes #include <sys/socket.h>
56df8bae1dSRodney W. Grimes #include <sys/time.h>
57df8bae1dSRodney W. Grimes #include <sys/kernel.h>
581025071fSGarrett Wollman #include <sys/syslog.h>
59b5e8ce9fSBruce Evans #include <sys/sysctl.h>
60df8bae1dSRodney W. Grimes 
61c85540ddSAndrey A. Chernov #include <net/pfil.h>
62df8bae1dSRodney W. Grimes #include <net/if.h>
639494d596SBrooks Davis #include <net/if_types.h>
64d314ad7bSJulian Elischer #include <net/if_var.h>
6582c23ebaSBill Fenner #include <net/if_dl.h>
66df8bae1dSRodney W. Grimes #include <net/route.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68df8bae1dSRodney W. Grimes 
69df8bae1dSRodney W. Grimes #include <netinet/in.h>
70df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
71b5e8ce9fSBruce Evans #include <netinet/in_var.h>
72df8bae1dSRodney W. Grimes #include <netinet/ip.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
7658938916SGarrett Wollman #include <machine/in_cksum.h>
77df8bae1dSRodney W. Grimes 
78f0068c4aSGarrett Wollman #include <sys/socketvar.h>
796ddbf1e2SGary Palmer 
806ddbf1e2SGary Palmer #include <netinet/ip_fw.h>
81db69a05dSPaul Saab #include <netinet/ip_dummynet.h>
82db69a05dSPaul Saab 
836a800098SYoshinobu Inoue #ifdef IPSEC
846a800098SYoshinobu Inoue #include <netinet6/ipsec.h>
856a800098SYoshinobu Inoue #include <netkey/key.h>
866a800098SYoshinobu Inoue #endif
876a800098SYoshinobu Inoue 
88b9234fafSSam Leffler #ifdef FAST_IPSEC
89b9234fafSSam Leffler #include <netipsec/ipsec.h>
90b9234fafSSam Leffler #include <netipsec/key.h>
91b9234fafSSam Leffler #endif
92b9234fafSSam Leffler 
931c5de19aSGarrett Wollman int rsvp_on = 0;
94f0068c4aSGarrett Wollman 
951f91d8c5SDavid Greenman int	ipforwarding = 0;
960312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
973d177f46SBill Fumerola     &ipforwarding, 0, "Enable IP forwarding between interfaces");
980312fbe9SPoul-Henning Kamp 
99d4fb926cSGarrett Wollman static int	ipsendredirects = 1; /* XXX */
1000312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
1013d177f46SBill Fumerola     &ipsendredirects, 0, "Enable sending IP redirects");
1020312fbe9SPoul-Henning Kamp 
103df8bae1dSRodney W. Grimes int	ip_defttl = IPDEFTTL;
1040312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
1053d177f46SBill Fumerola     &ip_defttl, 0, "Maximum TTL on IP packets");
1060312fbe9SPoul-Henning Kamp 
1070312fbe9SPoul-Henning Kamp static int	ip_dosourceroute = 0;
1080312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
1093d177f46SBill Fumerola     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
1104fce5804SGuido van Rooij 
1114fce5804SGuido van Rooij static int	ip_acceptsourceroute = 0;
1124fce5804SGuido van Rooij SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
1133d177f46SBill Fumerola     CTLFLAG_RW, &ip_acceptsourceroute, 0,
1143d177f46SBill Fumerola     "Enable accepting source routed IP packets");
1156a800098SYoshinobu Inoue 
1166a800098SYoshinobu Inoue static int	ip_keepfaith = 0;
1176a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
1186a800098SYoshinobu Inoue 	&ip_keepfaith,	0,
1196a800098SYoshinobu Inoue 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1206a800098SYoshinobu Inoue 
121402062e8SMike Silbersack static int    nipq = 0;         /* total # of reass queues */
122402062e8SMike Silbersack static int    maxnipq;
123690a6055SJesper Skriver SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
124402062e8SMike Silbersack 	&maxnipq, 0,
125690a6055SJesper Skriver 	"Maximum number of IPv4 fragment reassembly queue entries");
126690a6055SJesper Skriver 
127375386e2SMike Silbersack static int    maxfragsperpacket;
128375386e2SMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
129375386e2SMike Silbersack 	&maxfragsperpacket, 0,
130375386e2SMike Silbersack 	"Maximum number of IPv4 fragments allowed per packet");
131375386e2SMike Silbersack 
132df285b3dSMike Silbersack static int	ip_sendsourcequench = 0;
133df285b3dSMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
134df285b3dSMike Silbersack 	&ip_sendsourcequench, 0,
135df285b3dSMike Silbersack 	"Enable the transmission of source quench packets");
136df285b3dSMike Silbersack 
137823db0e9SDon Lewis /*
138823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
139823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
140a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
141823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1423f67c834SDon Lewis  *
143a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
144a8f12100SDon Lewis  * to translate the destination address to another local interface.
1453f67c834SDon Lewis  *
1463f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1473f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1483f67c834SDon Lewis  * packets for those addresses are received.
149823db0e9SDon Lewis  */
150b3e95d4eSJonathan Lemon static int	ip_checkinterface = 1;
151b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
152b3e95d4eSJonathan Lemon     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
153b3e95d4eSJonathan Lemon 
154df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1550312fbe9SPoul-Henning Kamp static int	ipprintfs = 0;
156df8bae1dSRodney W. Grimes #endif
157134ea224SSam Leffler #ifdef PFIL_HOOKS
158134ea224SSam Leffler struct pfil_head inet_pfil_hook;
159134ea224SSam Leffler #endif
160df8bae1dSRodney W. Grimes 
1611cafed39SJonathan Lemon static struct	ifqueue ipintrq;
162ca925d9cSJonathan Lemon static int	ipqmaxlen = IFQ_MAXLEN;
163ca925d9cSJonathan Lemon 
164df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
165f0ffb944SJulian Elischer extern	struct protosw inetsw[];
166df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
16759562606SGarrett Wollman struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
168ca925d9cSJonathan Lemon struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
169ca925d9cSJonathan Lemon u_long 	in_ifaddrhmask;				/* mask for hash table */
170ca925d9cSJonathan Lemon 
171afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
1723d177f46SBill Fumerola     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
1730312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
1743d177f46SBill Fumerola     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
175df8bae1dSRodney W. Grimes 
176f23b4c91SGarrett Wollman struct ipstat ipstat;
177c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
1783d177f46SBill Fumerola     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
179194a213eSAndrey A. Chernov 
180194a213eSAndrey A. Chernov /* Packet reassembly stuff */
181194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2      6
182194a213eSAndrey A. Chernov #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
183194a213eSAndrey A. Chernov #define IPREASS_HMASK           (IPREASS_NHASH - 1)
184194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \
185831a80b0SMatthew Dillon 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
186194a213eSAndrey A. Chernov 
187462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
1882fad1e93SSam Leffler struct mtx ipqlock;
1892fad1e93SSam Leffler 
1902fad1e93SSam Leffler #define	IPQ_LOCK()	mtx_lock(&ipqlock)
1912fad1e93SSam Leffler #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
1922fad1e93SSam Leffler #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF);
1932fad1e93SSam Leffler #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED);
194f23b4c91SGarrett Wollman 
1950312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1960312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1973d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1980312fbe9SPoul-Henning Kamp #endif
1990312fbe9SPoul-Henning Kamp 
2001b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
2011b968362SDag-Erling Smørgrav static int	ipstealth = 0;
2021b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
2031b968362SDag-Erling Smørgrav     &ipstealth, 0, "");
2041b968362SDag-Erling Smørgrav #endif
2051b968362SDag-Erling Smørgrav 
206cfe8b629SGarrett Wollman 
20723bf9953SPoul-Henning Kamp /* Firewall hooks */
20823bf9953SPoul-Henning Kamp ip_fw_chk_t *ip_fw_chk_ptr;
2099fcc0795SLuigi Rizzo int fw_enable = 1 ;
21097850a5dSLuigi Rizzo int fw_one_pass = 1;
211e7319babSPoul-Henning Kamp 
212db69a05dSPaul Saab /* Dummynet hooks */
213db69a05dSPaul Saab ip_dn_io_t *ip_dn_io_ptr;
214b715f178SLuigi Rizzo 
215afed1b49SDarren Reed 
216e7319babSPoul-Henning Kamp /*
2174d2e3692SLuigi Rizzo  * XXX this is ugly -- the following two global variables are
2184d2e3692SLuigi Rizzo  * used to store packet state while it travels through the stack.
2194d2e3692SLuigi Rizzo  * Note that the code even makes assumptions on the size and
2204d2e3692SLuigi Rizzo  * alignment of fields inside struct ip_srcrt so e.g. adding some
2214d2e3692SLuigi Rizzo  * fields will break the code. This needs to be fixed.
2224d2e3692SLuigi Rizzo  *
223df8bae1dSRodney W. Grimes  * We need to save the IP options in case a protocol wants to respond
224df8bae1dSRodney W. Grimes  * to an incoming packet over the same route if the packet got here
225df8bae1dSRodney W. Grimes  * using IP source routing.  This allows connection establishment and
226df8bae1dSRodney W. Grimes  * maintenance when the remote end is on a network that is not known
227df8bae1dSRodney W. Grimes  * to us.
228df8bae1dSRodney W. Grimes  */
2290312fbe9SPoul-Henning Kamp static int	ip_nhops = 0;
230df8bae1dSRodney W. Grimes static	struct ip_srcrt {
231df8bae1dSRodney W. Grimes 	struct	in_addr dst;			/* final destination */
232df8bae1dSRodney W. Grimes 	char	nop;				/* one NOP to align */
233df8bae1dSRodney W. Grimes 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
234df8bae1dSRodney W. Grimes 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
235df8bae1dSRodney W. Grimes } ip_srcrt;
236df8bae1dSRodney W. Grimes 
2374d77a549SAlfred Perlstein static void	save_rte(u_char *, struct in_addr);
2382b25acc1SLuigi Rizzo static int	ip_dooptions(struct mbuf *m, int,
2392b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2402b25acc1SLuigi Rizzo static void	ip_forward(struct mbuf *m, int srcrt,
2412b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2424d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
2432b25acc1SLuigi Rizzo static struct	mbuf *ip_reass(struct mbuf *, struct ipqhead *,
2442b25acc1SLuigi Rizzo 		struct ipq *, u_int32_t *, u_int16_t *);
2458948e4baSArchie Cobbs 
246df8bae1dSRodney W. Grimes /*
247df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
248df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
249df8bae1dSRodney W. Grimes  */
250df8bae1dSRodney W. Grimes void
251df8bae1dSRodney W. Grimes ip_init()
252df8bae1dSRodney W. Grimes {
253f0ffb944SJulian Elischer 	register struct protosw *pr;
254df8bae1dSRodney W. Grimes 	register int i;
255df8bae1dSRodney W. Grimes 
25659562606SGarrett Wollman 	TAILQ_INIT(&in_ifaddrhead);
257ca925d9cSJonathan Lemon 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
258f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
259df8bae1dSRodney W. Grimes 	if (pr == 0)
260df8bae1dSRodney W. Grimes 		panic("ip_init");
261df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
262df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
263f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
264f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
265df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
266df8bae1dSRodney W. Grimes 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
267df8bae1dSRodney W. Grimes 			ip_protox[pr->pr_protocol] = pr - inetsw;
268194a213eSAndrey A. Chernov 
269134ea224SSam Leffler #ifdef PFIL_HOOKS
270134ea224SSam Leffler 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
271134ea224SSam Leffler 	inet_pfil_hook.ph_af = AF_INET;
272134ea224SSam Leffler 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
273134ea224SSam Leffler 		printf("%s: WARNING: unable to register pfil hook, "
274134ea224SSam Leffler 			"error %d\n", __func__, i);
275134ea224SSam Leffler #endif /* PFIL_HOOKS */
276134ea224SSam Leffler 
2772fad1e93SSam Leffler 	IPQ_LOCK_INIT();
278194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++)
279462b86feSPoul-Henning Kamp 	    TAILQ_INIT(&ipq[i]);
280194a213eSAndrey A. Chernov 
281375386e2SMike Silbersack 	maxnipq = nmbclusters / 32;
282375386e2SMike Silbersack 	maxfragsperpacket = 16;
283194a213eSAndrey A. Chernov 
28464dddc18SKris Kennaway #ifndef RANDOM_IP_ID
285227ee8a1SPoul-Henning Kamp 	ip_id = time_second & 0xffff;
28664dddc18SKris Kennaway #endif
287df8bae1dSRodney W. Grimes 	ipintrq.ifq_maxlen = ipqmaxlen;
2886008862bSJohn Baldwin 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
2891cafed39SJonathan Lemon 	netisr_register(NETISR_IP, ip_input, &ipintrq);
290df8bae1dSRodney W. Grimes }
291df8bae1dSRodney W. Grimes 
2924d2e3692SLuigi Rizzo /*
2934d2e3692SLuigi Rizzo  * XXX watch out this one. It is perhaps used as a cache for
2944d2e3692SLuigi Rizzo  * the most recently used route ? it is cleared in in_addroute()
2954d2e3692SLuigi Rizzo  * when a new route is successfully created.
2964d2e3692SLuigi Rizzo  */
2971e3d5af0SRuslan Ermilov struct	route ipforward_rt;
298df8bae1dSRodney W. Grimes 
299df8bae1dSRodney W. Grimes /*
300df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
301df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
302df8bae1dSRodney W. Grimes  */
303c67b1d17SGarrett Wollman void
304c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
305df8bae1dSRodney W. Grimes {
30623bf9953SPoul-Henning Kamp 	struct ip *ip;
30723bf9953SPoul-Henning Kamp 	struct ipq *fp;
3085da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
309ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
310823db0e9SDon Lewis 	int    i, hlen, checkif;
31147c861ecSBrian Somers 	u_short sum;
3127538a9a0SJonathan Lemon 	struct in_addr pkt_dst;
3138948e4baSArchie Cobbs 	u_int32_t divert_info = 0;		/* packet divert/tee info */
3142b25acc1SLuigi Rizzo 	struct ip_fw_args args;
315b9234fafSSam Leffler #ifdef FAST_IPSEC
316b9234fafSSam Leffler 	struct m_tag *mtag;
317b9234fafSSam Leffler 	struct tdb_ident *tdbi;
318b9234fafSSam Leffler 	struct secpolicy *sp;
319b9234fafSSam Leffler 	int s, error;
320b9234fafSSam Leffler #endif /* FAST_IPSEC */
321b715f178SLuigi Rizzo 
3222b25acc1SLuigi Rizzo 	args.eh = NULL;
3232b25acc1SLuigi Rizzo 	args.oif = NULL;
3242b25acc1SLuigi Rizzo 	args.rule = NULL;
3252b25acc1SLuigi Rizzo 	args.divert_rule = 0;			/* divert cookie */
3262b25acc1SLuigi Rizzo 	args.next_hop = NULL;
3278948e4baSArchie Cobbs 
3282b25acc1SLuigi Rizzo 	/* Grab info from MT_TAG mbufs prepended to the chain.	*/
3292b25acc1SLuigi Rizzo 	for (; m && m->m_type == MT_TAG; m = m->m_next) {
3305d846453SSam Leffler 		switch(m->_m_tag_id) {
3312b25acc1SLuigi Rizzo 		default:
3322b25acc1SLuigi Rizzo 			printf("ip_input: unrecognised MT_TAG tag %d\n",
3335d846453SSam Leffler 			    m->_m_tag_id);
3342b25acc1SLuigi Rizzo 			break;
3352b25acc1SLuigi Rizzo 
3362b25acc1SLuigi Rizzo 		case PACKET_TAG_DUMMYNET:
3372b25acc1SLuigi Rizzo 			args.rule = ((struct dn_pkt *)m)->rule;
3382b25acc1SLuigi Rizzo 			break;
3392b25acc1SLuigi Rizzo 
3402b25acc1SLuigi Rizzo 		case PACKET_TAG_DIVERT:
3417627c6cbSMaxime Henrion 			args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
3422b25acc1SLuigi Rizzo 			break;
3432b25acc1SLuigi Rizzo 
3442b25acc1SLuigi Rizzo 		case PACKET_TAG_IPFORWARD:
3452b25acc1SLuigi Rizzo 			args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
3462b25acc1SLuigi Rizzo 			break;
3472b25acc1SLuigi Rizzo 		}
3482b25acc1SLuigi Rizzo 	}
349df8bae1dSRodney W. Grimes 
350fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
351db40007dSAndrew R. Reiter 
3522b25acc1SLuigi Rizzo 	if (args.rule) {	/* dummynet already filtered us */
3532b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
35453be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
3552b25acc1SLuigi Rizzo 		goto iphack ;
3562b25acc1SLuigi Rizzo 	}
3572b25acc1SLuigi Rizzo 
358df8bae1dSRodney W. Grimes 	ipstat.ips_total++;
35958938916SGarrett Wollman 
36058938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
36158938916SGarrett Wollman 		goto tooshort;
36258938916SGarrett Wollman 
363df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
364df8bae1dSRodney W. Grimes 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
365df8bae1dSRodney W. Grimes 		ipstat.ips_toosmall++;
366c67b1d17SGarrett Wollman 		return;
367df8bae1dSRodney W. Grimes 	}
368df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
36958938916SGarrett Wollman 
37053be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
371df8bae1dSRodney W. Grimes 		ipstat.ips_badvers++;
372df8bae1dSRodney W. Grimes 		goto bad;
373df8bae1dSRodney W. Grimes 	}
37458938916SGarrett Wollman 
37553be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
376df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
377df8bae1dSRodney W. Grimes 		ipstat.ips_badhlen++;
378df8bae1dSRodney W. Grimes 		goto bad;
379df8bae1dSRodney W. Grimes 	}
380df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
381df8bae1dSRodney W. Grimes 		if ((m = m_pullup(m, hlen)) == 0) {
382df8bae1dSRodney W. Grimes 			ipstat.ips_badhlen++;
383c67b1d17SGarrett Wollman 			return;
384df8bae1dSRodney W. Grimes 		}
385df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
386df8bae1dSRodney W. Grimes 	}
38733841545SHajimu UMEMOTO 
38833841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
38933841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
39033841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
39133841545SHajimu UMEMOTO 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
39233841545SHajimu UMEMOTO 			ipstat.ips_badaddr++;
39333841545SHajimu UMEMOTO 			goto bad;
39433841545SHajimu UMEMOTO 		}
39533841545SHajimu UMEMOTO 	}
39633841545SHajimu UMEMOTO 
397db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
398db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
399db4f9cc7SJonathan Lemon 	} else {
40058938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
40147c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
40258938916SGarrett Wollman 		} else {
40347c861ecSBrian Somers 			sum = in_cksum(m, hlen);
40458938916SGarrett Wollman 		}
405db4f9cc7SJonathan Lemon 	}
40647c861ecSBrian Somers 	if (sum) {
407df8bae1dSRodney W. Grimes 		ipstat.ips_badsum++;
408df8bae1dSRodney W. Grimes 		goto bad;
409df8bae1dSRodney W. Grimes 	}
410df8bae1dSRodney W. Grimes 
411df8bae1dSRodney W. Grimes 	/*
412df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
413df8bae1dSRodney W. Grimes 	 */
414fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
415df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
416df8bae1dSRodney W. Grimes 		ipstat.ips_badlen++;
417df8bae1dSRodney W. Grimes 		goto bad;
418df8bae1dSRodney W. Grimes 	}
419fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
420df8bae1dSRodney W. Grimes 
421df8bae1dSRodney W. Grimes 	/*
422df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
423df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
424df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
425df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
426df8bae1dSRodney W. Grimes 	 */
427df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
42858938916SGarrett Wollman tooshort:
429df8bae1dSRodney W. Grimes 		ipstat.ips_tooshort++;
430df8bae1dSRodney W. Grimes 		goto bad;
431df8bae1dSRodney W. Grimes 	}
432df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
433df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
434df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
435df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
436df8bae1dSRodney W. Grimes 		} else
437df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
438df8bae1dSRodney W. Grimes 	}
43914dd6717SSam Leffler #if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
44014dd6717SSam Leffler 	/*
44114dd6717SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
44214dd6717SSam Leffler 	 */
44314dd6717SSam Leffler 	if (ipsec_gethist(m, NULL))
44414dd6717SSam Leffler 		goto pass;
44514dd6717SSam Leffler #endif
4461f76a5e2SSam Leffler #if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF)
4471f76a5e2SSam Leffler 	/*
4481f76a5e2SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
4491f76a5e2SSam Leffler 	 */
4501f76a5e2SSam Leffler 	if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
4511f76a5e2SSam Leffler 		goto pass;
4521f76a5e2SSam Leffler #endif
4533f67c834SDon Lewis 
4544dd1662bSUgen J.S. Antsilevich 	/*
4554dd1662bSUgen J.S. Antsilevich 	 * IpHack's section.
4564dd1662bSUgen J.S. Antsilevich 	 * Right now when no processing on packet has done
4574dd1662bSUgen J.S. Antsilevich 	 * and it is still fresh out of network we do our black
4584dd1662bSUgen J.S. Antsilevich 	 * deals with it.
45993e0e116SJulian Elischer 	 * - Firewall: deny/allow/divert
460fed1c7e9SSøren Schmidt 	 * - Xlate: translate packet's addr/port (NAT).
461b715f178SLuigi Rizzo 	 * - Pipe: pass pkt through dummynet.
4624dd1662bSUgen J.S. Antsilevich 	 * - Wrap: fake packet's addr/port <unimpl.>
4634dd1662bSUgen J.S. Antsilevich 	 * - Encapsulate: put it in another IP and send out. <unimp.>
4644dd1662bSUgen J.S. Antsilevich  	 */
465b715f178SLuigi Rizzo 
466b715f178SLuigi Rizzo iphack:
467df8bae1dSRodney W. Grimes 
468c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
469c4ac87eaSDarren Reed 	/*
470134ea224SSam Leffler 	 * Run through list of hooks for input packets.
471c4ac87eaSDarren Reed 	 */
472134ea224SSam Leffler 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
473134ea224SSam Leffler 	    PFIL_IN) != 0)
474beec8214SDarren Reed 		return;
475134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
476c4ac87eaSDarren Reed 		return;
477c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
478c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
479c4ac87eaSDarren Reed 
4807b109fa4SLuigi Rizzo 	if (fw_enable && IPFW_LOADED) {
481f9e354dfSJulian Elischer 		/*
482f9e354dfSJulian Elischer 		 * If we've been forwarded from the output side, then
483f9e354dfSJulian Elischer 		 * skip the firewall a second time
484f9e354dfSJulian Elischer 		 */
4852b25acc1SLuigi Rizzo 		if (args.next_hop)
486f9e354dfSJulian Elischer 			goto ours;
4872b25acc1SLuigi Rizzo 
4882b25acc1SLuigi Rizzo 		args.m = m;
4892b25acc1SLuigi Rizzo 		i = ip_fw_chk_ptr(&args);
4902b25acc1SLuigi Rizzo 		m = args.m;
4912b25acc1SLuigi Rizzo 
492d60315beSLuigi Rizzo 		if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
493507b4b54SLuigi Rizzo 			if (m)
494507b4b54SLuigi Rizzo 				m_freem(m);
495b715f178SLuigi Rizzo 			return;
496507b4b54SLuigi Rizzo 		}
497d60315beSLuigi Rizzo 		ip = mtod(m, struct ip *); /* just in case m changed */
4982b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop == NULL)	/* common case */
499b715f178SLuigi Rizzo 			goto pass;
5007b109fa4SLuigi Rizzo                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
5018948e4baSArchie Cobbs 			/* Send packet to the appropriate pipe */
5022b25acc1SLuigi Rizzo 			ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
503e4676ba6SJulian Elischer 			return;
50493e0e116SJulian Elischer 		}
505b715f178SLuigi Rizzo #ifdef IPDIVERT
5068948e4baSArchie Cobbs 		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
5078948e4baSArchie Cobbs 			/* Divert or tee packet */
5088948e4baSArchie Cobbs 			divert_info = i;
509b715f178SLuigi Rizzo 			goto ours;
510b715f178SLuigi Rizzo 		}
511b715f178SLuigi Rizzo #endif
5122b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop != NULL)
513b715f178SLuigi Rizzo 			goto pass;
514b715f178SLuigi Rizzo 		/*
515b715f178SLuigi Rizzo 		 * if we get here, the packet must be dropped
516b715f178SLuigi Rizzo 		 */
517b715f178SLuigi Rizzo 		m_freem(m);
518b715f178SLuigi Rizzo 		return;
519b715f178SLuigi Rizzo 	}
520b715f178SLuigi Rizzo pass:
521100ba1a6SJordan K. Hubbard 
522df8bae1dSRodney W. Grimes 	/*
523df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
524df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
525df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
526df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
527df8bae1dSRodney W. Grimes 	 */
528df8bae1dSRodney W. Grimes 	ip_nhops = 0;		/* for source routed packets */
5292b25acc1SLuigi Rizzo 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
530c67b1d17SGarrett Wollman 		return;
531df8bae1dSRodney W. Grimes 
532f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
533f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
534f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
535f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
536f0068c4aSGarrett Wollman 	 * grabbing the packet.
537f0068c4aSGarrett Wollman          */
5381c5de19aSGarrett Wollman 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
539f0068c4aSGarrett Wollman 		goto ours;
540f0068c4aSGarrett Wollman 
541df8bae1dSRodney W. Grimes 	/*
542df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
543cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
544cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
545cc766e04SGarrett Wollman 	 * with it).
546df8bae1dSRodney W. Grimes 	 */
547cc766e04SGarrett Wollman 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
548cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
549cc766e04SGarrett Wollman 		goto ours;
550cc766e04SGarrett Wollman 
5517538a9a0SJonathan Lemon 	/*
5527538a9a0SJonathan Lemon 	 * Cache the destination address of the packet; this may be
5537538a9a0SJonathan Lemon 	 * changed by use of 'ipfw fwd'.
5547538a9a0SJonathan Lemon 	 */
5552b25acc1SLuigi Rizzo 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
5567538a9a0SJonathan Lemon 
557823db0e9SDon Lewis 	/*
558823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
559823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
560823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
561e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
562e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
5633f67c834SDon Lewis 	 *
5643f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
5653f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
5663f67c834SDon Lewis 	 *
567a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
5683f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5693f67c834SDon Lewis 	 * the packets are received.
570823db0e9SDon Lewis 	 */
571823db0e9SDon Lewis 	checkif = ip_checkinterface && (ipforwarding == 0) &&
5729494d596SBrooks Davis 	    m->m_pkthdr.rcvif != NULL &&
573e15ae1b2SDon Lewis 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
5742b25acc1SLuigi Rizzo 	    (args.next_hop == NULL);
575823db0e9SDon Lewis 
576ca925d9cSJonathan Lemon 	/*
577ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
578ca925d9cSJonathan Lemon 	 */
579ca925d9cSJonathan Lemon 	LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
580f9e354dfSJulian Elischer 		/*
581823db0e9SDon Lewis 		 * If the address matches, verify that the packet
582823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
583823db0e9SDon Lewis 		 * enabled.
584f9e354dfSJulian Elischer 		 */
585823db0e9SDon Lewis 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
586823db0e9SDon Lewis 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
587ed1ff184SJulian Elischer 			goto ours;
588ca925d9cSJonathan Lemon 	}
589823db0e9SDon Lewis 	/*
590ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
591ca925d9cSJonathan Lemon 	 *
592ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
593ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
594ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
595ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
596823db0e9SDon Lewis 	 */
597ca925d9cSJonathan Lemon 	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
598ca925d9cSJonathan Lemon 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
599ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
600ca925d9cSJonathan Lemon 				continue;
601ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
602df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
6037538a9a0SJonathan Lemon 			    pkt_dst.s_addr)
604df8bae1dSRodney W. Grimes 				goto ours;
6057538a9a0SJonathan Lemon 			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
606df8bae1dSRodney W. Grimes 				goto ours;
607ca925d9cSJonathan Lemon #ifdef BOOTP_COMPAT
608ca925d9cSJonathan Lemon 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
609ca925d9cSJonathan Lemon 				goto ours;
610ca925d9cSJonathan Lemon #endif
611df8bae1dSRodney W. Grimes 		}
612df8bae1dSRodney W. Grimes 	}
613df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
614df8bae1dSRodney W. Grimes 		struct in_multi *inm;
615df8bae1dSRodney W. Grimes 		if (ip_mrouter) {
616df8bae1dSRodney W. Grimes 			/*
617df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
618df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
619df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
620df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
621df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
622df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
623df8bae1dSRodney W. Grimes 			 */
624bbb4330bSLuigi Rizzo 			if (ip_mforward &&
625bbb4330bSLuigi Rizzo 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
626df8bae1dSRodney W. Grimes 				ipstat.ips_cantforward++;
627df8bae1dSRodney W. Grimes 				m_freem(m);
628c67b1d17SGarrett Wollman 				return;
629df8bae1dSRodney W. Grimes 			}
630df8bae1dSRodney W. Grimes 
631df8bae1dSRodney W. Grimes 			/*
63211612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
633df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
634df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
635df8bae1dSRodney W. Grimes 			 */
636df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
637df8bae1dSRodney W. Grimes 				goto ours;
638df8bae1dSRodney W. Grimes 			ipstat.ips_forward++;
639df8bae1dSRodney W. Grimes 		}
640df8bae1dSRodney W. Grimes 		/*
641df8bae1dSRodney W. Grimes 		 * See if we belong to the destination multicast group on the
642df8bae1dSRodney W. Grimes 		 * arrival interface.
643df8bae1dSRodney W. Grimes 		 */
644df8bae1dSRodney W. Grimes 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
645df8bae1dSRodney W. Grimes 		if (inm == NULL) {
64682c39223SGarrett Wollman 			ipstat.ips_notmember++;
647df8bae1dSRodney W. Grimes 			m_freem(m);
648c67b1d17SGarrett Wollman 			return;
649df8bae1dSRodney W. Grimes 		}
650df8bae1dSRodney W. Grimes 		goto ours;
651df8bae1dSRodney W. Grimes 	}
652df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
653df8bae1dSRodney W. Grimes 		goto ours;
654df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
655df8bae1dSRodney W. Grimes 		goto ours;
656df8bae1dSRodney W. Grimes 
6576a800098SYoshinobu Inoue 	/*
6586a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6596a800098SYoshinobu Inoue 	 */
6606a800098SYoshinobu Inoue 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
6616a800098SYoshinobu Inoue 		if (ip_keepfaith) {
6626a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
6636a800098SYoshinobu Inoue 				goto ours;
6646a800098SYoshinobu Inoue 		}
6656a800098SYoshinobu Inoue 		m_freem(m);
6666a800098SYoshinobu Inoue 		return;
6676a800098SYoshinobu Inoue 	}
6689494d596SBrooks Davis 
669df8bae1dSRodney W. Grimes 	/*
670df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
671df8bae1dSRodney W. Grimes 	 */
672df8bae1dSRodney W. Grimes 	if (ipforwarding == 0) {
673df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
674df8bae1dSRodney W. Grimes 		m_freem(m);
675546f251bSChris D. Faulhaber 	} else {
676546f251bSChris D. Faulhaber #ifdef IPSEC
677546f251bSChris D. Faulhaber 		/*
678546f251bSChris D. Faulhaber 		 * Enforce inbound IPsec SPD.
679546f251bSChris D. Faulhaber 		 */
680546f251bSChris D. Faulhaber 		if (ipsec4_in_reject(m, NULL)) {
681546f251bSChris D. Faulhaber 			ipsecstat.in_polvio++;
682546f251bSChris D. Faulhaber 			goto bad;
683546f251bSChris D. Faulhaber 		}
684546f251bSChris D. Faulhaber #endif /* IPSEC */
685b9234fafSSam Leffler #ifdef FAST_IPSEC
686b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
687b9234fafSSam Leffler 		s = splnet();
688b9234fafSSam Leffler 		if (mtag != NULL) {
689b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
690b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
691b9234fafSSam Leffler 		} else {
692b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
693b9234fafSSam Leffler 						   IP_FORWARDING, &error);
694b9234fafSSam Leffler 		}
695b9234fafSSam Leffler 		if (sp == NULL) {	/* NB: can happen if error */
696b9234fafSSam Leffler 			splx(s);
697b9234fafSSam Leffler 			/*XXX error stat???*/
698b9234fafSSam Leffler 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
699b9234fafSSam Leffler 			goto bad;
700b9234fafSSam Leffler 		}
701b9234fafSSam Leffler 
702b9234fafSSam Leffler 		/*
703b9234fafSSam Leffler 		 * Check security policy against packet attributes.
704b9234fafSSam Leffler 		 */
705b9234fafSSam Leffler 		error = ipsec_in_reject(sp, m);
706b9234fafSSam Leffler 		KEY_FREESP(&sp);
707b9234fafSSam Leffler 		splx(s);
708b9234fafSSam Leffler 		if (error) {
709b9234fafSSam Leffler 			ipstat.ips_cantforward++;
710b9234fafSSam Leffler 			goto bad;
711b9234fafSSam Leffler 		}
712b9234fafSSam Leffler #endif /* FAST_IPSEC */
7132b25acc1SLuigi Rizzo 		ip_forward(m, 0, args.next_hop);
714546f251bSChris D. Faulhaber 	}
715c67b1d17SGarrett Wollman 	return;
716df8bae1dSRodney W. Grimes 
717df8bae1dSRodney W. Grimes ours:
718d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
719d0ebc0d2SYaroslav Tykhiy 	/*
720d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
721d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
722d0ebc0d2SYaroslav Tykhiy 	 */
7232b25acc1SLuigi Rizzo 	if (ipstealth && hlen > sizeof (struct ip) &&
7242b25acc1SLuigi Rizzo 	    ip_dooptions(m, 1, args.next_hop))
725d0ebc0d2SYaroslav Tykhiy 		return;
726d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
727d0ebc0d2SYaroslav Tykhiy 
7285da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
7295da9f8faSJosef Karthauser 	if (ia != NULL) {
7305da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
7315da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
7325da9f8faSJosef Karthauser 	}
733100ba1a6SJordan K. Hubbard 
73463f8d699SJordan K. Hubbard 	/*
735df8bae1dSRodney W. Grimes 	 * If offset or IP_MF are set, must reassemble.
736df8bae1dSRodney W. Grimes 	 * Otherwise, nothing need be done.
737df8bae1dSRodney W. Grimes 	 * (We could look in the reassembly queue to see
738df8bae1dSRodney W. Grimes 	 * if the packet was previously fragmented,
739df8bae1dSRodney W. Grimes 	 * but it's not worth the time; just let them time out.)
740df8bae1dSRodney W. Grimes 	 */
741b6ea1aa5SRuslan Ermilov 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
7426a800098SYoshinobu Inoue 
743ecf44c01SMike Silbersack 		/* If maxnipq is 0, never accept fragments. */
744ac64c866SMike Silbersack 		if (maxnipq == 0) {
745ac64c866SMike Silbersack                 	ipstat.ips_fragments++;
746ac64c866SMike Silbersack 			ipstat.ips_fragdropped++;
747ac64c866SMike Silbersack 			goto bad;
748ac64c866SMike Silbersack 		}
749ac64c866SMike Silbersack 
750194a213eSAndrey A. Chernov 		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
7512fad1e93SSam Leffler 		IPQ_LOCK();
752df8bae1dSRodney W. Grimes 		/*
753df8bae1dSRodney W. Grimes 		 * Look for queue of fragments
754df8bae1dSRodney W. Grimes 		 * of this datagram.
755df8bae1dSRodney W. Grimes 		 */
756462b86feSPoul-Henning Kamp 		TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
757df8bae1dSRodney W. Grimes 			if (ip->ip_id == fp->ipq_id &&
758df8bae1dSRodney W. Grimes 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
759df8bae1dSRodney W. Grimes 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
76036b0360bSRobert Watson #ifdef MAC
76136b0360bSRobert Watson 			    mac_fragment_match(m, fp) &&
76236b0360bSRobert Watson #endif
763df8bae1dSRodney W. Grimes 			    ip->ip_p == fp->ipq_p)
764df8bae1dSRodney W. Grimes 				goto found;
765df8bae1dSRodney W. Grimes 
766042bbfa3SRobert Watson 		fp = NULL;
767194a213eSAndrey A. Chernov 
768ac64c866SMike Silbersack 		/*
769ac64c866SMike Silbersack 		 * Enforce upper bound on number of fragmented packets
770ac64c866SMike Silbersack 		 * for which we attempt reassembly;
771ac64c866SMike Silbersack 		 * If maxnipq is -1, accept all fragments without limitation.
772ac64c866SMike Silbersack 		 */
773ac64c866SMike Silbersack 		if ((nipq > maxnipq) && (maxnipq > 0)) {
774194a213eSAndrey A. Chernov 		    /*
775194a213eSAndrey A. Chernov 		     * drop something from the tail of the current queue
776194a213eSAndrey A. Chernov 		     * before proceeding further
777194a213eSAndrey A. Chernov 		     */
778462b86feSPoul-Henning Kamp 		    struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
779462b86feSPoul-Henning Kamp 		    if (q == NULL) {   /* gak */
780194a213eSAndrey A. Chernov 			for (i = 0; i < IPREASS_NHASH; i++) {
781462b86feSPoul-Henning Kamp 			    struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
782462b86feSPoul-Henning Kamp 			    if (r) {
78399e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += r->ipq_nfrags;
784462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], r);
785194a213eSAndrey A. Chernov 				break;
786194a213eSAndrey A. Chernov 			    }
787194a213eSAndrey A. Chernov 			}
788ac64c866SMike Silbersack 		    } else {
78999e8617dSMaxim Konovalov 			ipstat.ips_fragtimeout += q->ipq_nfrags;
790462b86feSPoul-Henning Kamp 			ip_freef(&ipq[sum], q);
791ac64c866SMike Silbersack 		    }
792194a213eSAndrey A. Chernov 		}
793194a213eSAndrey A. Chernov found:
794df8bae1dSRodney W. Grimes 		/*
795df8bae1dSRodney W. Grimes 		 * Adjust ip_len to not reflect header,
796df8bae1dSRodney W. Grimes 		 * convert offset of this to bytes.
797df8bae1dSRodney W. Grimes 		 */
798df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
799b6ea1aa5SRuslan Ermilov 		if (ip->ip_off & IP_MF) {
8006effc713SDoug Rabson 		        /*
8016effc713SDoug Rabson 		         * Make sure that fragments have a data length
8026effc713SDoug Rabson 			 * that's a non-zero multiple of 8 bytes.
8036effc713SDoug Rabson 		         */
8046effc713SDoug Rabson 			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
8052fad1e93SSam Leffler 				IPQ_UNLOCK();
8066effc713SDoug Rabson 				ipstat.ips_toosmall++; /* XXX */
8076effc713SDoug Rabson 				goto bad;
8086effc713SDoug Rabson 			}
8096effc713SDoug Rabson 			m->m_flags |= M_FRAG;
8101cf43499SMaxim Konovalov 		} else
8111cf43499SMaxim Konovalov 			m->m_flags &= ~M_FRAG;
812df8bae1dSRodney W. Grimes 		ip->ip_off <<= 3;
813df8bae1dSRodney W. Grimes 
814df8bae1dSRodney W. Grimes 		/*
815b6ea1aa5SRuslan Ermilov 		 * Attempt reassembly; if it succeeds, proceed.
8162b25acc1SLuigi Rizzo 		 * ip_reass() will return a different mbuf, and update
8172b25acc1SLuigi Rizzo 		 * the divert info in divert_info and args.divert_rule.
818df8bae1dSRodney W. Grimes 		 */
819df8bae1dSRodney W. Grimes 		ipstat.ips_fragments++;
820487bdb38SRuslan Ermilov 		m->m_pkthdr.header = ip;
8216a800098SYoshinobu Inoue 		m = ip_reass(m,
8222b25acc1SLuigi Rizzo 		    &ipq[sum], fp, &divert_info, &args.divert_rule);
8232fad1e93SSam Leffler 		IPQ_UNLOCK();
8242b25acc1SLuigi Rizzo 		if (m == 0)
825c67b1d17SGarrett Wollman 			return;
826df8bae1dSRodney W. Grimes 		ipstat.ips_reassembled++;
8276a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8287e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
82953be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
830af782f1cSBrian Somers #ifdef IPDIVERT
8318948e4baSArchie Cobbs 		/* Restore original checksum before diverting packet */
8328948e4baSArchie Cobbs 		if (divert_info != 0) {
833af782f1cSBrian Somers 			ip->ip_len += hlen;
834fd8e4ebcSMike Barcroft 			ip->ip_len = htons(ip->ip_len);
835fd8e4ebcSMike Barcroft 			ip->ip_off = htons(ip->ip_off);
836af782f1cSBrian Somers 			ip->ip_sum = 0;
83760123168SRuslan Ermilov 			if (hlen == sizeof(struct ip))
838af782f1cSBrian Somers 				ip->ip_sum = in_cksum_hdr(ip);
83960123168SRuslan Ermilov 			else
84060123168SRuslan Ermilov 				ip->ip_sum = in_cksum(m, hlen);
841fd8e4ebcSMike Barcroft 			ip->ip_off = ntohs(ip->ip_off);
842fd8e4ebcSMike Barcroft 			ip->ip_len = ntohs(ip->ip_len);
843af782f1cSBrian Somers 			ip->ip_len -= hlen;
844af782f1cSBrian Somers 		}
845af782f1cSBrian Somers #endif
846df8bae1dSRodney W. Grimes 	} else
847df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
848df8bae1dSRodney W. Grimes 
84993e0e116SJulian Elischer #ifdef IPDIVERT
85093e0e116SJulian Elischer 	/*
8518948e4baSArchie Cobbs 	 * Divert or tee packet to the divert protocol if required.
85293e0e116SJulian Elischer 	 */
8538948e4baSArchie Cobbs 	if (divert_info != 0) {
8548948e4baSArchie Cobbs 		struct mbuf *clone = NULL;
8558948e4baSArchie Cobbs 
8568948e4baSArchie Cobbs 		/* Clone packet if we're doing a 'tee' */
8578948e4baSArchie Cobbs 		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
858a163d034SWarner Losh 			clone = m_dup(m, M_DONTWAIT);
8598948e4baSArchie Cobbs 
8608948e4baSArchie Cobbs 		/* Restore packet header fields to original values */
8618948e4baSArchie Cobbs 		ip->ip_len += hlen;
862fd8e4ebcSMike Barcroft 		ip->ip_len = htons(ip->ip_len);
863fd8e4ebcSMike Barcroft 		ip->ip_off = htons(ip->ip_off);
8648948e4baSArchie Cobbs 
8658948e4baSArchie Cobbs 		/* Deliver packet to divert input routine */
8662b25acc1SLuigi Rizzo 		divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
867e4676ba6SJulian Elischer 		ipstat.ips_delivered++;
8688948e4baSArchie Cobbs 
8698948e4baSArchie Cobbs 		/* If 'tee', continue with original packet */
8708948e4baSArchie Cobbs 		if (clone == NULL)
87193e0e116SJulian Elischer 			return;
8728948e4baSArchie Cobbs 		m = clone;
8738948e4baSArchie Cobbs 		ip = mtod(m, struct ip *);
87456962689SCrist J. Clark 		ip->ip_len += hlen;
8752b25acc1SLuigi Rizzo 		/*
8762b25acc1SLuigi Rizzo 		 * Jump backwards to complete processing of the
8772b25acc1SLuigi Rizzo 		 * packet. But first clear divert_info to avoid
8782b25acc1SLuigi Rizzo 		 * entering this block again.
8792b25acc1SLuigi Rizzo 		 * We do not need to clear args.divert_rule
8802b25acc1SLuigi Rizzo 		 * or args.next_hop as they will not be used.
8812b25acc1SLuigi Rizzo 		 */
88256962689SCrist J. Clark 		divert_info = 0;
88356962689SCrist J. Clark 		goto pass;
88493e0e116SJulian Elischer 	}
88593e0e116SJulian Elischer #endif
88693e0e116SJulian Elischer 
88733841545SHajimu UMEMOTO #ifdef IPSEC
88833841545SHajimu UMEMOTO 	/*
88933841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
89033841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
89133841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
89233841545SHajimu UMEMOTO 	 */
89333841545SHajimu UMEMOTO 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
89433841545SHajimu UMEMOTO 	    ipsec4_in_reject(m, NULL)) {
89533841545SHajimu UMEMOTO 		ipsecstat.in_polvio++;
89633841545SHajimu UMEMOTO 		goto bad;
89733841545SHajimu UMEMOTO 	}
89833841545SHajimu UMEMOTO #endif
899b9234fafSSam Leffler #if FAST_IPSEC
900b9234fafSSam Leffler 	/*
901b9234fafSSam Leffler 	 * enforce IPsec policy checking if we are seeing last header.
902b9234fafSSam Leffler 	 * note that we do not visit this with protocols with pcb layer
903b9234fafSSam Leffler 	 * code - like udp/tcp/raw ip.
904b9234fafSSam Leffler 	 */
905b9234fafSSam Leffler 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
906b9234fafSSam Leffler 		/*
907b9234fafSSam Leffler 		 * Check if the packet has already had IPsec processing
908b9234fafSSam Leffler 		 * done.  If so, then just pass it along.  This tag gets
909b9234fafSSam Leffler 		 * set during AH, ESP, etc. input handling, before the
910b9234fafSSam Leffler 		 * packet is returned to the ip input queue for delivery.
911b9234fafSSam Leffler 		 */
912b9234fafSSam Leffler 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
913b9234fafSSam Leffler 		s = splnet();
914b9234fafSSam Leffler 		if (mtag != NULL) {
915b9234fafSSam Leffler 			tdbi = (struct tdb_ident *)(mtag + 1);
916b9234fafSSam Leffler 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
917b9234fafSSam Leffler 		} else {
918b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
919b9234fafSSam Leffler 						   IP_FORWARDING, &error);
920b9234fafSSam Leffler 		}
921b9234fafSSam Leffler 		if (sp != NULL) {
922b9234fafSSam Leffler 			/*
923b9234fafSSam Leffler 			 * Check security policy against packet attributes.
924b9234fafSSam Leffler 			 */
925b9234fafSSam Leffler 			error = ipsec_in_reject(sp, m);
926b9234fafSSam Leffler 			KEY_FREESP(&sp);
927b9234fafSSam Leffler 		} else {
928b9234fafSSam Leffler 			/* XXX error stat??? */
929b9234fafSSam Leffler 			error = EINVAL;
930b9234fafSSam Leffler DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
931b9234fafSSam Leffler 			goto bad;
932b9234fafSSam Leffler 		}
933b9234fafSSam Leffler 		splx(s);
934b9234fafSSam Leffler 		if (error)
935b9234fafSSam Leffler 			goto bad;
936b9234fafSSam Leffler 	}
937b9234fafSSam Leffler #endif /* FAST_IPSEC */
93833841545SHajimu UMEMOTO 
939df8bae1dSRodney W. Grimes 	/*
940df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
941df8bae1dSRodney W. Grimes 	 */
942df8bae1dSRodney W. Grimes 	ipstat.ips_delivered++;
9432b25acc1SLuigi Rizzo 	if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
9442b25acc1SLuigi Rizzo 		/* TCP needs IPFORWARD info if available */
9452b25acc1SLuigi Rizzo 		struct m_hdr tag;
9466a800098SYoshinobu Inoue 
9472b25acc1SLuigi Rizzo 		tag.mh_type = MT_TAG;
9482b25acc1SLuigi Rizzo 		tag.mh_flags = PACKET_TAG_IPFORWARD;
9492b25acc1SLuigi Rizzo 		tag.mh_data = (caddr_t)args.next_hop;
9502b25acc1SLuigi Rizzo 		tag.mh_next = m;
9512b25acc1SLuigi Rizzo 
9522b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(
9532b25acc1SLuigi Rizzo 			(struct mbuf *)&tag, hlen);
9542b25acc1SLuigi Rizzo 	} else
9552b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
956c67b1d17SGarrett Wollman 	return;
957df8bae1dSRodney W. Grimes bad:
958df8bae1dSRodney W. Grimes 	m_freem(m);
959c67b1d17SGarrett Wollman }
960c67b1d17SGarrett Wollman 
961c67b1d17SGarrett Wollman /*
9628948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
9638948e4baSArchie Cobbs  * whole datagram.  If a chain for reassembly of this datagram already
9648948e4baSArchie Cobbs  * exists, then it is given as fp; otherwise have to make a chain.
9658948e4baSArchie Cobbs  *
9668948e4baSArchie Cobbs  * When IPDIVERT enabled, keep additional state with each packet that
9678948e4baSArchie Cobbs  * tells us if we need to divert or tee the packet we're building.
9682b25acc1SLuigi Rizzo  * In particular, *divinfo includes the port and TEE flag,
9692b25acc1SLuigi Rizzo  * *divert_rule is the number of the matching rule.
970df8bae1dSRodney W. Grimes  */
9718948e4baSArchie Cobbs 
9726a800098SYoshinobu Inoue static struct mbuf *
9732b25acc1SLuigi Rizzo ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
9742b25acc1SLuigi Rizzo 	u_int32_t *divinfo, u_int16_t *divert_rule)
975df8bae1dSRodney W. Grimes {
9766effc713SDoug Rabson 	struct ip *ip = mtod(m, struct ip *);
977b6ea1aa5SRuslan Ermilov 	register struct mbuf *p, *q, *nq;
978df8bae1dSRodney W. Grimes 	struct mbuf *t;
97953be11f6SPoul-Henning Kamp 	int hlen = ip->ip_hl << 2;
980df8bae1dSRodney W. Grimes 	int i, next;
981df8bae1dSRodney W. Grimes 
9822fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
9832fad1e93SSam Leffler 
984df8bae1dSRodney W. Grimes 	/*
985df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
986df8bae1dSRodney W. Grimes 	 * would confuse code below.
987df8bae1dSRodney W. Grimes 	 */
988df8bae1dSRodney W. Grimes 	m->m_data += hlen;
989df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
990df8bae1dSRodney W. Grimes 
991df8bae1dSRodney W. Grimes 	/*
992df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
993df8bae1dSRodney W. Grimes 	 */
994042bbfa3SRobert Watson 	if (fp == NULL) {
995a163d034SWarner Losh 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
996df8bae1dSRodney W. Grimes 			goto dropfrag;
997df8bae1dSRodney W. Grimes 		fp = mtod(t, struct ipq *);
99836b0360bSRobert Watson #ifdef MAC
9995e7ce478SRobert Watson 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
10005e7ce478SRobert Watson 			m_free(t);
10015e7ce478SRobert Watson 			goto dropfrag;
10025e7ce478SRobert Watson 		}
100336b0360bSRobert Watson 		mac_create_ipq(m, fp);
100436b0360bSRobert Watson #endif
1005462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
1006194a213eSAndrey A. Chernov 		nipq++;
1007375386e2SMike Silbersack 		fp->ipq_nfrags = 1;
1008df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
1009df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
1010df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
10116effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
10126effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
1013af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1014af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
101593e0e116SJulian Elischer #ifdef IPDIVERT
10168948e4baSArchie Cobbs 		fp->ipq_div_info = 0;
1017bb60f459SJulian Elischer 		fp->ipq_div_cookie = 0;
101893e0e116SJulian Elischer #endif
1019af38c68cSLuigi Rizzo 		goto inserted;
102036b0360bSRobert Watson 	} else {
1021375386e2SMike Silbersack 		fp->ipq_nfrags++;
102236b0360bSRobert Watson #ifdef MAC
102336b0360bSRobert Watson 		mac_update_ipq(m, fp);
102436b0360bSRobert Watson #endif
1025df8bae1dSRodney W. Grimes 	}
1026df8bae1dSRodney W. Grimes 
10276effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
10286effc713SDoug Rabson 
1029df8bae1dSRodney W. Grimes 	/*
1030df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
1031df8bae1dSRodney W. Grimes 	 */
10326effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
10336effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
1034df8bae1dSRodney W. Grimes 			break;
1035df8bae1dSRodney W. Grimes 
1036df8bae1dSRodney W. Grimes 	/*
1037df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
1038df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
1039af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
1040af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
1041db4f9cc7SJonathan Lemon 	 *
1042db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
1043db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
1044df8bae1dSRodney W. Grimes 	 */
10456effc713SDoug Rabson 	if (p) {
10466effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1047df8bae1dSRodney W. Grimes 		if (i > 0) {
1048df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
1049df8bae1dSRodney W. Grimes 				goto dropfrag;
10506a800098SYoshinobu Inoue 			m_adj(m, i);
1051db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
1052df8bae1dSRodney W. Grimes 			ip->ip_off += i;
1053df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
1054df8bae1dSRodney W. Grimes 		}
1055af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
1056af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
1057af38c68cSLuigi Rizzo 	} else {
1058af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
1059af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1060df8bae1dSRodney W. Grimes 	}
1061df8bae1dSRodney W. Grimes 
1062df8bae1dSRodney W. Grimes 	/*
1063df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
1064df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
1065df8bae1dSRodney W. Grimes 	 */
10666effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1067af38c68cSLuigi Rizzo 	     q = nq) {
1068b36f5b37SMaxim Konovalov 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
10696effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
10706effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
10716effc713SDoug Rabson 			GETIP(q)->ip_off += i;
10726effc713SDoug Rabson 			m_adj(q, i);
1073db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
1074df8bae1dSRodney W. Grimes 			break;
1075df8bae1dSRodney W. Grimes 		}
10766effc713SDoug Rabson 		nq = q->m_nextpkt;
1077af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
107899e8617dSMaxim Konovalov 		ipstat.ips_fragdropped++;
1079375386e2SMike Silbersack 		fp->ipq_nfrags--;
10806effc713SDoug Rabson 		m_freem(q);
1081df8bae1dSRodney W. Grimes 	}
1082df8bae1dSRodney W. Grimes 
1083af38c68cSLuigi Rizzo inserted:
108493e0e116SJulian Elischer 
108593e0e116SJulian Elischer #ifdef IPDIVERT
108693e0e116SJulian Elischer 	/*
10878948e4baSArchie Cobbs 	 * Transfer firewall instructions to the fragment structure.
10882b25acc1SLuigi Rizzo 	 * Only trust info in the fragment at offset 0.
108993e0e116SJulian Elischer 	 */
10902b25acc1SLuigi Rizzo 	if (ip->ip_off == 0) {
10918948e4baSArchie Cobbs 		fp->ipq_div_info = *divinfo;
10922b25acc1SLuigi Rizzo 		fp->ipq_div_cookie = *divert_rule;
10932b25acc1SLuigi Rizzo 	}
10948948e4baSArchie Cobbs 	*divinfo = 0;
10952b25acc1SLuigi Rizzo 	*divert_rule = 0;
109693e0e116SJulian Elischer #endif
109793e0e116SJulian Elischer 
1098df8bae1dSRodney W. Grimes 	/*
1099375386e2SMike Silbersack 	 * Check for complete reassembly and perform frag per packet
1100375386e2SMike Silbersack 	 * limiting.
1101375386e2SMike Silbersack 	 *
1102375386e2SMike Silbersack 	 * Frag limiting is performed here so that the nth frag has
1103375386e2SMike Silbersack 	 * a chance to complete the packet before we drop the packet.
1104375386e2SMike Silbersack 	 * As a result, n+1 frags are actually allowed per packet, but
1105375386e2SMike Silbersack 	 * only n will ever be stored. (n = maxfragsperpacket.)
1106375386e2SMike Silbersack 	 *
1107df8bae1dSRodney W. Grimes 	 */
11086effc713SDoug Rabson 	next = 0;
11096effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1110375386e2SMike Silbersack 		if (GETIP(q)->ip_off != next) {
111199e8617dSMaxim Konovalov 			if (fp->ipq_nfrags > maxfragsperpacket) {
111299e8617dSMaxim Konovalov 				ipstat.ips_fragdropped += fp->ipq_nfrags;
1113375386e2SMike Silbersack 				ip_freef(head, fp);
111499e8617dSMaxim Konovalov 			}
11156effc713SDoug Rabson 			return (0);
1116375386e2SMike Silbersack 		}
11176effc713SDoug Rabson 		next += GETIP(q)->ip_len;
11186effc713SDoug Rabson 	}
11196effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
1120375386e2SMike Silbersack 	if (p->m_flags & M_FRAG) {
112199e8617dSMaxim Konovalov 		if (fp->ipq_nfrags > maxfragsperpacket) {
112299e8617dSMaxim Konovalov 			ipstat.ips_fragdropped += fp->ipq_nfrags;
1123375386e2SMike Silbersack 			ip_freef(head, fp);
112499e8617dSMaxim Konovalov 		}
1125df8bae1dSRodney W. Grimes 		return (0);
1126375386e2SMike Silbersack 	}
1127df8bae1dSRodney W. Grimes 
1128df8bae1dSRodney W. Grimes 	/*
1129430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1130430d30d8SBill Fenner 	 */
11316effc713SDoug Rabson 	q = fp->ipq_frags;
11326effc713SDoug Rabson 	ip = GETIP(q);
113353be11f6SPoul-Henning Kamp 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
1134430d30d8SBill Fenner 		ipstat.ips_toolong++;
113599e8617dSMaxim Konovalov 		ipstat.ips_fragdropped += fp->ipq_nfrags;
1136462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1137430d30d8SBill Fenner 		return (0);
1138430d30d8SBill Fenner 	}
1139430d30d8SBill Fenner 
1140430d30d8SBill Fenner 	/*
1141430d30d8SBill Fenner 	 * Concatenate fragments.
1142df8bae1dSRodney W. Grimes 	 */
11436effc713SDoug Rabson 	m = q;
1144df8bae1dSRodney W. Grimes 	t = m->m_next;
1145df8bae1dSRodney W. Grimes 	m->m_next = 0;
1146df8bae1dSRodney W. Grimes 	m_cat(m, t);
11476effc713SDoug Rabson 	nq = q->m_nextpkt;
1148945aa40dSDoug Rabson 	q->m_nextpkt = 0;
11496effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
11506effc713SDoug Rabson 		nq = q->m_nextpkt;
1151945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1152db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1153db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1154a8db1d93SJonathan Lemon 		m_cat(m, q);
1155df8bae1dSRodney W. Grimes 	}
115636b0360bSRobert Watson #ifdef MAC
115736b0360bSRobert Watson 	mac_create_datagram_from_ipq(fp, m);
115836b0360bSRobert Watson 	mac_destroy_ipq(fp);
115936b0360bSRobert Watson #endif
1160df8bae1dSRodney W. Grimes 
116193e0e116SJulian Elischer #ifdef IPDIVERT
116293e0e116SJulian Elischer 	/*
11638948e4baSArchie Cobbs 	 * Extract firewall instructions from the fragment structure.
116493e0e116SJulian Elischer 	 */
11658948e4baSArchie Cobbs 	*divinfo = fp->ipq_div_info;
11662b25acc1SLuigi Rizzo 	*divert_rule = fp->ipq_div_cookie;
116793e0e116SJulian Elischer #endif
116893e0e116SJulian Elischer 
1169df8bae1dSRodney W. Grimes 	/*
1170df8bae1dSRodney W. Grimes 	 * Create header for new ip packet by
1171df8bae1dSRodney W. Grimes 	 * modifying header of first packet;
1172df8bae1dSRodney W. Grimes 	 * dequeue and discard fragment reassembly header.
1173df8bae1dSRodney W. Grimes 	 * Make header visible.
1174df8bae1dSRodney W. Grimes 	 */
1175df8bae1dSRodney W. Grimes 	ip->ip_len = next;
11766effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
11776effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1178462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1179194a213eSAndrey A. Chernov 	nipq--;
1180df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
118153be11f6SPoul-Henning Kamp 	m->m_len += (ip->ip_hl << 2);
118253be11f6SPoul-Henning Kamp 	m->m_data -= (ip->ip_hl << 2);
1183df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1184a5554bf0SPoul-Henning Kamp 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
1185a5554bf0SPoul-Henning Kamp 		m_fixhdr(m);
11866a800098SYoshinobu Inoue 	return (m);
1187df8bae1dSRodney W. Grimes 
1188df8bae1dSRodney W. Grimes dropfrag:
1189efe39c6aSJulian Elischer #ifdef IPDIVERT
11908948e4baSArchie Cobbs 	*divinfo = 0;
11912b25acc1SLuigi Rizzo 	*divert_rule = 0;
1192efe39c6aSJulian Elischer #endif
1193df8bae1dSRodney W. Grimes 	ipstat.ips_fragdropped++;
1194042bbfa3SRobert Watson 	if (fp != NULL)
1195375386e2SMike Silbersack 		fp->ipq_nfrags--;
1196df8bae1dSRodney W. Grimes 	m_freem(m);
1197df8bae1dSRodney W. Grimes 	return (0);
11986effc713SDoug Rabson 
11996effc713SDoug Rabson #undef GETIP
1200df8bae1dSRodney W. Grimes }
1201df8bae1dSRodney W. Grimes 
1202df8bae1dSRodney W. Grimes /*
1203df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1204df8bae1dSRodney W. Grimes  * associated datagrams.
1205df8bae1dSRodney W. Grimes  */
12060312fbe9SPoul-Henning Kamp static void
1207462b86feSPoul-Henning Kamp ip_freef(fhp, fp)
1208462b86feSPoul-Henning Kamp 	struct ipqhead *fhp;
1209df8bae1dSRodney W. Grimes 	struct ipq *fp;
1210df8bae1dSRodney W. Grimes {
12116effc713SDoug Rabson 	register struct mbuf *q;
1212df8bae1dSRodney W. Grimes 
12132fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
12142fad1e93SSam Leffler 
12156effc713SDoug Rabson 	while (fp->ipq_frags) {
12166effc713SDoug Rabson 		q = fp->ipq_frags;
12176effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
12186effc713SDoug Rabson 		m_freem(q);
1219df8bae1dSRodney W. Grimes 	}
1220462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1221df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
1222194a213eSAndrey A. Chernov 	nipq--;
1223df8bae1dSRodney W. Grimes }
1224df8bae1dSRodney W. Grimes 
1225df8bae1dSRodney W. Grimes /*
1226df8bae1dSRodney W. Grimes  * IP timer processing;
1227df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1228df8bae1dSRodney W. Grimes  * queue, discard it.
1229df8bae1dSRodney W. Grimes  */
1230df8bae1dSRodney W. Grimes void
1231df8bae1dSRodney W. Grimes ip_slowtimo()
1232df8bae1dSRodney W. Grimes {
1233df8bae1dSRodney W. Grimes 	register struct ipq *fp;
1234df8bae1dSRodney W. Grimes 	int s = splnet();
1235194a213eSAndrey A. Chernov 	int i;
1236df8bae1dSRodney W. Grimes 
12372fad1e93SSam Leffler 	IPQ_LOCK();
1238194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1239462b86feSPoul-Henning Kamp 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1240462b86feSPoul-Henning Kamp 			struct ipq *fpp;
1241462b86feSPoul-Henning Kamp 
1242462b86feSPoul-Henning Kamp 			fpp = fp;
1243462b86feSPoul-Henning Kamp 			fp = TAILQ_NEXT(fp, ipq_list);
1244462b86feSPoul-Henning Kamp 			if(--fpp->ipq_ttl == 0) {
124599e8617dSMaxim Konovalov 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
1246462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], fpp);
1247df8bae1dSRodney W. Grimes 			}
1248df8bae1dSRodney W. Grimes 		}
1249194a213eSAndrey A. Chernov 	}
1250690a6055SJesper Skriver 	/*
1251690a6055SJesper Skriver 	 * If we are over the maximum number of fragments
1252690a6055SJesper Skriver 	 * (due to the limit being lowered), drain off
1253690a6055SJesper Skriver 	 * enough to get down to the new limit.
1254690a6055SJesper Skriver 	 */
1255a75a485dSMike Silbersack 	if (maxnipq >= 0 && nipq > maxnipq) {
1256690a6055SJesper Skriver 		for (i = 0; i < IPREASS_NHASH; i++) {
1257b36f5b37SMaxim Konovalov 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
125899e8617dSMaxim Konovalov 				ipstat.ips_fragdropped +=
125999e8617dSMaxim Konovalov 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1260690a6055SJesper Skriver 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1261690a6055SJesper Skriver 			}
1262690a6055SJesper Skriver 		}
1263690a6055SJesper Skriver 	}
12642fad1e93SSam Leffler 	IPQ_UNLOCK();
12651f91d8c5SDavid Greenman 	ipflow_slowtimo();
1266df8bae1dSRodney W. Grimes 	splx(s);
1267df8bae1dSRodney W. Grimes }
1268df8bae1dSRodney W. Grimes 
1269df8bae1dSRodney W. Grimes /*
1270df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1271df8bae1dSRodney W. Grimes  */
1272df8bae1dSRodney W. Grimes void
1273df8bae1dSRodney W. Grimes ip_drain()
1274df8bae1dSRodney W. Grimes {
1275194a213eSAndrey A. Chernov 	int     i;
1276ce29ab3aSGarrett Wollman 
12772fad1e93SSam Leffler 	IPQ_LOCK();
1278194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1279462b86feSPoul-Henning Kamp 		while(!TAILQ_EMPTY(&ipq[i])) {
128099e8617dSMaxim Konovalov 			ipstat.ips_fragdropped +=
128199e8617dSMaxim Konovalov 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
1282462b86feSPoul-Henning Kamp 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1283194a213eSAndrey A. Chernov 		}
1284194a213eSAndrey A. Chernov 	}
12852fad1e93SSam Leffler 	IPQ_UNLOCK();
1286ce29ab3aSGarrett Wollman 	in_rtqdrain();
1287df8bae1dSRodney W. Grimes }
1288df8bae1dSRodney W. Grimes 
1289df8bae1dSRodney W. Grimes /*
1290df8bae1dSRodney W. Grimes  * Do option processing on a datagram,
1291df8bae1dSRodney W. Grimes  * possibly discarding it if bad options are encountered,
1292df8bae1dSRodney W. Grimes  * or forwarding it if source-routed.
1293d0ebc0d2SYaroslav Tykhiy  * The pass argument is used when operating in the IPSTEALTH
1294d0ebc0d2SYaroslav Tykhiy  * mode to tell what options to process:
1295d0ebc0d2SYaroslav Tykhiy  * [LS]SRR (pass 0) or the others (pass 1).
1296d0ebc0d2SYaroslav Tykhiy  * The reason for as many as two passes is that when doing IPSTEALTH,
1297d0ebc0d2SYaroslav Tykhiy  * non-routing options should be processed only if the packet is for us.
1298df8bae1dSRodney W. Grimes  * Returns 1 if packet has been forwarded/freed,
1299df8bae1dSRodney W. Grimes  * 0 if the packet should be processed further.
1300df8bae1dSRodney W. Grimes  */
13010312fbe9SPoul-Henning Kamp static int
13022b25acc1SLuigi Rizzo ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1303df8bae1dSRodney W. Grimes {
13042b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
13052b25acc1SLuigi Rizzo 	u_char *cp;
13062b25acc1SLuigi Rizzo 	struct in_ifaddr *ia;
1307df8bae1dSRodney W. Grimes 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1308df8bae1dSRodney W. Grimes 	struct in_addr *sin, dst;
1309df8bae1dSRodney W. Grimes 	n_time ntime;
13104d2e3692SLuigi Rizzo 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1311df8bae1dSRodney W. Grimes 
1312df8bae1dSRodney W. Grimes 	dst = ip->ip_dst;
1313df8bae1dSRodney W. Grimes 	cp = (u_char *)(ip + 1);
131453be11f6SPoul-Henning Kamp 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1315df8bae1dSRodney W. Grimes 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1316df8bae1dSRodney W. Grimes 		opt = cp[IPOPT_OPTVAL];
1317df8bae1dSRodney W. Grimes 		if (opt == IPOPT_EOL)
1318df8bae1dSRodney W. Grimes 			break;
1319df8bae1dSRodney W. Grimes 		if (opt == IPOPT_NOP)
1320df8bae1dSRodney W. Grimes 			optlen = 1;
1321df8bae1dSRodney W. Grimes 		else {
1322fdcb8debSJun-ichiro itojun Hagino 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1323fdcb8debSJun-ichiro itojun Hagino 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1324fdcb8debSJun-ichiro itojun Hagino 				goto bad;
1325fdcb8debSJun-ichiro itojun Hagino 			}
1326df8bae1dSRodney W. Grimes 			optlen = cp[IPOPT_OLEN];
1327707d00a3SJonathan Lemon 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1328df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1329df8bae1dSRodney W. Grimes 				goto bad;
1330df8bae1dSRodney W. Grimes 			}
1331df8bae1dSRodney W. Grimes 		}
1332df8bae1dSRodney W. Grimes 		switch (opt) {
1333df8bae1dSRodney W. Grimes 
1334df8bae1dSRodney W. Grimes 		default:
1335df8bae1dSRodney W. Grimes 			break;
1336df8bae1dSRodney W. Grimes 
1337df8bae1dSRodney W. Grimes 		/*
1338df8bae1dSRodney W. Grimes 		 * Source routing with record.
1339df8bae1dSRodney W. Grimes 		 * Find interface with current destination address.
1340df8bae1dSRodney W. Grimes 		 * If none on this machine then drop if strictly routed,
1341df8bae1dSRodney W. Grimes 		 * or do nothing if loosely routed.
1342df8bae1dSRodney W. Grimes 		 * Record interface address and bring up next address
1343df8bae1dSRodney W. Grimes 		 * component.  If strictly routed make sure next
1344df8bae1dSRodney W. Grimes 		 * address is on directly accessible net.
1345df8bae1dSRodney W. Grimes 		 */
1346df8bae1dSRodney W. Grimes 		case IPOPT_LSRR:
1347df8bae1dSRodney W. Grimes 		case IPOPT_SSRR:
1348d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1349d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass > 0)
1350d0ebc0d2SYaroslav Tykhiy 				break;
1351d0ebc0d2SYaroslav Tykhiy #endif
135233841545SHajimu UMEMOTO 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
135333841545SHajimu UMEMOTO 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
135433841545SHajimu UMEMOTO 				goto bad;
135533841545SHajimu UMEMOTO 			}
1356df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1357df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1358df8bae1dSRodney W. Grimes 				goto bad;
1359df8bae1dSRodney W. Grimes 			}
1360df8bae1dSRodney W. Grimes 			ipaddr.sin_addr = ip->ip_dst;
1361df8bae1dSRodney W. Grimes 			ia = (struct in_ifaddr *)
1362df8bae1dSRodney W. Grimes 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1363df8bae1dSRodney W. Grimes 			if (ia == 0) {
1364df8bae1dSRodney W. Grimes 				if (opt == IPOPT_SSRR) {
1365df8bae1dSRodney W. Grimes 					type = ICMP_UNREACH;
1366df8bae1dSRodney W. Grimes 					code = ICMP_UNREACH_SRCFAIL;
1367df8bae1dSRodney W. Grimes 					goto bad;
1368df8bae1dSRodney W. Grimes 				}
1369bc189bf8SGuido van Rooij 				if (!ip_dosourceroute)
1370bc189bf8SGuido van Rooij 					goto nosourcerouting;
1371df8bae1dSRodney W. Grimes 				/*
1372df8bae1dSRodney W. Grimes 				 * Loose routing, and not at next destination
1373df8bae1dSRodney W. Grimes 				 * yet; nothing to do except forward.
1374df8bae1dSRodney W. Grimes 				 */
1375df8bae1dSRodney W. Grimes 				break;
1376df8bae1dSRodney W. Grimes 			}
1377df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
13785d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr)) {
1379df8bae1dSRodney W. Grimes 				/*
1380df8bae1dSRodney W. Grimes 				 * End of source route.  Should be for us.
1381df8bae1dSRodney W. Grimes 				 */
13824fce5804SGuido van Rooij 				if (!ip_acceptsourceroute)
13834fce5804SGuido van Rooij 					goto nosourcerouting;
1384df8bae1dSRodney W. Grimes 				save_rte(cp, ip->ip_src);
1385df8bae1dSRodney W. Grimes 				break;
1386df8bae1dSRodney W. Grimes 			}
1387d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1388d0ebc0d2SYaroslav Tykhiy 			if (ipstealth)
1389d0ebc0d2SYaroslav Tykhiy 				goto dropit;
1390d0ebc0d2SYaroslav Tykhiy #endif
13911025071fSGarrett Wollman 			if (!ip_dosourceroute) {
13920af8d3ecSDavid Greenman 				if (ipforwarding) {
13930af8d3ecSDavid Greenman 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
13940af8d3ecSDavid Greenman 					/*
13950af8d3ecSDavid Greenman 					 * Acting as a router, so generate ICMP
13960af8d3ecSDavid Greenman 					 */
1397efa48587SGuido van Rooij nosourcerouting:
1398bc189bf8SGuido van Rooij 					strcpy(buf, inet_ntoa(ip->ip_dst));
13991025071fSGarrett Wollman 					log(LOG_WARNING,
14001025071fSGarrett Wollman 					    "attempted source route from %s to %s\n",
14011025071fSGarrett Wollman 					    inet_ntoa(ip->ip_src), buf);
14021025071fSGarrett Wollman 					type = ICMP_UNREACH;
14031025071fSGarrett Wollman 					code = ICMP_UNREACH_SRCFAIL;
14041025071fSGarrett Wollman 					goto bad;
14050af8d3ecSDavid Greenman 				} else {
14060af8d3ecSDavid Greenman 					/*
14070af8d3ecSDavid Greenman 					 * Not acting as a router, so silently drop.
14080af8d3ecSDavid Greenman 					 */
1409d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1410d0ebc0d2SYaroslav Tykhiy dropit:
1411d0ebc0d2SYaroslav Tykhiy #endif
14120af8d3ecSDavid Greenman 					ipstat.ips_cantforward++;
14130af8d3ecSDavid Greenman 					m_freem(m);
14140af8d3ecSDavid Greenman 					return (1);
14150af8d3ecSDavid Greenman 				}
14161025071fSGarrett Wollman 			}
14171025071fSGarrett Wollman 
1418df8bae1dSRodney W. Grimes 			/*
1419df8bae1dSRodney W. Grimes 			 * locate outgoing interface
1420df8bae1dSRodney W. Grimes 			 */
142194a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, cp + off,
1422df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
14231025071fSGarrett Wollman 
1424df8bae1dSRodney W. Grimes 			if (opt == IPOPT_SSRR) {
1425df8bae1dSRodney W. Grimes #define	INA	struct in_ifaddr *
1426df8bae1dSRodney W. Grimes #define	SA	struct sockaddr *
1427df8bae1dSRodney W. Grimes 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1428df8bae1dSRodney W. Grimes 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1429df8bae1dSRodney W. Grimes 			} else
1430bd714208SRuslan Ermilov 				ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt);
1431df8bae1dSRodney W. Grimes 			if (ia == 0) {
1432df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1433df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_SRCFAIL;
1434df8bae1dSRodney W. Grimes 				goto bad;
1435df8bae1dSRodney W. Grimes 			}
1436df8bae1dSRodney W. Grimes 			ip->ip_dst = ipaddr.sin_addr;
143794a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
143894a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1439df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1440df8bae1dSRodney W. Grimes 			/*
1441df8bae1dSRodney W. Grimes 			 * Let ip_intr's mcast routing check handle mcast pkts
1442df8bae1dSRodney W. Grimes 			 */
1443df8bae1dSRodney W. Grimes 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1444df8bae1dSRodney W. Grimes 			break;
1445df8bae1dSRodney W. Grimes 
1446df8bae1dSRodney W. Grimes 		case IPOPT_RR:
1447d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1448d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1449d0ebc0d2SYaroslav Tykhiy 				break;
1450d0ebc0d2SYaroslav Tykhiy #endif
1451707d00a3SJonathan Lemon 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1452707d00a3SJonathan Lemon 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1453707d00a3SJonathan Lemon 				goto bad;
1454707d00a3SJonathan Lemon 			}
1455df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1456df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1457df8bae1dSRodney W. Grimes 				goto bad;
1458df8bae1dSRodney W. Grimes 			}
1459df8bae1dSRodney W. Grimes 			/*
1460df8bae1dSRodney W. Grimes 			 * If no space remains, ignore.
1461df8bae1dSRodney W. Grimes 			 */
1462df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
14635d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr))
1464df8bae1dSRodney W. Grimes 				break;
146594a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1466df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
1467df8bae1dSRodney W. Grimes 			/*
1468df8bae1dSRodney W. Grimes 			 * locate outgoing interface; if we're the destination,
1469df8bae1dSRodney W. Grimes 			 * use the incoming interface (should be same).
1470df8bae1dSRodney W. Grimes 			 */
1471df8bae1dSRodney W. Grimes 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1472bd714208SRuslan Ermilov 			    (ia = ip_rtaddr(ipaddr.sin_addr,
1473bd714208SRuslan Ermilov 			    &ipforward_rt)) == 0) {
1474df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1475df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_HOST;
1476df8bae1dSRodney W. Grimes 				goto bad;
1477df8bae1dSRodney W. Grimes 			}
147894a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
147994a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1480df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1481df8bae1dSRodney W. Grimes 			break;
1482df8bae1dSRodney W. Grimes 
1483df8bae1dSRodney W. Grimes 		case IPOPT_TS:
1484d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1485d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1486d0ebc0d2SYaroslav Tykhiy 				break;
1487d0ebc0d2SYaroslav Tykhiy #endif
1488df8bae1dSRodney W. Grimes 			code = cp - (u_char *)ip;
148907514071SJonathan Lemon 			if (optlen < 4 || optlen > 40) {
149007514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1491df8bae1dSRodney W. Grimes 				goto bad;
149233841545SHajimu UMEMOTO 			}
149307514071SJonathan Lemon 			if ((off = cp[IPOPT_OFFSET]) < 5) {
149407514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
149533841545SHajimu UMEMOTO 				goto bad;
149633841545SHajimu UMEMOTO 			}
149707514071SJonathan Lemon 			if (off > optlen - (int)sizeof(int32_t)) {
149807514071SJonathan Lemon 				cp[IPOPT_OFFSET + 1] += (1 << 4);
149907514071SJonathan Lemon 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
150007514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1501df8bae1dSRodney W. Grimes 					goto bad;
150233841545SHajimu UMEMOTO 				}
1503df8bae1dSRodney W. Grimes 				break;
1504df8bae1dSRodney W. Grimes 			}
150507514071SJonathan Lemon 			off--;				/* 0 origin */
150607514071SJonathan Lemon 			sin = (struct in_addr *)(cp + off);
150707514071SJonathan Lemon 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1508df8bae1dSRodney W. Grimes 
1509df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSONLY:
1510df8bae1dSRodney W. Grimes 				break;
1511df8bae1dSRodney W. Grimes 
1512df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSANDADDR:
151307514071SJonathan Lemon 				if (off + sizeof(n_time) +
151407514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
151507514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1516df8bae1dSRodney W. Grimes 					goto bad;
151733841545SHajimu UMEMOTO 				}
1518df8bae1dSRodney W. Grimes 				ipaddr.sin_addr = dst;
1519df8bae1dSRodney W. Grimes 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1520df8bae1dSRodney W. Grimes 							    m->m_pkthdr.rcvif);
1521df8bae1dSRodney W. Grimes 				if (ia == 0)
1522df8bae1dSRodney W. Grimes 					continue;
152394a5d9b6SDavid Greenman 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
152494a5d9b6SDavid Greenman 				    sizeof(struct in_addr));
152507514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1526a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1527df8bae1dSRodney W. Grimes 				break;
1528df8bae1dSRodney W. Grimes 
1529df8bae1dSRodney W. Grimes 			case IPOPT_TS_PRESPEC:
153007514071SJonathan Lemon 				if (off + sizeof(n_time) +
153107514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
153207514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1533df8bae1dSRodney W. Grimes 					goto bad;
153433841545SHajimu UMEMOTO 				}
153594a5d9b6SDavid Greenman 				(void)memcpy(&ipaddr.sin_addr, sin,
1536df8bae1dSRodney W. Grimes 				    sizeof(struct in_addr));
1537df8bae1dSRodney W. Grimes 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1538df8bae1dSRodney W. Grimes 					continue;
153907514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1540a5428e3aSMaxim Konovalov 				off += sizeof(struct in_addr);
1541df8bae1dSRodney W. Grimes 				break;
1542df8bae1dSRodney W. Grimes 
1543df8bae1dSRodney W. Grimes 			default:
154407514071SJonathan Lemon 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1545df8bae1dSRodney W. Grimes 				goto bad;
1546df8bae1dSRodney W. Grimes 			}
1547df8bae1dSRodney W. Grimes 			ntime = iptime();
154807514071SJonathan Lemon 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
154907514071SJonathan Lemon 			cp[IPOPT_OFFSET] += sizeof(n_time);
1550df8bae1dSRodney W. Grimes 		}
1551df8bae1dSRodney W. Grimes 	}
155247174b49SAndrey A. Chernov 	if (forward && ipforwarding) {
15532b25acc1SLuigi Rizzo 		ip_forward(m, 1, next_hop);
1554df8bae1dSRodney W. Grimes 		return (1);
1555df8bae1dSRodney W. Grimes 	}
1556df8bae1dSRodney W. Grimes 	return (0);
1557df8bae1dSRodney W. Grimes bad:
1558df8bae1dSRodney W. Grimes 	icmp_error(m, type, code, 0, 0);
1559df8bae1dSRodney W. Grimes 	ipstat.ips_badoptions++;
1560df8bae1dSRodney W. Grimes 	return (1);
1561df8bae1dSRodney W. Grimes }
1562df8bae1dSRodney W. Grimes 
1563df8bae1dSRodney W. Grimes /*
1564df8bae1dSRodney W. Grimes  * Given address of next destination (final or next hop),
1565df8bae1dSRodney W. Grimes  * return internet address info of interface to be used to get there.
1566df8bae1dSRodney W. Grimes  */
1567bd714208SRuslan Ermilov struct in_ifaddr *
1568bd714208SRuslan Ermilov ip_rtaddr(dst, rt)
1569df8bae1dSRodney W. Grimes 	struct in_addr dst;
1570bd714208SRuslan Ermilov 	struct route *rt;
1571df8bae1dSRodney W. Grimes {
1572df8bae1dSRodney W. Grimes 	register struct sockaddr_in *sin;
1573df8bae1dSRodney W. Grimes 
1574bd714208SRuslan Ermilov 	sin = (struct sockaddr_in *)&rt->ro_dst;
1575df8bae1dSRodney W. Grimes 
1576bd714208SRuslan Ermilov 	if (rt->ro_rt == 0 ||
1577bd714208SRuslan Ermilov 	    !(rt->ro_rt->rt_flags & RTF_UP) ||
15784078ffb1SRuslan Ermilov 	    dst.s_addr != sin->sin_addr.s_addr) {
1579bd714208SRuslan Ermilov 		if (rt->ro_rt) {
1580bd714208SRuslan Ermilov 			RTFREE(rt->ro_rt);
1581bd714208SRuslan Ermilov 			rt->ro_rt = 0;
1582df8bae1dSRodney W. Grimes 		}
1583df8bae1dSRodney W. Grimes 		sin->sin_family = AF_INET;
1584df8bae1dSRodney W. Grimes 		sin->sin_len = sizeof(*sin);
1585df8bae1dSRodney W. Grimes 		sin->sin_addr = dst;
1586df8bae1dSRodney W. Grimes 
1587bd714208SRuslan Ermilov 		rtalloc_ign(rt, RTF_PRCLONING);
1588df8bae1dSRodney W. Grimes 	}
1589bd714208SRuslan Ermilov 	if (rt->ro_rt == 0)
1590df8bae1dSRodney W. Grimes 		return ((struct in_ifaddr *)0);
1591bd714208SRuslan Ermilov 	return (ifatoia(rt->ro_rt->rt_ifa));
1592df8bae1dSRodney W. Grimes }
1593df8bae1dSRodney W. Grimes 
1594df8bae1dSRodney W. Grimes /*
1595df8bae1dSRodney W. Grimes  * Save incoming source route for use in replies,
1596df8bae1dSRodney W. Grimes  * to be picked up later by ip_srcroute if the receiver is interested.
1597df8bae1dSRodney W. Grimes  */
159837c84183SPoul-Henning Kamp static void
1599df8bae1dSRodney W. Grimes save_rte(option, dst)
1600df8bae1dSRodney W. Grimes 	u_char *option;
1601df8bae1dSRodney W. Grimes 	struct in_addr dst;
1602df8bae1dSRodney W. Grimes {
1603df8bae1dSRodney W. Grimes 	unsigned olen;
1604df8bae1dSRodney W. Grimes 
1605df8bae1dSRodney W. Grimes 	olen = option[IPOPT_OLEN];
1606df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1607df8bae1dSRodney W. Grimes 	if (ipprintfs)
1608df8bae1dSRodney W. Grimes 		printf("save_rte: olen %d\n", olen);
1609df8bae1dSRodney W. Grimes #endif
1610df8bae1dSRodney W. Grimes 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1611df8bae1dSRodney W. Grimes 		return;
16120453d3cbSBruce Evans 	bcopy(option, ip_srcrt.srcopt, olen);
1613df8bae1dSRodney W. Grimes 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1614df8bae1dSRodney W. Grimes 	ip_srcrt.dst = dst;
1615df8bae1dSRodney W. Grimes }
1616df8bae1dSRodney W. Grimes 
1617df8bae1dSRodney W. Grimes /*
1618df8bae1dSRodney W. Grimes  * Retrieve incoming source route for use in replies,
1619df8bae1dSRodney W. Grimes  * in the same form used by setsockopt.
1620df8bae1dSRodney W. Grimes  * The first hop is placed before the options, will be removed later.
1621df8bae1dSRodney W. Grimes  */
1622df8bae1dSRodney W. Grimes struct mbuf *
1623df8bae1dSRodney W. Grimes ip_srcroute()
1624df8bae1dSRodney W. Grimes {
1625df8bae1dSRodney W. Grimes 	register struct in_addr *p, *q;
1626df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1627df8bae1dSRodney W. Grimes 
1628df8bae1dSRodney W. Grimes 	if (ip_nhops == 0)
1629df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1630a163d034SWarner Losh 	m = m_get(M_DONTWAIT, MT_HEADER);
1631df8bae1dSRodney W. Grimes 	if (m == 0)
1632df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1633df8bae1dSRodney W. Grimes 
1634df8bae1dSRodney W. Grimes #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1635df8bae1dSRodney W. Grimes 
1636df8bae1dSRodney W. Grimes 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1637df8bae1dSRodney W. Grimes 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1638df8bae1dSRodney W. Grimes 	    OPTSIZ;
1639df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1640df8bae1dSRodney W. Grimes 	if (ipprintfs)
1641df8bae1dSRodney W. Grimes 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1642df8bae1dSRodney W. Grimes #endif
1643df8bae1dSRodney W. Grimes 
1644df8bae1dSRodney W. Grimes 	/*
1645df8bae1dSRodney W. Grimes 	 * First save first hop for return route
1646df8bae1dSRodney W. Grimes 	 */
1647df8bae1dSRodney W. Grimes 	p = &ip_srcrt.route[ip_nhops - 1];
1648df8bae1dSRodney W. Grimes 	*(mtod(m, struct in_addr *)) = *p--;
1649df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1650df8bae1dSRodney W. Grimes 	if (ipprintfs)
1651af38c68cSLuigi Rizzo 		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1652df8bae1dSRodney W. Grimes #endif
1653df8bae1dSRodney W. Grimes 
1654df8bae1dSRodney W. Grimes 	/*
1655df8bae1dSRodney W. Grimes 	 * Copy option fields and padding (nop) to mbuf.
1656df8bae1dSRodney W. Grimes 	 */
1657df8bae1dSRodney W. Grimes 	ip_srcrt.nop = IPOPT_NOP;
1658df8bae1dSRodney W. Grimes 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
165994a5d9b6SDavid Greenman 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
166094a5d9b6SDavid Greenman 	    &ip_srcrt.nop, OPTSIZ);
1661df8bae1dSRodney W. Grimes 	q = (struct in_addr *)(mtod(m, caddr_t) +
1662df8bae1dSRodney W. Grimes 	    sizeof(struct in_addr) + OPTSIZ);
1663df8bae1dSRodney W. Grimes #undef OPTSIZ
1664df8bae1dSRodney W. Grimes 	/*
1665df8bae1dSRodney W. Grimes 	 * Record return path as an IP source route,
1666df8bae1dSRodney W. Grimes 	 * reversing the path (pointers are now aligned).
1667df8bae1dSRodney W. Grimes 	 */
1668df8bae1dSRodney W. Grimes 	while (p >= ip_srcrt.route) {
1669df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1670df8bae1dSRodney W. Grimes 		if (ipprintfs)
1671af38c68cSLuigi Rizzo 			printf(" %lx", (u_long)ntohl(q->s_addr));
1672df8bae1dSRodney W. Grimes #endif
1673df8bae1dSRodney W. Grimes 		*q++ = *p--;
1674df8bae1dSRodney W. Grimes 	}
1675df8bae1dSRodney W. Grimes 	/*
1676df8bae1dSRodney W. Grimes 	 * Last hop goes to final destination.
1677df8bae1dSRodney W. Grimes 	 */
1678df8bae1dSRodney W. Grimes 	*q = ip_srcrt.dst;
1679df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1680df8bae1dSRodney W. Grimes 	if (ipprintfs)
1681af38c68cSLuigi Rizzo 		printf(" %lx\n", (u_long)ntohl(q->s_addr));
1682df8bae1dSRodney W. Grimes #endif
1683df8bae1dSRodney W. Grimes 	return (m);
1684df8bae1dSRodney W. Grimes }
1685df8bae1dSRodney W. Grimes 
1686df8bae1dSRodney W. Grimes /*
1687df8bae1dSRodney W. Grimes  * Strip out IP options, at higher
1688df8bae1dSRodney W. Grimes  * level protocol in the kernel.
1689df8bae1dSRodney W. Grimes  * Second argument is buffer to which options
1690df8bae1dSRodney W. Grimes  * will be moved, and return value is their length.
1691df8bae1dSRodney W. Grimes  * XXX should be deleted; last arg currently ignored.
1692df8bae1dSRodney W. Grimes  */
1693df8bae1dSRodney W. Grimes void
1694df8bae1dSRodney W. Grimes ip_stripoptions(m, mopt)
1695df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1696df8bae1dSRodney W. Grimes 	struct mbuf *mopt;
1697df8bae1dSRodney W. Grimes {
1698df8bae1dSRodney W. Grimes 	register int i;
1699df8bae1dSRodney W. Grimes 	struct ip *ip = mtod(m, struct ip *);
1700df8bae1dSRodney W. Grimes 	register caddr_t opts;
1701df8bae1dSRodney W. Grimes 	int olen;
1702df8bae1dSRodney W. Grimes 
170353be11f6SPoul-Henning Kamp 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
1704df8bae1dSRodney W. Grimes 	opts = (caddr_t)(ip + 1);
1705df8bae1dSRodney W. Grimes 	i = m->m_len - (sizeof (struct ip) + olen);
1706df8bae1dSRodney W. Grimes 	bcopy(opts + olen, opts, (unsigned)i);
1707df8bae1dSRodney W. Grimes 	m->m_len -= olen;
1708df8bae1dSRodney W. Grimes 	if (m->m_flags & M_PKTHDR)
1709df8bae1dSRodney W. Grimes 		m->m_pkthdr.len -= olen;
171053be11f6SPoul-Henning Kamp 	ip->ip_v = IPVERSION;
171153be11f6SPoul-Henning Kamp 	ip->ip_hl = sizeof(struct ip) >> 2;
1712df8bae1dSRodney W. Grimes }
1713df8bae1dSRodney W. Grimes 
1714df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1715df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1716df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1717df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1718df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1719fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
17203b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1721df8bae1dSRodney W. Grimes };
1722df8bae1dSRodney W. Grimes 
1723df8bae1dSRodney W. Grimes /*
1724df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1725df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1726df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1727df8bae1dSRodney W. Grimes  * of codes and types.
1728df8bae1dSRodney W. Grimes  *
1729df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1730df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1731df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1732df8bae1dSRodney W. Grimes  * protocol deal with that.
1733df8bae1dSRodney W. Grimes  *
1734df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1735df8bae1dSRodney W. Grimes  * via a source route.
1736df8bae1dSRodney W. Grimes  */
17370312fbe9SPoul-Henning Kamp static void
17382b25acc1SLuigi Rizzo ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1739df8bae1dSRodney W. Grimes {
17402b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
17412b25acc1SLuigi Rizzo 	struct rtentry *rt;
174226f9a767SRodney W. Grimes 	int error, type = 0, code = 0;
1743df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
1744df8bae1dSRodney W. Grimes 	n_long dest;
17453efc3014SJulian Elischer 	struct in_addr pkt_dst;
1746df8bae1dSRodney W. Grimes 	struct ifnet *destifp;
1747b9234fafSSam Leffler #if defined(IPSEC) || defined(FAST_IPSEC)
17486a800098SYoshinobu Inoue 	struct ifnet dummyifp;
17496a800098SYoshinobu Inoue #endif
1750df8bae1dSRodney W. Grimes 
1751df8bae1dSRodney W. Grimes 	dest = 0;
17523efc3014SJulian Elischer 	/*
17533efc3014SJulian Elischer 	 * Cache the destination address of the packet; this may be
17543efc3014SJulian Elischer 	 * changed by use of 'ipfw fwd'.
17553efc3014SJulian Elischer 	 */
17562b25acc1SLuigi Rizzo 	pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
17573efc3014SJulian Elischer 
1758df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1759df8bae1dSRodney W. Grimes 	if (ipprintfs)
176061ce519bSPoul-Henning Kamp 		printf("forward: src %lx dst %lx ttl %x\n",
17613efc3014SJulian Elischer 		    (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1762162886e2SBruce Evans 		    ip->ip_ttl);
1763df8bae1dSRodney W. Grimes #endif
1764100ba1a6SJordan K. Hubbard 
1765100ba1a6SJordan K. Hubbard 
17663efc3014SJulian Elischer 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1767df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1768df8bae1dSRodney W. Grimes 		m_freem(m);
1769df8bae1dSRodney W. Grimes 		return;
1770df8bae1dSRodney W. Grimes 	}
17711b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17721b968362SDag-Erling Smørgrav 	if (!ipstealth) {
17731b968362SDag-Erling Smørgrav #endif
1774df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
17751b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
17761b968362SDag-Erling Smørgrav 			    dest, 0);
1777df8bae1dSRodney W. Grimes 			return;
1778df8bae1dSRodney W. Grimes 		}
17791b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17801b968362SDag-Erling Smørgrav 	}
17811b968362SDag-Erling Smørgrav #endif
1782df8bae1dSRodney W. Grimes 
17833efc3014SJulian Elischer 	if (ip_rtaddr(pkt_dst, &ipforward_rt) == 0) {
1784df8bae1dSRodney W. Grimes 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1785df8bae1dSRodney W. Grimes 		return;
17864078ffb1SRuslan Ermilov 	} else
1787df8bae1dSRodney W. Grimes 		rt = ipforward_rt.ro_rt;
1788df8bae1dSRodney W. Grimes 
1789df8bae1dSRodney W. Grimes 	/*
1790bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1791bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1792bfef7ed4SIan Dowse 	 *
17934d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
17944d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
17954d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
17964d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
17974d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
17984d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
17994d2e3692SLuigi Rizzo 	 *
1800bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1801bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1802bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1803bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1804df8bae1dSRodney W. Grimes 	 */
1805a163d034SWarner Losh 	MGET(mcopy, M_DONTWAIT, m->m_type);
1806a163d034SWarner Losh 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
18079967cafcSSam Leffler 		/*
18089967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
18099967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
18109967cafcSSam Leffler 		 * be conservative and just discard the copy since
18119967cafcSSam Leffler 		 * code below may some day want the tags.
18129967cafcSSam Leffler 		 */
18139967cafcSSam Leffler 		m_free(mcopy);
18149967cafcSSam Leffler 		mcopy = NULL;
18159967cafcSSam Leffler 	}
1816bfef7ed4SIan Dowse 	if (mcopy != NULL) {
181753be11f6SPoul-Henning Kamp 		mcopy->m_len = imin((ip->ip_hl << 2) + 8,
1818bfef7ed4SIan Dowse 		    (int)ip->ip_len);
1819bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1820e316463aSRobert Watson 		/*
1821688fe1d9SRobert Watson 		 * XXXMAC: Eventually, we may have an explict labeling
1822688fe1d9SRobert Watson 		 * point here.
1823e316463aSRobert Watson 		 */
1824bfef7ed4SIan Dowse 	}
182504287599SRuslan Ermilov 
182604287599SRuslan Ermilov #ifdef IPSTEALTH
182704287599SRuslan Ermilov 	if (!ipstealth) {
182804287599SRuslan Ermilov #endif
182904287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
183004287599SRuslan Ermilov #ifdef IPSTEALTH
183104287599SRuslan Ermilov 	}
183204287599SRuslan Ermilov #endif
1833df8bae1dSRodney W. Grimes 
1834df8bae1dSRodney W. Grimes 	/*
1835df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1836df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1837df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1838df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1839df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1840df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1841df8bae1dSRodney W. Grimes 	 */
1842df8bae1dSRodney W. Grimes 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1843df8bae1dSRodney W. Grimes 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1844df8bae1dSRodney W. Grimes 	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
18452b25acc1SLuigi Rizzo 	    ipsendredirects && !srcrt && !next_hop) {
1846df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1847df8bae1dSRodney W. Grimes 		u_long src = ntohl(ip->ip_src.s_addr);
1848df8bae1dSRodney W. Grimes 
1849df8bae1dSRodney W. Grimes 		if (RTA(rt) &&
1850df8bae1dSRodney W. Grimes 		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1851df8bae1dSRodney W. Grimes 		    if (rt->rt_flags & RTF_GATEWAY)
1852df8bae1dSRodney W. Grimes 			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1853df8bae1dSRodney W. Grimes 		    else
18543efc3014SJulian Elischer 			dest = pkt_dst.s_addr;
1855df8bae1dSRodney W. Grimes 		    /* Router requirements says to only send host redirects */
1856df8bae1dSRodney W. Grimes 		    type = ICMP_REDIRECT;
1857df8bae1dSRodney W. Grimes 		    code = ICMP_REDIRECT_HOST;
1858df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1859df8bae1dSRodney W. Grimes 		    if (ipprintfs)
1860df8bae1dSRodney W. Grimes 		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1861df8bae1dSRodney W. Grimes #endif
1862df8bae1dSRodney W. Grimes 		}
1863df8bae1dSRodney W. Grimes 	}
1864df8bae1dSRodney W. Grimes 
1865ea779ff3SLuigi Rizzo     {
1866ea779ff3SLuigi Rizzo 	struct m_hdr tag;
1867ea779ff3SLuigi Rizzo 
1868ea779ff3SLuigi Rizzo 	if (next_hop) {
1869ea779ff3SLuigi Rizzo 		/* Pass IPFORWARD info if available */
1870ea779ff3SLuigi Rizzo 
1871ea779ff3SLuigi Rizzo 		tag.mh_type = MT_TAG;
1872ea779ff3SLuigi Rizzo 		tag.mh_flags = PACKET_TAG_IPFORWARD;
1873ea779ff3SLuigi Rizzo 		tag.mh_data = (caddr_t)next_hop;
1874ea779ff3SLuigi Rizzo 		tag.mh_next = m;
1875ea779ff3SLuigi Rizzo 		m = (struct mbuf *)&tag;
1876ea779ff3SLuigi Rizzo 	}
1877b97d15cbSGarrett Wollman 	error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
18785d846453SSam Leffler 			  IP_FORWARDING, 0, NULL);
1879ea779ff3SLuigi Rizzo     }
1880df8bae1dSRodney W. Grimes 	if (error)
1881df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1882df8bae1dSRodney W. Grimes 	else {
1883df8bae1dSRodney W. Grimes 		ipstat.ips_forward++;
1884df8bae1dSRodney W. Grimes 		if (type)
1885df8bae1dSRodney W. Grimes 			ipstat.ips_redirectsent++;
1886df8bae1dSRodney W. Grimes 		else {
18871f91d8c5SDavid Greenman 			if (mcopy) {
18881f91d8c5SDavid Greenman 				ipflow_create(&ipforward_rt, mcopy);
1889df8bae1dSRodney W. Grimes 				m_freem(mcopy);
18901f91d8c5SDavid Greenman 			}
1891df8bae1dSRodney W. Grimes 			return;
1892df8bae1dSRodney W. Grimes 		}
1893df8bae1dSRodney W. Grimes 	}
1894df8bae1dSRodney W. Grimes 	if (mcopy == NULL)
1895df8bae1dSRodney W. Grimes 		return;
1896df8bae1dSRodney W. Grimes 	destifp = NULL;
1897df8bae1dSRodney W. Grimes 
1898df8bae1dSRodney W. Grimes 	switch (error) {
1899df8bae1dSRodney W. Grimes 
1900df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1901df8bae1dSRodney W. Grimes 		/* type, code set above */
1902df8bae1dSRodney W. Grimes 		break;
1903df8bae1dSRodney W. Grimes 
1904df8bae1dSRodney W. Grimes 	case ENETUNREACH:		/* shouldn't happen, checked above */
1905df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1906df8bae1dSRodney W. Grimes 	case ENETDOWN:
1907df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1908df8bae1dSRodney W. Grimes 	default:
1909df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1910df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1911df8bae1dSRodney W. Grimes 		break;
1912df8bae1dSRodney W. Grimes 
1913df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1914df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1915df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
19165d846453SSam Leffler #ifdef IPSEC
19176a800098SYoshinobu Inoue 		/*
19186a800098SYoshinobu Inoue 		 * If the packet is routed over IPsec tunnel, tell the
19196a800098SYoshinobu Inoue 		 * originator the tunnel MTU.
19206a800098SYoshinobu Inoue 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
19216a800098SYoshinobu Inoue 		 * XXX quickhack!!!
19226a800098SYoshinobu Inoue 		 */
19236a800098SYoshinobu Inoue 		if (ipforward_rt.ro_rt) {
19246a800098SYoshinobu Inoue 			struct secpolicy *sp = NULL;
19256a800098SYoshinobu Inoue 			int ipsecerror;
19266a800098SYoshinobu Inoue 			int ipsechdr;
19276a800098SYoshinobu Inoue 			struct route *ro;
19286a800098SYoshinobu Inoue 
19296a800098SYoshinobu Inoue 			sp = ipsec4_getpolicybyaddr(mcopy,
19306a800098SYoshinobu Inoue 						    IPSEC_DIR_OUTBOUND,
19316a800098SYoshinobu Inoue 			                            IP_FORWARDING,
19326a800098SYoshinobu Inoue 			                            &ipsecerror);
19336a800098SYoshinobu Inoue 
19346a800098SYoshinobu Inoue 			if (sp == NULL)
19356a800098SYoshinobu Inoue 				destifp = ipforward_rt.ro_rt->rt_ifp;
19366a800098SYoshinobu Inoue 			else {
19376a800098SYoshinobu Inoue 				/* count IPsec header size */
19386a800098SYoshinobu Inoue 				ipsechdr = ipsec4_hdrsiz(mcopy,
19396a800098SYoshinobu Inoue 							 IPSEC_DIR_OUTBOUND,
19406a800098SYoshinobu Inoue 							 NULL);
19416a800098SYoshinobu Inoue 
19426a800098SYoshinobu Inoue 				/*
19436a800098SYoshinobu Inoue 				 * find the correct route for outer IPv4
19446a800098SYoshinobu Inoue 				 * header, compute tunnel MTU.
19456a800098SYoshinobu Inoue 				 *
19466a800098SYoshinobu Inoue 				 * XXX BUG ALERT
19476a800098SYoshinobu Inoue 				 * The "dummyifp" code relies upon the fact
19486a800098SYoshinobu Inoue 				 * that icmp_error() touches only ifp->if_mtu.
19496a800098SYoshinobu Inoue 				 */
19506a800098SYoshinobu Inoue 				/*XXX*/
19516a800098SYoshinobu Inoue 				destifp = NULL;
19526a800098SYoshinobu Inoue 				if (sp->req != NULL
19536a800098SYoshinobu Inoue 				 && sp->req->sav != NULL
19546a800098SYoshinobu Inoue 				 && sp->req->sav->sah != NULL) {
19556a800098SYoshinobu Inoue 					ro = &sp->req->sav->sah->sa_route;
19566a800098SYoshinobu Inoue 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
19576a800098SYoshinobu Inoue 						dummyifp.if_mtu =
19586a800098SYoshinobu Inoue 						    ro->ro_rt->rt_ifp->if_mtu;
19596a800098SYoshinobu Inoue 						dummyifp.if_mtu -= ipsechdr;
19606a800098SYoshinobu Inoue 						destifp = &dummyifp;
19616a800098SYoshinobu Inoue 					}
19626a800098SYoshinobu Inoue 				}
19636a800098SYoshinobu Inoue 
19646a800098SYoshinobu Inoue 				key_freesp(sp);
19656a800098SYoshinobu Inoue 			}
19666a800098SYoshinobu Inoue 		}
1967b9234fafSSam Leffler #elif FAST_IPSEC
1968b9234fafSSam Leffler 		/*
1969b9234fafSSam Leffler 		 * If the packet is routed over IPsec tunnel, tell the
1970b9234fafSSam Leffler 		 * originator the tunnel MTU.
1971b9234fafSSam Leffler 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1972b9234fafSSam Leffler 		 * XXX quickhack!!!
1973b9234fafSSam Leffler 		 */
1974b9234fafSSam Leffler 		if (ipforward_rt.ro_rt) {
1975b9234fafSSam Leffler 			struct secpolicy *sp = NULL;
1976b9234fafSSam Leffler 			int ipsecerror;
1977b9234fafSSam Leffler 			int ipsechdr;
1978b9234fafSSam Leffler 			struct route *ro;
1979b9234fafSSam Leffler 
1980b9234fafSSam Leffler 			sp = ipsec_getpolicybyaddr(mcopy,
1981b9234fafSSam Leffler 						   IPSEC_DIR_OUTBOUND,
1982b9234fafSSam Leffler 			                           IP_FORWARDING,
1983b9234fafSSam Leffler 			                           &ipsecerror);
1984b9234fafSSam Leffler 
1985b9234fafSSam Leffler 			if (sp == NULL)
1986b9234fafSSam Leffler 				destifp = ipforward_rt.ro_rt->rt_ifp;
1987b9234fafSSam Leffler 			else {
1988b9234fafSSam Leffler 				/* count IPsec header size */
1989b9234fafSSam Leffler 				ipsechdr = ipsec4_hdrsiz(mcopy,
1990b9234fafSSam Leffler 							 IPSEC_DIR_OUTBOUND,
1991b9234fafSSam Leffler 							 NULL);
1992b9234fafSSam Leffler 
1993b9234fafSSam Leffler 				/*
1994b9234fafSSam Leffler 				 * find the correct route for outer IPv4
1995b9234fafSSam Leffler 				 * header, compute tunnel MTU.
1996b9234fafSSam Leffler 				 *
1997b9234fafSSam Leffler 				 * XXX BUG ALERT
1998b9234fafSSam Leffler 				 * The "dummyifp" code relies upon the fact
1999b9234fafSSam Leffler 				 * that icmp_error() touches only ifp->if_mtu.
2000b9234fafSSam Leffler 				 */
2001b9234fafSSam Leffler 				/*XXX*/
2002b9234fafSSam Leffler 				destifp = NULL;
2003b9234fafSSam Leffler 				if (sp->req != NULL
2004b9234fafSSam Leffler 				 && sp->req->sav != NULL
2005b9234fafSSam Leffler 				 && sp->req->sav->sah != NULL) {
2006b9234fafSSam Leffler 					ro = &sp->req->sav->sah->sa_route;
2007b9234fafSSam Leffler 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
2008b9234fafSSam Leffler 						dummyifp.if_mtu =
2009b9234fafSSam Leffler 						    ro->ro_rt->rt_ifp->if_mtu;
2010b9234fafSSam Leffler 						dummyifp.if_mtu -= ipsechdr;
2011b9234fafSSam Leffler 						destifp = &dummyifp;
2012b9234fafSSam Leffler 					}
2013b9234fafSSam Leffler 				}
2014b9234fafSSam Leffler 
2015b9234fafSSam Leffler 				KEY_FREESP(&sp);
2016b9234fafSSam Leffler 			}
2017b9234fafSSam Leffler 		}
2018b9234fafSSam Leffler #else /* !IPSEC && !FAST_IPSEC */
20195d846453SSam Leffler 		if (ipforward_rt.ro_rt)
20205d846453SSam Leffler 			destifp = ipforward_rt.ro_rt->rt_ifp;
20216a800098SYoshinobu Inoue #endif /*IPSEC*/
2022df8bae1dSRodney W. Grimes 		ipstat.ips_cantfrag++;
2023df8bae1dSRodney W. Grimes 		break;
2024df8bae1dSRodney W. Grimes 
2025df8bae1dSRodney W. Grimes 	case ENOBUFS:
2026df285b3dSMike Silbersack 		/*
2027df285b3dSMike Silbersack 		 * A router should not generate ICMP_SOURCEQUENCH as
2028df285b3dSMike Silbersack 		 * required in RFC1812 Requirements for IP Version 4 Routers.
2029df285b3dSMike Silbersack 		 * Source quench could be a big problem under DoS attacks,
2030df285b3dSMike Silbersack 		 * or if the underlying interface is rate-limited.
2031df285b3dSMike Silbersack 		 * Those who need source quench packets may re-enable them
2032df285b3dSMike Silbersack 		 * via the net.inet.ip.sendsourcequench sysctl.
2033df285b3dSMike Silbersack 		 */
2034df285b3dSMike Silbersack 		if (ip_sendsourcequench == 0) {
2035df285b3dSMike Silbersack 			m_freem(mcopy);
2036df285b3dSMike Silbersack 			return;
2037df285b3dSMike Silbersack 		} else {
2038df8bae1dSRodney W. Grimes 			type = ICMP_SOURCEQUENCH;
2039df8bae1dSRodney W. Grimes 			code = 0;
2040df285b3dSMike Silbersack 		}
2041df8bae1dSRodney W. Grimes 		break;
20423a06e3e0SRuslan Ermilov 
20433a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
20443a06e3e0SRuslan Ermilov 		m_freem(mcopy);
20453a06e3e0SRuslan Ermilov 		return;
2046df8bae1dSRodney W. Grimes 	}
2047df8bae1dSRodney W. Grimes 	icmp_error(mcopy, type, code, dest, destifp);
2048df8bae1dSRodney W. Grimes }
2049df8bae1dSRodney W. Grimes 
205082c23ebaSBill Fenner void
205182c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m)
205282c23ebaSBill Fenner 	register struct inpcb *inp;
205382c23ebaSBill Fenner 	register struct mbuf **mp;
205482c23ebaSBill Fenner 	register struct ip *ip;
205582c23ebaSBill Fenner 	register struct mbuf *m;
205682c23ebaSBill Fenner {
205782c23ebaSBill Fenner 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
205882c23ebaSBill Fenner 		struct timeval tv;
205982c23ebaSBill Fenner 
206082c23ebaSBill Fenner 		microtime(&tv);
206182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
206282c23ebaSBill Fenner 			SCM_TIMESTAMP, SOL_SOCKET);
206382c23ebaSBill Fenner 		if (*mp)
206482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
20654cc20ab1SSeigo Tanimura 	}
206682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
206782c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
206882c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
206982c23ebaSBill Fenner 		if (*mp)
207082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
207182c23ebaSBill Fenner 	}
20724957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
20734957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
20744957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
20754957466bSMatthew N. Dodd 		if (*mp)
20764957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
20774957466bSMatthew N. Dodd 	}
207882c23ebaSBill Fenner #ifdef notyet
207982c23ebaSBill Fenner 	/* XXX
208082c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
208182c23ebaSBill Fenner 	 * than they already were.
208282c23ebaSBill Fenner 	 */
208382c23ebaSBill Fenner 	/* options were tossed already */
208482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
208582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
208682c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
208782c23ebaSBill Fenner 		if (*mp)
208882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
208982c23ebaSBill Fenner 	}
209082c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
209182c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
209282c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
209382c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
209482c23ebaSBill Fenner 		if (*mp)
209582c23ebaSBill Fenner 			mp = &(*mp)->m_next;
209682c23ebaSBill Fenner 	}
209782c23ebaSBill Fenner #endif
209882c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
2099d314ad7bSJulian Elischer 		struct ifnet *ifp;
2100d314ad7bSJulian Elischer 		struct sdlbuf {
210182c23ebaSBill Fenner 			struct sockaddr_dl sdl;
2102d314ad7bSJulian Elischer 			u_char	pad[32];
2103d314ad7bSJulian Elischer 		} sdlbuf;
2104d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
2105d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
210682c23ebaSBill Fenner 
2107d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
2108d314ad7bSJulian Elischer 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
2109f9132cebSJonathan Lemon 			sdp = (struct sockaddr_dl *)
2110f9132cebSJonathan Lemon 			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
2111d314ad7bSJulian Elischer 			/*
2112d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
2113d314ad7bSJulian Elischer 			 */
2114d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
2115d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
2116d314ad7bSJulian Elischer 				goto makedummy;
2117d314ad7bSJulian Elischer 			}
2118d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
2119d314ad7bSJulian Elischer 		} else {
2120d314ad7bSJulian Elischer makedummy:
2121d314ad7bSJulian Elischer 			sdl2->sdl_len
2122d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
2123d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
2124d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
2125d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2126d314ad7bSJulian Elischer 		}
2127d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
212882c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
212982c23ebaSBill Fenner 		if (*mp)
213082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
213182c23ebaSBill Fenner 	}
213282c23ebaSBill Fenner }
213382c23ebaSBill Fenner 
21344d2e3692SLuigi Rizzo /*
21354d2e3692SLuigi Rizzo  * XXX these routines are called from the upper part of the kernel.
21364d2e3692SLuigi Rizzo  * They need to be locked when we remove Giant.
21374d2e3692SLuigi Rizzo  *
21384d2e3692SLuigi Rizzo  * They could also be moved to ip_mroute.c, since all the RSVP
21394d2e3692SLuigi Rizzo  *  handling is done there already.
21404d2e3692SLuigi Rizzo  */
21414d2e3692SLuigi Rizzo static int ip_rsvp_on;
21424d2e3692SLuigi Rizzo struct socket *ip_rsvpd;
2143df8bae1dSRodney W. Grimes int
2144f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
2145f0068c4aSGarrett Wollman {
2146f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
2147f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
2148f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
2149f0068c4aSGarrett Wollman 
2150f0068c4aSGarrett Wollman 	if (ip_rsvpd != NULL)
2151f0068c4aSGarrett Wollman 		return EADDRINUSE;
2152f0068c4aSGarrett Wollman 
2153f0068c4aSGarrett Wollman 	ip_rsvpd = so;
21541c5de19aSGarrett Wollman 	/*
21551c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
21561c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21571c5de19aSGarrett Wollman 	 */
21581c5de19aSGarrett Wollman 	if (!ip_rsvp_on) {
21591c5de19aSGarrett Wollman 		ip_rsvp_on = 1;
21601c5de19aSGarrett Wollman 		rsvp_on++;
21611c5de19aSGarrett Wollman 	}
2162f0068c4aSGarrett Wollman 
2163f0068c4aSGarrett Wollman 	return 0;
2164f0068c4aSGarrett Wollman }
2165f0068c4aSGarrett Wollman 
2166f0068c4aSGarrett Wollman int
2167f0068c4aSGarrett Wollman ip_rsvp_done(void)
2168f0068c4aSGarrett Wollman {
2169f0068c4aSGarrett Wollman 	ip_rsvpd = NULL;
21701c5de19aSGarrett Wollman 	/*
21711c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
21721c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
21731c5de19aSGarrett Wollman 	 */
21741c5de19aSGarrett Wollman 	if (ip_rsvp_on) {
21751c5de19aSGarrett Wollman 		ip_rsvp_on = 0;
21761c5de19aSGarrett Wollman 		rsvp_on--;
21771c5de19aSGarrett Wollman 	}
2178f0068c4aSGarrett Wollman 	return 0;
2179f0068c4aSGarrett Wollman }
2180bbb4330bSLuigi Rizzo 
2181bbb4330bSLuigi Rizzo void
2182bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
2183bbb4330bSLuigi Rizzo {
2184bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
2185bbb4330bSLuigi Rizzo 		rsvp_input_p(m, off);
2186bbb4330bSLuigi Rizzo 		return;
2187bbb4330bSLuigi Rizzo 	}
2188bbb4330bSLuigi Rizzo 
2189bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
2190bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
2191bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
2192bbb4330bSLuigi Rizzo 	 */
2193bbb4330bSLuigi Rizzo 
2194bbb4330bSLuigi Rizzo 	if (!rsvp_on) {
2195bbb4330bSLuigi Rizzo 		m_freem(m);
2196bbb4330bSLuigi Rizzo 		return;
2197bbb4330bSLuigi Rizzo 	}
2198bbb4330bSLuigi Rizzo 
2199bbb4330bSLuigi Rizzo 	if (ip_rsvpd != NULL) {
2200bbb4330bSLuigi Rizzo 		rip_input(m, off);
2201bbb4330bSLuigi Rizzo 		return;
2202bbb4330bSLuigi Rizzo 	}
2203bbb4330bSLuigi Rizzo 	/* Drop the packet */
2204bbb4330bSLuigi Rizzo 	m_freem(m);
2205bbb4330bSLuigi Rizzo }
2206