xref: /freebsd/sys/netinet/ip_input.c (revision 4d2e36928d066a701da1527175dbadcc65211ad1)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
14df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
15df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
16df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
17df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
18df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
19df8bae1dSRodney W. Grimes  *    without specific prior written permission.
20df8bae1dSRodney W. Grimes  *
21df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
32df8bae1dSRodney W. Grimes  *
33df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34c3aac50fSPeter Wemm  * $FreeBSD$
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
3758938916SGarrett Wollman #define	_IP_VHL
3858938916SGarrett Wollman 
39e4f4247aSEivind Eklund #include "opt_bootp.h"
4074a9466cSGary Palmer #include "opt_ipfw.h"
41b715f178SLuigi Rizzo #include "opt_ipdn.h"
42fbd1372aSJoerg Wunsch #include "opt_ipdivert.h"
431ee25934SPeter Wemm #include "opt_ipfilter.h"
4427108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
456a800098SYoshinobu Inoue #include "opt_ipsec.h"
46c4ac87eaSDarren Reed #include "opt_pfil_hooks.h"
4764dddc18SKris Kennaway #include "opt_random_ip_id.h"
4874a9466cSGary Palmer 
49df8bae1dSRodney W. Grimes #include <sys/param.h>
50df8bae1dSRodney W. Grimes #include <sys/systm.h>
51df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
52b715f178SLuigi Rizzo #include <sys/malloc.h>
53df8bae1dSRodney W. Grimes #include <sys/domain.h>
54df8bae1dSRodney W. Grimes #include <sys/protosw.h>
55df8bae1dSRodney W. Grimes #include <sys/socket.h>
56df8bae1dSRodney W. Grimes #include <sys/time.h>
57df8bae1dSRodney W. Grimes #include <sys/kernel.h>
581025071fSGarrett Wollman #include <sys/syslog.h>
59b5e8ce9fSBruce Evans #include <sys/sysctl.h>
60df8bae1dSRodney W. Grimes 
61c85540ddSAndrey A. Chernov #include <net/pfil.h>
62df8bae1dSRodney W. Grimes #include <net/if.h>
639494d596SBrooks Davis #include <net/if_types.h>
64d314ad7bSJulian Elischer #include <net/if_var.h>
6582c23ebaSBill Fenner #include <net/if_dl.h>
66df8bae1dSRodney W. Grimes #include <net/route.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68367d34f8SBrian Somers #include <net/intrq.h>
69df8bae1dSRodney W. Grimes 
70df8bae1dSRodney W. Grimes #include <netinet/in.h>
71df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
72b5e8ce9fSBruce Evans #include <netinet/in_var.h>
73df8bae1dSRodney W. Grimes #include <netinet/ip.h>
74df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
76df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
7758938916SGarrett Wollman #include <machine/in_cksum.h>
78df8bae1dSRodney W. Grimes 
79f0068c4aSGarrett Wollman #include <sys/socketvar.h>
806ddbf1e2SGary Palmer 
816ddbf1e2SGary Palmer #include <netinet/ip_fw.h>
82db69a05dSPaul Saab #include <netinet/ip_dummynet.h>
83db69a05dSPaul Saab 
846a800098SYoshinobu Inoue #ifdef IPSEC
856a800098SYoshinobu Inoue #include <netinet6/ipsec.h>
866a800098SYoshinobu Inoue #include <netkey/key.h>
876a800098SYoshinobu Inoue #endif
886a800098SYoshinobu Inoue 
891c5de19aSGarrett Wollman int rsvp_on = 0;
90f0068c4aSGarrett Wollman 
911f91d8c5SDavid Greenman int	ipforwarding = 0;
920312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
933d177f46SBill Fumerola     &ipforwarding, 0, "Enable IP forwarding between interfaces");
940312fbe9SPoul-Henning Kamp 
95d4fb926cSGarrett Wollman static int	ipsendredirects = 1; /* XXX */
960312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
973d177f46SBill Fumerola     &ipsendredirects, 0, "Enable sending IP redirects");
980312fbe9SPoul-Henning Kamp 
99df8bae1dSRodney W. Grimes int	ip_defttl = IPDEFTTL;
1000312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
1013d177f46SBill Fumerola     &ip_defttl, 0, "Maximum TTL on IP packets");
1020312fbe9SPoul-Henning Kamp 
1030312fbe9SPoul-Henning Kamp static int	ip_dosourceroute = 0;
1040312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
1053d177f46SBill Fumerola     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
1064fce5804SGuido van Rooij 
1074fce5804SGuido van Rooij static int	ip_acceptsourceroute = 0;
1084fce5804SGuido van Rooij SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
1093d177f46SBill Fumerola     CTLFLAG_RW, &ip_acceptsourceroute, 0,
1103d177f46SBill Fumerola     "Enable accepting source routed IP packets");
1116a800098SYoshinobu Inoue 
1126a800098SYoshinobu Inoue static int	ip_keepfaith = 0;
1136a800098SYoshinobu Inoue SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
1146a800098SYoshinobu Inoue 	&ip_keepfaith,	0,
1156a800098SYoshinobu Inoue 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1166a800098SYoshinobu Inoue 
117690a6055SJesper Skriver static int	ip_nfragpackets = 0;
11896c2b042SJesper Skriver static int	ip_maxfragpackets;	/* initialized in ip_init() */
119690a6055SJesper Skriver SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
120690a6055SJesper Skriver 	&ip_maxfragpackets, 0,
121690a6055SJesper Skriver 	"Maximum number of IPv4 fragment reassembly queue entries");
122690a6055SJesper Skriver 
123823db0e9SDon Lewis /*
124823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
125823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
126a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
127823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1283f67c834SDon Lewis  *
129a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
130a8f12100SDon Lewis  * to translate the destination address to another local interface.
1313f67c834SDon Lewis  *
1323f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1333f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1343f67c834SDon Lewis  * packets for those addresses are received.
135823db0e9SDon Lewis  */
136b3e95d4eSJonathan Lemon static int	ip_checkinterface = 1;
137b3e95d4eSJonathan Lemon SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
138b3e95d4eSJonathan Lemon     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
139b3e95d4eSJonathan Lemon 
140df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1410312fbe9SPoul-Henning Kamp static int	ipprintfs = 0;
142df8bae1dSRodney W. Grimes #endif
143df8bae1dSRodney W. Grimes 
144ca925d9cSJonathan Lemon static int	ipqmaxlen = IFQ_MAXLEN;
145ca925d9cSJonathan Lemon 
146df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
147f0ffb944SJulian Elischer extern	struct protosw inetsw[];
148df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
14959562606SGarrett Wollman struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
150ca925d9cSJonathan Lemon struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
151ca925d9cSJonathan Lemon u_long 	in_ifaddrhmask;				/* mask for hash table */
152ca925d9cSJonathan Lemon 
153afed1375SDavid Greenman SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
1543d177f46SBill Fumerola     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
1550312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
1563d177f46SBill Fumerola     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
157df8bae1dSRodney W. Grimes 
158f23b4c91SGarrett Wollman struct ipstat ipstat;
159c73d99b5SRuslan Ermilov SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
1603d177f46SBill Fumerola     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
161194a213eSAndrey A. Chernov 
162194a213eSAndrey A. Chernov /* Packet reassembly stuff */
163194a213eSAndrey A. Chernov #define IPREASS_NHASH_LOG2      6
164194a213eSAndrey A. Chernov #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
165194a213eSAndrey A. Chernov #define IPREASS_HMASK           (IPREASS_NHASH - 1)
166194a213eSAndrey A. Chernov #define IPREASS_HASH(x,y) \
167831a80b0SMatthew Dillon 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
168194a213eSAndrey A. Chernov 
169462b86feSPoul-Henning Kamp static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
170194a213eSAndrey A. Chernov static int    nipq = 0;         /* total # of reass queues */
171194a213eSAndrey A. Chernov static int    maxnipq;
172f23b4c91SGarrett Wollman 
1730312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1740312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1753d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1760312fbe9SPoul-Henning Kamp #endif
1770312fbe9SPoul-Henning Kamp 
1781b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
1791b968362SDag-Erling Smørgrav static int	ipstealth = 0;
1801b968362SDag-Erling Smørgrav SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
1811b968362SDag-Erling Smørgrav     &ipstealth, 0, "");
1821b968362SDag-Erling Smørgrav #endif
1831b968362SDag-Erling Smørgrav 
184cfe8b629SGarrett Wollman 
18523bf9953SPoul-Henning Kamp /* Firewall hooks */
18623bf9953SPoul-Henning Kamp ip_fw_chk_t *ip_fw_chk_ptr;
1879fcc0795SLuigi Rizzo int fw_enable = 1 ;
188e7319babSPoul-Henning Kamp 
189db69a05dSPaul Saab /* Dummynet hooks */
190db69a05dSPaul Saab ip_dn_io_t *ip_dn_io_ptr;
191b715f178SLuigi Rizzo 
192afed1b49SDarren Reed 
193e7319babSPoul-Henning Kamp /*
1944d2e3692SLuigi Rizzo  * XXX this is ugly -- the following two global variables are
1954d2e3692SLuigi Rizzo  * used to store packet state while it travels through the stack.
1964d2e3692SLuigi Rizzo  * Note that the code even makes assumptions on the size and
1974d2e3692SLuigi Rizzo  * alignment of fields inside struct ip_srcrt so e.g. adding some
1984d2e3692SLuigi Rizzo  * fields will break the code. This needs to be fixed.
1994d2e3692SLuigi Rizzo  *
200df8bae1dSRodney W. Grimes  * We need to save the IP options in case a protocol wants to respond
201df8bae1dSRodney W. Grimes  * to an incoming packet over the same route if the packet got here
202df8bae1dSRodney W. Grimes  * using IP source routing.  This allows connection establishment and
203df8bae1dSRodney W. Grimes  * maintenance when the remote end is on a network that is not known
204df8bae1dSRodney W. Grimes  * to us.
205df8bae1dSRodney W. Grimes  */
2060312fbe9SPoul-Henning Kamp static int	ip_nhops = 0;
207df8bae1dSRodney W. Grimes static	struct ip_srcrt {
208df8bae1dSRodney W. Grimes 	struct	in_addr dst;			/* final destination */
209df8bae1dSRodney W. Grimes 	char	nop;				/* one NOP to align */
210df8bae1dSRodney W. Grimes 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
211df8bae1dSRodney W. Grimes 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
212df8bae1dSRodney W. Grimes } ip_srcrt;
213df8bae1dSRodney W. Grimes 
2144d77a549SAlfred Perlstein static void	save_rte(u_char *, struct in_addr);
2152b25acc1SLuigi Rizzo static int	ip_dooptions(struct mbuf *m, int,
2162b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2172b25acc1SLuigi Rizzo static void	ip_forward(struct mbuf *m, int srcrt,
2182b25acc1SLuigi Rizzo 			struct sockaddr_in *next_hop);
2194d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
2202b25acc1SLuigi Rizzo static struct	mbuf *ip_reass(struct mbuf *, struct ipqhead *,
2212b25acc1SLuigi Rizzo 		struct ipq *, u_int32_t *, u_int16_t *);
2224d77a549SAlfred Perlstein static void	ipintr(void);
2238948e4baSArchie Cobbs 
224df8bae1dSRodney W. Grimes /*
225df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
226df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
227df8bae1dSRodney W. Grimes  */
228df8bae1dSRodney W. Grimes void
229df8bae1dSRodney W. Grimes ip_init()
230df8bae1dSRodney W. Grimes {
231f0ffb944SJulian Elischer 	register struct protosw *pr;
232df8bae1dSRodney W. Grimes 	register int i;
233df8bae1dSRodney W. Grimes 
23459562606SGarrett Wollman 	TAILQ_INIT(&in_ifaddrhead);
235ca925d9cSJonathan Lemon 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
236f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
237df8bae1dSRodney W. Grimes 	if (pr == 0)
238df8bae1dSRodney W. Grimes 		panic("ip_init");
239df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
240df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
241f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
242f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
243df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
244df8bae1dSRodney W. Grimes 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
245df8bae1dSRodney W. Grimes 			ip_protox[pr->pr_protocol] = pr - inetsw;
246194a213eSAndrey A. Chernov 
247194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++)
248462b86feSPoul-Henning Kamp 	    TAILQ_INIT(&ipq[i]);
249194a213eSAndrey A. Chernov 
250194a213eSAndrey A. Chernov 	maxnipq = nmbclusters / 4;
25196c2b042SJesper Skriver 	ip_maxfragpackets = nmbclusters / 4;
252194a213eSAndrey A. Chernov 
25364dddc18SKris Kennaway #ifndef RANDOM_IP_ID
254227ee8a1SPoul-Henning Kamp 	ip_id = time_second & 0xffff;
25564dddc18SKris Kennaway #endif
256df8bae1dSRodney W. Grimes 	ipintrq.ifq_maxlen = ipqmaxlen;
2576008862bSJohn Baldwin 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
258bedbd47eSMike Smith 	ipintrq_present = 1;
259242c5536SPeter Wemm 
260242c5536SPeter Wemm 	register_netisr(NETISR_IP, ipintr);
261df8bae1dSRodney W. Grimes }
262df8bae1dSRodney W. Grimes 
2634d2e3692SLuigi Rizzo /*
2644d2e3692SLuigi Rizzo  * XXX watch out this one. It is perhaps used as a cache for
2654d2e3692SLuigi Rizzo  * the most recently used route ? it is cleared in in_addroute()
2664d2e3692SLuigi Rizzo  * when a new route is successfully created.
2674d2e3692SLuigi Rizzo  */
2681e3d5af0SRuslan Ermilov struct	route ipforward_rt;
269df8bae1dSRodney W. Grimes 
270df8bae1dSRodney W. Grimes /*
271df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
272df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
273df8bae1dSRodney W. Grimes  */
274c67b1d17SGarrett Wollman void
275c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
276df8bae1dSRodney W. Grimes {
27723bf9953SPoul-Henning Kamp 	struct ip *ip;
27823bf9953SPoul-Henning Kamp 	struct ipq *fp;
2795da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
280ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
281823db0e9SDon Lewis 	int    i, hlen, checkif;
28247c861ecSBrian Somers 	u_short sum;
2837538a9a0SJonathan Lemon 	struct in_addr pkt_dst;
2848948e4baSArchie Cobbs 	u_int32_t divert_info = 0;		/* packet divert/tee info */
2852b25acc1SLuigi Rizzo 	struct ip_fw_args args;
286c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
287c4ac87eaSDarren Reed 	struct packet_filter_hook *pfh;
288c4ac87eaSDarren Reed 	struct mbuf *m0;
289c4ac87eaSDarren Reed 	int rv;
290c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
291b715f178SLuigi Rizzo 
2922b25acc1SLuigi Rizzo 	args.eh = NULL;
2932b25acc1SLuigi Rizzo 	args.oif = NULL;
2942b25acc1SLuigi Rizzo 	args.rule = NULL;
2952b25acc1SLuigi Rizzo 	args.divert_rule = 0;			/* divert cookie */
2962b25acc1SLuigi Rizzo 	args.next_hop = NULL;
2978948e4baSArchie Cobbs 
2982b25acc1SLuigi Rizzo 	/* Grab info from MT_TAG mbufs prepended to the chain.	*/
2992b25acc1SLuigi Rizzo 	for (; m && m->m_type == MT_TAG; m = m->m_next) {
3002b25acc1SLuigi Rizzo 		switch(m->m_tag_id) {
3012b25acc1SLuigi Rizzo 		default:
3022b25acc1SLuigi Rizzo 			printf("ip_input: unrecognised MT_TAG tag %d\n",
3032b25acc1SLuigi Rizzo 			    m->m_tag_id);
3042b25acc1SLuigi Rizzo 			break;
3052b25acc1SLuigi Rizzo 
3062b25acc1SLuigi Rizzo 		case PACKET_TAG_DUMMYNET:
3072b25acc1SLuigi Rizzo 			args.rule = ((struct dn_pkt *)m)->rule;
3082b25acc1SLuigi Rizzo 			break;
3092b25acc1SLuigi Rizzo 
3102b25acc1SLuigi Rizzo 		case PACKET_TAG_DIVERT:
3112b25acc1SLuigi Rizzo 			args.divert_rule = (int)m->m_hdr.mh_data & 0xffff;
3122b25acc1SLuigi Rizzo 			break;
3132b25acc1SLuigi Rizzo 
3142b25acc1SLuigi Rizzo 		case PACKET_TAG_IPFORWARD:
3152b25acc1SLuigi Rizzo 			args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
3162b25acc1SLuigi Rizzo 			break;
3172b25acc1SLuigi Rizzo 		}
3182b25acc1SLuigi Rizzo 	}
319df8bae1dSRodney W. Grimes 
320db40007dSAndrew R. Reiter 	KASSERT(m != NULL && (m->m_flags & M_PKTHDR) != 0,
321db40007dSAndrew R. Reiter 	    ("ip_input: no HDR"));
322db40007dSAndrew R. Reiter 
3232b25acc1SLuigi Rizzo 	if (args.rule) {	/* dummynet already filtered us */
3242b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
3252b25acc1SLuigi Rizzo 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
3262b25acc1SLuigi Rizzo 		goto iphack ;
3272b25acc1SLuigi Rizzo 	}
3282b25acc1SLuigi Rizzo 
329df8bae1dSRodney W. Grimes 	ipstat.ips_total++;
33058938916SGarrett Wollman 
33158938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
33258938916SGarrett Wollman 		goto tooshort;
33358938916SGarrett Wollman 
334df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
335df8bae1dSRodney W. Grimes 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
336df8bae1dSRodney W. Grimes 		ipstat.ips_toosmall++;
337c67b1d17SGarrett Wollman 		return;
338df8bae1dSRodney W. Grimes 	}
339df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
34058938916SGarrett Wollman 
34158938916SGarrett Wollman 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
342df8bae1dSRodney W. Grimes 		ipstat.ips_badvers++;
343df8bae1dSRodney W. Grimes 		goto bad;
344df8bae1dSRodney W. Grimes 	}
34558938916SGarrett Wollman 
34658938916SGarrett Wollman 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
347df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
348df8bae1dSRodney W. Grimes 		ipstat.ips_badhlen++;
349df8bae1dSRodney W. Grimes 		goto bad;
350df8bae1dSRodney W. Grimes 	}
351df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
352df8bae1dSRodney W. Grimes 		if ((m = m_pullup(m, hlen)) == 0) {
353df8bae1dSRodney W. Grimes 			ipstat.ips_badhlen++;
354c67b1d17SGarrett Wollman 			return;
355df8bae1dSRodney W. Grimes 		}
356df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
357df8bae1dSRodney W. Grimes 	}
35833841545SHajimu UMEMOTO 
35933841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
36033841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
36133841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
36233841545SHajimu UMEMOTO 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
36333841545SHajimu UMEMOTO 			ipstat.ips_badaddr++;
36433841545SHajimu UMEMOTO 			goto bad;
36533841545SHajimu UMEMOTO 		}
36633841545SHajimu UMEMOTO 	}
36733841545SHajimu UMEMOTO 
368db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
369db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
370db4f9cc7SJonathan Lemon 	} else {
37158938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
37247c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
37358938916SGarrett Wollman 		} else {
37447c861ecSBrian Somers 			sum = in_cksum(m, hlen);
37558938916SGarrett Wollman 		}
376db4f9cc7SJonathan Lemon 	}
37747c861ecSBrian Somers 	if (sum) {
378df8bae1dSRodney W. Grimes 		ipstat.ips_badsum++;
379df8bae1dSRodney W. Grimes 		goto bad;
380df8bae1dSRodney W. Grimes 	}
381df8bae1dSRodney W. Grimes 
382df8bae1dSRodney W. Grimes 	/*
383df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
384df8bae1dSRodney W. Grimes 	 */
385fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
386df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
387df8bae1dSRodney W. Grimes 		ipstat.ips_badlen++;
388df8bae1dSRodney W. Grimes 		goto bad;
389df8bae1dSRodney W. Grimes 	}
390fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
391df8bae1dSRodney W. Grimes 
392df8bae1dSRodney W. Grimes 	/*
393df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
394df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
395df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
396df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
397df8bae1dSRodney W. Grimes 	 */
398df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
39958938916SGarrett Wollman tooshort:
400df8bae1dSRodney W. Grimes 		ipstat.ips_tooshort++;
401df8bae1dSRodney W. Grimes 		goto bad;
402df8bae1dSRodney W. Grimes 	}
403df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
404df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
405df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
406df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
407df8bae1dSRodney W. Grimes 		} else
408df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
409df8bae1dSRodney W. Grimes 	}
4103f67c834SDon Lewis 
41133841545SHajimu UMEMOTO #ifdef IPSEC
41233841545SHajimu UMEMOTO 	if (ipsec_gethist(m, NULL))
41333841545SHajimu UMEMOTO 		goto pass;
41433841545SHajimu UMEMOTO #endif
4153f67c834SDon Lewis 
4164dd1662bSUgen J.S. Antsilevich 	/*
4174dd1662bSUgen J.S. Antsilevich 	 * IpHack's section.
4184dd1662bSUgen J.S. Antsilevich 	 * Right now when no processing on packet has done
4194dd1662bSUgen J.S. Antsilevich 	 * and it is still fresh out of network we do our black
4204dd1662bSUgen J.S. Antsilevich 	 * deals with it.
42193e0e116SJulian Elischer 	 * - Firewall: deny/allow/divert
422fed1c7e9SSøren Schmidt 	 * - Xlate: translate packet's addr/port (NAT).
423b715f178SLuigi Rizzo 	 * - Pipe: pass pkt through dummynet.
4244dd1662bSUgen J.S. Antsilevich 	 * - Wrap: fake packet's addr/port <unimpl.>
4254dd1662bSUgen J.S. Antsilevich 	 * - Encapsulate: put it in another IP and send out. <unimp.>
4264dd1662bSUgen J.S. Antsilevich  	 */
427b715f178SLuigi Rizzo 
428b715f178SLuigi Rizzo iphack:
429df8bae1dSRodney W. Grimes 
430c4ac87eaSDarren Reed #ifdef PFIL_HOOKS
431c4ac87eaSDarren Reed 	/*
432c4ac87eaSDarren Reed 	 * Run through list of hooks for input packets.  If there are any
433c4ac87eaSDarren Reed 	 * filters which require that additional packets in the flow are
434c4ac87eaSDarren Reed 	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
435c4ac87eaSDarren Reed 	 * Note that filters must _never_ set this flag, as another filter
436c4ac87eaSDarren Reed 	 * in the list may have previously cleared it.
437c4ac87eaSDarren Reed 	 */
438c4ac87eaSDarren Reed 	m0 = m;
439c4ac87eaSDarren Reed 	pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
440fc2ffbe6SPoul-Henning Kamp 	for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
441c4ac87eaSDarren Reed 		if (pfh->pfil_func) {
442c4ac87eaSDarren Reed 			rv = pfh->pfil_func(ip, hlen,
443c4ac87eaSDarren Reed 					    m->m_pkthdr.rcvif, 0, &m0);
444c4ac87eaSDarren Reed 			if (rv)
445beec8214SDarren Reed 				return;
446c4ac87eaSDarren Reed 			m = m0;
447c4ac87eaSDarren Reed 			if (m == NULL)
448c4ac87eaSDarren Reed 				return;
449c4ac87eaSDarren Reed 			ip = mtod(m, struct ip *);
450beec8214SDarren Reed 		}
451c4ac87eaSDarren Reed #endif /* PFIL_HOOKS */
452c4ac87eaSDarren Reed 
4537b109fa4SLuigi Rizzo 	if (fw_enable && IPFW_LOADED) {
454f9e354dfSJulian Elischer 		/*
455f9e354dfSJulian Elischer 		 * If we've been forwarded from the output side, then
456f9e354dfSJulian Elischer 		 * skip the firewall a second time
457f9e354dfSJulian Elischer 		 */
4582b25acc1SLuigi Rizzo 		if (args.next_hop)
459f9e354dfSJulian Elischer 			goto ours;
4602b25acc1SLuigi Rizzo 
4612b25acc1SLuigi Rizzo 		args.m = m;
4622b25acc1SLuigi Rizzo 		i = ip_fw_chk_ptr(&args);
4632b25acc1SLuigi Rizzo 		m = args.m;
4642b25acc1SLuigi Rizzo 
465d60315beSLuigi Rizzo 		if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
466507b4b54SLuigi Rizzo 			if (m)
467507b4b54SLuigi Rizzo 				m_freem(m);
468b715f178SLuigi Rizzo 			return;
469507b4b54SLuigi Rizzo 		}
470d60315beSLuigi Rizzo 		ip = mtod(m, struct ip *); /* just in case m changed */
4712b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop == NULL)	/* common case */
472b715f178SLuigi Rizzo 			goto pass;
4737b109fa4SLuigi Rizzo                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
4748948e4baSArchie Cobbs 			/* Send packet to the appropriate pipe */
4752b25acc1SLuigi Rizzo 			ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
476e4676ba6SJulian Elischer 			return;
47793e0e116SJulian Elischer 		}
478b715f178SLuigi Rizzo #ifdef IPDIVERT
4798948e4baSArchie Cobbs 		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
4808948e4baSArchie Cobbs 			/* Divert or tee packet */
4818948e4baSArchie Cobbs 			divert_info = i;
482b715f178SLuigi Rizzo 			goto ours;
483b715f178SLuigi Rizzo 		}
484b715f178SLuigi Rizzo #endif
4852b25acc1SLuigi Rizzo 		if (i == 0 && args.next_hop != NULL)
486b715f178SLuigi Rizzo 			goto pass;
487b715f178SLuigi Rizzo 		/*
488b715f178SLuigi Rizzo 		 * if we get here, the packet must be dropped
489b715f178SLuigi Rizzo 		 */
490b715f178SLuigi Rizzo 		m_freem(m);
491b715f178SLuigi Rizzo 		return;
492b715f178SLuigi Rizzo 	}
493b715f178SLuigi Rizzo pass:
494100ba1a6SJordan K. Hubbard 
495df8bae1dSRodney W. Grimes 	/*
496df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
497df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
498df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
499df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
500df8bae1dSRodney W. Grimes 	 */
501df8bae1dSRodney W. Grimes 	ip_nhops = 0;		/* for source routed packets */
5022b25acc1SLuigi Rizzo 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
503c67b1d17SGarrett Wollman 		return;
504df8bae1dSRodney W. Grimes 
505f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
506f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
507f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
508f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
509f0068c4aSGarrett Wollman 	 * grabbing the packet.
510f0068c4aSGarrett Wollman          */
5111c5de19aSGarrett Wollman 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
512f0068c4aSGarrett Wollman 		goto ours;
513f0068c4aSGarrett Wollman 
514df8bae1dSRodney W. Grimes 	/*
515df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
516cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
517cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
518cc766e04SGarrett Wollman 	 * with it).
519df8bae1dSRodney W. Grimes 	 */
520cc766e04SGarrett Wollman 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
521cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
522cc766e04SGarrett Wollman 		goto ours;
523cc766e04SGarrett Wollman 
5247538a9a0SJonathan Lemon 	/*
5257538a9a0SJonathan Lemon 	 * Cache the destination address of the packet; this may be
5267538a9a0SJonathan Lemon 	 * changed by use of 'ipfw fwd'.
5277538a9a0SJonathan Lemon 	 */
5282b25acc1SLuigi Rizzo 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
5297538a9a0SJonathan Lemon 
530823db0e9SDon Lewis 	/*
531823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
532823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
533823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
534e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
535e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
5363f67c834SDon Lewis 	 *
5373f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
5383f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
5393f67c834SDon Lewis 	 *
540a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
5413f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5423f67c834SDon Lewis 	 * the packets are received.
543823db0e9SDon Lewis 	 */
544823db0e9SDon Lewis 	checkif = ip_checkinterface && (ipforwarding == 0) &&
5459494d596SBrooks Davis 	    m->m_pkthdr.rcvif != NULL &&
546e15ae1b2SDon Lewis 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
5472b25acc1SLuigi Rizzo 	    (args.next_hop == NULL);
548823db0e9SDon Lewis 
549ca925d9cSJonathan Lemon 	/*
550ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
551ca925d9cSJonathan Lemon 	 */
552ca925d9cSJonathan Lemon 	LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
553f9e354dfSJulian Elischer 		/*
554823db0e9SDon Lewis 		 * If the address matches, verify that the packet
555823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
556823db0e9SDon Lewis 		 * enabled.
557f9e354dfSJulian Elischer 		 */
558823db0e9SDon Lewis 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
559823db0e9SDon Lewis 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
560ed1ff184SJulian Elischer 			goto ours;
561ca925d9cSJonathan Lemon 	}
562823db0e9SDon Lewis 	/*
563ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
564ca925d9cSJonathan Lemon 	 *
565ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
566ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
567ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
568ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
569823db0e9SDon Lewis 	 */
570ca925d9cSJonathan Lemon 	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
571ca925d9cSJonathan Lemon 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
572ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
573ca925d9cSJonathan Lemon 				continue;
574ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
575df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
5767538a9a0SJonathan Lemon 			    pkt_dst.s_addr)
577df8bae1dSRodney W. Grimes 				goto ours;
5787538a9a0SJonathan Lemon 			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
579df8bae1dSRodney W. Grimes 				goto ours;
580ca925d9cSJonathan Lemon #ifdef BOOTP_COMPAT
581ca925d9cSJonathan Lemon 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
582ca925d9cSJonathan Lemon 				goto ours;
583ca925d9cSJonathan Lemon #endif
584df8bae1dSRodney W. Grimes 		}
585df8bae1dSRodney W. Grimes 	}
586df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
587df8bae1dSRodney W. Grimes 		struct in_multi *inm;
588df8bae1dSRodney W. Grimes 		if (ip_mrouter) {
589df8bae1dSRodney W. Grimes 			/*
590df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
591df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
592df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
593df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
594df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
595df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
596df8bae1dSRodney W. Grimes 			 */
597f0068c4aSGarrett Wollman 			if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
598df8bae1dSRodney W. Grimes 				ipstat.ips_cantforward++;
599df8bae1dSRodney W. Grimes 				m_freem(m);
600c67b1d17SGarrett Wollman 				return;
601df8bae1dSRodney W. Grimes 			}
602df8bae1dSRodney W. Grimes 
603df8bae1dSRodney W. Grimes 			/*
60411612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
605df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
606df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
607df8bae1dSRodney W. Grimes 			 */
608df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
609df8bae1dSRodney W. Grimes 				goto ours;
610df8bae1dSRodney W. Grimes 			ipstat.ips_forward++;
611df8bae1dSRodney W. Grimes 		}
612df8bae1dSRodney W. Grimes 		/*
613df8bae1dSRodney W. Grimes 		 * See if we belong to the destination multicast group on the
614df8bae1dSRodney W. Grimes 		 * arrival interface.
615df8bae1dSRodney W. Grimes 		 */
616df8bae1dSRodney W. Grimes 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
617df8bae1dSRodney W. Grimes 		if (inm == NULL) {
61882c39223SGarrett Wollman 			ipstat.ips_notmember++;
619df8bae1dSRodney W. Grimes 			m_freem(m);
620c67b1d17SGarrett Wollman 			return;
621df8bae1dSRodney W. Grimes 		}
622df8bae1dSRodney W. Grimes 		goto ours;
623df8bae1dSRodney W. Grimes 	}
624df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
625df8bae1dSRodney W. Grimes 		goto ours;
626df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
627df8bae1dSRodney W. Grimes 		goto ours;
628df8bae1dSRodney W. Grimes 
6296a800098SYoshinobu Inoue 	/*
6306a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6316a800098SYoshinobu Inoue 	 */
6326a800098SYoshinobu Inoue 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
6336a800098SYoshinobu Inoue 		if (ip_keepfaith) {
6346a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
6356a800098SYoshinobu Inoue 				goto ours;
6366a800098SYoshinobu Inoue 		}
6376a800098SYoshinobu Inoue 		m_freem(m);
6386a800098SYoshinobu Inoue 		return;
6396a800098SYoshinobu Inoue 	}
6409494d596SBrooks Davis 
641df8bae1dSRodney W. Grimes 	/*
642df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
643df8bae1dSRodney W. Grimes 	 */
644df8bae1dSRodney W. Grimes 	if (ipforwarding == 0) {
645df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
646df8bae1dSRodney W. Grimes 		m_freem(m);
647546f251bSChris D. Faulhaber 	} else {
648546f251bSChris D. Faulhaber #ifdef IPSEC
649546f251bSChris D. Faulhaber 		/*
650546f251bSChris D. Faulhaber 		 * Enforce inbound IPsec SPD.
651546f251bSChris D. Faulhaber 		 */
652546f251bSChris D. Faulhaber 		if (ipsec4_in_reject(m, NULL)) {
653546f251bSChris D. Faulhaber 			ipsecstat.in_polvio++;
654546f251bSChris D. Faulhaber 			goto bad;
655546f251bSChris D. Faulhaber 		}
656546f251bSChris D. Faulhaber #endif /* IPSEC */
6572b25acc1SLuigi Rizzo 		ip_forward(m, 0, args.next_hop);
658546f251bSChris D. Faulhaber 	}
659c67b1d17SGarrett Wollman 	return;
660df8bae1dSRodney W. Grimes 
661df8bae1dSRodney W. Grimes ours:
662d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
663d0ebc0d2SYaroslav Tykhiy 	/*
664d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
665d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
666d0ebc0d2SYaroslav Tykhiy 	 */
6672b25acc1SLuigi Rizzo 	if (ipstealth && hlen > sizeof (struct ip) &&
6682b25acc1SLuigi Rizzo 	    ip_dooptions(m, 1, args.next_hop))
669d0ebc0d2SYaroslav Tykhiy 		return;
670d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
671d0ebc0d2SYaroslav Tykhiy 
6725da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
6735da9f8faSJosef Karthauser 	if (ia != NULL) {
6745da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
6755da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
6765da9f8faSJosef Karthauser 	}
677100ba1a6SJordan K. Hubbard 
67863f8d699SJordan K. Hubbard 	/*
679df8bae1dSRodney W. Grimes 	 * If offset or IP_MF are set, must reassemble.
680df8bae1dSRodney W. Grimes 	 * Otherwise, nothing need be done.
681df8bae1dSRodney W. Grimes 	 * (We could look in the reassembly queue to see
682df8bae1dSRodney W. Grimes 	 * if the packet was previously fragmented,
683df8bae1dSRodney W. Grimes 	 * but it's not worth the time; just let them time out.)
684df8bae1dSRodney W. Grimes 	 */
685b6ea1aa5SRuslan Ermilov 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
6866a800098SYoshinobu Inoue 
687194a213eSAndrey A. Chernov 		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
688df8bae1dSRodney W. Grimes 		/*
689df8bae1dSRodney W. Grimes 		 * Look for queue of fragments
690df8bae1dSRodney W. Grimes 		 * of this datagram.
691df8bae1dSRodney W. Grimes 		 */
692462b86feSPoul-Henning Kamp 		TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
693df8bae1dSRodney W. Grimes 			if (ip->ip_id == fp->ipq_id &&
694df8bae1dSRodney W. Grimes 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
695df8bae1dSRodney W. Grimes 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
696df8bae1dSRodney W. Grimes 			    ip->ip_p == fp->ipq_p)
697df8bae1dSRodney W. Grimes 				goto found;
698df8bae1dSRodney W. Grimes 
699194a213eSAndrey A. Chernov 		fp = 0;
700194a213eSAndrey A. Chernov 
701194a213eSAndrey A. Chernov 		/* check if there's a place for the new queue */
702194a213eSAndrey A. Chernov 		if (nipq > maxnipq) {
703194a213eSAndrey A. Chernov 		    /*
704194a213eSAndrey A. Chernov 		     * drop something from the tail of the current queue
705194a213eSAndrey A. Chernov 		     * before proceeding further
706194a213eSAndrey A. Chernov 		     */
707462b86feSPoul-Henning Kamp 		    struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
708462b86feSPoul-Henning Kamp 		    if (q == NULL) {   /* gak */
709194a213eSAndrey A. Chernov 			for (i = 0; i < IPREASS_NHASH; i++) {
710462b86feSPoul-Henning Kamp 			    struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
711462b86feSPoul-Henning Kamp 			    if (r) {
712462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], r);
713194a213eSAndrey A. Chernov 				break;
714194a213eSAndrey A. Chernov 			    }
715194a213eSAndrey A. Chernov 			}
716194a213eSAndrey A. Chernov 		    } else
717462b86feSPoul-Henning Kamp 			ip_freef(&ipq[sum], q);
718194a213eSAndrey A. Chernov 		}
719194a213eSAndrey A. Chernov found:
720df8bae1dSRodney W. Grimes 		/*
721df8bae1dSRodney W. Grimes 		 * Adjust ip_len to not reflect header,
722df8bae1dSRodney W. Grimes 		 * convert offset of this to bytes.
723df8bae1dSRodney W. Grimes 		 */
724df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
725b6ea1aa5SRuslan Ermilov 		if (ip->ip_off & IP_MF) {
7266effc713SDoug Rabson 		        /*
7276effc713SDoug Rabson 		         * Make sure that fragments have a data length
7286effc713SDoug Rabson 			 * that's a non-zero multiple of 8 bytes.
7296effc713SDoug Rabson 		         */
7306effc713SDoug Rabson 			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
7316effc713SDoug Rabson 				ipstat.ips_toosmall++; /* XXX */
7326effc713SDoug Rabson 				goto bad;
7336effc713SDoug Rabson 			}
7346effc713SDoug Rabson 			m->m_flags |= M_FRAG;
7356effc713SDoug Rabson 		}
736df8bae1dSRodney W. Grimes 		ip->ip_off <<= 3;
737df8bae1dSRodney W. Grimes 
738df8bae1dSRodney W. Grimes 		/*
739b6ea1aa5SRuslan Ermilov 		 * Attempt reassembly; if it succeeds, proceed.
7402b25acc1SLuigi Rizzo 		 * ip_reass() will return a different mbuf, and update
7412b25acc1SLuigi Rizzo 		 * the divert info in divert_info and args.divert_rule.
742df8bae1dSRodney W. Grimes 		 */
743df8bae1dSRodney W. Grimes 		ipstat.ips_fragments++;
744487bdb38SRuslan Ermilov 		m->m_pkthdr.header = ip;
7456a800098SYoshinobu Inoue 		m = ip_reass(m,
7462b25acc1SLuigi Rizzo 		    &ipq[sum], fp, &divert_info, &args.divert_rule);
7472b25acc1SLuigi Rizzo 		if (m == 0)
748c67b1d17SGarrett Wollman 			return;
749df8bae1dSRodney W. Grimes 		ipstat.ips_reassembled++;
7506a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
7517e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
7527e2df452SRuslan Ermilov 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
753af782f1cSBrian Somers #ifdef IPDIVERT
7548948e4baSArchie Cobbs 		/* Restore original checksum before diverting packet */
7558948e4baSArchie Cobbs 		if (divert_info != 0) {
756af782f1cSBrian Somers 			ip->ip_len += hlen;
757fd8e4ebcSMike Barcroft 			ip->ip_len = htons(ip->ip_len);
758fd8e4ebcSMike Barcroft 			ip->ip_off = htons(ip->ip_off);
759af782f1cSBrian Somers 			ip->ip_sum = 0;
76060123168SRuslan Ermilov 			if (hlen == sizeof(struct ip))
761af782f1cSBrian Somers 				ip->ip_sum = in_cksum_hdr(ip);
76260123168SRuslan Ermilov 			else
76360123168SRuslan Ermilov 				ip->ip_sum = in_cksum(m, hlen);
764fd8e4ebcSMike Barcroft 			ip->ip_off = ntohs(ip->ip_off);
765fd8e4ebcSMike Barcroft 			ip->ip_len = ntohs(ip->ip_len);
766af782f1cSBrian Somers 			ip->ip_len -= hlen;
767af782f1cSBrian Somers 		}
768af782f1cSBrian Somers #endif
769df8bae1dSRodney W. Grimes 	} else
770df8bae1dSRodney W. Grimes 		ip->ip_len -= hlen;
771df8bae1dSRodney W. Grimes 
77293e0e116SJulian Elischer #ifdef IPDIVERT
77393e0e116SJulian Elischer 	/*
7748948e4baSArchie Cobbs 	 * Divert or tee packet to the divert protocol if required.
77593e0e116SJulian Elischer 	 */
7768948e4baSArchie Cobbs 	if (divert_info != 0) {
7778948e4baSArchie Cobbs 		struct mbuf *clone = NULL;
7788948e4baSArchie Cobbs 
7798948e4baSArchie Cobbs 		/* Clone packet if we're doing a 'tee' */
7808948e4baSArchie Cobbs 		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
7818948e4baSArchie Cobbs 			clone = m_dup(m, M_DONTWAIT);
7828948e4baSArchie Cobbs 
7838948e4baSArchie Cobbs 		/* Restore packet header fields to original values */
7848948e4baSArchie Cobbs 		ip->ip_len += hlen;
785fd8e4ebcSMike Barcroft 		ip->ip_len = htons(ip->ip_len);
786fd8e4ebcSMike Barcroft 		ip->ip_off = htons(ip->ip_off);
7878948e4baSArchie Cobbs 
7888948e4baSArchie Cobbs 		/* Deliver packet to divert input routine */
7892b25acc1SLuigi Rizzo 		divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
790e4676ba6SJulian Elischer 		ipstat.ips_delivered++;
7918948e4baSArchie Cobbs 
7928948e4baSArchie Cobbs 		/* If 'tee', continue with original packet */
7938948e4baSArchie Cobbs 		if (clone == NULL)
79493e0e116SJulian Elischer 			return;
7958948e4baSArchie Cobbs 		m = clone;
7968948e4baSArchie Cobbs 		ip = mtod(m, struct ip *);
79756962689SCrist J. Clark 		ip->ip_len += hlen;
7982b25acc1SLuigi Rizzo 		/*
7992b25acc1SLuigi Rizzo 		 * Jump backwards to complete processing of the
8002b25acc1SLuigi Rizzo 		 * packet. But first clear divert_info to avoid
8012b25acc1SLuigi Rizzo 		 * entering this block again.
8022b25acc1SLuigi Rizzo 		 * We do not need to clear args.divert_rule
8032b25acc1SLuigi Rizzo 		 * or args.next_hop as they will not be used.
8042b25acc1SLuigi Rizzo 		 */
80556962689SCrist J. Clark 		divert_info = 0;
80656962689SCrist J. Clark 		goto pass;
80793e0e116SJulian Elischer 	}
80893e0e116SJulian Elischer #endif
80993e0e116SJulian Elischer 
81033841545SHajimu UMEMOTO #ifdef IPSEC
81133841545SHajimu UMEMOTO 	/*
81233841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
81333841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
81433841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
81533841545SHajimu UMEMOTO 	 */
81633841545SHajimu UMEMOTO 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
81733841545SHajimu UMEMOTO 	    ipsec4_in_reject(m, NULL)) {
81833841545SHajimu UMEMOTO 		ipsecstat.in_polvio++;
81933841545SHajimu UMEMOTO 		goto bad;
82033841545SHajimu UMEMOTO 	}
82133841545SHajimu UMEMOTO #endif
82233841545SHajimu UMEMOTO 
823df8bae1dSRodney W. Grimes 	/*
824df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
825df8bae1dSRodney W. Grimes 	 */
826df8bae1dSRodney W. Grimes 	ipstat.ips_delivered++;
8272b25acc1SLuigi Rizzo 	if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
8282b25acc1SLuigi Rizzo 		/* TCP needs IPFORWARD info if available */
8292b25acc1SLuigi Rizzo 		struct m_hdr tag;
8306a800098SYoshinobu Inoue 
8312b25acc1SLuigi Rizzo 		tag.mh_type = MT_TAG;
8322b25acc1SLuigi Rizzo 		tag.mh_flags = PACKET_TAG_IPFORWARD;
8332b25acc1SLuigi Rizzo 		tag.mh_data = (caddr_t)args.next_hop;
8342b25acc1SLuigi Rizzo 		tag.mh_next = m;
8352b25acc1SLuigi Rizzo 
8362b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(
8372b25acc1SLuigi Rizzo 			(struct mbuf *)&tag, hlen);
8382b25acc1SLuigi Rizzo 	} else
8392b25acc1SLuigi Rizzo 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
840c67b1d17SGarrett Wollman 	return;
841df8bae1dSRodney W. Grimes bad:
842df8bae1dSRodney W. Grimes 	m_freem(m);
843c67b1d17SGarrett Wollman }
844c67b1d17SGarrett Wollman 
845c67b1d17SGarrett Wollman /*
846c67b1d17SGarrett Wollman  * IP software interrupt routine - to go away sometime soon
847c67b1d17SGarrett Wollman  */
848c67b1d17SGarrett Wollman static void
849c67b1d17SGarrett Wollman ipintr(void)
850c67b1d17SGarrett Wollman {
851c67b1d17SGarrett Wollman 	struct mbuf *m;
852c67b1d17SGarrett Wollman 
853c67b1d17SGarrett Wollman 	while (1) {
854c67b1d17SGarrett Wollman 		IF_DEQUEUE(&ipintrq, m);
855c67b1d17SGarrett Wollman 		if (m == 0)
856c67b1d17SGarrett Wollman 			return;
857c67b1d17SGarrett Wollman 		ip_input(m);
858c67b1d17SGarrett Wollman 	}
859df8bae1dSRodney W. Grimes }
860df8bae1dSRodney W. Grimes 
861df8bae1dSRodney W. Grimes /*
8628948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
8638948e4baSArchie Cobbs  * whole datagram.  If a chain for reassembly of this datagram already
8648948e4baSArchie Cobbs  * exists, then it is given as fp; otherwise have to make a chain.
8658948e4baSArchie Cobbs  *
8668948e4baSArchie Cobbs  * When IPDIVERT enabled, keep additional state with each packet that
8678948e4baSArchie Cobbs  * tells us if we need to divert or tee the packet we're building.
8682b25acc1SLuigi Rizzo  * In particular, *divinfo includes the port and TEE flag,
8692b25acc1SLuigi Rizzo  * *divert_rule is the number of the matching rule.
870df8bae1dSRodney W. Grimes  */
8718948e4baSArchie Cobbs 
8726a800098SYoshinobu Inoue static struct mbuf *
8732b25acc1SLuigi Rizzo ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
8742b25acc1SLuigi Rizzo 	u_int32_t *divinfo, u_int16_t *divert_rule)
875df8bae1dSRodney W. Grimes {
8766effc713SDoug Rabson 	struct ip *ip = mtod(m, struct ip *);
877b6ea1aa5SRuslan Ermilov 	register struct mbuf *p, *q, *nq;
878df8bae1dSRodney W. Grimes 	struct mbuf *t;
8796effc713SDoug Rabson 	int hlen = IP_VHL_HL(ip->ip_vhl) << 2;
880df8bae1dSRodney W. Grimes 	int i, next;
881df8bae1dSRodney W. Grimes 
882df8bae1dSRodney W. Grimes 	/*
883df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
884df8bae1dSRodney W. Grimes 	 * would confuse code below.
885df8bae1dSRodney W. Grimes 	 */
886df8bae1dSRodney W. Grimes 	m->m_data += hlen;
887df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
888df8bae1dSRodney W. Grimes 
889df8bae1dSRodney W. Grimes 	/*
890df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
891df8bae1dSRodney W. Grimes 	 */
892df8bae1dSRodney W. Grimes 	if (fp == 0) {
893690a6055SJesper Skriver 		/*
894690a6055SJesper Skriver 		 * Enforce upper bound on number of fragmented packets
895690a6055SJesper Skriver 		 * for which we attempt reassembly;
896690a6055SJesper Skriver 		 * If maxfrag is 0, never accept fragments.
897690a6055SJesper Skriver 		 * If maxfrag is -1, accept all fragments without limitation.
898690a6055SJesper Skriver 		 */
899690a6055SJesper Skriver 		if ((ip_maxfragpackets >= 0) && (ip_nfragpackets >= ip_maxfragpackets))
900690a6055SJesper Skriver 			goto dropfrag;
901690a6055SJesper Skriver 		ip_nfragpackets++;
902df8bae1dSRodney W. Grimes 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
903df8bae1dSRodney W. Grimes 			goto dropfrag;
904df8bae1dSRodney W. Grimes 		fp = mtod(t, struct ipq *);
905462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
906194a213eSAndrey A. Chernov 		nipq++;
907df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
908df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
909df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
9106effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
9116effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
912af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
913af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
91493e0e116SJulian Elischer #ifdef IPDIVERT
9158948e4baSArchie Cobbs 		fp->ipq_div_info = 0;
916bb60f459SJulian Elischer 		fp->ipq_div_cookie = 0;
91793e0e116SJulian Elischer #endif
918af38c68cSLuigi Rizzo 		goto inserted;
919df8bae1dSRodney W. Grimes 	}
920df8bae1dSRodney W. Grimes 
9216effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
9226effc713SDoug Rabson 
923df8bae1dSRodney W. Grimes 	/*
924df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
925df8bae1dSRodney W. Grimes 	 */
9266effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
9276effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
928df8bae1dSRodney W. Grimes 			break;
929df8bae1dSRodney W. Grimes 
930df8bae1dSRodney W. Grimes 	/*
931df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
932df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
933af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
934af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
935db4f9cc7SJonathan Lemon 	 *
936db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
937db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
938df8bae1dSRodney W. Grimes 	 */
9396effc713SDoug Rabson 	if (p) {
9406effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
941df8bae1dSRodney W. Grimes 		if (i > 0) {
942df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
943df8bae1dSRodney W. Grimes 				goto dropfrag;
9446a800098SYoshinobu Inoue 			m_adj(m, i);
945db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
946df8bae1dSRodney W. Grimes 			ip->ip_off += i;
947df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
948df8bae1dSRodney W. Grimes 		}
949af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
950af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
951af38c68cSLuigi Rizzo 	} else {
952af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
953af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
954df8bae1dSRodney W. Grimes 	}
955df8bae1dSRodney W. Grimes 
956df8bae1dSRodney W. Grimes 	/*
957df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
958df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
959df8bae1dSRodney W. Grimes 	 */
9606effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
961af38c68cSLuigi Rizzo 	     q = nq) {
9626effc713SDoug Rabson 		i = (ip->ip_off + ip->ip_len) -
9636effc713SDoug Rabson 		    GETIP(q)->ip_off;
9646effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
9656effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
9666effc713SDoug Rabson 			GETIP(q)->ip_off += i;
9676effc713SDoug Rabson 			m_adj(q, i);
968db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
969df8bae1dSRodney W. Grimes 			break;
970df8bae1dSRodney W. Grimes 		}
9716effc713SDoug Rabson 		nq = q->m_nextpkt;
972af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
9736effc713SDoug Rabson 		m_freem(q);
974df8bae1dSRodney W. Grimes 	}
975df8bae1dSRodney W. Grimes 
976af38c68cSLuigi Rizzo inserted:
97793e0e116SJulian Elischer 
97893e0e116SJulian Elischer #ifdef IPDIVERT
97993e0e116SJulian Elischer 	/*
9808948e4baSArchie Cobbs 	 * Transfer firewall instructions to the fragment structure.
9812b25acc1SLuigi Rizzo 	 * Only trust info in the fragment at offset 0.
98293e0e116SJulian Elischer 	 */
9832b25acc1SLuigi Rizzo 	if (ip->ip_off == 0) {
9848948e4baSArchie Cobbs 		fp->ipq_div_info = *divinfo;
9852b25acc1SLuigi Rizzo 		fp->ipq_div_cookie = *divert_rule;
9862b25acc1SLuigi Rizzo 	}
9878948e4baSArchie Cobbs 	*divinfo = 0;
9882b25acc1SLuigi Rizzo 	*divert_rule = 0;
98993e0e116SJulian Elischer #endif
99093e0e116SJulian Elischer 
991df8bae1dSRodney W. Grimes 	/*
992af38c68cSLuigi Rizzo 	 * Check for complete reassembly.
993df8bae1dSRodney W. Grimes 	 */
9946effc713SDoug Rabson 	next = 0;
9956effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
9966effc713SDoug Rabson 		if (GETIP(q)->ip_off != next)
9976effc713SDoug Rabson 			return (0);
9986effc713SDoug Rabson 		next += GETIP(q)->ip_len;
9996effc713SDoug Rabson 	}
10006effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
10016effc713SDoug Rabson 	if (p->m_flags & M_FRAG)
1002df8bae1dSRodney W. Grimes 		return (0);
1003df8bae1dSRodney W. Grimes 
1004df8bae1dSRodney W. Grimes 	/*
1005430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1006430d30d8SBill Fenner 	 */
10076effc713SDoug Rabson 	q = fp->ipq_frags;
10086effc713SDoug Rabson 	ip = GETIP(q);
10096effc713SDoug Rabson 	if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) {
1010430d30d8SBill Fenner 		ipstat.ips_toolong++;
1011462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1012430d30d8SBill Fenner 		return (0);
1013430d30d8SBill Fenner 	}
1014430d30d8SBill Fenner 
1015430d30d8SBill Fenner 	/*
1016430d30d8SBill Fenner 	 * Concatenate fragments.
1017df8bae1dSRodney W. Grimes 	 */
10186effc713SDoug Rabson 	m = q;
1019df8bae1dSRodney W. Grimes 	t = m->m_next;
1020df8bae1dSRodney W. Grimes 	m->m_next = 0;
1021df8bae1dSRodney W. Grimes 	m_cat(m, t);
10226effc713SDoug Rabson 	nq = q->m_nextpkt;
1023945aa40dSDoug Rabson 	q->m_nextpkt = 0;
10246effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
10256effc713SDoug Rabson 		nq = q->m_nextpkt;
1026945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1027db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1028db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1029a8db1d93SJonathan Lemon 		m_cat(m, q);
1030df8bae1dSRodney W. Grimes 	}
1031df8bae1dSRodney W. Grimes 
103293e0e116SJulian Elischer #ifdef IPDIVERT
103393e0e116SJulian Elischer 	/*
10348948e4baSArchie Cobbs 	 * Extract firewall instructions from the fragment structure.
103593e0e116SJulian Elischer 	 */
10368948e4baSArchie Cobbs 	*divinfo = fp->ipq_div_info;
10372b25acc1SLuigi Rizzo 	*divert_rule = fp->ipq_div_cookie;
103893e0e116SJulian Elischer #endif
103993e0e116SJulian Elischer 
1040df8bae1dSRodney W. Grimes 	/*
1041df8bae1dSRodney W. Grimes 	 * Create header for new ip packet by
1042df8bae1dSRodney W. Grimes 	 * modifying header of first packet;
1043df8bae1dSRodney W. Grimes 	 * dequeue and discard fragment reassembly header.
1044df8bae1dSRodney W. Grimes 	 * Make header visible.
1045df8bae1dSRodney W. Grimes 	 */
1046df8bae1dSRodney W. Grimes 	ip->ip_len = next;
10476effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
10486effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1049462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1050194a213eSAndrey A. Chernov 	nipq--;
1051df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
1052690a6055SJesper Skriver 	ip_nfragpackets--;
10536effc713SDoug Rabson 	m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2);
10546effc713SDoug Rabson 	m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2);
1055df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1056df8bae1dSRodney W. Grimes 	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
1057df8bae1dSRodney W. Grimes 		register int plen = 0;
10586a800098SYoshinobu Inoue 		for (t = m; t; t = t->m_next)
10596a800098SYoshinobu Inoue 			plen += t->m_len;
10606a800098SYoshinobu Inoue 		m->m_pkthdr.len = plen;
1061df8bae1dSRodney W. Grimes 	}
10626a800098SYoshinobu Inoue 	return (m);
1063df8bae1dSRodney W. Grimes 
1064df8bae1dSRodney W. Grimes dropfrag:
1065efe39c6aSJulian Elischer #ifdef IPDIVERT
10668948e4baSArchie Cobbs 	*divinfo = 0;
10672b25acc1SLuigi Rizzo 	*divert_rule = 0;
1068efe39c6aSJulian Elischer #endif
1069df8bae1dSRodney W. Grimes 	ipstat.ips_fragdropped++;
1070df8bae1dSRodney W. Grimes 	m_freem(m);
1071df8bae1dSRodney W. Grimes 	return (0);
10726effc713SDoug Rabson 
10736effc713SDoug Rabson #undef GETIP
1074df8bae1dSRodney W. Grimes }
1075df8bae1dSRodney W. Grimes 
1076df8bae1dSRodney W. Grimes /*
1077df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1078df8bae1dSRodney W. Grimes  * associated datagrams.
1079df8bae1dSRodney W. Grimes  */
10800312fbe9SPoul-Henning Kamp static void
1081462b86feSPoul-Henning Kamp ip_freef(fhp, fp)
1082462b86feSPoul-Henning Kamp 	struct ipqhead *fhp;
1083df8bae1dSRodney W. Grimes 	struct ipq *fp;
1084df8bae1dSRodney W. Grimes {
10856effc713SDoug Rabson 	register struct mbuf *q;
1086df8bae1dSRodney W. Grimes 
10876effc713SDoug Rabson 	while (fp->ipq_frags) {
10886effc713SDoug Rabson 		q = fp->ipq_frags;
10896effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
10906effc713SDoug Rabson 		m_freem(q);
1091df8bae1dSRodney W. Grimes 	}
1092462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1093df8bae1dSRodney W. Grimes 	(void) m_free(dtom(fp));
1094690a6055SJesper Skriver 	ip_nfragpackets--;
1095194a213eSAndrey A. Chernov 	nipq--;
1096df8bae1dSRodney W. Grimes }
1097df8bae1dSRodney W. Grimes 
1098df8bae1dSRodney W. Grimes /*
1099df8bae1dSRodney W. Grimes  * IP timer processing;
1100df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1101df8bae1dSRodney W. Grimes  * queue, discard it.
1102df8bae1dSRodney W. Grimes  */
1103df8bae1dSRodney W. Grimes void
1104df8bae1dSRodney W. Grimes ip_slowtimo()
1105df8bae1dSRodney W. Grimes {
1106df8bae1dSRodney W. Grimes 	register struct ipq *fp;
1107df8bae1dSRodney W. Grimes 	int s = splnet();
1108194a213eSAndrey A. Chernov 	int i;
1109df8bae1dSRodney W. Grimes 
1110194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1111462b86feSPoul-Henning Kamp 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1112462b86feSPoul-Henning Kamp 			struct ipq *fpp;
1113462b86feSPoul-Henning Kamp 
1114462b86feSPoul-Henning Kamp 			fpp = fp;
1115462b86feSPoul-Henning Kamp 			fp = TAILQ_NEXT(fp, ipq_list);
1116462b86feSPoul-Henning Kamp 			if(--fpp->ipq_ttl == 0) {
1117df8bae1dSRodney W. Grimes 				ipstat.ips_fragtimeout++;
1118462b86feSPoul-Henning Kamp 				ip_freef(&ipq[i], fpp);
1119df8bae1dSRodney W. Grimes 			}
1120df8bae1dSRodney W. Grimes 		}
1121194a213eSAndrey A. Chernov 	}
1122690a6055SJesper Skriver 	/*
1123690a6055SJesper Skriver 	 * If we are over the maximum number of fragments
1124690a6055SJesper Skriver 	 * (due to the limit being lowered), drain off
1125690a6055SJesper Skriver 	 * enough to get down to the new limit.
1126690a6055SJesper Skriver 	 */
1127690a6055SJesper Skriver 	for (i = 0; i < IPREASS_NHASH; i++) {
1128690a6055SJesper Skriver 		if (ip_maxfragpackets >= 0) {
1129690a6055SJesper Skriver 			while (ip_nfragpackets > ip_maxfragpackets &&
1130690a6055SJesper Skriver 				!TAILQ_EMPTY(&ipq[i])) {
1131690a6055SJesper Skriver 				ipstat.ips_fragdropped++;
1132690a6055SJesper Skriver 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1133690a6055SJesper Skriver 			}
1134690a6055SJesper Skriver 		}
1135690a6055SJesper Skriver 	}
11361f91d8c5SDavid Greenman 	ipflow_slowtimo();
1137df8bae1dSRodney W. Grimes 	splx(s);
1138df8bae1dSRodney W. Grimes }
1139df8bae1dSRodney W. Grimes 
1140df8bae1dSRodney W. Grimes /*
1141df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1142df8bae1dSRodney W. Grimes  */
1143df8bae1dSRodney W. Grimes void
1144df8bae1dSRodney W. Grimes ip_drain()
1145df8bae1dSRodney W. Grimes {
1146194a213eSAndrey A. Chernov 	int     i;
1147ce29ab3aSGarrett Wollman 
1148194a213eSAndrey A. Chernov 	for (i = 0; i < IPREASS_NHASH; i++) {
1149462b86feSPoul-Henning Kamp 		while(!TAILQ_EMPTY(&ipq[i])) {
1150194a213eSAndrey A. Chernov 			ipstat.ips_fragdropped++;
1151462b86feSPoul-Henning Kamp 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1152194a213eSAndrey A. Chernov 		}
1153194a213eSAndrey A. Chernov 	}
1154ce29ab3aSGarrett Wollman 	in_rtqdrain();
1155df8bae1dSRodney W. Grimes }
1156df8bae1dSRodney W. Grimes 
1157df8bae1dSRodney W. Grimes /*
1158df8bae1dSRodney W. Grimes  * Do option processing on a datagram,
1159df8bae1dSRodney W. Grimes  * possibly discarding it if bad options are encountered,
1160df8bae1dSRodney W. Grimes  * or forwarding it if source-routed.
1161d0ebc0d2SYaroslav Tykhiy  * The pass argument is used when operating in the IPSTEALTH
1162d0ebc0d2SYaroslav Tykhiy  * mode to tell what options to process:
1163d0ebc0d2SYaroslav Tykhiy  * [LS]SRR (pass 0) or the others (pass 1).
1164d0ebc0d2SYaroslav Tykhiy  * The reason for as many as two passes is that when doing IPSTEALTH,
1165d0ebc0d2SYaroslav Tykhiy  * non-routing options should be processed only if the packet is for us.
1166df8bae1dSRodney W. Grimes  * Returns 1 if packet has been forwarded/freed,
1167df8bae1dSRodney W. Grimes  * 0 if the packet should be processed further.
1168df8bae1dSRodney W. Grimes  */
11690312fbe9SPoul-Henning Kamp static int
11702b25acc1SLuigi Rizzo ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1171df8bae1dSRodney W. Grimes {
11722b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
11732b25acc1SLuigi Rizzo 	u_char *cp;
11742b25acc1SLuigi Rizzo 	struct in_ifaddr *ia;
1175df8bae1dSRodney W. Grimes 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1176df8bae1dSRodney W. Grimes 	struct in_addr *sin, dst;
1177df8bae1dSRodney W. Grimes 	n_time ntime;
11784d2e3692SLuigi Rizzo 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1179df8bae1dSRodney W. Grimes 
1180df8bae1dSRodney W. Grimes 	dst = ip->ip_dst;
1181df8bae1dSRodney W. Grimes 	cp = (u_char *)(ip + 1);
118258938916SGarrett Wollman 	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1183df8bae1dSRodney W. Grimes 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1184df8bae1dSRodney W. Grimes 		opt = cp[IPOPT_OPTVAL];
1185df8bae1dSRodney W. Grimes 		if (opt == IPOPT_EOL)
1186df8bae1dSRodney W. Grimes 			break;
1187df8bae1dSRodney W. Grimes 		if (opt == IPOPT_NOP)
1188df8bae1dSRodney W. Grimes 			optlen = 1;
1189df8bae1dSRodney W. Grimes 		else {
1190fdcb8debSJun-ichiro itojun Hagino 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1191fdcb8debSJun-ichiro itojun Hagino 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1192fdcb8debSJun-ichiro itojun Hagino 				goto bad;
1193fdcb8debSJun-ichiro itojun Hagino 			}
1194df8bae1dSRodney W. Grimes 			optlen = cp[IPOPT_OLEN];
1195707d00a3SJonathan Lemon 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1196df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1197df8bae1dSRodney W. Grimes 				goto bad;
1198df8bae1dSRodney W. Grimes 			}
1199df8bae1dSRodney W. Grimes 		}
1200df8bae1dSRodney W. Grimes 		switch (opt) {
1201df8bae1dSRodney W. Grimes 
1202df8bae1dSRodney W. Grimes 		default:
1203df8bae1dSRodney W. Grimes 			break;
1204df8bae1dSRodney W. Grimes 
1205df8bae1dSRodney W. Grimes 		/*
1206df8bae1dSRodney W. Grimes 		 * Source routing with record.
1207df8bae1dSRodney W. Grimes 		 * Find interface with current destination address.
1208df8bae1dSRodney W. Grimes 		 * If none on this machine then drop if strictly routed,
1209df8bae1dSRodney W. Grimes 		 * or do nothing if loosely routed.
1210df8bae1dSRodney W. Grimes 		 * Record interface address and bring up next address
1211df8bae1dSRodney W. Grimes 		 * component.  If strictly routed make sure next
1212df8bae1dSRodney W. Grimes 		 * address is on directly accessible net.
1213df8bae1dSRodney W. Grimes 		 */
1214df8bae1dSRodney W. Grimes 		case IPOPT_LSRR:
1215df8bae1dSRodney W. Grimes 		case IPOPT_SSRR:
1216d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1217d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass > 0)
1218d0ebc0d2SYaroslav Tykhiy 				break;
1219d0ebc0d2SYaroslav Tykhiy #endif
122033841545SHajimu UMEMOTO 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
122133841545SHajimu UMEMOTO 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
122233841545SHajimu UMEMOTO 				goto bad;
122333841545SHajimu UMEMOTO 			}
1224df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1225df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1226df8bae1dSRodney W. Grimes 				goto bad;
1227df8bae1dSRodney W. Grimes 			}
1228df8bae1dSRodney W. Grimes 			ipaddr.sin_addr = ip->ip_dst;
1229df8bae1dSRodney W. Grimes 			ia = (struct in_ifaddr *)
1230df8bae1dSRodney W. Grimes 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1231df8bae1dSRodney W. Grimes 			if (ia == 0) {
1232df8bae1dSRodney W. Grimes 				if (opt == IPOPT_SSRR) {
1233df8bae1dSRodney W. Grimes 					type = ICMP_UNREACH;
1234df8bae1dSRodney W. Grimes 					code = ICMP_UNREACH_SRCFAIL;
1235df8bae1dSRodney W. Grimes 					goto bad;
1236df8bae1dSRodney W. Grimes 				}
1237bc189bf8SGuido van Rooij 				if (!ip_dosourceroute)
1238bc189bf8SGuido van Rooij 					goto nosourcerouting;
1239df8bae1dSRodney W. Grimes 				/*
1240df8bae1dSRodney W. Grimes 				 * Loose routing, and not at next destination
1241df8bae1dSRodney W. Grimes 				 * yet; nothing to do except forward.
1242df8bae1dSRodney W. Grimes 				 */
1243df8bae1dSRodney W. Grimes 				break;
1244df8bae1dSRodney W. Grimes 			}
1245df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
12465d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr)) {
1247df8bae1dSRodney W. Grimes 				/*
1248df8bae1dSRodney W. Grimes 				 * End of source route.  Should be for us.
1249df8bae1dSRodney W. Grimes 				 */
12504fce5804SGuido van Rooij 				if (!ip_acceptsourceroute)
12514fce5804SGuido van Rooij 					goto nosourcerouting;
1252df8bae1dSRodney W. Grimes 				save_rte(cp, ip->ip_src);
1253df8bae1dSRodney W. Grimes 				break;
1254df8bae1dSRodney W. Grimes 			}
1255d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1256d0ebc0d2SYaroslav Tykhiy 			if (ipstealth)
1257d0ebc0d2SYaroslav Tykhiy 				goto dropit;
1258d0ebc0d2SYaroslav Tykhiy #endif
12591025071fSGarrett Wollman 			if (!ip_dosourceroute) {
12600af8d3ecSDavid Greenman 				if (ipforwarding) {
12610af8d3ecSDavid Greenman 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
12620af8d3ecSDavid Greenman 					/*
12630af8d3ecSDavid Greenman 					 * Acting as a router, so generate ICMP
12640af8d3ecSDavid Greenman 					 */
1265efa48587SGuido van Rooij nosourcerouting:
1266bc189bf8SGuido van Rooij 					strcpy(buf, inet_ntoa(ip->ip_dst));
12671025071fSGarrett Wollman 					log(LOG_WARNING,
12681025071fSGarrett Wollman 					    "attempted source route from %s to %s\n",
12691025071fSGarrett Wollman 					    inet_ntoa(ip->ip_src), buf);
12701025071fSGarrett Wollman 					type = ICMP_UNREACH;
12711025071fSGarrett Wollman 					code = ICMP_UNREACH_SRCFAIL;
12721025071fSGarrett Wollman 					goto bad;
12730af8d3ecSDavid Greenman 				} else {
12740af8d3ecSDavid Greenman 					/*
12750af8d3ecSDavid Greenman 					 * Not acting as a router, so silently drop.
12760af8d3ecSDavid Greenman 					 */
1277d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1278d0ebc0d2SYaroslav Tykhiy dropit:
1279d0ebc0d2SYaroslav Tykhiy #endif
12800af8d3ecSDavid Greenman 					ipstat.ips_cantforward++;
12810af8d3ecSDavid Greenman 					m_freem(m);
12820af8d3ecSDavid Greenman 					return (1);
12830af8d3ecSDavid Greenman 				}
12841025071fSGarrett Wollman 			}
12851025071fSGarrett Wollman 
1286df8bae1dSRodney W. Grimes 			/*
1287df8bae1dSRodney W. Grimes 			 * locate outgoing interface
1288df8bae1dSRodney W. Grimes 			 */
128994a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, cp + off,
1290df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
12911025071fSGarrett Wollman 
1292df8bae1dSRodney W. Grimes 			if (opt == IPOPT_SSRR) {
1293df8bae1dSRodney W. Grimes #define	INA	struct in_ifaddr *
1294df8bae1dSRodney W. Grimes #define	SA	struct sockaddr *
1295df8bae1dSRodney W. Grimes 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1296df8bae1dSRodney W. Grimes 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1297df8bae1dSRodney W. Grimes 			} else
1298bd714208SRuslan Ermilov 				ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt);
1299df8bae1dSRodney W. Grimes 			if (ia == 0) {
1300df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1301df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_SRCFAIL;
1302df8bae1dSRodney W. Grimes 				goto bad;
1303df8bae1dSRodney W. Grimes 			}
1304df8bae1dSRodney W. Grimes 			ip->ip_dst = ipaddr.sin_addr;
130594a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
130694a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1307df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1308df8bae1dSRodney W. Grimes 			/*
1309df8bae1dSRodney W. Grimes 			 * Let ip_intr's mcast routing check handle mcast pkts
1310df8bae1dSRodney W. Grimes 			 */
1311df8bae1dSRodney W. Grimes 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1312df8bae1dSRodney W. Grimes 			break;
1313df8bae1dSRodney W. Grimes 
1314df8bae1dSRodney W. Grimes 		case IPOPT_RR:
1315d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1316d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1317d0ebc0d2SYaroslav Tykhiy 				break;
1318d0ebc0d2SYaroslav Tykhiy #endif
1319707d00a3SJonathan Lemon 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1320707d00a3SJonathan Lemon 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1321707d00a3SJonathan Lemon 				goto bad;
1322707d00a3SJonathan Lemon 			}
1323df8bae1dSRodney W. Grimes 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1324df8bae1dSRodney W. Grimes 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1325df8bae1dSRodney W. Grimes 				goto bad;
1326df8bae1dSRodney W. Grimes 			}
1327df8bae1dSRodney W. Grimes 			/*
1328df8bae1dSRodney W. Grimes 			 * If no space remains, ignore.
1329df8bae1dSRodney W. Grimes 			 */
1330df8bae1dSRodney W. Grimes 			off--;			/* 0 origin */
13315d5d5fc0SJonathan Lemon 			if (off > optlen - (int)sizeof(struct in_addr))
1332df8bae1dSRodney W. Grimes 				break;
133394a5d9b6SDavid Greenman 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1334df8bae1dSRodney W. Grimes 			    sizeof(ipaddr.sin_addr));
1335df8bae1dSRodney W. Grimes 			/*
1336df8bae1dSRodney W. Grimes 			 * locate outgoing interface; if we're the destination,
1337df8bae1dSRodney W. Grimes 			 * use the incoming interface (should be same).
1338df8bae1dSRodney W. Grimes 			 */
1339df8bae1dSRodney W. Grimes 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1340bd714208SRuslan Ermilov 			    (ia = ip_rtaddr(ipaddr.sin_addr,
1341bd714208SRuslan Ermilov 			    &ipforward_rt)) == 0) {
1342df8bae1dSRodney W. Grimes 				type = ICMP_UNREACH;
1343df8bae1dSRodney W. Grimes 				code = ICMP_UNREACH_HOST;
1344df8bae1dSRodney W. Grimes 				goto bad;
1345df8bae1dSRodney W. Grimes 			}
134694a5d9b6SDavid Greenman 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
134794a5d9b6SDavid Greenman 			    sizeof(struct in_addr));
1348df8bae1dSRodney W. Grimes 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1349df8bae1dSRodney W. Grimes 			break;
1350df8bae1dSRodney W. Grimes 
1351df8bae1dSRodney W. Grimes 		case IPOPT_TS:
1352d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
1353d0ebc0d2SYaroslav Tykhiy 			if (ipstealth && pass == 0)
1354d0ebc0d2SYaroslav Tykhiy 				break;
1355d0ebc0d2SYaroslav Tykhiy #endif
1356df8bae1dSRodney W. Grimes 			code = cp - (u_char *)ip;
135707514071SJonathan Lemon 			if (optlen < 4 || optlen > 40) {
135807514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1359df8bae1dSRodney W. Grimes 				goto bad;
136033841545SHajimu UMEMOTO 			}
136107514071SJonathan Lemon 			if ((off = cp[IPOPT_OFFSET]) < 5) {
136207514071SJonathan Lemon 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
136333841545SHajimu UMEMOTO 				goto bad;
136433841545SHajimu UMEMOTO 			}
136507514071SJonathan Lemon 			if (off > optlen - (int)sizeof(int32_t)) {
136607514071SJonathan Lemon 				cp[IPOPT_OFFSET + 1] += (1 << 4);
136707514071SJonathan Lemon 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
136807514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1369df8bae1dSRodney W. Grimes 					goto bad;
137033841545SHajimu UMEMOTO 				}
1371df8bae1dSRodney W. Grimes 				break;
1372df8bae1dSRodney W. Grimes 			}
137307514071SJonathan Lemon 			off--;				/* 0 origin */
137407514071SJonathan Lemon 			sin = (struct in_addr *)(cp + off);
137507514071SJonathan Lemon 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1376df8bae1dSRodney W. Grimes 
1377df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSONLY:
1378df8bae1dSRodney W. Grimes 				break;
1379df8bae1dSRodney W. Grimes 
1380df8bae1dSRodney W. Grimes 			case IPOPT_TS_TSANDADDR:
138107514071SJonathan Lemon 				if (off + sizeof(n_time) +
138207514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
138307514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1384df8bae1dSRodney W. Grimes 					goto bad;
138533841545SHajimu UMEMOTO 				}
1386df8bae1dSRodney W. Grimes 				ipaddr.sin_addr = dst;
1387df8bae1dSRodney W. Grimes 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1388df8bae1dSRodney W. Grimes 							    m->m_pkthdr.rcvif);
1389df8bae1dSRodney W. Grimes 				if (ia == 0)
1390df8bae1dSRodney W. Grimes 					continue;
139194a5d9b6SDavid Greenman 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
139294a5d9b6SDavid Greenman 				    sizeof(struct in_addr));
139307514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1394df8bae1dSRodney W. Grimes 				break;
1395df8bae1dSRodney W. Grimes 
1396df8bae1dSRodney W. Grimes 			case IPOPT_TS_PRESPEC:
139707514071SJonathan Lemon 				if (off + sizeof(n_time) +
139807514071SJonathan Lemon 				    sizeof(struct in_addr) > optlen) {
139907514071SJonathan Lemon 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1400df8bae1dSRodney W. Grimes 					goto bad;
140133841545SHajimu UMEMOTO 				}
140294a5d9b6SDavid Greenman 				(void)memcpy(&ipaddr.sin_addr, sin,
1403df8bae1dSRodney W. Grimes 				    sizeof(struct in_addr));
1404df8bae1dSRodney W. Grimes 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1405df8bae1dSRodney W. Grimes 					continue;
140607514071SJonathan Lemon 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1407df8bae1dSRodney W. Grimes 				break;
1408df8bae1dSRodney W. Grimes 
1409df8bae1dSRodney W. Grimes 			default:
141007514071SJonathan Lemon 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1411df8bae1dSRodney W. Grimes 				goto bad;
1412df8bae1dSRodney W. Grimes 			}
1413df8bae1dSRodney W. Grimes 			ntime = iptime();
141407514071SJonathan Lemon 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
141507514071SJonathan Lemon 			cp[IPOPT_OFFSET] += sizeof(n_time);
1416df8bae1dSRodney W. Grimes 		}
1417df8bae1dSRodney W. Grimes 	}
141847174b49SAndrey A. Chernov 	if (forward && ipforwarding) {
14192b25acc1SLuigi Rizzo 		ip_forward(m, 1, next_hop);
1420df8bae1dSRodney W. Grimes 		return (1);
1421df8bae1dSRodney W. Grimes 	}
1422df8bae1dSRodney W. Grimes 	return (0);
1423df8bae1dSRodney W. Grimes bad:
1424df8bae1dSRodney W. Grimes 	icmp_error(m, type, code, 0, 0);
1425df8bae1dSRodney W. Grimes 	ipstat.ips_badoptions++;
1426df8bae1dSRodney W. Grimes 	return (1);
1427df8bae1dSRodney W. Grimes }
1428df8bae1dSRodney W. Grimes 
1429df8bae1dSRodney W. Grimes /*
1430df8bae1dSRodney W. Grimes  * Given address of next destination (final or next hop),
1431df8bae1dSRodney W. Grimes  * return internet address info of interface to be used to get there.
1432df8bae1dSRodney W. Grimes  */
1433bd714208SRuslan Ermilov struct in_ifaddr *
1434bd714208SRuslan Ermilov ip_rtaddr(dst, rt)
1435df8bae1dSRodney W. Grimes 	struct in_addr dst;
1436bd714208SRuslan Ermilov 	struct route *rt;
1437df8bae1dSRodney W. Grimes {
1438df8bae1dSRodney W. Grimes 	register struct sockaddr_in *sin;
1439df8bae1dSRodney W. Grimes 
1440bd714208SRuslan Ermilov 	sin = (struct sockaddr_in *)&rt->ro_dst;
1441df8bae1dSRodney W. Grimes 
1442bd714208SRuslan Ermilov 	if (rt->ro_rt == 0 ||
1443bd714208SRuslan Ermilov 	    !(rt->ro_rt->rt_flags & RTF_UP) ||
14444078ffb1SRuslan Ermilov 	    dst.s_addr != sin->sin_addr.s_addr) {
1445bd714208SRuslan Ermilov 		if (rt->ro_rt) {
1446bd714208SRuslan Ermilov 			RTFREE(rt->ro_rt);
1447bd714208SRuslan Ermilov 			rt->ro_rt = 0;
1448df8bae1dSRodney W. Grimes 		}
1449df8bae1dSRodney W. Grimes 		sin->sin_family = AF_INET;
1450df8bae1dSRodney W. Grimes 		sin->sin_len = sizeof(*sin);
1451df8bae1dSRodney W. Grimes 		sin->sin_addr = dst;
1452df8bae1dSRodney W. Grimes 
1453bd714208SRuslan Ermilov 		rtalloc_ign(rt, RTF_PRCLONING);
1454df8bae1dSRodney W. Grimes 	}
1455bd714208SRuslan Ermilov 	if (rt->ro_rt == 0)
1456df8bae1dSRodney W. Grimes 		return ((struct in_ifaddr *)0);
1457bd714208SRuslan Ermilov 	return (ifatoia(rt->ro_rt->rt_ifa));
1458df8bae1dSRodney W. Grimes }
1459df8bae1dSRodney W. Grimes 
1460df8bae1dSRodney W. Grimes /*
1461df8bae1dSRodney W. Grimes  * Save incoming source route for use in replies,
1462df8bae1dSRodney W. Grimes  * to be picked up later by ip_srcroute if the receiver is interested.
1463df8bae1dSRodney W. Grimes  */
1464df8bae1dSRodney W. Grimes void
1465df8bae1dSRodney W. Grimes save_rte(option, dst)
1466df8bae1dSRodney W. Grimes 	u_char *option;
1467df8bae1dSRodney W. Grimes 	struct in_addr dst;
1468df8bae1dSRodney W. Grimes {
1469df8bae1dSRodney W. Grimes 	unsigned olen;
1470df8bae1dSRodney W. Grimes 
1471df8bae1dSRodney W. Grimes 	olen = option[IPOPT_OLEN];
1472df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1473df8bae1dSRodney W. Grimes 	if (ipprintfs)
1474df8bae1dSRodney W. Grimes 		printf("save_rte: olen %d\n", olen);
1475df8bae1dSRodney W. Grimes #endif
1476df8bae1dSRodney W. Grimes 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1477df8bae1dSRodney W. Grimes 		return;
14780453d3cbSBruce Evans 	bcopy(option, ip_srcrt.srcopt, olen);
1479df8bae1dSRodney W. Grimes 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1480df8bae1dSRodney W. Grimes 	ip_srcrt.dst = dst;
1481df8bae1dSRodney W. Grimes }
1482df8bae1dSRodney W. Grimes 
1483df8bae1dSRodney W. Grimes /*
1484df8bae1dSRodney W. Grimes  * Retrieve incoming source route for use in replies,
1485df8bae1dSRodney W. Grimes  * in the same form used by setsockopt.
1486df8bae1dSRodney W. Grimes  * The first hop is placed before the options, will be removed later.
1487df8bae1dSRodney W. Grimes  */
1488df8bae1dSRodney W. Grimes struct mbuf *
1489df8bae1dSRodney W. Grimes ip_srcroute()
1490df8bae1dSRodney W. Grimes {
1491df8bae1dSRodney W. Grimes 	register struct in_addr *p, *q;
1492df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1493df8bae1dSRodney W. Grimes 
1494df8bae1dSRodney W. Grimes 	if (ip_nhops == 0)
1495df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1496cfe8b629SGarrett Wollman 	m = m_get(M_DONTWAIT, MT_HEADER);
1497df8bae1dSRodney W. Grimes 	if (m == 0)
1498df8bae1dSRodney W. Grimes 		return ((struct mbuf *)0);
1499df8bae1dSRodney W. Grimes 
1500df8bae1dSRodney W. Grimes #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1501df8bae1dSRodney W. Grimes 
1502df8bae1dSRodney W. Grimes 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1503df8bae1dSRodney W. Grimes 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1504df8bae1dSRodney W. Grimes 	    OPTSIZ;
1505df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1506df8bae1dSRodney W. Grimes 	if (ipprintfs)
1507df8bae1dSRodney W. Grimes 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1508df8bae1dSRodney W. Grimes #endif
1509df8bae1dSRodney W. Grimes 
1510df8bae1dSRodney W. Grimes 	/*
1511df8bae1dSRodney W. Grimes 	 * First save first hop for return route
1512df8bae1dSRodney W. Grimes 	 */
1513df8bae1dSRodney W. Grimes 	p = &ip_srcrt.route[ip_nhops - 1];
1514df8bae1dSRodney W. Grimes 	*(mtod(m, struct in_addr *)) = *p--;
1515df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1516df8bae1dSRodney W. Grimes 	if (ipprintfs)
1517af38c68cSLuigi Rizzo 		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1518df8bae1dSRodney W. Grimes #endif
1519df8bae1dSRodney W. Grimes 
1520df8bae1dSRodney W. Grimes 	/*
1521df8bae1dSRodney W. Grimes 	 * Copy option fields and padding (nop) to mbuf.
1522df8bae1dSRodney W. Grimes 	 */
1523df8bae1dSRodney W. Grimes 	ip_srcrt.nop = IPOPT_NOP;
1524df8bae1dSRodney W. Grimes 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
152594a5d9b6SDavid Greenman 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
152694a5d9b6SDavid Greenman 	    &ip_srcrt.nop, OPTSIZ);
1527df8bae1dSRodney W. Grimes 	q = (struct in_addr *)(mtod(m, caddr_t) +
1528df8bae1dSRodney W. Grimes 	    sizeof(struct in_addr) + OPTSIZ);
1529df8bae1dSRodney W. Grimes #undef OPTSIZ
1530df8bae1dSRodney W. Grimes 	/*
1531df8bae1dSRodney W. Grimes 	 * Record return path as an IP source route,
1532df8bae1dSRodney W. Grimes 	 * reversing the path (pointers are now aligned).
1533df8bae1dSRodney W. Grimes 	 */
1534df8bae1dSRodney W. Grimes 	while (p >= ip_srcrt.route) {
1535df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1536df8bae1dSRodney W. Grimes 		if (ipprintfs)
1537af38c68cSLuigi Rizzo 			printf(" %lx", (u_long)ntohl(q->s_addr));
1538df8bae1dSRodney W. Grimes #endif
1539df8bae1dSRodney W. Grimes 		*q++ = *p--;
1540df8bae1dSRodney W. Grimes 	}
1541df8bae1dSRodney W. Grimes 	/*
1542df8bae1dSRodney W. Grimes 	 * Last hop goes to final destination.
1543df8bae1dSRodney W. Grimes 	 */
1544df8bae1dSRodney W. Grimes 	*q = ip_srcrt.dst;
1545df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1546df8bae1dSRodney W. Grimes 	if (ipprintfs)
1547af38c68cSLuigi Rizzo 		printf(" %lx\n", (u_long)ntohl(q->s_addr));
1548df8bae1dSRodney W. Grimes #endif
1549df8bae1dSRodney W. Grimes 	return (m);
1550df8bae1dSRodney W. Grimes }
1551df8bae1dSRodney W. Grimes 
1552df8bae1dSRodney W. Grimes /*
1553df8bae1dSRodney W. Grimes  * Strip out IP options, at higher
1554df8bae1dSRodney W. Grimes  * level protocol in the kernel.
1555df8bae1dSRodney W. Grimes  * Second argument is buffer to which options
1556df8bae1dSRodney W. Grimes  * will be moved, and return value is their length.
1557df8bae1dSRodney W. Grimes  * XXX should be deleted; last arg currently ignored.
1558df8bae1dSRodney W. Grimes  */
1559df8bae1dSRodney W. Grimes void
1560df8bae1dSRodney W. Grimes ip_stripoptions(m, mopt)
1561df8bae1dSRodney W. Grimes 	register struct mbuf *m;
1562df8bae1dSRodney W. Grimes 	struct mbuf *mopt;
1563df8bae1dSRodney W. Grimes {
1564df8bae1dSRodney W. Grimes 	register int i;
1565df8bae1dSRodney W. Grimes 	struct ip *ip = mtod(m, struct ip *);
1566df8bae1dSRodney W. Grimes 	register caddr_t opts;
1567df8bae1dSRodney W. Grimes 	int olen;
1568df8bae1dSRodney W. Grimes 
156958938916SGarrett Wollman 	olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1570df8bae1dSRodney W. Grimes 	opts = (caddr_t)(ip + 1);
1571df8bae1dSRodney W. Grimes 	i = m->m_len - (sizeof (struct ip) + olen);
1572df8bae1dSRodney W. Grimes 	bcopy(opts + olen, opts, (unsigned)i);
1573df8bae1dSRodney W. Grimes 	m->m_len -= olen;
1574df8bae1dSRodney W. Grimes 	if (m->m_flags & M_PKTHDR)
1575df8bae1dSRodney W. Grimes 		m->m_pkthdr.len -= olen;
157658938916SGarrett Wollman 	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
1577df8bae1dSRodney W. Grimes }
1578df8bae1dSRodney W. Grimes 
1579df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1580df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1581df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1582df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1583df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1584df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
15853b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1586df8bae1dSRodney W. Grimes };
1587df8bae1dSRodney W. Grimes 
1588df8bae1dSRodney W. Grimes /*
1589df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1590df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1591df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1592df8bae1dSRodney W. Grimes  * of codes and types.
1593df8bae1dSRodney W. Grimes  *
1594df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1595df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1596df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1597df8bae1dSRodney W. Grimes  * protocol deal with that.
1598df8bae1dSRodney W. Grimes  *
1599df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1600df8bae1dSRodney W. Grimes  * via a source route.
1601df8bae1dSRodney W. Grimes  */
16020312fbe9SPoul-Henning Kamp static void
16032b25acc1SLuigi Rizzo ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1604df8bae1dSRodney W. Grimes {
16052b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
16062b25acc1SLuigi Rizzo 	struct rtentry *rt;
160726f9a767SRodney W. Grimes 	int error, type = 0, code = 0;
1608df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
1609df8bae1dSRodney W. Grimes 	n_long dest;
16103efc3014SJulian Elischer 	struct in_addr pkt_dst;
1611df8bae1dSRodney W. Grimes 	struct ifnet *destifp;
16126a800098SYoshinobu Inoue #ifdef IPSEC
16136a800098SYoshinobu Inoue 	struct ifnet dummyifp;
16146a800098SYoshinobu Inoue #endif
1615df8bae1dSRodney W. Grimes 
1616df8bae1dSRodney W. Grimes 	dest = 0;
16173efc3014SJulian Elischer 	/*
16183efc3014SJulian Elischer 	 * Cache the destination address of the packet; this may be
16193efc3014SJulian Elischer 	 * changed by use of 'ipfw fwd'.
16203efc3014SJulian Elischer 	 */
16212b25acc1SLuigi Rizzo 	pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
16223efc3014SJulian Elischer 
1623df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1624df8bae1dSRodney W. Grimes 	if (ipprintfs)
162561ce519bSPoul-Henning Kamp 		printf("forward: src %lx dst %lx ttl %x\n",
16263efc3014SJulian Elischer 		    (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1627162886e2SBruce Evans 		    ip->ip_ttl);
1628df8bae1dSRodney W. Grimes #endif
1629100ba1a6SJordan K. Hubbard 
1630100ba1a6SJordan K. Hubbard 
16313efc3014SJulian Elischer 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1632df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1633df8bae1dSRodney W. Grimes 		m_freem(m);
1634df8bae1dSRodney W. Grimes 		return;
1635df8bae1dSRodney W. Grimes 	}
16361b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
16371b968362SDag-Erling Smørgrav 	if (!ipstealth) {
16381b968362SDag-Erling Smørgrav #endif
1639df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
16401b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
16411b968362SDag-Erling Smørgrav 			    dest, 0);
1642df8bae1dSRodney W. Grimes 			return;
1643df8bae1dSRodney W. Grimes 		}
16441b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
16451b968362SDag-Erling Smørgrav 	}
16461b968362SDag-Erling Smørgrav #endif
1647df8bae1dSRodney W. Grimes 
16483efc3014SJulian Elischer 	if (ip_rtaddr(pkt_dst, &ipforward_rt) == 0) {
1649df8bae1dSRodney W. Grimes 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1650df8bae1dSRodney W. Grimes 		return;
16514078ffb1SRuslan Ermilov 	} else
1652df8bae1dSRodney W. Grimes 		rt = ipforward_rt.ro_rt;
1653df8bae1dSRodney W. Grimes 
1654df8bae1dSRodney W. Grimes 	/*
1655bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1656bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1657bfef7ed4SIan Dowse 	 *
16584d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
16594d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
16604d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
16614d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
16624d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
16634d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
16644d2e3692SLuigi Rizzo 	 *
1665bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1666bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1667bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1668bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1669df8bae1dSRodney W. Grimes 	 */
1670bfef7ed4SIan Dowse 	MGET(mcopy, M_DONTWAIT, m->m_type);
1671bfef7ed4SIan Dowse 	if (mcopy != NULL) {
1672bfef7ed4SIan Dowse 		M_COPY_PKTHDR(mcopy, m);
1673bfef7ed4SIan Dowse 		mcopy->m_len = imin((IP_VHL_HL(ip->ip_vhl) << 2) + 8,
1674bfef7ed4SIan Dowse 		    (int)ip->ip_len);
1675bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1676bfef7ed4SIan Dowse 	}
167704287599SRuslan Ermilov 
167804287599SRuslan Ermilov #ifdef IPSTEALTH
167904287599SRuslan Ermilov 	if (!ipstealth) {
168004287599SRuslan Ermilov #endif
168104287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
168204287599SRuslan Ermilov #ifdef IPSTEALTH
168304287599SRuslan Ermilov 	}
168404287599SRuslan Ermilov #endif
1685df8bae1dSRodney W. Grimes 
1686df8bae1dSRodney W. Grimes 	/*
1687df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1688df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1689df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1690df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1691df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1692df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1693df8bae1dSRodney W. Grimes 	 */
1694df8bae1dSRodney W. Grimes 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1695df8bae1dSRodney W. Grimes 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1696df8bae1dSRodney W. Grimes 	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
16972b25acc1SLuigi Rizzo 	    ipsendredirects && !srcrt && !next_hop) {
1698df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1699df8bae1dSRodney W. Grimes 		u_long src = ntohl(ip->ip_src.s_addr);
1700df8bae1dSRodney W. Grimes 
1701df8bae1dSRodney W. Grimes 		if (RTA(rt) &&
1702df8bae1dSRodney W. Grimes 		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1703df8bae1dSRodney W. Grimes 		    if (rt->rt_flags & RTF_GATEWAY)
1704df8bae1dSRodney W. Grimes 			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1705df8bae1dSRodney W. Grimes 		    else
17063efc3014SJulian Elischer 			dest = pkt_dst.s_addr;
1707df8bae1dSRodney W. Grimes 		    /* Router requirements says to only send host redirects */
1708df8bae1dSRodney W. Grimes 		    type = ICMP_REDIRECT;
1709df8bae1dSRodney W. Grimes 		    code = ICMP_REDIRECT_HOST;
1710df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1711df8bae1dSRodney W. Grimes 		    if (ipprintfs)
1712df8bae1dSRodney W. Grimes 		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1713df8bae1dSRodney W. Grimes #endif
1714df8bae1dSRodney W. Grimes 		}
1715df8bae1dSRodney W. Grimes 	}
1716df8bae1dSRodney W. Grimes 
1717b97d15cbSGarrett Wollman 	error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1718b97d15cbSGarrett Wollman 			  IP_FORWARDING, 0);
1719df8bae1dSRodney W. Grimes 	if (error)
1720df8bae1dSRodney W. Grimes 		ipstat.ips_cantforward++;
1721df8bae1dSRodney W. Grimes 	else {
1722df8bae1dSRodney W. Grimes 		ipstat.ips_forward++;
1723df8bae1dSRodney W. Grimes 		if (type)
1724df8bae1dSRodney W. Grimes 			ipstat.ips_redirectsent++;
1725df8bae1dSRodney W. Grimes 		else {
17261f91d8c5SDavid Greenman 			if (mcopy) {
17271f91d8c5SDavid Greenman 				ipflow_create(&ipforward_rt, mcopy);
1728df8bae1dSRodney W. Grimes 				m_freem(mcopy);
17291f91d8c5SDavid Greenman 			}
1730df8bae1dSRodney W. Grimes 			return;
1731df8bae1dSRodney W. Grimes 		}
1732df8bae1dSRodney W. Grimes 	}
1733df8bae1dSRodney W. Grimes 	if (mcopy == NULL)
1734df8bae1dSRodney W. Grimes 		return;
1735df8bae1dSRodney W. Grimes 	destifp = NULL;
1736df8bae1dSRodney W. Grimes 
1737df8bae1dSRodney W. Grimes 	switch (error) {
1738df8bae1dSRodney W. Grimes 
1739df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1740df8bae1dSRodney W. Grimes 		/* type, code set above */
1741df8bae1dSRodney W. Grimes 		break;
1742df8bae1dSRodney W. Grimes 
1743df8bae1dSRodney W. Grimes 	case ENETUNREACH:		/* shouldn't happen, checked above */
1744df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1745df8bae1dSRodney W. Grimes 	case ENETDOWN:
1746df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1747df8bae1dSRodney W. Grimes 	default:
1748df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1749df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1750df8bae1dSRodney W. Grimes 		break;
1751df8bae1dSRodney W. Grimes 
1752df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1753df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1754df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
17556a800098SYoshinobu Inoue #ifndef IPSEC
1756df8bae1dSRodney W. Grimes 		if (ipforward_rt.ro_rt)
1757df8bae1dSRodney W. Grimes 			destifp = ipforward_rt.ro_rt->rt_ifp;
17586a800098SYoshinobu Inoue #else
17596a800098SYoshinobu Inoue 		/*
17606a800098SYoshinobu Inoue 		 * If the packet is routed over IPsec tunnel, tell the
17616a800098SYoshinobu Inoue 		 * originator the tunnel MTU.
17626a800098SYoshinobu Inoue 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
17636a800098SYoshinobu Inoue 		 * XXX quickhack!!!
17646a800098SYoshinobu Inoue 		 */
17656a800098SYoshinobu Inoue 		if (ipforward_rt.ro_rt) {
17666a800098SYoshinobu Inoue 			struct secpolicy *sp = NULL;
17676a800098SYoshinobu Inoue 			int ipsecerror;
17686a800098SYoshinobu Inoue 			int ipsechdr;
17696a800098SYoshinobu Inoue 			struct route *ro;
17706a800098SYoshinobu Inoue 
17716a800098SYoshinobu Inoue 			sp = ipsec4_getpolicybyaddr(mcopy,
17726a800098SYoshinobu Inoue 						    IPSEC_DIR_OUTBOUND,
17736a800098SYoshinobu Inoue 			                            IP_FORWARDING,
17746a800098SYoshinobu Inoue 			                            &ipsecerror);
17756a800098SYoshinobu Inoue 
17766a800098SYoshinobu Inoue 			if (sp == NULL)
17776a800098SYoshinobu Inoue 				destifp = ipforward_rt.ro_rt->rt_ifp;
17786a800098SYoshinobu Inoue 			else {
17796a800098SYoshinobu Inoue 				/* count IPsec header size */
17806a800098SYoshinobu Inoue 				ipsechdr = ipsec4_hdrsiz(mcopy,
17816a800098SYoshinobu Inoue 							 IPSEC_DIR_OUTBOUND,
17826a800098SYoshinobu Inoue 							 NULL);
17836a800098SYoshinobu Inoue 
17846a800098SYoshinobu Inoue 				/*
17856a800098SYoshinobu Inoue 				 * find the correct route for outer IPv4
17866a800098SYoshinobu Inoue 				 * header, compute tunnel MTU.
17876a800098SYoshinobu Inoue 				 *
17886a800098SYoshinobu Inoue 				 * XXX BUG ALERT
17896a800098SYoshinobu Inoue 				 * The "dummyifp" code relies upon the fact
17906a800098SYoshinobu Inoue 				 * that icmp_error() touches only ifp->if_mtu.
17916a800098SYoshinobu Inoue 				 */
17926a800098SYoshinobu Inoue 				/*XXX*/
17936a800098SYoshinobu Inoue 				destifp = NULL;
17946a800098SYoshinobu Inoue 				if (sp->req != NULL
17956a800098SYoshinobu Inoue 				 && sp->req->sav != NULL
17966a800098SYoshinobu Inoue 				 && sp->req->sav->sah != NULL) {
17976a800098SYoshinobu Inoue 					ro = &sp->req->sav->sah->sa_route;
17986a800098SYoshinobu Inoue 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
17996a800098SYoshinobu Inoue 						dummyifp.if_mtu =
18006a800098SYoshinobu Inoue 						    ro->ro_rt->rt_ifp->if_mtu;
18016a800098SYoshinobu Inoue 						dummyifp.if_mtu -= ipsechdr;
18026a800098SYoshinobu Inoue 						destifp = &dummyifp;
18036a800098SYoshinobu Inoue 					}
18046a800098SYoshinobu Inoue 				}
18056a800098SYoshinobu Inoue 
18066a800098SYoshinobu Inoue 				key_freesp(sp);
18076a800098SYoshinobu Inoue 			}
18086a800098SYoshinobu Inoue 		}
18096a800098SYoshinobu Inoue #endif /*IPSEC*/
1810df8bae1dSRodney W. Grimes 		ipstat.ips_cantfrag++;
1811df8bae1dSRodney W. Grimes 		break;
1812df8bae1dSRodney W. Grimes 
1813df8bae1dSRodney W. Grimes 	case ENOBUFS:
1814df8bae1dSRodney W. Grimes 		type = ICMP_SOURCEQUENCH;
1815df8bae1dSRodney W. Grimes 		code = 0;
1816df8bae1dSRodney W. Grimes 		break;
18173a06e3e0SRuslan Ermilov 
18183a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
18193a06e3e0SRuslan Ermilov 		m_freem(mcopy);
18203a06e3e0SRuslan Ermilov 		return;
1821df8bae1dSRodney W. Grimes 	}
1822df8bae1dSRodney W. Grimes 	icmp_error(mcopy, type, code, dest, destifp);
1823df8bae1dSRodney W. Grimes }
1824df8bae1dSRodney W. Grimes 
182582c23ebaSBill Fenner void
182682c23ebaSBill Fenner ip_savecontrol(inp, mp, ip, m)
182782c23ebaSBill Fenner 	register struct inpcb *inp;
182882c23ebaSBill Fenner 	register struct mbuf **mp;
182982c23ebaSBill Fenner 	register struct ip *ip;
183082c23ebaSBill Fenner 	register struct mbuf *m;
183182c23ebaSBill Fenner {
183282c23ebaSBill Fenner 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
183382c23ebaSBill Fenner 		struct timeval tv;
183482c23ebaSBill Fenner 
183582c23ebaSBill Fenner 		microtime(&tv);
183682c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
183782c23ebaSBill Fenner 			SCM_TIMESTAMP, SOL_SOCKET);
183882c23ebaSBill Fenner 		if (*mp)
183982c23ebaSBill Fenner 			mp = &(*mp)->m_next;
18404cc20ab1SSeigo Tanimura 	}
184182c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
184282c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
184382c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
184482c23ebaSBill Fenner 		if (*mp)
184582c23ebaSBill Fenner 			mp = &(*mp)->m_next;
184682c23ebaSBill Fenner 	}
184782c23ebaSBill Fenner #ifdef notyet
184882c23ebaSBill Fenner 	/* XXX
184982c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
185082c23ebaSBill Fenner 	 * than they already were.
185182c23ebaSBill Fenner 	 */
185282c23ebaSBill Fenner 	/* options were tossed already */
185382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
185482c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
185582c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
185682c23ebaSBill Fenner 		if (*mp)
185782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
185882c23ebaSBill Fenner 	}
185982c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
186082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
186182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
186282c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
186382c23ebaSBill Fenner 		if (*mp)
186482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
186582c23ebaSBill Fenner 	}
186682c23ebaSBill Fenner #endif
186782c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1868d314ad7bSJulian Elischer 		struct ifnet *ifp;
1869d314ad7bSJulian Elischer 		struct sdlbuf {
187082c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1871d314ad7bSJulian Elischer 			u_char	pad[32];
1872d314ad7bSJulian Elischer 		} sdlbuf;
1873d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1874d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
187582c23ebaSBill Fenner 
1876d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
1877d314ad7bSJulian Elischer 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
1878f9132cebSJonathan Lemon 			sdp = (struct sockaddr_dl *)
1879f9132cebSJonathan Lemon 			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
1880d314ad7bSJulian Elischer 			/*
1881d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1882d314ad7bSJulian Elischer 			 */
1883d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
1884d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
1885d314ad7bSJulian Elischer 				goto makedummy;
1886d314ad7bSJulian Elischer 			}
1887d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1888d314ad7bSJulian Elischer 		} else {
1889d314ad7bSJulian Elischer makedummy:
1890d314ad7bSJulian Elischer 			sdl2->sdl_len
1891d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
1892d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1893d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1894d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1895d314ad7bSJulian Elischer 		}
1896d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
189782c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
189882c23ebaSBill Fenner 		if (*mp)
189982c23ebaSBill Fenner 			mp = &(*mp)->m_next;
190082c23ebaSBill Fenner 	}
190182c23ebaSBill Fenner }
190282c23ebaSBill Fenner 
19034d2e3692SLuigi Rizzo /*
19044d2e3692SLuigi Rizzo  * XXX these routines are called from the upper part of the kernel.
19054d2e3692SLuigi Rizzo  * They need to be locked when we remove Giant.
19064d2e3692SLuigi Rizzo  *
19074d2e3692SLuigi Rizzo  * They could also be moved to ip_mroute.c, since all the RSVP
19084d2e3692SLuigi Rizzo  *  handling is done there already.
19094d2e3692SLuigi Rizzo  */
19104d2e3692SLuigi Rizzo static int ip_rsvp_on;
19114d2e3692SLuigi Rizzo struct socket *ip_rsvpd;
1912df8bae1dSRodney W. Grimes int
1913f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1914f0068c4aSGarrett Wollman {
1915f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1916f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1917f0068c4aSGarrett Wollman 	  return EOPNOTSUPP;
1918f0068c4aSGarrett Wollman 
1919f0068c4aSGarrett Wollman 	if (ip_rsvpd != NULL)
1920f0068c4aSGarrett Wollman 	  return EADDRINUSE;
1921f0068c4aSGarrett Wollman 
1922f0068c4aSGarrett Wollman 	ip_rsvpd = so;
19231c5de19aSGarrett Wollman 	/*
19241c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
19251c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
19261c5de19aSGarrett Wollman 	 */
19271c5de19aSGarrett Wollman 	if (!ip_rsvp_on) {
19281c5de19aSGarrett Wollman 		ip_rsvp_on = 1;
19291c5de19aSGarrett Wollman 		rsvp_on++;
19301c5de19aSGarrett Wollman 	}
1931f0068c4aSGarrett Wollman 
1932f0068c4aSGarrett Wollman 	return 0;
1933f0068c4aSGarrett Wollman }
1934f0068c4aSGarrett Wollman 
1935f0068c4aSGarrett Wollman int
1936f0068c4aSGarrett Wollman ip_rsvp_done(void)
1937f0068c4aSGarrett Wollman {
1938f0068c4aSGarrett Wollman 	ip_rsvpd = NULL;
19391c5de19aSGarrett Wollman 	/*
19401c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
19411c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
19421c5de19aSGarrett Wollman 	 */
19431c5de19aSGarrett Wollman 	if (ip_rsvp_on) {
19441c5de19aSGarrett Wollman 		ip_rsvp_on = 0;
19451c5de19aSGarrett Wollman 		rsvp_on--;
19461c5de19aSGarrett Wollman 	}
1947f0068c4aSGarrett Wollman 	return 0;
1948f0068c4aSGarrett Wollman }
1949