xref: /freebsd/sys/netinet/ip_input.c (revision f389439f50fc4c27d15d3017b622270e25ba71c7)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
370ac40133SBrian Somers #include "opt_bootp.h"
3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
396a800098SYoshinobu Inoue #include "opt_ipsec.h"
4033553d6eSBjoern A. Zeeb #include "opt_route.h"
41b8bc95cdSAdrian Chadd #include "opt_rss.h"
4274a9466cSGary Palmer 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
46df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
47b715f178SLuigi Rizzo #include <sys/malloc.h>
48df8bae1dSRodney W. Grimes #include <sys/domain.h>
49df8bae1dSRodney W. Grimes #include <sys/protosw.h>
50df8bae1dSRodney W. Grimes #include <sys/socket.h>
51df8bae1dSRodney W. Grimes #include <sys/time.h>
52df8bae1dSRodney W. Grimes #include <sys/kernel.h>
53385195c0SMarko Zec #include <sys/lock.h>
54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
55385195c0SMarko Zec #include <sys/rwlock.h>
5657f60867SMark Johnston #include <sys/sdt.h>
571025071fSGarrett Wollman #include <sys/syslog.h>
58b5e8ce9fSBruce Evans #include <sys/sysctl.h>
59df8bae1dSRodney W. Grimes 
60df8bae1dSRodney W. Grimes #include <net/if.h>
619494d596SBrooks Davis #include <net/if_types.h>
62d314ad7bSJulian Elischer #include <net/if_var.h>
6382c23ebaSBill Fenner #include <net/if_dl.h>
64b252313fSGleb Smirnoff #include <net/pfil.h>
65df8bae1dSRodney W. Grimes #include <net/route.h>
66983066f0SAlexander V. Chernikov #include <net/route/nhop.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68b2bdc62aSAdrian Chadd #include <net/rss_config.h>
694b79449eSBjoern A. Zeeb #include <net/vnet.h>
70df8bae1dSRodney W. Grimes 
71df8bae1dSRodney W. Grimes #include <netinet/in.h>
7257f60867SMark Johnston #include <netinet/in_kdtrace.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
74b5e8ce9fSBruce Evans #include <netinet/in_var.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip.h>
76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h>
77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
79eddfbb76SRobert Watson #include <netinet/ip_fw.h>
80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
81ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
8258938916SGarrett Wollman #include <machine/in_cksum.h>
83a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
8565634ae7SWojciech Macek #include <netinet/ip_mroute.h>
86df8bae1dSRodney W. Grimes 
87fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
88fcf59617SAndrey V. Elsukov 
89f0068c4aSGarrett Wollman #include <sys/socketvar.h>
906ddbf1e2SGary Palmer 
91aed55708SRobert Watson #include <security/mac/mac_framework.h>
92aed55708SRobert Watson 
93d2035ffbSEd Maste #ifdef CTASSERT
94d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
95d2035ffbSEd Maste #endif
96d2035ffbSEd Maste 
971dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
98843b0e57SXin LI extern void ipreass_init(void);
99843b0e57SXin LI extern void ipreass_drain(void);
100843b0e57SXin LI extern void ipreass_slowtimo(void);
1011dbefcc0SGleb Smirnoff #ifdef VIMAGE
102843b0e57SXin LI extern void ipreass_destroy(void);
1031dbefcc0SGleb Smirnoff #endif
1041dbefcc0SGleb Smirnoff 
10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10682cea7e6SBjoern A. Zeeb 
10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1086df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
109eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1108b615593SMarko Zec     "Enable IP forwarding between interfaces");
1110312fbe9SPoul-Henning Kamp 
1128ad114c0SGeorge V. Neville-Neil /*
1138ad114c0SGeorge V. Neville-Neil  * Respond with an ICMP host redirect when we forward a packet out of
1148ad114c0SGeorge V. Neville-Neil  * the same interface on which it was received.  See RFC 792.
1158ad114c0SGeorge V. Neville-Neil  */
1168ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1;
1176df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
118eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1198b615593SMarko Zec     "Enable sending IP redirects");
1200312fbe9SPoul-Henning Kamp 
12194df3271SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_strong_es) = false;
12294df3271SGleb Smirnoff #define	V_ip_strong_es	VNET(ip_strong_es)
12394df3271SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es,
12494df3271SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false,
12594df3271SGleb Smirnoff     "Packet's IP destination address must match address on arrival interface");
126b3e95d4eSJonathan Lemon 
1272ce85919SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_sav) = true;
1282ce85919SGleb Smirnoff #define	V_ip_sav	VNET(ip_sav)
1292ce85919SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, source_address_validation,
1302ce85919SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_sav), true,
1312ce85919SGleb Smirnoff     "Drop incoming packets with source address that is a local address");
1322ce85919SGleb Smirnoff 
133b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head);	/* Packet filter hooks */
134df8bae1dSRodney W. Grimes 
135d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
136d4b5cae4SRobert Watson 	.nh_name = "ip",
137d4b5cae4SRobert Watson 	.nh_handler = ip_input,
138d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
139b8bc95cdSAdrian Chadd #ifdef	RSS
1402527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
141b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
142b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
143b8bc95cdSAdrian Chadd #else
144d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
145b8bc95cdSAdrian Chadd #endif
146d4b5cae4SRobert Watson };
147ca925d9cSJonathan Lemon 
148b8bc95cdSAdrian Chadd #ifdef	RSS
149b8bc95cdSAdrian Chadd /*
150b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
151b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
152b8bc95cdSAdrian Chadd  *
153b8bc95cdSAdrian Chadd  * It should likely have something that assert it
154b8bc95cdSAdrian Chadd  * actually has valid flow details.
155b8bc95cdSAdrian Chadd  */
156b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
157b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
158b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
159b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
160499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
161b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
162b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
163b8bc95cdSAdrian Chadd };
164b8bc95cdSAdrian Chadd #endif
165b8bc95cdSAdrian Chadd 
166df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
167f0ffb944SJulian Elischer extern	struct protosw inetsw[];
168df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
16982cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17082cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17182cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
172ca925d9cSJonathan Lemon 
173c8ee75f2SGleb Smirnoff /* Make sure it is safe to use hashinit(9) on CK_LIST. */
174c8ee75f2SGleb Smirnoff CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr)));
175c8ee75f2SGleb Smirnoff 
1760312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1770312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1783d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1790312fbe9SPoul-Henning Kamp #endif
1800312fbe9SPoul-Henning Kamp 
1811b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18282cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1836df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
184eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
185eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1861b968362SDag-Erling Smørgrav #endif
187eddfbb76SRobert Watson 
188315e3e38SRobert Watson /*
1895da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1905923c293SGleb Smirnoff  */
1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1925da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1935da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1945da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1955923c293SGleb Smirnoff 
1965923c293SGleb Smirnoff #ifdef VIMAGE
1975da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
1985923c293SGleb Smirnoff #endif /* VIMAGE */
1995923c293SGleb Smirnoff 
2005923c293SGleb Smirnoff /*
201315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2025923c293SGleb Smirnoff  * into ipstat treated as an array.
203315e3e38SRobert Watson  */
204315e3e38SRobert Watson void
205315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
206315e3e38SRobert Watson {
207315e3e38SRobert Watson 
2085da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
209315e3e38SRobert Watson }
210315e3e38SRobert Watson 
211315e3e38SRobert Watson void
212315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
213315e3e38SRobert Watson {
214315e3e38SRobert Watson 
2155da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
216315e3e38SRobert Watson }
217315e3e38SRobert Watson 
218d4b5cae4SRobert Watson static int
219d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
220d4b5cae4SRobert Watson {
221d4b5cae4SRobert Watson 	int error, qlimit;
222d4b5cae4SRobert Watson 
223d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
224d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
225d4b5cae4SRobert Watson 	if (error || !req->newptr)
226d4b5cae4SRobert Watson 		return (error);
227d4b5cae4SRobert Watson 	if (qlimit < 1)
228d4b5cae4SRobert Watson 		return (EINVAL);
229d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
230d4b5cae4SRobert Watson }
231d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
2327029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
2337029da5cSPawel Biernacki     sysctl_netinet_intr_queue_maxlen, "I",
234d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
235d4b5cae4SRobert Watson 
236d4b5cae4SRobert Watson static int
237d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
238d4b5cae4SRobert Watson {
239d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
240d4b5cae4SRobert Watson 	int error, qdrops;
241d4b5cae4SRobert Watson 
242d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
243d4b5cae4SRobert Watson 	qdrops = qdrops_long;
244d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
245d4b5cae4SRobert Watson 	if (error || !req->newptr)
246d4b5cae4SRobert Watson 		return (error);
247d4b5cae4SRobert Watson 	if (qdrops != 0)
248d4b5cae4SRobert Watson 		return (EINVAL);
249d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
250d4b5cae4SRobert Watson 	return (0);
251d4b5cae4SRobert Watson }
252d4b5cae4SRobert Watson 
253d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
2547029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
2557029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_queue_drops, "I",
256d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
257d4b5cae4SRobert Watson 
258b8bc95cdSAdrian Chadd #ifdef	RSS
259b8bc95cdSAdrian Chadd static int
260b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
261b8bc95cdSAdrian Chadd {
262b8bc95cdSAdrian Chadd 	int error, qlimit;
263b8bc95cdSAdrian Chadd 
264b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
265b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
266b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
267b8bc95cdSAdrian Chadd 		return (error);
268b8bc95cdSAdrian Chadd 	if (qlimit < 1)
269b8bc95cdSAdrian Chadd 		return (EINVAL);
270b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
271b8bc95cdSAdrian Chadd }
2727faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
2737029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
2747029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_direct_queue_maxlen,
2757faa0d21SAndrey V. Elsukov     "I", "Maximum size of the IP direct input queue");
276b8bc95cdSAdrian Chadd 
277b8bc95cdSAdrian Chadd static int
278b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
279b8bc95cdSAdrian Chadd {
280b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
281b8bc95cdSAdrian Chadd 	int error, qdrops;
282b8bc95cdSAdrian Chadd 
283b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
284b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
285b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
286b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
287b8bc95cdSAdrian Chadd 		return (error);
288b8bc95cdSAdrian Chadd 	if (qdrops != 0)
289b8bc95cdSAdrian Chadd 		return (EINVAL);
290b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
291b8bc95cdSAdrian Chadd 	return (0);
292b8bc95cdSAdrian Chadd }
293b8bc95cdSAdrian Chadd 
2947faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
2957029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
2967029da5cSPawel Biernacki     sysctl_netinet_intr_direct_queue_drops, "I",
297b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
298b8bc95cdSAdrian Chadd #endif	/* RSS */
299b8bc95cdSAdrian Chadd 
300df8bae1dSRodney W. Grimes /*
301df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
302df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
303df8bae1dSRodney W. Grimes  */
304df8bae1dSRodney W. Grimes void
305f2565d68SRobert Watson ip_init(void)
306df8bae1dSRodney W. Grimes {
307b252313fSGleb Smirnoff 	struct pfil_head_args args;
308f2565d68SRobert Watson 	struct protosw *pr;
309f2565d68SRobert Watson 	int i;
310df8bae1dSRodney W. Grimes 
311d7c5a620SMatt Macy 	CK_STAILQ_INIT(&V_in_ifaddrhead);
312603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3131ed81b73SMarko Zec 
3141ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3151dbefcc0SGleb Smirnoff 	ipreass_init();
3161ed81b73SMarko Zec 
3170b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
318b252313fSGleb Smirnoff 	args.pa_version = PFIL_VERSION;
319b252313fSGleb Smirnoff 	args.pa_flags = PFIL_IN | PFIL_OUT;
320b252313fSGleb Smirnoff 	args.pa_type = PFIL_TYPE_IP4;
321b252313fSGleb Smirnoff 	args.pa_headname = PFIL_INET_NAME;
322b252313fSGleb Smirnoff 	V_inet_pfil_head = pfil_head_register(&args);
3230b4b0b0fSJulian Elischer 
324ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
325ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
326ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
327ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
328ef91a976SAndrey V. Elsukov 		    __func__);
329ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
330ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
331ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
332ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
333ef91a976SAndrey V. Elsukov 		    __func__);
334ef91a976SAndrey V. Elsukov 
3351ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
336484149deSBjoern A. Zeeb #ifdef VIMAGE
337484149deSBjoern A. Zeeb 	if (!IS_DEFAULT_VNET(curvnet)) {
338484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_nh);
339484149deSBjoern A. Zeeb #ifdef	RSS
340484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_direct_nh);
341484149deSBjoern A. Zeeb #endif
3421ed81b73SMarko Zec 		return;
343484149deSBjoern A. Zeeb 	}
344484149deSBjoern A. Zeeb #endif
3451ed81b73SMarko Zec 
346f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
34702410549SRobert Watson 	if (pr == NULL)
348db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
349db09bef3SAndre Oppermann 
350db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
351df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
352df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
353db09bef3SAndre Oppermann 	/*
354db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
355db09bef3SAndre Oppermann 	 * in ip_protox[].
356db09bef3SAndre Oppermann 	 */
357f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
358f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
359df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
360db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
361db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
362db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
363df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
364db09bef3SAndre Oppermann 		}
365194a213eSAndrey A. Chernov 
366d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
367b8bc95cdSAdrian Chadd #ifdef	RSS
368b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
369b8bc95cdSAdrian Chadd #endif
370df8bae1dSRodney W. Grimes }
371df8bae1dSRodney W. Grimes 
3729802380eSBjoern A. Zeeb #ifdef VIMAGE
3733f58662dSBjoern A. Zeeb static void
3743f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3759802380eSBjoern A. Zeeb {
376ef91a976SAndrey V. Elsukov 	int error;
3774d3dfd45SMikolaj Golub 
378484149deSBjoern A. Zeeb #ifdef	RSS
379484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
380484149deSBjoern A. Zeeb #endif
381484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
382484149deSBjoern A. Zeeb 
383b252313fSGleb Smirnoff 	pfil_head_unregister(V_inet_pfil_head);
384ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
385ef91a976SAndrey V. Elsukov 	if (error != 0) {
386ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
387ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
388ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
389ef91a976SAndrey V. Elsukov 	}
390ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
391ef91a976SAndrey V. Elsukov 	if (error != 0) {
392ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
393ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
394ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
395ef91a976SAndrey V. Elsukov 	}
39689856f7eSBjoern A. Zeeb 
39789856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
39889856f7eSBjoern A. Zeeb 	in_ifscrub_all();
39989856f7eSBjoern A. Zeeb 
40089856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
401b1d63265SAlexander V. Chernikov 	rib_flush_routes_family(AF_INET);
4029802380eSBjoern A. Zeeb 
403e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4041dbefcc0SGleb Smirnoff 	ipreass_destroy();
40589856f7eSBjoern A. Zeeb 
40689856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
40789856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4089802380eSBjoern A. Zeeb }
4093f58662dSBjoern A. Zeeb 
4103f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4119802380eSBjoern A. Zeeb #endif
4129802380eSBjoern A. Zeeb 
413b8bc95cdSAdrian Chadd #ifdef	RSS
414b8bc95cdSAdrian Chadd /*
415b8bc95cdSAdrian Chadd  * IP direct input routine.
416b8bc95cdSAdrian Chadd  *
417b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
418b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
419b8bc95cdSAdrian Chadd  */
420b8bc95cdSAdrian Chadd void
421b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
422b8bc95cdSAdrian Chadd {
423b8bc95cdSAdrian Chadd 	struct ip *ip;
424b8bc95cdSAdrian Chadd 	int hlen;
425b8bc95cdSAdrian Chadd 
426b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
427b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
428b8bc95cdSAdrian Chadd 
429fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
430fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
431fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
432fcf59617SAndrey V. Elsukov 			return;
433fcf59617SAndrey V. Elsukov 	}
434fcf59617SAndrey V. Elsukov #endif /* IPSEC */
435b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
436b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
437b8bc95cdSAdrian Chadd 	return;
438b8bc95cdSAdrian Chadd }
439b8bc95cdSAdrian Chadd #endif
440b8bc95cdSAdrian Chadd 
4414d2e3692SLuigi Rizzo /*
442df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
443df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
444df8bae1dSRodney W. Grimes  */
445c67b1d17SGarrett Wollman void
446c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
447df8bae1dSRodney W. Grimes {
44865634ae7SWojciech Macek 	MROUTER_RLOCK_TRACKER;
4499188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4505da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
451ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4520aade26eSRobert Watson 	struct ifnet *ifp;
45394df3271SGleb Smirnoff 	int hlen = 0;
45421d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
45502c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
456f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
45794df3271SGleb Smirnoff 	bool strong_es;
458b715f178SLuigi Rizzo 
459fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
460b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
461db40007dSAndrew R. Reiter 
462ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
46376ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
46476ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4652b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
46653be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4678f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4689b932e9eSAndre Oppermann 		goto ours;
4692b25acc1SLuigi Rizzo 	}
4702b25acc1SLuigi Rizzo 
47186425c62SRobert Watson 	IPSTAT_INC(ips_total);
47258938916SGarrett Wollman 
4730359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < sizeof(struct ip)))
47458938916SGarrett Wollman 		goto tooshort;
47558938916SGarrett Wollman 
4760359e7a5SMateusz Guzik 	if (m->m_len < sizeof(struct ip)) {
4770359e7a5SMateusz Guzik 		m = m_pullup(m, sizeof(struct ip));
4780359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
47986425c62SRobert Watson 			IPSTAT_INC(ips_toosmall);
480c67b1d17SGarrett Wollman 			return;
481df8bae1dSRodney W. Grimes 		}
4820359e7a5SMateusz Guzik 	}
483df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
48458938916SGarrett Wollman 
4850359e7a5SMateusz Guzik 	if (__predict_false(ip->ip_v != IPVERSION)) {
48686425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
487df8bae1dSRodney W. Grimes 		goto bad;
488df8bae1dSRodney W. Grimes 	}
48958938916SGarrett Wollman 
49053be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
4910359e7a5SMateusz Guzik 	if (__predict_false(hlen < sizeof(struct ip))) {	/* minimum header length */
49286425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
493df8bae1dSRodney W. Grimes 		goto bad;
494df8bae1dSRodney W. Grimes 	}
495df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4960359e7a5SMateusz Guzik 		m = m_pullup(m, hlen);
4970359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
49886425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
499c67b1d17SGarrett Wollman 			return;
500df8bae1dSRodney W. Grimes 		}
501df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
502df8bae1dSRodney W. Grimes 	}
50333841545SHajimu UMEMOTO 
50457f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
50557f60867SMark Johnston 
5066c1c6ae5SRodney W. Grimes 	/* IN_LOOPBACK must not appear on the wire - RFC1122 */
5070aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5086c1c6ae5SRodney W. Grimes 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
5096c1c6ae5SRodney W. Grimes 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
5100aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
51186425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
51233841545SHajimu UMEMOTO 			goto bad;
51333841545SHajimu UMEMOTO 		}
51433841545SHajimu UMEMOTO 	}
51533841545SHajimu UMEMOTO 
516db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
517db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
518db4f9cc7SJonathan Lemon 	} else {
51958938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
52047c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
52158938916SGarrett Wollman 		} else {
52247c861ecSBrian Somers 			sum = in_cksum(m, hlen);
52358938916SGarrett Wollman 		}
524db4f9cc7SJonathan Lemon 	}
5250359e7a5SMateusz Guzik 	if (__predict_false(sum)) {
52686425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
527df8bae1dSRodney W. Grimes 		goto bad;
528df8bae1dSRodney W. Grimes 	}
529df8bae1dSRodney W. Grimes 
53002b199f1SMax Laier #ifdef ALTQ
53102b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
53202b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
53302b199f1SMax Laier 		return;
53402b199f1SMax Laier #endif
53502b199f1SMax Laier 
53621d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
5370359e7a5SMateusz Guzik 	if (__predict_false(ip_len < hlen)) {
53886425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
539df8bae1dSRodney W. Grimes 		goto bad;
540df8bae1dSRodney W. Grimes 	}
541df8bae1dSRodney W. Grimes 
542df8bae1dSRodney W. Grimes 	/*
543df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
544df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
545df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
546df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
547df8bae1dSRodney W. Grimes 	 */
5480359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < ip_len)) {
54958938916SGarrett Wollman tooshort:
55086425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
551df8bae1dSRodney W. Grimes 		goto bad;
552df8bae1dSRodney W. Grimes 	}
55321d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
554df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
55521d172a3SGleb Smirnoff 			m->m_len = ip_len;
55621d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
557df8bae1dSRodney W. Grimes 		} else
55821d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
559df8bae1dSRodney W. Grimes 	}
560b8bc95cdSAdrian Chadd 
561ad9f4d6aSAndrey V. Elsukov 	/*
562ad9f4d6aSAndrey V. Elsukov 	 * Try to forward the packet, but if we fail continue.
563*f389439fSBjoern A. Zeeb 	 * ip_tryforward() may generate redirects these days.
564*f389439fSBjoern A. Zeeb 	 * XXX the logic below falling through to normal processing
565*f389439fSBjoern A. Zeeb 	 * if redirects are required should be revisited as well.
566ad9f4d6aSAndrey V. Elsukov 	 * ip_tryforward() does inbound and outbound packet firewall
567ad9f4d6aSAndrey V. Elsukov 	 * processing. If firewall has decided that destination becomes
568ad9f4d6aSAndrey V. Elsukov 	 * our local address, it sets M_FASTFWD_OURS flag. In this
569ad9f4d6aSAndrey V. Elsukov 	 * case skip another inbound firewall processing and update
570ad9f4d6aSAndrey V. Elsukov 	 * ip pointer.
571ad9f4d6aSAndrey V. Elsukov 	 */
5728ad114c0SGeorge V. Neville-Neil 	if (V_ipforwarding != 0
573fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
574fcf59617SAndrey V. Elsukov 	    && (!IPSEC_ENABLED(ipv4) ||
575fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
576ad9f4d6aSAndrey V. Elsukov #endif
577ad9f4d6aSAndrey V. Elsukov 	    ) {
578*f389439fSBjoern A. Zeeb 		/*
579*f389439fSBjoern A. Zeeb 		 * ip_dooptions() was run so we can ignore the source route (or
580*f389439fSBjoern A. Zeeb 		 * any IP options case) case for redirects in ip_tryforward().
581*f389439fSBjoern A. Zeeb 		 */
582ad9f4d6aSAndrey V. Elsukov 		if ((m = ip_tryforward(m)) == NULL)
58333872124SGeorge V. Neville-Neil 			return;
584ad9f4d6aSAndrey V. Elsukov 		if (m->m_flags & M_FASTFWD_OURS) {
585ad9f4d6aSAndrey V. Elsukov 			m->m_flags &= ~M_FASTFWD_OURS;
586ad9f4d6aSAndrey V. Elsukov 			ip = mtod(m, struct ip *);
587ad9f4d6aSAndrey V. Elsukov 			goto ours;
588ad9f4d6aSAndrey V. Elsukov 		}
589ad9f4d6aSAndrey V. Elsukov 	}
590fcf59617SAndrey V. Elsukov 
591fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
59214dd6717SSam Leffler 	/*
593ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
59414dd6717SSam Leffler 	 */
595fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4) &&
596fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
597c21fd232SAndre Oppermann 			goto passin;
598ad9f4d6aSAndrey V. Elsukov #endif
599fcf59617SAndrey V. Elsukov 
600c4ac87eaSDarren Reed 	/*
601134ea224SSam Leffler 	 * Run through list of hooks for input packets.
602f51f805fSSam Leffler 	 *
603f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
604f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
605f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
606c4ac87eaSDarren Reed 	 */
607c21fd232SAndre Oppermann 
608c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
609b252313fSGleb Smirnoff 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
610c21fd232SAndre Oppermann 		goto passin;
611c21fd232SAndre Oppermann 
612f51f805fSSam Leffler 	odst = ip->ip_dst;
613b252313fSGleb Smirnoff 	if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) !=
614b252313fSGleb Smirnoff 	    PFIL_PASS)
615beec8214SDarren Reed 		return;
616134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
617c4ac87eaSDarren Reed 		return;
6189b932e9eSAndre Oppermann 
619c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
62002c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
6219b932e9eSAndre Oppermann 
6229b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
6239b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
6249b932e9eSAndre Oppermann 		goto ours;
6259b932e9eSAndre Oppermann 	}
626ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
627de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
628099dd043SAndre Oppermann 			/*
629ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
630ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
631ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
632099dd043SAndre Oppermann 			 */
633ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
634099dd043SAndre Oppermann 			return;
635099dd043SAndre Oppermann 		}
636ffdbf9daSAndrey V. Elsukov 	}
637c21fd232SAndre Oppermann passin:
63821d172a3SGleb Smirnoff 
63921d172a3SGleb Smirnoff 	/*
640df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
641df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
642df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
643df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
644df8bae1dSRodney W. Grimes 	 */
6459b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
646c67b1d17SGarrett Wollman 		return;
647df8bae1dSRodney W. Grimes 
648f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
649f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
650f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
651f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
652f0068c4aSGarrett Wollman 	 * grabbing the packet.
653f0068c4aSGarrett Wollman          */
6540359e7a5SMateusz Guzik 	if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on)
655f0068c4aSGarrett Wollman 		goto ours;
656f0068c4aSGarrett Wollman 
657df8bae1dSRodney W. Grimes 	/*
658df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
659cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
660cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
661cc766e04SGarrett Wollman 	 * with it).
662df8bae1dSRodney W. Grimes 	 */
663d7c5a620SMatt Macy 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
664cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
665cc766e04SGarrett Wollman 		goto ours;
666cc766e04SGarrett Wollman 
6677538a9a0SJonathan Lemon 	/*
668823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
669823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
67094df3271SGleb Smirnoff 	 * strong ES model) with a list of additional predicates:
67194df3271SGleb Smirnoff 	 * - if IP forwarding is disabled
67294df3271SGleb Smirnoff 	 * - the packet is not locally generated
67394df3271SGleb Smirnoff 	 * - the packet is not subject to 'ipfw fwd'
67494df3271SGleb Smirnoff 	 * - Interface is not running CARP. If the packet got here, we already
67594df3271SGleb Smirnoff 	 *   checked it with carp_iamatch() and carp_forus().
676823db0e9SDon Lewis 	 */
67794df3271SGleb Smirnoff 	strong_es = V_ip_strong_es && (V_ipforwarding == 0) &&
67881674f12SGleb Smirnoff 	    ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
67954bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
680823db0e9SDon Lewis 
681ca925d9cSJonathan Lemon 	/*
682ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
683ca925d9cSJonathan Lemon 	 */
684c8ee75f2SGleb Smirnoff 	CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
68594df3271SGleb Smirnoff 		if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr)
68694df3271SGleb Smirnoff 			continue;
68794df3271SGleb Smirnoff 
688f9e354dfSJulian Elischer 		/*
68994df3271SGleb Smirnoff 		 * net.inet.ip.rfc1122_strong_es: the address matches, verify
69094df3271SGleb Smirnoff 		 * that the packet arrived via the correct interface.
691f9e354dfSJulian Elischer 		 */
69294df3271SGleb Smirnoff 		if (__predict_false(strong_es && ia->ia_ifp != ifp)) {
69394df3271SGleb Smirnoff 			IPSTAT_INC(ips_badaddr);
69494df3271SGleb Smirnoff 			goto bad;
695ca925d9cSJonathan Lemon 		}
69694df3271SGleb Smirnoff 
6972ce85919SGleb Smirnoff 		/*
6982ce85919SGleb Smirnoff 		 * net.inet.ip.source_address_validation: drop incoming
6992ce85919SGleb Smirnoff 		 * packets that pretend to be ours.
7002ce85919SGleb Smirnoff 		 */
7012ce85919SGleb Smirnoff 		if (V_ip_sav && !(ifp->if_flags & IFF_LOOPBACK) &&
7022ce85919SGleb Smirnoff 		    __predict_false(in_localip_fib(ip->ip_src, ifp->if_fib))) {
7032ce85919SGleb Smirnoff 			IPSTAT_INC(ips_badaddr);
7042ce85919SGleb Smirnoff 			goto bad;
7052ce85919SGleb Smirnoff 		}
7062ce85919SGleb Smirnoff 
70794df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
70894df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
70994df3271SGleb Smirnoff 		goto ours;
7108c0fec80SRobert Watson 	}
7112d9cfabaSRobert Watson 
712823db0e9SDon Lewis 	/*
713ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
714ca925d9cSJonathan Lemon 	 *
715ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
716ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
717ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
718ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
719823db0e9SDon Lewis 	 */
72081674f12SGleb Smirnoff 	if (ifp->if_flags & IFF_BROADCAST) {
721d7c5a620SMatt Macy 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
722ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
723ca925d9cSJonathan Lemon 				continue;
724ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
725df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
7260aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
7277caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7287caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7297caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
730df8bae1dSRodney W. Grimes 				goto ours;
7310aade26eSRobert Watson 			}
7320ac40133SBrian Somers #ifdef BOOTP_COMPAT
7330aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7347caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7357caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7367caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
737ca925d9cSJonathan Lemon 				goto ours;
7380aade26eSRobert Watson 			}
7390ac40133SBrian Somers #endif
740df8bae1dSRodney W. Grimes 		}
74119e5b0a7SRobert Watson 		ia = NULL;
742df8bae1dSRodney W. Grimes 	}
743df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
74465634ae7SWojciech Macek 		MROUTER_RLOCK();
74503b0505bSZhenlei Huang 		/*
74603b0505bSZhenlei Huang 		 * RFC 3927 2.7: Do not forward multicast packets from
74703b0505bSZhenlei Huang 		 * IN_LINKLOCAL.
74803b0505bSZhenlei Huang 		 */
7493d846e48SZhenlei Huang 		if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
750df8bae1dSRodney W. Grimes 			/*
751df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
752df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
753df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
754df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
755df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
756df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
757df8bae1dSRodney W. Grimes 			 */
7580aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
75965634ae7SWojciech Macek 				MROUTER_RUNLOCK();
76086425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
761df8bae1dSRodney W. Grimes 				m_freem(m);
762c67b1d17SGarrett Wollman 				return;
763df8bae1dSRodney W. Grimes 			}
764df8bae1dSRodney W. Grimes 
765df8bae1dSRodney W. Grimes 			/*
76611612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
767df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
768df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
769df8bae1dSRodney W. Grimes 			 */
77065634ae7SWojciech Macek 			if (ip->ip_p == IPPROTO_IGMP) {
77165634ae7SWojciech Macek 				MROUTER_RUNLOCK();
772df8bae1dSRodney W. Grimes 				goto ours;
77365634ae7SWojciech Macek 			}
77486425c62SRobert Watson 			IPSTAT_INC(ips_forward);
775df8bae1dSRodney W. Grimes 		}
77665634ae7SWojciech Macek 		MROUTER_RUNLOCK();
777df8bae1dSRodney W. Grimes 		/*
778d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
779d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
780d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
781df8bae1dSRodney W. Grimes 		 */
782df8bae1dSRodney W. Grimes 		goto ours;
783df8bae1dSRodney W. Grimes 	}
784df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
785df8bae1dSRodney W. Grimes 		goto ours;
786df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
787df8bae1dSRodney W. Grimes 		goto ours;
78803b0505bSZhenlei Huang 	/* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */
7893d846e48SZhenlei Huang 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
7903d846e48SZhenlei Huang 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
7913d846e48SZhenlei Huang 		IPSTAT_INC(ips_cantforward);
7923d846e48SZhenlei Huang 		m_freem(m);
7933d846e48SZhenlei Huang 		return;
7943d846e48SZhenlei Huang 	}
795df8bae1dSRodney W. Grimes 
7966a800098SYoshinobu Inoue 	/*
797df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
798df8bae1dSRodney W. Grimes 	 */
799603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
80086425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
801df8bae1dSRodney W. Grimes 		m_freem(m);
802546f251bSChris D. Faulhaber 	} else {
8039b932e9eSAndre Oppermann 		ip_forward(m, dchg);
804546f251bSChris D. Faulhaber 	}
805c67b1d17SGarrett Wollman 	return;
806df8bae1dSRodney W. Grimes 
807df8bae1dSRodney W. Grimes ours:
808d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
809d0ebc0d2SYaroslav Tykhiy 	/*
810d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
811d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
812d0ebc0d2SYaroslav Tykhiy 	 */
8137caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
814d0ebc0d2SYaroslav Tykhiy 		return;
815d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
816d0ebc0d2SYaroslav Tykhiy 
81763f8d699SJordan K. Hubbard 	/*
818b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
819ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
820df8bae1dSRodney W. Grimes 	 */
8218f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
822aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
823f0cada84SAndre Oppermann 		m = ip_reass(m);
824f0cada84SAndre Oppermann 		if (m == NULL)
825c67b1d17SGarrett Wollman 			return;
8266a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8277e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
82853be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
829f0cada84SAndre Oppermann 	}
830f0cada84SAndre Oppermann 
831fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
832fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
833fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
834fcf59617SAndrey V. Elsukov 			return;
835fcf59617SAndrey V. Elsukov 	}
836b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
83733841545SHajimu UMEMOTO 
838df8bae1dSRodney W. Grimes 	/*
839df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
840df8bae1dSRodney W. Grimes 	 */
84186425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8429b932e9eSAndre Oppermann 
8438f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
844c67b1d17SGarrett Wollman 	return;
845df8bae1dSRodney W. Grimes bad:
846df8bae1dSRodney W. Grimes 	m_freem(m);
847c67b1d17SGarrett Wollman }
848c67b1d17SGarrett Wollman 
849c67b1d17SGarrett Wollman /*
850df8bae1dSRodney W. Grimes  * IP timer processing;
851df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
852df8bae1dSRodney W. Grimes  * queue, discard it.
853df8bae1dSRodney W. Grimes  */
854df8bae1dSRodney W. Grimes void
855f2565d68SRobert Watson ip_slowtimo(void)
856df8bae1dSRodney W. Grimes {
8578b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
858df8bae1dSRodney W. Grimes 
8595ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8608b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8618b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8621dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8638b615593SMarko Zec 		CURVNET_RESTORE();
8648b615593SMarko Zec 	}
8655ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
866df8bae1dSRodney W. Grimes }
867df8bae1dSRodney W. Grimes 
8689802380eSBjoern A. Zeeb void
8699802380eSBjoern A. Zeeb ip_drain(void)
8709802380eSBjoern A. Zeeb {
8719802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8729802380eSBjoern A. Zeeb 
8739802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8749802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8759802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8761dbefcc0SGleb Smirnoff 		ipreass_drain();
8778b615593SMarko Zec 		CURVNET_RESTORE();
8788b615593SMarko Zec 	}
8795ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
880df8bae1dSRodney W. Grimes }
881df8bae1dSRodney W. Grimes 
882df8bae1dSRodney W. Grimes /*
883de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
884de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
885de38924dSAndre Oppermann  */
886de38924dSAndre Oppermann int
8871b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
888de38924dSAndre Oppermann {
889de38924dSAndre Oppermann 	struct protosw *pr;
890de38924dSAndre Oppermann 
891de38924dSAndre Oppermann 	/* Sanity checks. */
8921b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
893de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
894de38924dSAndre Oppermann 
895de38924dSAndre Oppermann 	/*
896de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
897de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
898de38924dSAndre Oppermann 	 */
899de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
900de38924dSAndre Oppermann 	if (pr == NULL)
901de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
902de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
903de38924dSAndre Oppermann 		return (EEXIST);
904de38924dSAndre Oppermann 
905de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
906de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
907de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
908de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
909de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
910de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
911de38924dSAndre Oppermann 			return (0);
912de38924dSAndre Oppermann 		}
913de38924dSAndre Oppermann 	}
914de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
915de38924dSAndre Oppermann }
916de38924dSAndre Oppermann 
917de38924dSAndre Oppermann int
9181b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
919de38924dSAndre Oppermann {
920de38924dSAndre Oppermann 	struct protosw *pr;
921de38924dSAndre Oppermann 
922de38924dSAndre Oppermann 	/* Sanity checks. */
9231b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
924de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
925de38924dSAndre Oppermann 
926de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
927de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
928de38924dSAndre Oppermann 	if (pr == NULL)
929de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
930de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
931de38924dSAndre Oppermann 		return (ENOENT);
932de38924dSAndre Oppermann 
933de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
934de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
935de38924dSAndre Oppermann 	return (0);
936de38924dSAndre Oppermann }
937de38924dSAndre Oppermann 
938df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
939df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
940df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
941df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
942df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
943fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9443b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
945df8bae1dSRodney W. Grimes };
946df8bae1dSRodney W. Grimes 
947df8bae1dSRodney W. Grimes /*
948df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
949df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
950df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
951df8bae1dSRodney W. Grimes  * of codes and types.
952df8bae1dSRodney W. Grimes  *
953df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
954df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
955df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
956df8bae1dSRodney W. Grimes  * protocol deal with that.
957df8bae1dSRodney W. Grimes  *
958df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
959df8bae1dSRodney W. Grimes  * via a source route.
960df8bae1dSRodney W. Grimes  */
9619b932e9eSAndre Oppermann void
9629b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
963df8bae1dSRodney W. Grimes {
9642b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
965efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
966df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
967d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9689b932e9eSAndre Oppermann 	struct in_addr dest;
969b835b6feSBjoern A. Zeeb 	struct route ro;
9704043ee3cSAlexander V. Chernikov 	uint32_t flowid;
971c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9723efc3014SJulian Elischer 
973b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
974b8a6e03fSGleb Smirnoff 
9759b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
97686425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
977df8bae1dSRodney W. Grimes 		m_freem(m);
978df8bae1dSRodney W. Grimes 		return;
979df8bae1dSRodney W. Grimes 	}
980fcf59617SAndrey V. Elsukov 	if (
981fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH
982fcf59617SAndrey V. Elsukov 	    V_ipstealth == 0 &&
983fcf59617SAndrey V. Elsukov #endif
984fcf59617SAndrey V. Elsukov 	    ip->ip_ttl <= IPTTLDEC) {
985fcf59617SAndrey V. Elsukov 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
9868922ddbeSAndrey V. Elsukov 		return;
9878922ddbeSAndrey V. Elsukov 	}
988df8bae1dSRodney W. Grimes 
989d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
990d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
991d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
992d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
993d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
9944043ee3cSAlexander V. Chernikov 	flowid = m->m_pkthdr.flowid;
9954043ee3cSAlexander V. Chernikov 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid);
996983066f0SAlexander V. Chernikov 	if (ro.ro_nh != NULL) {
997983066f0SAlexander V. Chernikov 		ia = ifatoia(ro.ro_nh->nh_ifa);
99856844a62SErmal Luçi 	} else
99956844a62SErmal Luçi 		ia = NULL;
1000df8bae1dSRodney W. Grimes 	/*
1001bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1002bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1003bfef7ed4SIan Dowse 	 *
10044d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
10054d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
10064d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
10074d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
10084d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
10094d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
10104d2e3692SLuigi Rizzo 	 *
1011c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
1012bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1013bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1014bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1015df8bae1dSRodney W. Grimes 	 */
1016dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
1017eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
10189967cafcSSam Leffler 		/*
10199967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10209967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10219967cafcSSam Leffler 		 * be conservative and just discard the copy since
10229967cafcSSam Leffler 		 * code below may some day want the tags.
10239967cafcSSam Leffler 		 */
10249967cafcSSam Leffler 		m_free(mcopy);
10259967cafcSSam Leffler 		mcopy = NULL;
10269967cafcSSam Leffler 	}
1027bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10288f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1029e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1030bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1031bfef7ed4SIan Dowse 	}
103204287599SRuslan Ermilov #ifdef IPSTEALTH
1033fcf59617SAndrey V. Elsukov 	if (V_ipstealth == 0)
103404287599SRuslan Ermilov #endif
103504287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
1036fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1037fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
1038fcf59617SAndrey V. Elsukov 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
1039fcf59617SAndrey V. Elsukov 			/* mbuf consumed by IPsec */
1040d16a2e47SMark Johnston 			RO_NHFREE(&ro);
1041fcf59617SAndrey V. Elsukov 			m_freem(mcopy);
1042fcf59617SAndrey V. Elsukov 			if (error != EINPROGRESS)
1043fcf59617SAndrey V. Elsukov 				IPSTAT_INC(ips_cantforward);
1044b8a6e03fSGleb Smirnoff 			return;
104504287599SRuslan Ermilov 		}
1046fcf59617SAndrey V. Elsukov 		/* No IPsec processing required */
1047fcf59617SAndrey V. Elsukov 	}
1048fcf59617SAndrey V. Elsukov #endif /* IPSEC */
1049df8bae1dSRodney W. Grimes 	/*
1050df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1051df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1052df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1053df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1054df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1055df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1056df8bae1dSRodney W. Grimes 	 */
10579b932e9eSAndre Oppermann 	dest.s_addr = 0;
1058efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1059efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
1060983066f0SAlexander V. Chernikov 		struct nhop_object *nh;
106102c1c707SAndre Oppermann 
1062983066f0SAlexander V. Chernikov 		nh = ro.ro_nh;
106302c1c707SAndre Oppermann 
1064983066f0SAlexander V. Chernikov 		if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
1065983066f0SAlexander V. Chernikov 			struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
1066df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1067df8bae1dSRodney W. Grimes 
1068983066f0SAlexander V. Chernikov 			if (nh_ia != NULL &&
1069983066f0SAlexander V. Chernikov 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
1070df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1071df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1072df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
107362e1a437SZhenlei Huang 				if (nh->nh_flags & NHF_GATEWAY) {
107462e1a437SZhenlei Huang 				    if (nh->gw_sa.sa_family == AF_INET)
107562e1a437SZhenlei Huang 					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
107662e1a437SZhenlei Huang 				    else /* Do not redirect in case gw is AF_INET6 */
107762e1a437SZhenlei Huang 					type = 0;
107862e1a437SZhenlei Huang 				} else
107962e1a437SZhenlei Huang 					dest.s_addr = ip->ip_dst.s_addr;
1080df8bae1dSRodney W. Grimes 			}
1081df8bae1dSRodney W. Grimes 		}
108202c1c707SAndre Oppermann 	}
1083df8bae1dSRodney W. Grimes 
1084b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1085b835b6feSBjoern A. Zeeb 
1086983066f0SAlexander V. Chernikov 	if (error == EMSGSIZE && ro.ro_nh)
1087983066f0SAlexander V. Chernikov 		mtu = ro.ro_nh->nh_mtu;
1088983066f0SAlexander V. Chernikov 	RO_NHFREE(&ro);
1089b835b6feSBjoern A. Zeeb 
1090df8bae1dSRodney W. Grimes 	if (error)
109186425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1092df8bae1dSRodney W. Grimes 	else {
109386425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1094df8bae1dSRodney W. Grimes 		if (type)
109586425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1096df8bae1dSRodney W. Grimes 		else {
10979188b4a1SAndre Oppermann 			if (mcopy)
1098df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1099b8a6e03fSGleb Smirnoff 			return;
1100df8bae1dSRodney W. Grimes 		}
1101df8bae1dSRodney W. Grimes 	}
11024f6c66ccSMatt Macy 	if (mcopy == NULL)
1103b8a6e03fSGleb Smirnoff 		return;
11044f6c66ccSMatt Macy 
1105df8bae1dSRodney W. Grimes 	switch (error) {
1106df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1107df8bae1dSRodney W. Grimes 		/* type, code set above */
1108df8bae1dSRodney W. Grimes 		break;
1109df8bae1dSRodney W. Grimes 
1110efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1111df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1112df8bae1dSRodney W. Grimes 	case ENETDOWN:
1113df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1114df8bae1dSRodney W. Grimes 	default:
1115df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1116df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1117df8bae1dSRodney W. Grimes 		break;
1118df8bae1dSRodney W. Grimes 
1119df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1120df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1121df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11229b932e9eSAndre Oppermann 		/*
1123b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1124b835b6feSBjoern A. Zeeb 		 * interface MTU.
1125ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1126ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1127ab48768bSAndre Oppermann 		 * current packet size.
11289b932e9eSAndre Oppermann 		 */
1129b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1130b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1131b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1132b835b6feSBjoern A. Zeeb 		} else {
1133ab48768bSAndre Oppermann 			if (ia != NULL)
1134c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1135ab48768bSAndre Oppermann 			else
11368f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1137ab48768bSAndre Oppermann 		}
113886425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1139df8bae1dSRodney W. Grimes 		break;
1140df8bae1dSRodney W. Grimes 
1141df8bae1dSRodney W. Grimes 	case ENOBUFS:
11423a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11433a06e3e0SRuslan Ermilov 		m_freem(mcopy);
1144b8a6e03fSGleb Smirnoff 		return;
1145df8bae1dSRodney W. Grimes 	}
1146c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1147df8bae1dSRodney W. Grimes }
1148df8bae1dSRodney W. Grimes 
1149339efd75SMaxim Sobolev #define	CHECK_SO_CT(sp, ct) \
1150339efd75SMaxim Sobolev     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1151339efd75SMaxim Sobolev 
115282c23ebaSBill Fenner void
1153f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1154f2565d68SRobert Watson     struct mbuf *m)
115582c23ebaSBill Fenner {
115606193f0bSKonstantin Belousov 	bool stamped;
11578b615593SMarko Zec 
115806193f0bSKonstantin Belousov 	stamped = false;
1159339efd75SMaxim Sobolev 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
1160339efd75SMaxim Sobolev 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
116106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt;
116206193f0bSKonstantin Belousov 		struct timespec ts1;
1163be8a62e8SPoul-Henning Kamp 
116406193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
116506193f0bSKonstantin Belousov 		    M_TSTMP)) {
116606193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
116706193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt);
116806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
116906193f0bSKonstantin Belousov 			bintime_add(&bt, &boottimebin);
117006193f0bSKonstantin Belousov 		} else {
1171be8a62e8SPoul-Henning Kamp 			bintime(&bt);
117206193f0bSKonstantin Belousov 		}
1173be8a62e8SPoul-Henning Kamp 		*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1174be8a62e8SPoul-Henning Kamp 		    SCM_BINTIME, SOL_SOCKET);
117506193f0bSKonstantin Belousov 		if (*mp != NULL) {
1176be8a62e8SPoul-Henning Kamp 			mp = &(*mp)->m_next;
117706193f0bSKonstantin Belousov 			stamped = true;
117806193f0bSKonstantin Belousov 		}
1179be8a62e8SPoul-Henning Kamp 	}
1180339efd75SMaxim Sobolev 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
118106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt1;
1182c012cfe6SEd Maste 		struct timespec ts1;
118382c23ebaSBill Fenner 		struct timeval tv;
118482c23ebaSBill Fenner 
118506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
118606193f0bSKonstantin Belousov 		    M_TSTMP)) {
118706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
118806193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt1);
118906193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
119006193f0bSKonstantin Belousov 			bintime_add(&bt1, &boottimebin);
119106193f0bSKonstantin Belousov 			bintime2timeval(&bt1, &tv);
119206193f0bSKonstantin Belousov 		} else {
1193339efd75SMaxim Sobolev 			microtime(&tv);
119406193f0bSKonstantin Belousov 		}
119582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
119682c23ebaSBill Fenner 		    SCM_TIMESTAMP, SOL_SOCKET);
119706193f0bSKonstantin Belousov 		if (*mp != NULL) {
119882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
119906193f0bSKonstantin Belousov 			stamped = true;
120006193f0bSKonstantin Belousov 		}
1201339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
120206193f0bSKonstantin Belousov 		struct bintime boottimebin;
120306193f0bSKonstantin Belousov 		struct timespec ts, ts1;
1204339efd75SMaxim Sobolev 
120506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
120606193f0bSKonstantin Belousov 		    M_TSTMP)) {
120706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
120806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
120906193f0bSKonstantin Belousov 			bintime2timespec(&boottimebin, &ts1);
12106040822cSAlan Somers 			timespecadd(&ts, &ts1, &ts);
121106193f0bSKonstantin Belousov 		} else {
1212339efd75SMaxim Sobolev 			nanotime(&ts);
121306193f0bSKonstantin Belousov 		}
1214339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1215339efd75SMaxim Sobolev 		    SCM_REALTIME, SOL_SOCKET);
121606193f0bSKonstantin Belousov 		if (*mp != NULL) {
1217339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
121806193f0bSKonstantin Belousov 			stamped = true;
121906193f0bSKonstantin Belousov 		}
1220339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
1221339efd75SMaxim Sobolev 		struct timespec ts;
1222339efd75SMaxim Sobolev 
122306193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
122406193f0bSKonstantin Belousov 		    M_TSTMP))
122506193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
122606193f0bSKonstantin Belousov 		else
1227339efd75SMaxim Sobolev 			nanouptime(&ts);
1228339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1229339efd75SMaxim Sobolev 		    SCM_MONOTONIC, SOL_SOCKET);
123006193f0bSKonstantin Belousov 		if (*mp != NULL) {
123106193f0bSKonstantin Belousov 			mp = &(*mp)->m_next;
123206193f0bSKonstantin Belousov 			stamped = true;
123306193f0bSKonstantin Belousov 		}
123406193f0bSKonstantin Belousov 	}
123506193f0bSKonstantin Belousov 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
123606193f0bSKonstantin Belousov 	    M_TSTMP)) {
123706193f0bSKonstantin Belousov 		struct sock_timestamp_info sti;
123806193f0bSKonstantin Belousov 
123906193f0bSKonstantin Belousov 		bzero(&sti, sizeof(sti));
124006193f0bSKonstantin Belousov 		sti.st_info_flags = ST_INFO_HW;
124106193f0bSKonstantin Belousov 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
124206193f0bSKonstantin Belousov 			sti.st_info_flags |= ST_INFO_HW_HPREC;
124306193f0bSKonstantin Belousov 		*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO,
124406193f0bSKonstantin Belousov 		    SOL_SOCKET);
124506193f0bSKonstantin Belousov 		if (*mp != NULL)
1246339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1247be8a62e8SPoul-Henning Kamp 	}
124882c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
124982c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
125082c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
125182c23ebaSBill Fenner 		if (*mp)
125282c23ebaSBill Fenner 			mp = &(*mp)->m_next;
125382c23ebaSBill Fenner 	}
12544957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
12554957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
12564957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
12574957466bSMatthew N. Dodd 		if (*mp)
12584957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
12594957466bSMatthew N. Dodd 	}
126082c23ebaSBill Fenner #ifdef notyet
126182c23ebaSBill Fenner 	/* XXX
126282c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
126382c23ebaSBill Fenner 	 * than they already were.
126482c23ebaSBill Fenner 	 */
126582c23ebaSBill Fenner 	/* options were tossed already */
126682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
126782c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
126882c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
126982c23ebaSBill Fenner 		if (*mp)
127082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
127182c23ebaSBill Fenner 	}
127282c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
127382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1274e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
127582c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
127682c23ebaSBill Fenner 		if (*mp)
127782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
127882c23ebaSBill Fenner 	}
127982c23ebaSBill Fenner #endif
128082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1281d314ad7bSJulian Elischer 		struct ifnet *ifp;
1282d314ad7bSJulian Elischer 		struct sdlbuf {
128382c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1284d314ad7bSJulian Elischer 			u_char	pad[32];
1285d314ad7bSJulian Elischer 		} sdlbuf;
1286d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1287d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
128882c23ebaSBill Fenner 
1289db0ac6deSCy Schubert 		if ((ifp = m->m_pkthdr.rcvif)) {
12904a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1291d314ad7bSJulian Elischer 			/*
1292d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1293d314ad7bSJulian Elischer 			 */
129446f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
129546f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1296d314ad7bSJulian Elischer 				goto makedummy;
1297d314ad7bSJulian Elischer 			}
1298d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1299d314ad7bSJulian Elischer 		} else {
1300d314ad7bSJulian Elischer makedummy:
130146f2df9cSSergey Kandaurov 			sdl2->sdl_len =
130246f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1303d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1304d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1305d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1306d314ad7bSJulian Elischer 		}
1307d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
130882c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
130982c23ebaSBill Fenner 		if (*mp)
131082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
131182c23ebaSBill Fenner 	}
13123cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
13133cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
13143cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
13153cca425bSMichael Tuexen 		if (*mp)
13163cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
13173cca425bSMichael Tuexen 	}
13189d3ddf43SAdrian Chadd 
13199d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
13209d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13219d3ddf43SAdrian Chadd 
13229d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13239d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13249d3ddf43SAdrian Chadd 
13259d3ddf43SAdrian Chadd 		/*
13269d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
13279d3ddf43SAdrian Chadd 		 * other - don't populate both?
13289d3ddf43SAdrian Chadd 		 */
13299d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
13309d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
13319d3ddf43SAdrian Chadd 		if (*mp)
13329d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13339d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
13349d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
13359d3ddf43SAdrian Chadd 		if (*mp)
13369d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13379d3ddf43SAdrian Chadd 	}
13389d3ddf43SAdrian Chadd 
13399d3ddf43SAdrian Chadd #ifdef	RSS
13409d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
13419d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13429d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
13439d3ddf43SAdrian Chadd 
13449d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13459d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13469d3ddf43SAdrian Chadd 
13479d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
13489d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
13499d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
13509d3ddf43SAdrian Chadd 			if (*mp)
13519d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
13529d3ddf43SAdrian Chadd 		}
13539d3ddf43SAdrian Chadd 	}
13549d3ddf43SAdrian Chadd #endif
135582c23ebaSBill Fenner }
135682c23ebaSBill Fenner 
13574d2e3692SLuigi Rizzo /*
135830916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
135930916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
136030916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
136130916a2dSRobert Watson  * compiled.
13624d2e3692SLuigi Rizzo  */
13635f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on);
136482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
136582cea7e6SBjoern A. Zeeb 
136682cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
136782cea7e6SBjoern A. Zeeb 
1368df8bae1dSRodney W. Grimes int
1369f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1370f0068c4aSGarrett Wollman {
13718b615593SMarko Zec 
1372f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1373f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1374f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1375f0068c4aSGarrett Wollman 
1376603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1377f0068c4aSGarrett Wollman 		return EADDRINUSE;
1378f0068c4aSGarrett Wollman 
1379603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13801c5de19aSGarrett Wollman 	/*
13811c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13821c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13831c5de19aSGarrett Wollman 	 */
1384603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1385603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1386603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13871c5de19aSGarrett Wollman 	}
1388f0068c4aSGarrett Wollman 
1389f0068c4aSGarrett Wollman 	return 0;
1390f0068c4aSGarrett Wollman }
1391f0068c4aSGarrett Wollman 
1392f0068c4aSGarrett Wollman int
1393f0068c4aSGarrett Wollman ip_rsvp_done(void)
1394f0068c4aSGarrett Wollman {
13958b615593SMarko Zec 
1396603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13971c5de19aSGarrett Wollman 	/*
13981c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13991c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
14001c5de19aSGarrett Wollman 	 */
1401603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1402603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1403603724d3SBjoern A. Zeeb 		V_rsvp_on--;
14041c5de19aSGarrett Wollman 	}
1405f0068c4aSGarrett Wollman 	return 0;
1406f0068c4aSGarrett Wollman }
1407bbb4330bSLuigi Rizzo 
14088f5a8818SKevin Lo int
14098f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1410bbb4330bSLuigi Rizzo {
14118f5a8818SKevin Lo 	struct mbuf *m;
14128f5a8818SKevin Lo 
14138f5a8818SKevin Lo 	m = *mp;
14148f5a8818SKevin Lo 	*mp = NULL;
14158b615593SMarko Zec 
1416bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
14178f5a8818SKevin Lo 		*mp = m;
14188f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
14198f5a8818SKevin Lo 		return (IPPROTO_DONE);
1420bbb4330bSLuigi Rizzo 	}
1421bbb4330bSLuigi Rizzo 
1422bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1423bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1424bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1425bbb4330bSLuigi Rizzo 	 */
1426bbb4330bSLuigi Rizzo 
1427603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1428bbb4330bSLuigi Rizzo 		m_freem(m);
14298f5a8818SKevin Lo 		return (IPPROTO_DONE);
1430bbb4330bSLuigi Rizzo 	}
1431bbb4330bSLuigi Rizzo 
1432603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
14338f5a8818SKevin Lo 		*mp = m;
14348f5a8818SKevin Lo 		rip_input(mp, offp, proto);
14358f5a8818SKevin Lo 		return (IPPROTO_DONE);
1436bbb4330bSLuigi Rizzo 	}
1437bbb4330bSLuigi Rizzo 	/* Drop the packet */
1438bbb4330bSLuigi Rizzo 	m_freem(m);
14398f5a8818SKevin Lo 	return (IPPROTO_DONE);
1440bbb4330bSLuigi Rizzo }
1441