xref: /freebsd/sys/netinet/ip_input.c (revision 983066f05bacda3e18aae456e56feb00f70c0b55)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
370ac40133SBrian Somers #include "opt_bootp.h"
3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
396a800098SYoshinobu Inoue #include "opt_ipsec.h"
4033553d6eSBjoern A. Zeeb #include "opt_route.h"
41b8bc95cdSAdrian Chadd #include "opt_rss.h"
4274a9466cSGary Palmer 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
46df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
47b715f178SLuigi Rizzo #include <sys/malloc.h>
48df8bae1dSRodney W. Grimes #include <sys/domain.h>
49df8bae1dSRodney W. Grimes #include <sys/protosw.h>
50df8bae1dSRodney W. Grimes #include <sys/socket.h>
51df8bae1dSRodney W. Grimes #include <sys/time.h>
52df8bae1dSRodney W. Grimes #include <sys/kernel.h>
53385195c0SMarko Zec #include <sys/lock.h>
54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
55385195c0SMarko Zec #include <sys/rwlock.h>
5657f60867SMark Johnston #include <sys/sdt.h>
571025071fSGarrett Wollman #include <sys/syslog.h>
58b5e8ce9fSBruce Evans #include <sys/sysctl.h>
59df8bae1dSRodney W. Grimes 
60df8bae1dSRodney W. Grimes #include <net/if.h>
619494d596SBrooks Davis #include <net/if_types.h>
62d314ad7bSJulian Elischer #include <net/if_var.h>
6382c23ebaSBill Fenner #include <net/if_dl.h>
64b252313fSGleb Smirnoff #include <net/pfil.h>
65df8bae1dSRodney W. Grimes #include <net/route.h>
66*983066f0SAlexander V. Chernikov #include <net/route/nhop.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68b2bdc62aSAdrian Chadd #include <net/rss_config.h>
694b79449eSBjoern A. Zeeb #include <net/vnet.h>
70df8bae1dSRodney W. Grimes 
71df8bae1dSRodney W. Grimes #include <netinet/in.h>
7257f60867SMark Johnston #include <netinet/in_kdtrace.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
74b5e8ce9fSBruce Evans #include <netinet/in_var.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip.h>
76*983066f0SAlexander V. Chernikov #include <netinet/in_fib.h>
77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
79eddfbb76SRobert Watson #include <netinet/ip_fw.h>
80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
81ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
8258938916SGarrett Wollman #include <machine/in_cksum.h>
83a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
85df8bae1dSRodney W. Grimes 
86fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
87fcf59617SAndrey V. Elsukov 
88f0068c4aSGarrett Wollman #include <sys/socketvar.h>
896ddbf1e2SGary Palmer 
90aed55708SRobert Watson #include <security/mac/mac_framework.h>
91aed55708SRobert Watson 
92d2035ffbSEd Maste #ifdef CTASSERT
93d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
94d2035ffbSEd Maste #endif
95d2035ffbSEd Maste 
961dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
97843b0e57SXin LI extern void ipreass_init(void);
98843b0e57SXin LI extern void ipreass_drain(void);
99843b0e57SXin LI extern void ipreass_slowtimo(void);
1001dbefcc0SGleb Smirnoff #ifdef VIMAGE
101843b0e57SXin LI extern void ipreass_destroy(void);
1021dbefcc0SGleb Smirnoff #endif
1031dbefcc0SGleb Smirnoff 
104cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock;
105cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
106f0068c4aSGarrett Wollman 
10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10882cea7e6SBjoern A. Zeeb 
10982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1106df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
111eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1128b615593SMarko Zec     "Enable IP forwarding between interfaces");
1130312fbe9SPoul-Henning Kamp 
1145f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ipsendredirects) = 1;	/* XXX */
11582cea7e6SBjoern A. Zeeb #define	V_ipsendredirects	VNET(ipsendredirects)
1166df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
117eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1188b615593SMarko Zec     "Enable sending IP redirects");
1190312fbe9SPoul-Henning Kamp 
120823db0e9SDon Lewis /*
121823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
122823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
123a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
124823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1253f67c834SDon Lewis  *
126a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
127a8f12100SDon Lewis  * to translate the destination address to another local interface.
1283f67c834SDon Lewis  *
1293f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1303f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1313f67c834SDon Lewis  * packets for those addresses are received.
132823db0e9SDon Lewis  */
1335f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_checkinterface);
13482cea7e6SBjoern A. Zeeb #define	V_ip_checkinterface	VNET(ip_checkinterface)
1356df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
136eddfbb76SRobert Watson     &VNET_NAME(ip_checkinterface), 0,
1378b615593SMarko Zec     "Verify packet arrives on correct interface");
138b3e95d4eSJonathan Lemon 
139b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head);	/* Packet filter hooks */
140df8bae1dSRodney W. Grimes 
141d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
142d4b5cae4SRobert Watson 	.nh_name = "ip",
143d4b5cae4SRobert Watson 	.nh_handler = ip_input,
144d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
145b8bc95cdSAdrian Chadd #ifdef	RSS
1462527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
147b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
148b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
149b8bc95cdSAdrian Chadd #else
150d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
151b8bc95cdSAdrian Chadd #endif
152d4b5cae4SRobert Watson };
153ca925d9cSJonathan Lemon 
154b8bc95cdSAdrian Chadd #ifdef	RSS
155b8bc95cdSAdrian Chadd /*
156b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
157b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
158b8bc95cdSAdrian Chadd  *
159b8bc95cdSAdrian Chadd  * It should likely have something that assert it
160b8bc95cdSAdrian Chadd  * actually has valid flow details.
161b8bc95cdSAdrian Chadd  */
162b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
163b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
164b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
165b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
166499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
167b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
168b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
169b8bc95cdSAdrian Chadd };
170b8bc95cdSAdrian Chadd #endif
171b8bc95cdSAdrian Chadd 
172df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
173f0ffb944SJulian Elischer extern	struct protosw inetsw[];
174df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
17582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17682cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17782cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
178ca925d9cSJonathan Lemon 
1790312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1800312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1813d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1820312fbe9SPoul-Henning Kamp #endif
1830312fbe9SPoul-Henning Kamp 
1841b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1866df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
187eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
188eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1891b968362SDag-Erling Smørgrav #endif
190eddfbb76SRobert Watson 
191315e3e38SRobert Watson /*
1925da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1935923c293SGleb Smirnoff  */
1945da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1955da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1965da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1975da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1985923c293SGleb Smirnoff 
1995923c293SGleb Smirnoff #ifdef VIMAGE
2005da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
2015923c293SGleb Smirnoff #endif /* VIMAGE */
2025923c293SGleb Smirnoff 
2035923c293SGleb Smirnoff /*
204315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2055923c293SGleb Smirnoff  * into ipstat treated as an array.
206315e3e38SRobert Watson  */
207315e3e38SRobert Watson void
208315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
209315e3e38SRobert Watson {
210315e3e38SRobert Watson 
2115da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
212315e3e38SRobert Watson }
213315e3e38SRobert Watson 
214315e3e38SRobert Watson void
215315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
216315e3e38SRobert Watson {
217315e3e38SRobert Watson 
2185da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
219315e3e38SRobert Watson }
220315e3e38SRobert Watson 
221d4b5cae4SRobert Watson static int
222d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
223d4b5cae4SRobert Watson {
224d4b5cae4SRobert Watson 	int error, qlimit;
225d4b5cae4SRobert Watson 
226d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
227d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
228d4b5cae4SRobert Watson 	if (error || !req->newptr)
229d4b5cae4SRobert Watson 		return (error);
230d4b5cae4SRobert Watson 	if (qlimit < 1)
231d4b5cae4SRobert Watson 		return (EINVAL);
232d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
233d4b5cae4SRobert Watson }
234d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
2357029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
2367029da5cSPawel Biernacki     sysctl_netinet_intr_queue_maxlen, "I",
237d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
238d4b5cae4SRobert Watson 
239d4b5cae4SRobert Watson static int
240d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
241d4b5cae4SRobert Watson {
242d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
243d4b5cae4SRobert Watson 	int error, qdrops;
244d4b5cae4SRobert Watson 
245d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
246d4b5cae4SRobert Watson 	qdrops = qdrops_long;
247d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
248d4b5cae4SRobert Watson 	if (error || !req->newptr)
249d4b5cae4SRobert Watson 		return (error);
250d4b5cae4SRobert Watson 	if (qdrops != 0)
251d4b5cae4SRobert Watson 		return (EINVAL);
252d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
253d4b5cae4SRobert Watson 	return (0);
254d4b5cae4SRobert Watson }
255d4b5cae4SRobert Watson 
256d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
2577029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
2587029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_queue_drops, "I",
259d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
260d4b5cae4SRobert Watson 
261b8bc95cdSAdrian Chadd #ifdef	RSS
262b8bc95cdSAdrian Chadd static int
263b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
264b8bc95cdSAdrian Chadd {
265b8bc95cdSAdrian Chadd 	int error, qlimit;
266b8bc95cdSAdrian Chadd 
267b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
268b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
269b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
270b8bc95cdSAdrian Chadd 		return (error);
271b8bc95cdSAdrian Chadd 	if (qlimit < 1)
272b8bc95cdSAdrian Chadd 		return (EINVAL);
273b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
274b8bc95cdSAdrian Chadd }
2757faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
2767029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
2777029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_direct_queue_maxlen,
2787faa0d21SAndrey V. Elsukov     "I", "Maximum size of the IP direct input queue");
279b8bc95cdSAdrian Chadd 
280b8bc95cdSAdrian Chadd static int
281b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
282b8bc95cdSAdrian Chadd {
283b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
284b8bc95cdSAdrian Chadd 	int error, qdrops;
285b8bc95cdSAdrian Chadd 
286b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
287b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
288b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
289b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
290b8bc95cdSAdrian Chadd 		return (error);
291b8bc95cdSAdrian Chadd 	if (qdrops != 0)
292b8bc95cdSAdrian Chadd 		return (EINVAL);
293b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
294b8bc95cdSAdrian Chadd 	return (0);
295b8bc95cdSAdrian Chadd }
296b8bc95cdSAdrian Chadd 
2977faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
2987029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
2997029da5cSPawel Biernacki     sysctl_netinet_intr_direct_queue_drops, "I",
300b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
301b8bc95cdSAdrian Chadd #endif	/* RSS */
302b8bc95cdSAdrian Chadd 
303df8bae1dSRodney W. Grimes /*
304df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
305df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
306df8bae1dSRodney W. Grimes  */
307df8bae1dSRodney W. Grimes void
308f2565d68SRobert Watson ip_init(void)
309df8bae1dSRodney W. Grimes {
310b252313fSGleb Smirnoff 	struct pfil_head_args args;
311f2565d68SRobert Watson 	struct protosw *pr;
312f2565d68SRobert Watson 	int i;
313df8bae1dSRodney W. Grimes 
314d7c5a620SMatt Macy 	CK_STAILQ_INIT(&V_in_ifaddrhead);
315603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3161ed81b73SMarko Zec 
3171ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3181dbefcc0SGleb Smirnoff 	ipreass_init();
3191ed81b73SMarko Zec 
3200b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
321b252313fSGleb Smirnoff 	args.pa_version = PFIL_VERSION;
322b252313fSGleb Smirnoff 	args.pa_flags = PFIL_IN | PFIL_OUT;
323b252313fSGleb Smirnoff 	args.pa_type = PFIL_TYPE_IP4;
324b252313fSGleb Smirnoff 	args.pa_headname = PFIL_INET_NAME;
325b252313fSGleb Smirnoff 	V_inet_pfil_head = pfil_head_register(&args);
3260b4b0b0fSJulian Elischer 
327ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
328ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
329ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
330ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
331ef91a976SAndrey V. Elsukov 		    __func__);
332ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
333ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
334ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
335ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
336ef91a976SAndrey V. Elsukov 		    __func__);
337ef91a976SAndrey V. Elsukov 
3381ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
339484149deSBjoern A. Zeeb #ifdef VIMAGE
340484149deSBjoern A. Zeeb 	if (!IS_DEFAULT_VNET(curvnet)) {
341484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_nh);
342484149deSBjoern A. Zeeb #ifdef	RSS
343484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_direct_nh);
344484149deSBjoern A. Zeeb #endif
3451ed81b73SMarko Zec 		return;
346484149deSBjoern A. Zeeb 	}
347484149deSBjoern A. Zeeb #endif
3481ed81b73SMarko Zec 
349f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
35002410549SRobert Watson 	if (pr == NULL)
351db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
352db09bef3SAndre Oppermann 
353db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
354df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
355df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
356db09bef3SAndre Oppermann 	/*
357db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
358db09bef3SAndre Oppermann 	 * in ip_protox[].
359db09bef3SAndre Oppermann 	 */
360f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
361f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
362df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
363db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
364db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
365db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
366df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
367db09bef3SAndre Oppermann 		}
368194a213eSAndrey A. Chernov 
369d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
370b8bc95cdSAdrian Chadd #ifdef	RSS
371b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
372b8bc95cdSAdrian Chadd #endif
373df8bae1dSRodney W. Grimes }
374df8bae1dSRodney W. Grimes 
3759802380eSBjoern A. Zeeb #ifdef VIMAGE
3763f58662dSBjoern A. Zeeb static void
3773f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3789802380eSBjoern A. Zeeb {
37989856f7eSBjoern A. Zeeb 	struct ifnet *ifp;
380ef91a976SAndrey V. Elsukov 	int error;
3814d3dfd45SMikolaj Golub 
382484149deSBjoern A. Zeeb #ifdef	RSS
383484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
384484149deSBjoern A. Zeeb #endif
385484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
386484149deSBjoern A. Zeeb 
387b252313fSGleb Smirnoff 	pfil_head_unregister(V_inet_pfil_head);
388ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
389ef91a976SAndrey V. Elsukov 	if (error != 0) {
390ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
391ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
392ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
393ef91a976SAndrey V. Elsukov 	}
394ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
395ef91a976SAndrey V. Elsukov 	if (error != 0) {
396ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
397ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
398ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
399ef91a976SAndrey V. Elsukov 	}
40089856f7eSBjoern A. Zeeb 
40189856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
40289856f7eSBjoern A. Zeeb 	in_ifscrub_all();
40389856f7eSBjoern A. Zeeb 
40489856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
40589856f7eSBjoern A. Zeeb 	IFNET_RLOCK();
4064f6c66ccSMatt Macy 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
40789856f7eSBjoern A. Zeeb 		rt_flushifroutes_af(ifp, AF_INET);
40889856f7eSBjoern A. Zeeb 	IFNET_RUNLOCK();
4099802380eSBjoern A. Zeeb 
410e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4111dbefcc0SGleb Smirnoff 	ipreass_destroy();
41289856f7eSBjoern A. Zeeb 
41389856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
41489856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4159802380eSBjoern A. Zeeb }
4163f58662dSBjoern A. Zeeb 
4173f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4189802380eSBjoern A. Zeeb #endif
4199802380eSBjoern A. Zeeb 
420b8bc95cdSAdrian Chadd #ifdef	RSS
421b8bc95cdSAdrian Chadd /*
422b8bc95cdSAdrian Chadd  * IP direct input routine.
423b8bc95cdSAdrian Chadd  *
424b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
425b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
426b8bc95cdSAdrian Chadd  */
427b8bc95cdSAdrian Chadd void
428b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
429b8bc95cdSAdrian Chadd {
430b8bc95cdSAdrian Chadd 	struct ip *ip;
431b8bc95cdSAdrian Chadd 	int hlen;
432b8bc95cdSAdrian Chadd 
433b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
434b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
435b8bc95cdSAdrian Chadd 
436fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
437fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
438fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
439fcf59617SAndrey V. Elsukov 			return;
440fcf59617SAndrey V. Elsukov 	}
441fcf59617SAndrey V. Elsukov #endif /* IPSEC */
442b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
443b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
444b8bc95cdSAdrian Chadd 	return;
445b8bc95cdSAdrian Chadd }
446b8bc95cdSAdrian Chadd #endif
447b8bc95cdSAdrian Chadd 
4484d2e3692SLuigi Rizzo /*
449df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
450df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
451df8bae1dSRodney W. Grimes  */
452c67b1d17SGarrett Wollman void
453c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
454df8bae1dSRodney W. Grimes {
4551a5995ccSEugene Grosbein 	struct rm_priotracker in_ifa_tracker;
4569188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4575da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
458ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4590aade26eSRobert Watson 	struct ifnet *ifp;
4609b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
46121d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
46202c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
463f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
464b715f178SLuigi Rizzo 
465fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
466b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
467db40007dSAndrew R. Reiter 
468ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
46976ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
47076ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4712b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
47253be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4738f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4749b932e9eSAndre Oppermann 		goto ours;
4752b25acc1SLuigi Rizzo 	}
4762b25acc1SLuigi Rizzo 
47786425c62SRobert Watson 	IPSTAT_INC(ips_total);
47858938916SGarrett Wollman 
47958938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
48058938916SGarrett Wollman 		goto tooshort;
48158938916SGarrett Wollman 
482df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
4830b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
48486425c62SRobert Watson 		IPSTAT_INC(ips_toosmall);
485c67b1d17SGarrett Wollman 		return;
486df8bae1dSRodney W. Grimes 	}
487df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
48858938916SGarrett Wollman 
48953be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
49086425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
491df8bae1dSRodney W. Grimes 		goto bad;
492df8bae1dSRodney W. Grimes 	}
49358938916SGarrett Wollman 
49453be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
495df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
49686425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
497df8bae1dSRodney W. Grimes 		goto bad;
498df8bae1dSRodney W. Grimes 	}
499df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
5000b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
50186425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
502c67b1d17SGarrett Wollman 			return;
503df8bae1dSRodney W. Grimes 		}
504df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
505df8bae1dSRodney W. Grimes 	}
50633841545SHajimu UMEMOTO 
50757f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
50857f60867SMark Johnston 
5096c1c6ae5SRodney W. Grimes 	/* IN_LOOPBACK must not appear on the wire - RFC1122 */
5100aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5116c1c6ae5SRodney W. Grimes 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
5126c1c6ae5SRodney W. Grimes 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
5130aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
51486425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
51533841545SHajimu UMEMOTO 			goto bad;
51633841545SHajimu UMEMOTO 		}
51733841545SHajimu UMEMOTO 	}
51833841545SHajimu UMEMOTO 
519db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
520db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
521db4f9cc7SJonathan Lemon 	} else {
52258938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
52347c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
52458938916SGarrett Wollman 		} else {
52547c861ecSBrian Somers 			sum = in_cksum(m, hlen);
52658938916SGarrett Wollman 		}
527db4f9cc7SJonathan Lemon 	}
52847c861ecSBrian Somers 	if (sum) {
52986425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
530df8bae1dSRodney W. Grimes 		goto bad;
531df8bae1dSRodney W. Grimes 	}
532df8bae1dSRodney W. Grimes 
53302b199f1SMax Laier #ifdef ALTQ
53402b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
53502b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
53602b199f1SMax Laier 		return;
53702b199f1SMax Laier #endif
53802b199f1SMax Laier 
53921d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
54021d172a3SGleb Smirnoff 	if (ip_len < hlen) {
54186425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
542df8bae1dSRodney W. Grimes 		goto bad;
543df8bae1dSRodney W. Grimes 	}
544df8bae1dSRodney W. Grimes 
545df8bae1dSRodney W. Grimes 	/*
546df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
547df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
548df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
549df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
550df8bae1dSRodney W. Grimes 	 */
55121d172a3SGleb Smirnoff 	if (m->m_pkthdr.len < ip_len) {
55258938916SGarrett Wollman tooshort:
55386425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
554df8bae1dSRodney W. Grimes 		goto bad;
555df8bae1dSRodney W. Grimes 	}
55621d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
557df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
55821d172a3SGleb Smirnoff 			m->m_len = ip_len;
55921d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
560df8bae1dSRodney W. Grimes 		} else
56121d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
562df8bae1dSRodney W. Grimes 	}
563b8bc95cdSAdrian Chadd 
564ad9f4d6aSAndrey V. Elsukov 	/*
565ad9f4d6aSAndrey V. Elsukov 	 * Try to forward the packet, but if we fail continue.
56662484790SAndrey V. Elsukov 	 * ip_tryforward() does not generate redirects, so fall
56762484790SAndrey V. Elsukov 	 * through to normal processing if redirects are required.
568ad9f4d6aSAndrey V. Elsukov 	 * ip_tryforward() does inbound and outbound packet firewall
569ad9f4d6aSAndrey V. Elsukov 	 * processing. If firewall has decided that destination becomes
570ad9f4d6aSAndrey V. Elsukov 	 * our local address, it sets M_FASTFWD_OURS flag. In this
571ad9f4d6aSAndrey V. Elsukov 	 * case skip another inbound firewall processing and update
572ad9f4d6aSAndrey V. Elsukov 	 * ip pointer.
573ad9f4d6aSAndrey V. Elsukov 	 */
57462484790SAndrey V. Elsukov 	if (V_ipforwarding != 0 && V_ipsendredirects == 0
575fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
576fcf59617SAndrey V. Elsukov 	    && (!IPSEC_ENABLED(ipv4) ||
577fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
578ad9f4d6aSAndrey V. Elsukov #endif
579ad9f4d6aSAndrey V. Elsukov 	    ) {
580ad9f4d6aSAndrey V. Elsukov 		if ((m = ip_tryforward(m)) == NULL)
58133872124SGeorge V. Neville-Neil 			return;
582ad9f4d6aSAndrey V. Elsukov 		if (m->m_flags & M_FASTFWD_OURS) {
583ad9f4d6aSAndrey V. Elsukov 			m->m_flags &= ~M_FASTFWD_OURS;
584ad9f4d6aSAndrey V. Elsukov 			ip = mtod(m, struct ip *);
585ad9f4d6aSAndrey V. Elsukov 			goto ours;
586ad9f4d6aSAndrey V. Elsukov 		}
587ad9f4d6aSAndrey V. Elsukov 	}
588fcf59617SAndrey V. Elsukov 
589fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
59014dd6717SSam Leffler 	/*
591ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
59214dd6717SSam Leffler 	 */
593fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4) &&
594fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
595c21fd232SAndre Oppermann 			goto passin;
596ad9f4d6aSAndrey V. Elsukov #endif
597fcf59617SAndrey V. Elsukov 
598c4ac87eaSDarren Reed 	/*
599134ea224SSam Leffler 	 * Run through list of hooks for input packets.
600f51f805fSSam Leffler 	 *
601f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
602f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
603f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
604c4ac87eaSDarren Reed 	 */
605c21fd232SAndre Oppermann 
606c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
607b252313fSGleb Smirnoff 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
608c21fd232SAndre Oppermann 		goto passin;
609c21fd232SAndre Oppermann 
610f51f805fSSam Leffler 	odst = ip->ip_dst;
611b252313fSGleb Smirnoff 	if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) !=
612b252313fSGleb Smirnoff 	    PFIL_PASS)
613beec8214SDarren Reed 		return;
614134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
615c4ac87eaSDarren Reed 		return;
6169b932e9eSAndre Oppermann 
617c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
61802c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
6190aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
6209b932e9eSAndre Oppermann 
6219b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
6229b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
6239b932e9eSAndre Oppermann 		goto ours;
6249b932e9eSAndre Oppermann 	}
625ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
626de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
627099dd043SAndre Oppermann 			/*
628ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
629ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
630ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
631099dd043SAndre Oppermann 			 */
632ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
633099dd043SAndre Oppermann 			return;
634099dd043SAndre Oppermann 		}
635ffdbf9daSAndrey V. Elsukov 	}
636c21fd232SAndre Oppermann passin:
63721d172a3SGleb Smirnoff 
63821d172a3SGleb Smirnoff 	/*
639df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
640df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
641df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
642df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
643df8bae1dSRodney W. Grimes 	 */
6449b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
645c67b1d17SGarrett Wollman 		return;
646df8bae1dSRodney W. Grimes 
647f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
648f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
649f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
650f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
651f0068c4aSGarrett Wollman 	 * grabbing the packet.
652f0068c4aSGarrett Wollman          */
653603724d3SBjoern A. Zeeb 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
654f0068c4aSGarrett Wollman 		goto ours;
655f0068c4aSGarrett Wollman 
656df8bae1dSRodney W. Grimes 	/*
657df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
658cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
659cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
660cc766e04SGarrett Wollman 	 * with it).
661df8bae1dSRodney W. Grimes 	 */
662d7c5a620SMatt Macy 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
663cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
664cc766e04SGarrett Wollman 		goto ours;
665cc766e04SGarrett Wollman 
6667538a9a0SJonathan Lemon 	/*
667823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
668823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
669823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
670e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
671e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
6723f67c834SDon Lewis 	 *
6733f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
6743f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
6753f67c834SDon Lewis 	 *
676a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
6773f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
6783f67c834SDon Lewis 	 * the packets are received.
679a9771948SGleb Smirnoff 	 *
680a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
681a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
682a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
683823db0e9SDon Lewis 	 */
684603724d3SBjoern A. Zeeb 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
6850aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
68654bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
687823db0e9SDon Lewis 
688ca925d9cSJonathan Lemon 	/*
689ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
690ca925d9cSJonathan Lemon 	 */
6911a5995ccSEugene Grosbein 	IN_IFADDR_RLOCK(&in_ifa_tracker);
6929b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
693f9e354dfSJulian Elischer 		/*
694823db0e9SDon Lewis 		 * If the address matches, verify that the packet
695823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
696823db0e9SDon Lewis 		 * enabled.
697f9e354dfSJulian Elischer 		 */
6989b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
6998c0fec80SRobert Watson 		    (!checkif || ia->ia_ifp == ifp)) {
7007caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7017caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
7027caf4ab7SGleb Smirnoff 			    m->m_pkthdr.len);
7031a5995ccSEugene Grosbein 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
704ed1ff184SJulian Elischer 			goto ours;
705ca925d9cSJonathan Lemon 		}
7068c0fec80SRobert Watson 	}
7071a5995ccSEugene Grosbein 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
7082d9cfabaSRobert Watson 
709823db0e9SDon Lewis 	/*
710ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
711ca925d9cSJonathan Lemon 	 *
712ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
713ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
714ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
715ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
716823db0e9SDon Lewis 	 */
7170aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
718d7c5a620SMatt Macy 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
719ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
720ca925d9cSJonathan Lemon 				continue;
721ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
722df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
7230aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
7247caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7257caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7267caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
727df8bae1dSRodney W. Grimes 				goto ours;
7280aade26eSRobert Watson 			}
7290ac40133SBrian Somers #ifdef BOOTP_COMPAT
7300aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7317caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7327caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7337caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
734ca925d9cSJonathan Lemon 				goto ours;
7350aade26eSRobert Watson 			}
7360ac40133SBrian Somers #endif
737df8bae1dSRodney W. Grimes 		}
73819e5b0a7SRobert Watson 		ia = NULL;
739df8bae1dSRodney W. Grimes 	}
740f8429ca2SBruce M Simpson 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
741f8429ca2SBruce M Simpson 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
74286425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
743f8429ca2SBruce M Simpson 		m_freem(m);
744f8429ca2SBruce M Simpson 		return;
745f8429ca2SBruce M Simpson 	}
746df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
747603724d3SBjoern A. Zeeb 		if (V_ip_mrouter) {
748df8bae1dSRodney W. Grimes 			/*
749df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
750df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
751df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
752df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
753df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
754df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
755df8bae1dSRodney W. Grimes 			 */
7560aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
75786425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
758df8bae1dSRodney W. Grimes 				m_freem(m);
759c67b1d17SGarrett Wollman 				return;
760df8bae1dSRodney W. Grimes 			}
761df8bae1dSRodney W. Grimes 
762df8bae1dSRodney W. Grimes 			/*
76311612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
764df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
765df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
766df8bae1dSRodney W. Grimes 			 */
767df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
768df8bae1dSRodney W. Grimes 				goto ours;
76986425c62SRobert Watson 			IPSTAT_INC(ips_forward);
770df8bae1dSRodney W. Grimes 		}
771df8bae1dSRodney W. Grimes 		/*
772d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
773d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
774d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
775df8bae1dSRodney W. Grimes 		 */
776df8bae1dSRodney W. Grimes 		goto ours;
777df8bae1dSRodney W. Grimes 	}
778df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
779df8bae1dSRodney W. Grimes 		goto ours;
780df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
781df8bae1dSRodney W. Grimes 		goto ours;
782df8bae1dSRodney W. Grimes 
7836a800098SYoshinobu Inoue 	/*
784df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
785df8bae1dSRodney W. Grimes 	 */
786603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
78786425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
788df8bae1dSRodney W. Grimes 		m_freem(m);
789546f251bSChris D. Faulhaber 	} else {
7909b932e9eSAndre Oppermann 		ip_forward(m, dchg);
791546f251bSChris D. Faulhaber 	}
792c67b1d17SGarrett Wollman 	return;
793df8bae1dSRodney W. Grimes 
794df8bae1dSRodney W. Grimes ours:
795d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
796d0ebc0d2SYaroslav Tykhiy 	/*
797d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
798d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
799d0ebc0d2SYaroslav Tykhiy 	 */
8007caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
801d0ebc0d2SYaroslav Tykhiy 		return;
802d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
803d0ebc0d2SYaroslav Tykhiy 
80463f8d699SJordan K. Hubbard 	/*
805b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
806ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
807df8bae1dSRodney W. Grimes 	 */
8088f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
809aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
810f0cada84SAndre Oppermann 		m = ip_reass(m);
811f0cada84SAndre Oppermann 		if (m == NULL)
812c67b1d17SGarrett Wollman 			return;
8136a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8147e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
81553be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
816f0cada84SAndre Oppermann 	}
817f0cada84SAndre Oppermann 
818fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
819fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
820fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
821fcf59617SAndrey V. Elsukov 			return;
822fcf59617SAndrey V. Elsukov 	}
823b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
82433841545SHajimu UMEMOTO 
825df8bae1dSRodney W. Grimes 	/*
826df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
827df8bae1dSRodney W. Grimes 	 */
82886425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8299b932e9eSAndre Oppermann 
8308f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
831c67b1d17SGarrett Wollman 	return;
832df8bae1dSRodney W. Grimes bad:
833df8bae1dSRodney W. Grimes 	m_freem(m);
834c67b1d17SGarrett Wollman }
835c67b1d17SGarrett Wollman 
836c67b1d17SGarrett Wollman /*
837df8bae1dSRodney W. Grimes  * IP timer processing;
838df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
839df8bae1dSRodney W. Grimes  * queue, discard it.
840df8bae1dSRodney W. Grimes  */
841df8bae1dSRodney W. Grimes void
842f2565d68SRobert Watson ip_slowtimo(void)
843df8bae1dSRodney W. Grimes {
8448b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
845df8bae1dSRodney W. Grimes 
8465ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8478b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8488b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8491dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8508b615593SMarko Zec 		CURVNET_RESTORE();
8518b615593SMarko Zec 	}
8525ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
853df8bae1dSRodney W. Grimes }
854df8bae1dSRodney W. Grimes 
8559802380eSBjoern A. Zeeb void
8569802380eSBjoern A. Zeeb ip_drain(void)
8579802380eSBjoern A. Zeeb {
8589802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8599802380eSBjoern A. Zeeb 
8609802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8619802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8629802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8631dbefcc0SGleb Smirnoff 		ipreass_drain();
8648b615593SMarko Zec 		CURVNET_RESTORE();
8658b615593SMarko Zec 	}
8665ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
867df8bae1dSRodney W. Grimes }
868df8bae1dSRodney W. Grimes 
869df8bae1dSRodney W. Grimes /*
870de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
871de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
872de38924dSAndre Oppermann  */
873de38924dSAndre Oppermann int
8741b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
875de38924dSAndre Oppermann {
876de38924dSAndre Oppermann 	struct protosw *pr;
877de38924dSAndre Oppermann 
878de38924dSAndre Oppermann 	/* Sanity checks. */
8791b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
880de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
881de38924dSAndre Oppermann 
882de38924dSAndre Oppermann 	/*
883de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
884de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
885de38924dSAndre Oppermann 	 */
886de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
887de38924dSAndre Oppermann 	if (pr == NULL)
888de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
889de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
890de38924dSAndre Oppermann 		return (EEXIST);
891de38924dSAndre Oppermann 
892de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
893de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
894de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
895de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
896de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
897de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
898de38924dSAndre Oppermann 			return (0);
899de38924dSAndre Oppermann 		}
900de38924dSAndre Oppermann 	}
901de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
902de38924dSAndre Oppermann }
903de38924dSAndre Oppermann 
904de38924dSAndre Oppermann int
9051b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
906de38924dSAndre Oppermann {
907de38924dSAndre Oppermann 	struct protosw *pr;
908de38924dSAndre Oppermann 
909de38924dSAndre Oppermann 	/* Sanity checks. */
9101b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
911de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
912de38924dSAndre Oppermann 
913de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
914de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
915de38924dSAndre Oppermann 	if (pr == NULL)
916de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
917de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
918de38924dSAndre Oppermann 		return (ENOENT);
919de38924dSAndre Oppermann 
920de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
921de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
922de38924dSAndre Oppermann 	return (0);
923de38924dSAndre Oppermann }
924de38924dSAndre Oppermann 
925df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
926df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
927df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
928df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
929df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
930fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9313b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
932df8bae1dSRodney W. Grimes };
933df8bae1dSRodney W. Grimes 
934df8bae1dSRodney W. Grimes /*
935df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
936df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
937df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
938df8bae1dSRodney W. Grimes  * of codes and types.
939df8bae1dSRodney W. Grimes  *
940df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
941df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
942df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
943df8bae1dSRodney W. Grimes  * protocol deal with that.
944df8bae1dSRodney W. Grimes  *
945df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
946df8bae1dSRodney W. Grimes  * via a source route.
947df8bae1dSRodney W. Grimes  */
9489b932e9eSAndre Oppermann void
9499b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
950df8bae1dSRodney W. Grimes {
9512b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
952efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
953df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
954d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9559b932e9eSAndre Oppermann 	struct in_addr dest;
956b835b6feSBjoern A. Zeeb 	struct route ro;
957c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9583efc3014SJulian Elischer 
959b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
960b8a6e03fSGleb Smirnoff 
9619b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
96286425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
963df8bae1dSRodney W. Grimes 		m_freem(m);
964df8bae1dSRodney W. Grimes 		return;
965df8bae1dSRodney W. Grimes 	}
966fcf59617SAndrey V. Elsukov 	if (
967fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH
968fcf59617SAndrey V. Elsukov 	    V_ipstealth == 0 &&
969fcf59617SAndrey V. Elsukov #endif
970fcf59617SAndrey V. Elsukov 	    ip->ip_ttl <= IPTTLDEC) {
971fcf59617SAndrey V. Elsukov 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
9728922ddbeSAndrey V. Elsukov 		return;
9738922ddbeSAndrey V. Elsukov 	}
974df8bae1dSRodney W. Grimes 
975d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
976d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
977d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
978d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
979d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
980d14122b0SErmal Luçi #ifdef RADIX_MPATH
981d14122b0SErmal Luçi 	rtalloc_mpath_fib(&ro,
982d14122b0SErmal Luçi 	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
983d14122b0SErmal Luçi 	    M_GETFIB(m));
984d14122b0SErmal Luçi #else
985*983066f0SAlexander V. Chernikov 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF,
986*983066f0SAlexander V. Chernikov 	    m->m_pkthdr.flowid);
987d14122b0SErmal Luçi #endif
988*983066f0SAlexander V. Chernikov 	if (ro.ro_nh != NULL) {
989*983066f0SAlexander V. Chernikov 		ia = ifatoia(ro.ro_nh->nh_ifa);
99056844a62SErmal Luçi 	} else
99156844a62SErmal Luçi 		ia = NULL;
992df8bae1dSRodney W. Grimes 	/*
993bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
994bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
995bfef7ed4SIan Dowse 	 *
9964d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9974d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9984d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9994d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
10004d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
10014d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
10024d2e3692SLuigi Rizzo 	 *
1003c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
1004bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1005bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1006bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1007df8bae1dSRodney W. Grimes 	 */
1008dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
1009eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
10109967cafcSSam Leffler 		/*
10119967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10129967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10139967cafcSSam Leffler 		 * be conservative and just discard the copy since
10149967cafcSSam Leffler 		 * code below may some day want the tags.
10159967cafcSSam Leffler 		 */
10169967cafcSSam Leffler 		m_free(mcopy);
10179967cafcSSam Leffler 		mcopy = NULL;
10189967cafcSSam Leffler 	}
1019bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10208f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1021e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1022bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1023bfef7ed4SIan Dowse 	}
102404287599SRuslan Ermilov #ifdef IPSTEALTH
1025fcf59617SAndrey V. Elsukov 	if (V_ipstealth == 0)
102604287599SRuslan Ermilov #endif
102704287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
1028fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1029fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
1030fcf59617SAndrey V. Elsukov 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
1031fcf59617SAndrey V. Elsukov 			/* mbuf consumed by IPsec */
1032fcf59617SAndrey V. Elsukov 			m_freem(mcopy);
1033fcf59617SAndrey V. Elsukov 			if (error != EINPROGRESS)
1034fcf59617SAndrey V. Elsukov 				IPSTAT_INC(ips_cantforward);
1035b8a6e03fSGleb Smirnoff 			return;
103604287599SRuslan Ermilov 		}
1037fcf59617SAndrey V. Elsukov 		/* No IPsec processing required */
1038fcf59617SAndrey V. Elsukov 	}
1039fcf59617SAndrey V. Elsukov #endif /* IPSEC */
1040df8bae1dSRodney W. Grimes 	/*
1041df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1042df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1043df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1044df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1045df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1046df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1047df8bae1dSRodney W. Grimes 	 */
10489b932e9eSAndre Oppermann 	dest.s_addr = 0;
1049efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1050efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
1051*983066f0SAlexander V. Chernikov 		struct nhop_object *nh;
105202c1c707SAndre Oppermann 
1053*983066f0SAlexander V. Chernikov 		nh = ro.ro_nh;
105402c1c707SAndre Oppermann 
1055*983066f0SAlexander V. Chernikov 		if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
1056*983066f0SAlexander V. Chernikov 			struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
1057df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1058df8bae1dSRodney W. Grimes 
1059*983066f0SAlexander V. Chernikov 			if (nh_ia != NULL &&
1060*983066f0SAlexander V. Chernikov 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
1061*983066f0SAlexander V. Chernikov 				if (nh->nh_flags & NHF_GATEWAY)
1062*983066f0SAlexander V. Chernikov 					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
1063df8bae1dSRodney W. Grimes 				else
10649b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1065df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1066df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1067df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1068df8bae1dSRodney W. Grimes 			}
1069df8bae1dSRodney W. Grimes 		}
107002c1c707SAndre Oppermann 	}
1071df8bae1dSRodney W. Grimes 
1072b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1073b835b6feSBjoern A. Zeeb 
1074*983066f0SAlexander V. Chernikov 	if (error == EMSGSIZE && ro.ro_nh)
1075*983066f0SAlexander V. Chernikov 		mtu = ro.ro_nh->nh_mtu;
1076*983066f0SAlexander V. Chernikov 	RO_NHFREE(&ro);
1077b835b6feSBjoern A. Zeeb 
1078df8bae1dSRodney W. Grimes 	if (error)
107986425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1080df8bae1dSRodney W. Grimes 	else {
108186425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1082df8bae1dSRodney W. Grimes 		if (type)
108386425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1084df8bae1dSRodney W. Grimes 		else {
10859188b4a1SAndre Oppermann 			if (mcopy)
1086df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1087b8a6e03fSGleb Smirnoff 			return;
1088df8bae1dSRodney W. Grimes 		}
1089df8bae1dSRodney W. Grimes 	}
10904f6c66ccSMatt Macy 	if (mcopy == NULL)
1091b8a6e03fSGleb Smirnoff 		return;
10924f6c66ccSMatt Macy 
1093df8bae1dSRodney W. Grimes 
1094df8bae1dSRodney W. Grimes 	switch (error) {
1095df8bae1dSRodney W. Grimes 
1096df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1097df8bae1dSRodney W. Grimes 		/* type, code set above */
1098df8bae1dSRodney W. Grimes 		break;
1099df8bae1dSRodney W. Grimes 
1100efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1101df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1102df8bae1dSRodney W. Grimes 	case ENETDOWN:
1103df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1104df8bae1dSRodney W. Grimes 	default:
1105df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1106df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1107df8bae1dSRodney W. Grimes 		break;
1108df8bae1dSRodney W. Grimes 
1109df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1110df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1111df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11129b932e9eSAndre Oppermann 		/*
1113b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1114b835b6feSBjoern A. Zeeb 		 * interface MTU.
1115ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1116ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1117ab48768bSAndre Oppermann 		 * current packet size.
11189b932e9eSAndre Oppermann 		 */
1119b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1120b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1121b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1122b835b6feSBjoern A. Zeeb 		} else {
1123ab48768bSAndre Oppermann 			if (ia != NULL)
1124c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1125ab48768bSAndre Oppermann 			else
11268f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1127ab48768bSAndre Oppermann 		}
112886425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1129df8bae1dSRodney W. Grimes 		break;
1130df8bae1dSRodney W. Grimes 
1131df8bae1dSRodney W. Grimes 	case ENOBUFS:
11323a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11333a06e3e0SRuslan Ermilov 		m_freem(mcopy);
1134b8a6e03fSGleb Smirnoff 		return;
1135df8bae1dSRodney W. Grimes 	}
1136c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1137df8bae1dSRodney W. Grimes }
1138df8bae1dSRodney W. Grimes 
1139339efd75SMaxim Sobolev #define	CHECK_SO_CT(sp, ct) \
1140339efd75SMaxim Sobolev     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1141339efd75SMaxim Sobolev 
114282c23ebaSBill Fenner void
1143f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1144f2565d68SRobert Watson     struct mbuf *m)
114582c23ebaSBill Fenner {
114606193f0bSKonstantin Belousov 	bool stamped;
11478b615593SMarko Zec 
114806193f0bSKonstantin Belousov 	stamped = false;
1149339efd75SMaxim Sobolev 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
1150339efd75SMaxim Sobolev 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
115106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt;
115206193f0bSKonstantin Belousov 		struct timespec ts1;
1153be8a62e8SPoul-Henning Kamp 
115406193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
115506193f0bSKonstantin Belousov 		    M_TSTMP)) {
115606193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
115706193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt);
115806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
115906193f0bSKonstantin Belousov 			bintime_add(&bt, &boottimebin);
116006193f0bSKonstantin Belousov 		} else {
1161be8a62e8SPoul-Henning Kamp 			bintime(&bt);
116206193f0bSKonstantin Belousov 		}
1163be8a62e8SPoul-Henning Kamp 		*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1164be8a62e8SPoul-Henning Kamp 		    SCM_BINTIME, SOL_SOCKET);
116506193f0bSKonstantin Belousov 		if (*mp != NULL) {
1166be8a62e8SPoul-Henning Kamp 			mp = &(*mp)->m_next;
116706193f0bSKonstantin Belousov 			stamped = true;
116806193f0bSKonstantin Belousov 		}
1169be8a62e8SPoul-Henning Kamp 	}
1170339efd75SMaxim Sobolev 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
117106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt1;
1172c012cfe6SEd Maste 		struct timespec ts1;
117382c23ebaSBill Fenner 		struct timeval tv;
117482c23ebaSBill Fenner 
117506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
117606193f0bSKonstantin Belousov 		    M_TSTMP)) {
117706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
117806193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt1);
117906193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
118006193f0bSKonstantin Belousov 			bintime_add(&bt1, &boottimebin);
118106193f0bSKonstantin Belousov 			bintime2timeval(&bt1, &tv);
118206193f0bSKonstantin Belousov 		} else {
1183339efd75SMaxim Sobolev 			microtime(&tv);
118406193f0bSKonstantin Belousov 		}
118582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
118682c23ebaSBill Fenner 		    SCM_TIMESTAMP, SOL_SOCKET);
118706193f0bSKonstantin Belousov 		if (*mp != NULL) {
118882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
118906193f0bSKonstantin Belousov 			stamped = true;
119006193f0bSKonstantin Belousov 		}
1191339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
119206193f0bSKonstantin Belousov 		struct bintime boottimebin;
119306193f0bSKonstantin Belousov 		struct timespec ts, ts1;
1194339efd75SMaxim Sobolev 
119506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
119606193f0bSKonstantin Belousov 		    M_TSTMP)) {
119706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
119806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
119906193f0bSKonstantin Belousov 			bintime2timespec(&boottimebin, &ts1);
12006040822cSAlan Somers 			timespecadd(&ts, &ts1, &ts);
120106193f0bSKonstantin Belousov 		} else {
1202339efd75SMaxim Sobolev 			nanotime(&ts);
120306193f0bSKonstantin Belousov 		}
1204339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1205339efd75SMaxim Sobolev 		    SCM_REALTIME, SOL_SOCKET);
120606193f0bSKonstantin Belousov 		if (*mp != NULL) {
1207339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
120806193f0bSKonstantin Belousov 			stamped = true;
120906193f0bSKonstantin Belousov 		}
1210339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
1211339efd75SMaxim Sobolev 		struct timespec ts;
1212339efd75SMaxim Sobolev 
121306193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
121406193f0bSKonstantin Belousov 		    M_TSTMP))
121506193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
121606193f0bSKonstantin Belousov 		else
1217339efd75SMaxim Sobolev 			nanouptime(&ts);
1218339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1219339efd75SMaxim Sobolev 		    SCM_MONOTONIC, SOL_SOCKET);
122006193f0bSKonstantin Belousov 		if (*mp != NULL) {
122106193f0bSKonstantin Belousov 			mp = &(*mp)->m_next;
122206193f0bSKonstantin Belousov 			stamped = true;
122306193f0bSKonstantin Belousov 		}
122406193f0bSKonstantin Belousov 	}
122506193f0bSKonstantin Belousov 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
122606193f0bSKonstantin Belousov 	    M_TSTMP)) {
122706193f0bSKonstantin Belousov 		struct sock_timestamp_info sti;
122806193f0bSKonstantin Belousov 
122906193f0bSKonstantin Belousov 		bzero(&sti, sizeof(sti));
123006193f0bSKonstantin Belousov 		sti.st_info_flags = ST_INFO_HW;
123106193f0bSKonstantin Belousov 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
123206193f0bSKonstantin Belousov 			sti.st_info_flags |= ST_INFO_HW_HPREC;
123306193f0bSKonstantin Belousov 		*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO,
123406193f0bSKonstantin Belousov 		    SOL_SOCKET);
123506193f0bSKonstantin Belousov 		if (*mp != NULL)
1236339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1237be8a62e8SPoul-Henning Kamp 	}
123882c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
123982c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
124082c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
124182c23ebaSBill Fenner 		if (*mp)
124282c23ebaSBill Fenner 			mp = &(*mp)->m_next;
124382c23ebaSBill Fenner 	}
12444957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
12454957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
12464957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
12474957466bSMatthew N. Dodd 		if (*mp)
12484957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
12494957466bSMatthew N. Dodd 	}
125082c23ebaSBill Fenner #ifdef notyet
125182c23ebaSBill Fenner 	/* XXX
125282c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
125382c23ebaSBill Fenner 	 * than they already were.
125482c23ebaSBill Fenner 	 */
125582c23ebaSBill Fenner 	/* options were tossed already */
125682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
125782c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
125882c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
125982c23ebaSBill Fenner 		if (*mp)
126082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
126182c23ebaSBill Fenner 	}
126282c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
126382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1264e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
126582c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
126682c23ebaSBill Fenner 		if (*mp)
126782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
126882c23ebaSBill Fenner 	}
126982c23ebaSBill Fenner #endif
127082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1271d314ad7bSJulian Elischer 		struct ifnet *ifp;
1272d314ad7bSJulian Elischer 		struct sdlbuf {
127382c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1274d314ad7bSJulian Elischer 			u_char	pad[32];
1275d314ad7bSJulian Elischer 		} sdlbuf;
1276d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1277d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
127882c23ebaSBill Fenner 
127946f2df9cSSergey Kandaurov 		if ((ifp = m->m_pkthdr.rcvif) &&
128046f2df9cSSergey Kandaurov 		    ifp->if_index && ifp->if_index <= V_if_index) {
12814a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1282d314ad7bSJulian Elischer 			/*
1283d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1284d314ad7bSJulian Elischer 			 */
128546f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
128646f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1287d314ad7bSJulian Elischer 				goto makedummy;
1288d314ad7bSJulian Elischer 			}
1289d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1290d314ad7bSJulian Elischer 		} else {
1291d314ad7bSJulian Elischer makedummy:
129246f2df9cSSergey Kandaurov 			sdl2->sdl_len =
129346f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1294d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1295d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1296d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1297d314ad7bSJulian Elischer 		}
1298d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
129982c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
130082c23ebaSBill Fenner 		if (*mp)
130182c23ebaSBill Fenner 			mp = &(*mp)->m_next;
130282c23ebaSBill Fenner 	}
13033cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
13043cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
13053cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
13063cca425bSMichael Tuexen 		if (*mp)
13073cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
13083cca425bSMichael Tuexen 	}
13099d3ddf43SAdrian Chadd 
13109d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
13119d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13129d3ddf43SAdrian Chadd 
13139d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13149d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13159d3ddf43SAdrian Chadd 
13169d3ddf43SAdrian Chadd 		/*
13179d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
13189d3ddf43SAdrian Chadd 		 * other - don't populate both?
13199d3ddf43SAdrian Chadd 		 */
13209d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
13219d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
13229d3ddf43SAdrian Chadd 		if (*mp)
13239d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13249d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
13259d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
13269d3ddf43SAdrian Chadd 		if (*mp)
13279d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13289d3ddf43SAdrian Chadd 	}
13299d3ddf43SAdrian Chadd 
13309d3ddf43SAdrian Chadd #ifdef	RSS
13319d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
13329d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13339d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
13349d3ddf43SAdrian Chadd 
13359d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13369d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13379d3ddf43SAdrian Chadd 
13389d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
13399d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
13409d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
13419d3ddf43SAdrian Chadd 			if (*mp)
13429d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
13439d3ddf43SAdrian Chadd 		}
13449d3ddf43SAdrian Chadd 	}
13459d3ddf43SAdrian Chadd #endif
134682c23ebaSBill Fenner }
134782c23ebaSBill Fenner 
13484d2e3692SLuigi Rizzo /*
134930916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
135030916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
135130916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
135230916a2dSRobert Watson  * compiled.
13534d2e3692SLuigi Rizzo  */
13545f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on);
135582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
135682cea7e6SBjoern A. Zeeb 
135782cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
135882cea7e6SBjoern A. Zeeb 
1359df8bae1dSRodney W. Grimes int
1360f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1361f0068c4aSGarrett Wollman {
13628b615593SMarko Zec 
1363f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1364f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1365f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1366f0068c4aSGarrett Wollman 
1367603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1368f0068c4aSGarrett Wollman 		return EADDRINUSE;
1369f0068c4aSGarrett Wollman 
1370603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13711c5de19aSGarrett Wollman 	/*
13721c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13731c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13741c5de19aSGarrett Wollman 	 */
1375603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1376603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1377603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13781c5de19aSGarrett Wollman 	}
1379f0068c4aSGarrett Wollman 
1380f0068c4aSGarrett Wollman 	return 0;
1381f0068c4aSGarrett Wollman }
1382f0068c4aSGarrett Wollman 
1383f0068c4aSGarrett Wollman int
1384f0068c4aSGarrett Wollman ip_rsvp_done(void)
1385f0068c4aSGarrett Wollman {
13868b615593SMarko Zec 
1387603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13881c5de19aSGarrett Wollman 	/*
13891c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13901c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13911c5de19aSGarrett Wollman 	 */
1392603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1393603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1394603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13951c5de19aSGarrett Wollman 	}
1396f0068c4aSGarrett Wollman 	return 0;
1397f0068c4aSGarrett Wollman }
1398bbb4330bSLuigi Rizzo 
13998f5a8818SKevin Lo int
14008f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1401bbb4330bSLuigi Rizzo {
14028f5a8818SKevin Lo 	struct mbuf *m;
14038f5a8818SKevin Lo 
14048f5a8818SKevin Lo 	m = *mp;
14058f5a8818SKevin Lo 	*mp = NULL;
14068b615593SMarko Zec 
1407bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
14088f5a8818SKevin Lo 		*mp = m;
14098f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
14108f5a8818SKevin Lo 		return (IPPROTO_DONE);
1411bbb4330bSLuigi Rizzo 	}
1412bbb4330bSLuigi Rizzo 
1413bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1414bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1415bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1416bbb4330bSLuigi Rizzo 	 */
1417bbb4330bSLuigi Rizzo 
1418603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1419bbb4330bSLuigi Rizzo 		m_freem(m);
14208f5a8818SKevin Lo 		return (IPPROTO_DONE);
1421bbb4330bSLuigi Rizzo 	}
1422bbb4330bSLuigi Rizzo 
1423603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
14248f5a8818SKevin Lo 		*mp = m;
14258f5a8818SKevin Lo 		rip_input(mp, offp, proto);
14268f5a8818SKevin Lo 		return (IPPROTO_DONE);
1427bbb4330bSLuigi Rizzo 	}
1428bbb4330bSLuigi Rizzo 	/* Drop the packet */
1429bbb4330bSLuigi Rizzo 	m_freem(m);
14308f5a8818SKevin Lo 	return (IPPROTO_DONE);
1431bbb4330bSLuigi Rizzo }
1432