xref: /freebsd/sys/netinet/ip_input.c (revision 126f8248ccfee8e9252a8b449694b8619738599f)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
370ac40133SBrian Somers #include "opt_bootp.h"
38*126f8248SMichael Tuexen #include "opt_inet.h"
3927108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
406a800098SYoshinobu Inoue #include "opt_ipsec.h"
4133553d6eSBjoern A. Zeeb #include "opt_route.h"
42b8bc95cdSAdrian Chadd #include "opt_rss.h"
4378b1fc05SGleb Smirnoff #include "opt_sctp.h"
4474a9466cSGary Palmer 
45df8bae1dSRodney W. Grimes #include <sys/param.h>
46df8bae1dSRodney W. Grimes #include <sys/systm.h>
47ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
48df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
49b715f178SLuigi Rizzo #include <sys/malloc.h>
50df8bae1dSRodney W. Grimes #include <sys/domain.h>
51df8bae1dSRodney W. Grimes #include <sys/protosw.h>
52df8bae1dSRodney W. Grimes #include <sys/socket.h>
53df8bae1dSRodney W. Grimes #include <sys/time.h>
54df8bae1dSRodney W. Grimes #include <sys/kernel.h>
55385195c0SMarko Zec #include <sys/lock.h>
56cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
57385195c0SMarko Zec #include <sys/rwlock.h>
5857f60867SMark Johnston #include <sys/sdt.h>
591025071fSGarrett Wollman #include <sys/syslog.h>
60b5e8ce9fSBruce Evans #include <sys/sysctl.h>
61df8bae1dSRodney W. Grimes 
62df8bae1dSRodney W. Grimes #include <net/if.h>
639494d596SBrooks Davis #include <net/if_types.h>
64d314ad7bSJulian Elischer #include <net/if_var.h>
6582c23ebaSBill Fenner #include <net/if_dl.h>
66b252313fSGleb Smirnoff #include <net/pfil.h>
67df8bae1dSRodney W. Grimes #include <net/route.h>
68983066f0SAlexander V. Chernikov #include <net/route/nhop.h>
69748e0b0aSGarrett Wollman #include <net/netisr.h>
70b2bdc62aSAdrian Chadd #include <net/rss_config.h>
714b79449eSBjoern A. Zeeb #include <net/vnet.h>
72df8bae1dSRodney W. Grimes 
73df8bae1dSRodney W. Grimes #include <netinet/in.h>
7457f60867SMark Johnston #include <netinet/in_kdtrace.h>
75df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
76b5e8ce9fSBruce Evans #include <netinet/in_var.h>
77df8bae1dSRodney W. Grimes #include <netinet/ip.h>
78983066f0SAlexander V. Chernikov #include <netinet/in_fib.h>
79df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
80df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
8178b1fc05SGleb Smirnoff #include <netinet/ip_encap.h>
82eddfbb76SRobert Watson #include <netinet/ip_fw.h>
83df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
8478b1fc05SGleb Smirnoff #include <netinet/igmp_var.h>
85ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
8658938916SGarrett Wollman #include <machine/in_cksum.h>
87a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
88b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
8978b1fc05SGleb Smirnoff #ifdef SCTP
9078b1fc05SGleb Smirnoff #include <netinet/sctp_var.h>
9178b1fc05SGleb Smirnoff #endif
92df8bae1dSRodney W. Grimes 
93fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
94fcf59617SAndrey V. Elsukov 
95f0068c4aSGarrett Wollman #include <sys/socketvar.h>
966ddbf1e2SGary Palmer 
97aed55708SRobert Watson #include <security/mac/mac_framework.h>
98aed55708SRobert Watson 
99d2035ffbSEd Maste #ifdef CTASSERT
100d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
101d2035ffbSEd Maste #endif
102d2035ffbSEd Maste 
1031dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
104843b0e57SXin LI extern void ipreass_init(void);
105aea0cd04SGleb Smirnoff extern void ipreass_vnet_init(void);
1061dbefcc0SGleb Smirnoff #ifdef VIMAGE
107843b0e57SXin LI extern void ipreass_destroy(void);
1081dbefcc0SGleb Smirnoff #endif
1091dbefcc0SGleb Smirnoff 
11082cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
11182cea7e6SBjoern A. Zeeb 
11282cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1136df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
114eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1158b615593SMarko Zec     "Enable IP forwarding between interfaces");
1160312fbe9SPoul-Henning Kamp 
1178ad114c0SGeorge V. Neville-Neil /*
1188ad114c0SGeorge V. Neville-Neil  * Respond with an ICMP host redirect when we forward a packet out of
1198ad114c0SGeorge V. Neville-Neil  * the same interface on which it was received.  See RFC 792.
1208ad114c0SGeorge V. Neville-Neil  */
1218ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1;
1226df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
123eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1248b615593SMarko Zec     "Enable sending IP redirects");
1250312fbe9SPoul-Henning Kamp 
12694df3271SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_strong_es) = false;
12794df3271SGleb Smirnoff #define	V_ip_strong_es	VNET(ip_strong_es)
12894df3271SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es,
12994df3271SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false,
13094df3271SGleb Smirnoff     "Packet's IP destination address must match address on arrival interface");
131b3e95d4eSJonathan Lemon 
1322ce85919SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_sav) = true;
1332ce85919SGleb Smirnoff #define	V_ip_sav	VNET(ip_sav)
1342ce85919SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, source_address_validation,
1352ce85919SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_sav), true,
1362ce85919SGleb Smirnoff     "Drop incoming packets with source address that is a local address");
1372ce85919SGleb Smirnoff 
138b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head);	/* Packet filter hooks */
139df8bae1dSRodney W. Grimes 
140d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
141d4b5cae4SRobert Watson 	.nh_name = "ip",
142d4b5cae4SRobert Watson 	.nh_handler = ip_input,
143d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
144b8bc95cdSAdrian Chadd #ifdef	RSS
1452527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
146b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
147b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
148b8bc95cdSAdrian Chadd #else
149d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
150b8bc95cdSAdrian Chadd #endif
151d4b5cae4SRobert Watson };
152ca925d9cSJonathan Lemon 
153b8bc95cdSAdrian Chadd #ifdef	RSS
154b8bc95cdSAdrian Chadd /*
155b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
156b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
157b8bc95cdSAdrian Chadd  *
158b8bc95cdSAdrian Chadd  * It should likely have something that assert it
159b8bc95cdSAdrian Chadd  * actually has valid flow details.
160b8bc95cdSAdrian Chadd  */
161b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
162b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
163b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
164b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
165499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
166b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
167b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
168b8bc95cdSAdrian Chadd };
169b8bc95cdSAdrian Chadd #endif
170b8bc95cdSAdrian Chadd 
17178b1fc05SGleb Smirnoff ipproto_input_t		*ip_protox[IPPROTO_MAX] = {
17278b1fc05SGleb Smirnoff 			    [0 ... IPPROTO_MAX - 1] = rip_input };
17378b1fc05SGleb Smirnoff ipproto_ctlinput_t	*ip_ctlprotox[IPPROTO_MAX] = {
17478b1fc05SGleb Smirnoff 			    [0 ... IPPROTO_MAX - 1] = rip_ctlinput };
17578b1fc05SGleb Smirnoff 
17682cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17782cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17882cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
179ca925d9cSJonathan Lemon 
180c8ee75f2SGleb Smirnoff /* Make sure it is safe to use hashinit(9) on CK_LIST. */
181c8ee75f2SGleb Smirnoff CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr)));
182c8ee75f2SGleb Smirnoff 
1830312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1840312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1853d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1860312fbe9SPoul-Henning Kamp #endif
1870312fbe9SPoul-Henning Kamp 
1881b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1906df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
191eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
192eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1931b968362SDag-Erling Smørgrav #endif
194eddfbb76SRobert Watson 
195315e3e38SRobert Watson /*
1965da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1975923c293SGleb Smirnoff  */
1985da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1995da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
2005da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
2015da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
2025923c293SGleb Smirnoff 
2035923c293SGleb Smirnoff #ifdef VIMAGE
2045da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
2055923c293SGleb Smirnoff #endif /* VIMAGE */
2065923c293SGleb Smirnoff 
2075923c293SGleb Smirnoff /*
208315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2095923c293SGleb Smirnoff  * into ipstat treated as an array.
210315e3e38SRobert Watson  */
211315e3e38SRobert Watson void
212315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
213315e3e38SRobert Watson {
214315e3e38SRobert Watson 
2155da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
216315e3e38SRobert Watson }
217315e3e38SRobert Watson 
218315e3e38SRobert Watson void
219315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
220315e3e38SRobert Watson {
221315e3e38SRobert Watson 
2225da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
223315e3e38SRobert Watson }
224315e3e38SRobert Watson 
225d4b5cae4SRobert Watson static int
226d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
227d4b5cae4SRobert Watson {
228d4b5cae4SRobert Watson 	int error, qlimit;
229d4b5cae4SRobert Watson 
230d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
231d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
232d4b5cae4SRobert Watson 	if (error || !req->newptr)
233d4b5cae4SRobert Watson 		return (error);
234d4b5cae4SRobert Watson 	if (qlimit < 1)
235d4b5cae4SRobert Watson 		return (EINVAL);
236d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
237d4b5cae4SRobert Watson }
238d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
2397029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
2407029da5cSPawel Biernacki     sysctl_netinet_intr_queue_maxlen, "I",
241d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
242d4b5cae4SRobert Watson 
243d4b5cae4SRobert Watson static int
244d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
245d4b5cae4SRobert Watson {
246d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
247d4b5cae4SRobert Watson 	int error, qdrops;
248d4b5cae4SRobert Watson 
249d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
250d4b5cae4SRobert Watson 	qdrops = qdrops_long;
251d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
252d4b5cae4SRobert Watson 	if (error || !req->newptr)
253d4b5cae4SRobert Watson 		return (error);
254d4b5cae4SRobert Watson 	if (qdrops != 0)
255d4b5cae4SRobert Watson 		return (EINVAL);
256d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
257d4b5cae4SRobert Watson 	return (0);
258d4b5cae4SRobert Watson }
259d4b5cae4SRobert Watson 
260d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
2617029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
2627029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_queue_drops, "I",
263d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
264d4b5cae4SRobert Watson 
265b8bc95cdSAdrian Chadd #ifdef	RSS
266b8bc95cdSAdrian Chadd static int
267b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
268b8bc95cdSAdrian Chadd {
269b8bc95cdSAdrian Chadd 	int error, qlimit;
270b8bc95cdSAdrian Chadd 
271b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
272b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
273b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
274b8bc95cdSAdrian Chadd 		return (error);
275b8bc95cdSAdrian Chadd 	if (qlimit < 1)
276b8bc95cdSAdrian Chadd 		return (EINVAL);
277b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
278b8bc95cdSAdrian Chadd }
2797faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
2807029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
2817029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_direct_queue_maxlen,
2827faa0d21SAndrey V. Elsukov     "I", "Maximum size of the IP direct input queue");
283b8bc95cdSAdrian Chadd 
284b8bc95cdSAdrian Chadd static int
285b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
286b8bc95cdSAdrian Chadd {
287b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
288b8bc95cdSAdrian Chadd 	int error, qdrops;
289b8bc95cdSAdrian Chadd 
290b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
291b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
292b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
293b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
294b8bc95cdSAdrian Chadd 		return (error);
295b8bc95cdSAdrian Chadd 	if (qdrops != 0)
296b8bc95cdSAdrian Chadd 		return (EINVAL);
297b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
298b8bc95cdSAdrian Chadd 	return (0);
299b8bc95cdSAdrian Chadd }
300b8bc95cdSAdrian Chadd 
3017faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
3027029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
3037029da5cSPawel Biernacki     sysctl_netinet_intr_direct_queue_drops, "I",
304b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
305b8bc95cdSAdrian Chadd #endif	/* RSS */
306b8bc95cdSAdrian Chadd 
307df8bae1dSRodney W. Grimes /*
308df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
309df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
310df8bae1dSRodney W. Grimes  */
31189128ff3SGleb Smirnoff static void
31289128ff3SGleb Smirnoff ip_vnet_init(void *arg __unused)
313df8bae1dSRodney W. Grimes {
314b252313fSGleb Smirnoff 	struct pfil_head_args args;
315df8bae1dSRodney W. Grimes 
316d7c5a620SMatt Macy 	CK_STAILQ_INIT(&V_in_ifaddrhead);
317603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3181ed81b73SMarko Zec 
3191ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
320aea0cd04SGleb Smirnoff 	ipreass_vnet_init();
3211ed81b73SMarko Zec 
3220b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
323b252313fSGleb Smirnoff 	args.pa_version = PFIL_VERSION;
324b252313fSGleb Smirnoff 	args.pa_flags = PFIL_IN | PFIL_OUT;
325b252313fSGleb Smirnoff 	args.pa_type = PFIL_TYPE_IP4;
326b252313fSGleb Smirnoff 	args.pa_headname = PFIL_INET_NAME;
327b252313fSGleb Smirnoff 	V_inet_pfil_head = pfil_head_register(&args);
3280b4b0b0fSJulian Elischer 
329ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
330ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
331ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
332ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
333ef91a976SAndrey V. Elsukov 		    __func__);
334ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
335ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
336ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
337ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
338ef91a976SAndrey V. Elsukov 		    __func__);
339ef91a976SAndrey V. Elsukov 
340484149deSBjoern A. Zeeb #ifdef VIMAGE
341484149deSBjoern A. Zeeb 	netisr_register_vnet(&ip_nh);
342484149deSBjoern A. Zeeb #ifdef	RSS
343484149deSBjoern A. Zeeb 	netisr_register_vnet(&ip_direct_nh);
344484149deSBjoern A. Zeeb #endif
345484149deSBjoern A. Zeeb #endif
34689128ff3SGleb Smirnoff }
34789128ff3SGleb Smirnoff VNET_SYSINIT(ip_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
34889128ff3SGleb Smirnoff     ip_vnet_init, NULL);
34989128ff3SGleb Smirnoff 
35089128ff3SGleb Smirnoff static void
35189128ff3SGleb Smirnoff ip_init(const void *unused __unused)
35289128ff3SGleb Smirnoff {
3531ed81b73SMarko Zec 
354aea0cd04SGleb Smirnoff 	ipreass_init();
355aea0cd04SGleb Smirnoff 
356db09bef3SAndre Oppermann 	/*
35778b1fc05SGleb Smirnoff 	 * Register statically compiled protocols, that are unlikely to
35878b1fc05SGleb Smirnoff 	 * ever become dynamic.
359db09bef3SAndre Oppermann 	 */
36078b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_ICMP, icmp_input, NULL);
36178b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_IGMP, igmp_input, NULL);
36278b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_RSVP, rsvp_input, NULL);
36378b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_IPV4, encap4_input, NULL);
36478b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_MOBILE, encap4_input, NULL);
36578b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_ETHERIP, encap4_input, NULL);
36678b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_GRE, encap4_input, NULL);
36778b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_IPV6, encap4_input, NULL);
36878b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_PIM, encap4_input, NULL);
36978b1fc05SGleb Smirnoff #ifdef SCTP	/* XXX: has a loadable & static version */
37078b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_SCTP, sctp_input, sctp_ctlinput);
37178b1fc05SGleb Smirnoff #endif
372194a213eSAndrey A. Chernov 
373d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
374b8bc95cdSAdrian Chadd #ifdef	RSS
375b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
376b8bc95cdSAdrian Chadd #endif
377df8bae1dSRodney W. Grimes }
37889128ff3SGleb Smirnoff SYSINIT(ip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_init, NULL);
379df8bae1dSRodney W. Grimes 
3809802380eSBjoern A. Zeeb #ifdef VIMAGE
3813f58662dSBjoern A. Zeeb static void
3823f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3839802380eSBjoern A. Zeeb {
384ef91a976SAndrey V. Elsukov 	int error;
3854d3dfd45SMikolaj Golub 
386484149deSBjoern A. Zeeb #ifdef	RSS
387484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
388484149deSBjoern A. Zeeb #endif
389484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
390484149deSBjoern A. Zeeb 
391b252313fSGleb Smirnoff 	pfil_head_unregister(V_inet_pfil_head);
392ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
393ef91a976SAndrey V. Elsukov 	if (error != 0) {
394ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
395ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
396ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
397ef91a976SAndrey V. Elsukov 	}
398ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
399ef91a976SAndrey V. Elsukov 	if (error != 0) {
400ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
401ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
402ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
403ef91a976SAndrey V. Elsukov 	}
40489856f7eSBjoern A. Zeeb 
40589856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
40689856f7eSBjoern A. Zeeb 	in_ifscrub_all();
40789856f7eSBjoern A. Zeeb 
40889856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
409b1d63265SAlexander V. Chernikov 	rib_flush_routes_family(AF_INET);
4109802380eSBjoern A. Zeeb 
411e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4121dbefcc0SGleb Smirnoff 	ipreass_destroy();
41389856f7eSBjoern A. Zeeb 
41489856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
41589856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4169802380eSBjoern A. Zeeb }
4173f58662dSBjoern A. Zeeb 
4183f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4199802380eSBjoern A. Zeeb #endif
4209802380eSBjoern A. Zeeb 
421b8bc95cdSAdrian Chadd #ifdef	RSS
422b8bc95cdSAdrian Chadd /*
423b8bc95cdSAdrian Chadd  * IP direct input routine.
424b8bc95cdSAdrian Chadd  *
425b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
426b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
427b8bc95cdSAdrian Chadd  */
428b8bc95cdSAdrian Chadd void
429b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
430b8bc95cdSAdrian Chadd {
431b8bc95cdSAdrian Chadd 	struct ip *ip;
432b8bc95cdSAdrian Chadd 	int hlen;
433b8bc95cdSAdrian Chadd 
434b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
435b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
436b8bc95cdSAdrian Chadd 
437fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
438fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
439fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
440fcf59617SAndrey V. Elsukov 			return;
441fcf59617SAndrey V. Elsukov 	}
442fcf59617SAndrey V. Elsukov #endif /* IPSEC */
443b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
44478b1fc05SGleb Smirnoff 	ip_protox[ip->ip_p](&m, &hlen, ip->ip_p);
445b8bc95cdSAdrian Chadd }
446b8bc95cdSAdrian Chadd #endif
447b8bc95cdSAdrian Chadd 
4484d2e3692SLuigi Rizzo /*
449df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
450df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
451df8bae1dSRodney W. Grimes  */
452c67b1d17SGarrett Wollman void
453c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
454df8bae1dSRodney W. Grimes {
4559188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4565da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
457ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4580aade26eSRobert Watson 	struct ifnet *ifp;
45994df3271SGleb Smirnoff 	int hlen = 0;
46021d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
46102c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
462f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
46394df3271SGleb Smirnoff 	bool strong_es;
464b715f178SLuigi Rizzo 
465fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
466b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
467db40007dSAndrew R. Reiter 
468ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
46976ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
47076ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4712b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
47253be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4738f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4749b932e9eSAndre Oppermann 		goto ours;
4752b25acc1SLuigi Rizzo 	}
4762b25acc1SLuigi Rizzo 
47786425c62SRobert Watson 	IPSTAT_INC(ips_total);
47858938916SGarrett Wollman 
4790359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < sizeof(struct ip)))
48058938916SGarrett Wollman 		goto tooshort;
48158938916SGarrett Wollman 
4820359e7a5SMateusz Guzik 	if (m->m_len < sizeof(struct ip)) {
4830359e7a5SMateusz Guzik 		m = m_pullup(m, sizeof(struct ip));
4840359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
48586425c62SRobert Watson 			IPSTAT_INC(ips_toosmall);
486c67b1d17SGarrett Wollman 			return;
487df8bae1dSRodney W. Grimes 		}
4880359e7a5SMateusz Guzik 	}
489df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
49058938916SGarrett Wollman 
4910359e7a5SMateusz Guzik 	if (__predict_false(ip->ip_v != IPVERSION)) {
49286425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
493df8bae1dSRodney W. Grimes 		goto bad;
494df8bae1dSRodney W. Grimes 	}
49558938916SGarrett Wollman 
49653be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
4970359e7a5SMateusz Guzik 	if (__predict_false(hlen < sizeof(struct ip))) {	/* minimum header length */
49886425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
499df8bae1dSRodney W. Grimes 		goto bad;
500df8bae1dSRodney W. Grimes 	}
501df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
5020359e7a5SMateusz Guzik 		m = m_pullup(m, hlen);
5030359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
50486425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
505c67b1d17SGarrett Wollman 			return;
506df8bae1dSRodney W. Grimes 		}
507df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
508df8bae1dSRodney W. Grimes 	}
50933841545SHajimu UMEMOTO 
51057f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
51157f60867SMark Johnston 
5126c1c6ae5SRodney W. Grimes 	/* IN_LOOPBACK must not appear on the wire - RFC1122 */
5130aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5146c1c6ae5SRodney W. Grimes 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
5156c1c6ae5SRodney W. Grimes 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
5160aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
51786425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
51833841545SHajimu UMEMOTO 			goto bad;
51933841545SHajimu UMEMOTO 		}
52033841545SHajimu UMEMOTO 	}
52133841545SHajimu UMEMOTO 
522db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
523db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
524db4f9cc7SJonathan Lemon 	} else {
52558938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
52647c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
52758938916SGarrett Wollman 		} else {
52847c861ecSBrian Somers 			sum = in_cksum(m, hlen);
52958938916SGarrett Wollman 		}
530db4f9cc7SJonathan Lemon 	}
5310359e7a5SMateusz Guzik 	if (__predict_false(sum)) {
53286425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
533df8bae1dSRodney W. Grimes 		goto bad;
534df8bae1dSRodney W. Grimes 	}
535df8bae1dSRodney W. Grimes 
53621d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
5370359e7a5SMateusz Guzik 	if (__predict_false(ip_len < hlen)) {
53886425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
539df8bae1dSRodney W. Grimes 		goto bad;
540df8bae1dSRodney W. Grimes 	}
541df8bae1dSRodney W. Grimes 
542df8bae1dSRodney W. Grimes 	/*
543df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
544df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
545df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
546df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
547df8bae1dSRodney W. Grimes 	 */
5480359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < ip_len)) {
54958938916SGarrett Wollman tooshort:
55086425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
551df8bae1dSRodney W. Grimes 		goto bad;
552df8bae1dSRodney W. Grimes 	}
55321d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
554df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
55521d172a3SGleb Smirnoff 			m->m_len = ip_len;
55621d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
557df8bae1dSRodney W. Grimes 		} else
55821d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
559df8bae1dSRodney W. Grimes 	}
560b8bc95cdSAdrian Chadd 
561ad9f4d6aSAndrey V. Elsukov 	/*
562ad9f4d6aSAndrey V. Elsukov 	 * Try to forward the packet, but if we fail continue.
563f389439fSBjoern A. Zeeb 	 * ip_tryforward() may generate redirects these days.
564f389439fSBjoern A. Zeeb 	 * XXX the logic below falling through to normal processing
565f389439fSBjoern A. Zeeb 	 * if redirects are required should be revisited as well.
566ad9f4d6aSAndrey V. Elsukov 	 * ip_tryforward() does inbound and outbound packet firewall
567ad9f4d6aSAndrey V. Elsukov 	 * processing. If firewall has decided that destination becomes
568ad9f4d6aSAndrey V. Elsukov 	 * our local address, it sets M_FASTFWD_OURS flag. In this
569ad9f4d6aSAndrey V. Elsukov 	 * case skip another inbound firewall processing and update
570ad9f4d6aSAndrey V. Elsukov 	 * ip pointer.
571ad9f4d6aSAndrey V. Elsukov 	 */
5728ad114c0SGeorge V. Neville-Neil 	if (V_ipforwarding != 0
573fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
574fcf59617SAndrey V. Elsukov 	    && (!IPSEC_ENABLED(ipv4) ||
575fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
576ad9f4d6aSAndrey V. Elsukov #endif
577ad9f4d6aSAndrey V. Elsukov 	    ) {
578f389439fSBjoern A. Zeeb 		/*
579f389439fSBjoern A. Zeeb 		 * ip_dooptions() was run so we can ignore the source route (or
580f389439fSBjoern A. Zeeb 		 * any IP options case) case for redirects in ip_tryforward().
581f389439fSBjoern A. Zeeb 		 */
582ad9f4d6aSAndrey V. Elsukov 		if ((m = ip_tryforward(m)) == NULL)
58333872124SGeorge V. Neville-Neil 			return;
584ad9f4d6aSAndrey V. Elsukov 		if (m->m_flags & M_FASTFWD_OURS) {
585ad9f4d6aSAndrey V. Elsukov 			m->m_flags &= ~M_FASTFWD_OURS;
586ad9f4d6aSAndrey V. Elsukov 			ip = mtod(m, struct ip *);
587ad9f4d6aSAndrey V. Elsukov 			goto ours;
588ad9f4d6aSAndrey V. Elsukov 		}
589ad9f4d6aSAndrey V. Elsukov 	}
590fcf59617SAndrey V. Elsukov 
591fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
59214dd6717SSam Leffler 	/*
593ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
59414dd6717SSam Leffler 	 */
595fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4) &&
596fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
597c21fd232SAndre Oppermann 			goto passin;
598ad9f4d6aSAndrey V. Elsukov #endif
599fcf59617SAndrey V. Elsukov 
600c4ac87eaSDarren Reed 	/*
601134ea224SSam Leffler 	 * Run through list of hooks for input packets.
602f51f805fSSam Leffler 	 *
603f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
604f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
605f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
606c4ac87eaSDarren Reed 	 */
607c21fd232SAndre Oppermann 
608c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
609b252313fSGleb Smirnoff 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
610c21fd232SAndre Oppermann 		goto passin;
611c21fd232SAndre Oppermann 
612f51f805fSSam Leffler 	odst = ip->ip_dst;
613dda6376bSMateusz Guzik 	if (pfil_mbuf_in(V_inet_pfil_head, &m, ifp, NULL) !=
614b252313fSGleb Smirnoff 	    PFIL_PASS)
615beec8214SDarren Reed 		return;
616134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
617c4ac87eaSDarren Reed 		return;
6189b932e9eSAndre Oppermann 
619c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
62002c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
6219b932e9eSAndre Oppermann 
6229b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
6239b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
6249b932e9eSAndre Oppermann 		goto ours;
6259b932e9eSAndre Oppermann 	}
626ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
627de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
628099dd043SAndre Oppermann 			/*
629ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
630ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
631ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
632099dd043SAndre Oppermann 			 */
633ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
634099dd043SAndre Oppermann 			return;
635099dd043SAndre Oppermann 		}
636ffdbf9daSAndrey V. Elsukov 	}
637c21fd232SAndre Oppermann passin:
63821d172a3SGleb Smirnoff 
63921d172a3SGleb Smirnoff 	/*
640df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
641df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
642df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
643df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
644df8bae1dSRodney W. Grimes 	 */
6459b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
646c67b1d17SGarrett Wollman 		return;
647df8bae1dSRodney W. Grimes 
648f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
649f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
650f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
651f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
652f0068c4aSGarrett Wollman 	 * grabbing the packet.
653f0068c4aSGarrett Wollman          */
6540359e7a5SMateusz Guzik 	if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on)
655f0068c4aSGarrett Wollman 		goto ours;
656f0068c4aSGarrett Wollman 
657df8bae1dSRodney W. Grimes 	/*
658df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
659cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
660cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
661cc766e04SGarrett Wollman 	 * with it).
662df8bae1dSRodney W. Grimes 	 */
663d7c5a620SMatt Macy 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
664cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
665cc766e04SGarrett Wollman 		goto ours;
666cc766e04SGarrett Wollman 
6677538a9a0SJonathan Lemon 	/*
668823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
669823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
67094df3271SGleb Smirnoff 	 * strong ES model) with a list of additional predicates:
67194df3271SGleb Smirnoff 	 * - if IP forwarding is disabled
67294df3271SGleb Smirnoff 	 * - the packet is not locally generated
67394df3271SGleb Smirnoff 	 * - the packet is not subject to 'ipfw fwd'
67494df3271SGleb Smirnoff 	 * - Interface is not running CARP. If the packet got here, we already
67594df3271SGleb Smirnoff 	 *   checked it with carp_iamatch() and carp_forus().
676823db0e9SDon Lewis 	 */
67794df3271SGleb Smirnoff 	strong_es = V_ip_strong_es && (V_ipforwarding == 0) &&
67881674f12SGleb Smirnoff 	    ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
67954bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
680823db0e9SDon Lewis 
681ca925d9cSJonathan Lemon 	/*
682ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
683ca925d9cSJonathan Lemon 	 */
684c8ee75f2SGleb Smirnoff 	CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
68594df3271SGleb Smirnoff 		if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr)
68694df3271SGleb Smirnoff 			continue;
68794df3271SGleb Smirnoff 
688f9e354dfSJulian Elischer 		/*
68994df3271SGleb Smirnoff 		 * net.inet.ip.rfc1122_strong_es: the address matches, verify
69094df3271SGleb Smirnoff 		 * that the packet arrived via the correct interface.
691f9e354dfSJulian Elischer 		 */
69294df3271SGleb Smirnoff 		if (__predict_false(strong_es && ia->ia_ifp != ifp)) {
69394df3271SGleb Smirnoff 			IPSTAT_INC(ips_badaddr);
69494df3271SGleb Smirnoff 			goto bad;
695ca925d9cSJonathan Lemon 		}
69694df3271SGleb Smirnoff 
6972ce85919SGleb Smirnoff 		/*
6982ce85919SGleb Smirnoff 		 * net.inet.ip.source_address_validation: drop incoming
6992ce85919SGleb Smirnoff 		 * packets that pretend to be ours.
7002ce85919SGleb Smirnoff 		 */
7012ce85919SGleb Smirnoff 		if (V_ip_sav && !(ifp->if_flags & IFF_LOOPBACK) &&
7022ce85919SGleb Smirnoff 		    __predict_false(in_localip_fib(ip->ip_src, ifp->if_fib))) {
7032ce85919SGleb Smirnoff 			IPSTAT_INC(ips_badaddr);
7042ce85919SGleb Smirnoff 			goto bad;
7052ce85919SGleb Smirnoff 		}
7062ce85919SGleb Smirnoff 
70794df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
70894df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
70994df3271SGleb Smirnoff 		goto ours;
7108c0fec80SRobert Watson 	}
7112d9cfabaSRobert Watson 
712823db0e9SDon Lewis 	/*
713ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
714ca925d9cSJonathan Lemon 	 *
715ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
716ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
717ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
718ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
719823db0e9SDon Lewis 	 */
72081674f12SGleb Smirnoff 	if (ifp->if_flags & IFF_BROADCAST) {
721d7c5a620SMatt Macy 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
722ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
723ca925d9cSJonathan Lemon 				continue;
724ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
725df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
7260aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
7277caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7287caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7297caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
730df8bae1dSRodney W. Grimes 				goto ours;
7310aade26eSRobert Watson 			}
7320ac40133SBrian Somers #ifdef BOOTP_COMPAT
7330aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7347caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7357caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7367caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
737ca925d9cSJonathan Lemon 				goto ours;
7380aade26eSRobert Watson 			}
7390ac40133SBrian Somers #endif
740df8bae1dSRodney W. Grimes 		}
74119e5b0a7SRobert Watson 		ia = NULL;
742df8bae1dSRodney W. Grimes 	}
743df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
74403b0505bSZhenlei Huang 		/*
74503b0505bSZhenlei Huang 		 * RFC 3927 2.7: Do not forward multicast packets from
74603b0505bSZhenlei Huang 		 * IN_LINKLOCAL.
74703b0505bSZhenlei Huang 		 */
7483d846e48SZhenlei Huang 		if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
749df8bae1dSRodney W. Grimes 			/*
750df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
751df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
752df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
753df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
754df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
755df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
756df8bae1dSRodney W. Grimes 			 */
7570aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
75886425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
759df8bae1dSRodney W. Grimes 				m_freem(m);
760c67b1d17SGarrett Wollman 				return;
761df8bae1dSRodney W. Grimes 			}
762df8bae1dSRodney W. Grimes 
763df8bae1dSRodney W. Grimes 			/*
76411612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
765df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
766df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
767df8bae1dSRodney W. Grimes 			 */
76865634ae7SWojciech Macek 			if (ip->ip_p == IPPROTO_IGMP) {
769df8bae1dSRodney W. Grimes 				goto ours;
77065634ae7SWojciech Macek 			}
77186425c62SRobert Watson 			IPSTAT_INC(ips_forward);
772df8bae1dSRodney W. Grimes 		}
773df8bae1dSRodney W. Grimes 		/*
774d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
775d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
776d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
777df8bae1dSRodney W. Grimes 		 */
778df8bae1dSRodney W. Grimes 		goto ours;
779df8bae1dSRodney W. Grimes 	}
780df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
781df8bae1dSRodney W. Grimes 		goto ours;
782df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
783df8bae1dSRodney W. Grimes 		goto ours;
78403b0505bSZhenlei Huang 	/* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */
7853d846e48SZhenlei Huang 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
7863d846e48SZhenlei Huang 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
7873d846e48SZhenlei Huang 		IPSTAT_INC(ips_cantforward);
7883d846e48SZhenlei Huang 		m_freem(m);
7893d846e48SZhenlei Huang 		return;
7903d846e48SZhenlei Huang 	}
791df8bae1dSRodney W. Grimes 
7926a800098SYoshinobu Inoue 	/*
793df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
794df8bae1dSRodney W. Grimes 	 */
795603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
79686425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
797df8bae1dSRodney W. Grimes 		m_freem(m);
798546f251bSChris D. Faulhaber 	} else {
7999b932e9eSAndre Oppermann 		ip_forward(m, dchg);
800546f251bSChris D. Faulhaber 	}
801c67b1d17SGarrett Wollman 	return;
802df8bae1dSRodney W. Grimes 
803df8bae1dSRodney W. Grimes ours:
804d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
805d0ebc0d2SYaroslav Tykhiy 	/*
806d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
807d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
808d0ebc0d2SYaroslav Tykhiy 	 */
8097caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
810d0ebc0d2SYaroslav Tykhiy 		return;
811d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
812d0ebc0d2SYaroslav Tykhiy 
81363f8d699SJordan K. Hubbard 	/*
814b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
815ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
816df8bae1dSRodney W. Grimes 	 */
8178f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
818aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
819f0cada84SAndre Oppermann 		m = ip_reass(m);
820f0cada84SAndre Oppermann 		if (m == NULL)
821c67b1d17SGarrett Wollman 			return;
8226a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8237e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
82453be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
825f0cada84SAndre Oppermann 	}
826f0cada84SAndre Oppermann 
827fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
828fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
829fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
830fcf59617SAndrey V. Elsukov 			return;
831fcf59617SAndrey V. Elsukov 	}
832b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
83333841545SHajimu UMEMOTO 
834df8bae1dSRodney W. Grimes 	/*
835df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
836df8bae1dSRodney W. Grimes 	 */
83786425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8389b932e9eSAndre Oppermann 
83978b1fc05SGleb Smirnoff 	ip_protox[ip->ip_p](&m, &hlen, ip->ip_p);
840c67b1d17SGarrett Wollman 	return;
841df8bae1dSRodney W. Grimes bad:
842df8bae1dSRodney W. Grimes 	m_freem(m);
843c67b1d17SGarrett Wollman }
844c67b1d17SGarrett Wollman 
845de38924dSAndre Oppermann int
84678b1fc05SGleb Smirnoff ipproto_register(uint8_t proto, ipproto_input_t input, ipproto_ctlinput_t ctl)
847de38924dSAndre Oppermann {
848de38924dSAndre Oppermann 
84978b1fc05SGleb Smirnoff 	MPASS(proto > 0);
850de38924dSAndre Oppermann 
851de38924dSAndre Oppermann 	/*
852de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
85378b1fc05SGleb Smirnoff 	 * already.  An index pointing to rip_input() is unused.
854de38924dSAndre Oppermann 	 */
85578b1fc05SGleb Smirnoff 	if (ip_protox[proto] == rip_input) {
85678b1fc05SGleb Smirnoff 		ip_protox[proto] = input;
85778b1fc05SGleb Smirnoff 		ip_ctlprotox[proto] = ctl;
858de38924dSAndre Oppermann 		return (0);
85978b1fc05SGleb Smirnoff 	} else
86078b1fc05SGleb Smirnoff 		return (EEXIST);
861de38924dSAndre Oppermann }
862de38924dSAndre Oppermann 
863de38924dSAndre Oppermann int
86478b1fc05SGleb Smirnoff ipproto_unregister(uint8_t proto)
865de38924dSAndre Oppermann {
866de38924dSAndre Oppermann 
86778b1fc05SGleb Smirnoff 	MPASS(proto > 0);
868de38924dSAndre Oppermann 
86978b1fc05SGleb Smirnoff 	if (ip_protox[proto] != rip_input) {
87078b1fc05SGleb Smirnoff 		ip_protox[proto] = rip_input;
87178b1fc05SGleb Smirnoff 		ip_ctlprotox[proto] = rip_ctlinput;
872de38924dSAndre Oppermann 		return (0);
87378b1fc05SGleb Smirnoff 	} else
87478b1fc05SGleb Smirnoff 		return (ENOENT);
875de38924dSAndre Oppermann }
876de38924dSAndre Oppermann 
877df8bae1dSRodney W. Grimes /*
878df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
879df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
880df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
881df8bae1dSRodney W. Grimes  * of codes and types.
882df8bae1dSRodney W. Grimes  *
883df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
884df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
885df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
886df8bae1dSRodney W. Grimes  * protocol deal with that.
887df8bae1dSRodney W. Grimes  *
888df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
889df8bae1dSRodney W. Grimes  * via a source route.
890df8bae1dSRodney W. Grimes  */
8919b932e9eSAndre Oppermann void
8929b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
893df8bae1dSRodney W. Grimes {
8942b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
895efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
896df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
897d14122b0SErmal Luçi 	struct sockaddr_in *sin;
8989b932e9eSAndre Oppermann 	struct in_addr dest;
899b835b6feSBjoern A. Zeeb 	struct route ro;
9004043ee3cSAlexander V. Chernikov 	uint32_t flowid;
901c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9023efc3014SJulian Elischer 
903b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
904b8a6e03fSGleb Smirnoff 
9059b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
90686425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
907df8bae1dSRodney W. Grimes 		m_freem(m);
908df8bae1dSRodney W. Grimes 		return;
909df8bae1dSRodney W. Grimes 	}
910fcf59617SAndrey V. Elsukov 	if (
911fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH
912fcf59617SAndrey V. Elsukov 	    V_ipstealth == 0 &&
913fcf59617SAndrey V. Elsukov #endif
914fcf59617SAndrey V. Elsukov 	    ip->ip_ttl <= IPTTLDEC) {
915fcf59617SAndrey V. Elsukov 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
9168922ddbeSAndrey V. Elsukov 		return;
9178922ddbeSAndrey V. Elsukov 	}
918df8bae1dSRodney W. Grimes 
919d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
920d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
921d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
922d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
923d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
9244043ee3cSAlexander V. Chernikov 	flowid = m->m_pkthdr.flowid;
9254043ee3cSAlexander V. Chernikov 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid);
926983066f0SAlexander V. Chernikov 	if (ro.ro_nh != NULL) {
927983066f0SAlexander V. Chernikov 		ia = ifatoia(ro.ro_nh->nh_ifa);
92856844a62SErmal Luçi 	} else
92956844a62SErmal Luçi 		ia = NULL;
930df8bae1dSRodney W. Grimes 	/*
931bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
932bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
933bfef7ed4SIan Dowse 	 *
9344d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9354d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9364d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9374d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
9384d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
9394d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
9404d2e3692SLuigi Rizzo 	 *
941c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
942bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
943bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
944bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
945df8bae1dSRodney W. Grimes 	 */
946dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
947eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
9489967cafcSSam Leffler 		/*
9499967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
9509967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
9519967cafcSSam Leffler 		 * be conservative and just discard the copy since
9529967cafcSSam Leffler 		 * code below may some day want the tags.
9539967cafcSSam Leffler 		 */
9549967cafcSSam Leffler 		m_free(mcopy);
9559967cafcSSam Leffler 		mcopy = NULL;
9569967cafcSSam Leffler 	}
957bfef7ed4SIan Dowse 	if (mcopy != NULL) {
9588f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
959e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
960bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
961bfef7ed4SIan Dowse 	}
96204287599SRuslan Ermilov #ifdef IPSTEALTH
963fcf59617SAndrey V. Elsukov 	if (V_ipstealth == 0)
96404287599SRuslan Ermilov #endif
96504287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
966fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
967fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
968fcf59617SAndrey V. Elsukov 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
969fcf59617SAndrey V. Elsukov 			/* mbuf consumed by IPsec */
970d16a2e47SMark Johnston 			RO_NHFREE(&ro);
971fcf59617SAndrey V. Elsukov 			m_freem(mcopy);
972fcf59617SAndrey V. Elsukov 			if (error != EINPROGRESS)
973fcf59617SAndrey V. Elsukov 				IPSTAT_INC(ips_cantforward);
974b8a6e03fSGleb Smirnoff 			return;
97504287599SRuslan Ermilov 		}
976fcf59617SAndrey V. Elsukov 		/* No IPsec processing required */
977fcf59617SAndrey V. Elsukov 	}
978fcf59617SAndrey V. Elsukov #endif /* IPSEC */
979df8bae1dSRodney W. Grimes 	/*
980df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
981df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
982df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
983df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
984df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
985df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
986df8bae1dSRodney W. Grimes 	 */
9879b932e9eSAndre Oppermann 	dest.s_addr = 0;
988efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
989efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
990983066f0SAlexander V. Chernikov 		struct nhop_object *nh;
99102c1c707SAndre Oppermann 
992983066f0SAlexander V. Chernikov 		nh = ro.ro_nh;
99302c1c707SAndre Oppermann 
994983066f0SAlexander V. Chernikov 		if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
995983066f0SAlexander V. Chernikov 			struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
996df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
997df8bae1dSRodney W. Grimes 
998983066f0SAlexander V. Chernikov 			if (nh_ia != NULL &&
999983066f0SAlexander V. Chernikov 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
1000df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1001df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1002df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
100362e1a437SZhenlei Huang 				if (nh->nh_flags & NHF_GATEWAY) {
100462e1a437SZhenlei Huang 				    if (nh->gw_sa.sa_family == AF_INET)
100562e1a437SZhenlei Huang 					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
100662e1a437SZhenlei Huang 				    else /* Do not redirect in case gw is AF_INET6 */
100762e1a437SZhenlei Huang 					type = 0;
100862e1a437SZhenlei Huang 				} else
100962e1a437SZhenlei Huang 					dest.s_addr = ip->ip_dst.s_addr;
1010df8bae1dSRodney W. Grimes 			}
1011df8bae1dSRodney W. Grimes 		}
101202c1c707SAndre Oppermann 	}
1013df8bae1dSRodney W. Grimes 
1014b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1015b835b6feSBjoern A. Zeeb 
1016983066f0SAlexander V. Chernikov 	if (error == EMSGSIZE && ro.ro_nh)
1017983066f0SAlexander V. Chernikov 		mtu = ro.ro_nh->nh_mtu;
1018983066f0SAlexander V. Chernikov 	RO_NHFREE(&ro);
1019b835b6feSBjoern A. Zeeb 
1020df8bae1dSRodney W. Grimes 	if (error)
102186425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1022df8bae1dSRodney W. Grimes 	else {
102386425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1024df8bae1dSRodney W. Grimes 		if (type)
102586425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1026df8bae1dSRodney W. Grimes 		else {
10279188b4a1SAndre Oppermann 			if (mcopy)
1028df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1029b8a6e03fSGleb Smirnoff 			return;
1030df8bae1dSRodney W. Grimes 		}
1031df8bae1dSRodney W. Grimes 	}
10324f6c66ccSMatt Macy 	if (mcopy == NULL)
1033b8a6e03fSGleb Smirnoff 		return;
10344f6c66ccSMatt Macy 
1035df8bae1dSRodney W. Grimes 	switch (error) {
1036df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1037df8bae1dSRodney W. Grimes 		/* type, code set above */
1038df8bae1dSRodney W. Grimes 		break;
1039df8bae1dSRodney W. Grimes 
1040efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1041df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1042df8bae1dSRodney W. Grimes 	case ENETDOWN:
1043df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1044df8bae1dSRodney W. Grimes 	default:
1045df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1046df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1047df8bae1dSRodney W. Grimes 		break;
1048df8bae1dSRodney W. Grimes 
1049df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1050df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1051df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
10529b932e9eSAndre Oppermann 		/*
1053b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1054b835b6feSBjoern A. Zeeb 		 * interface MTU.
1055ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1056ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1057ab48768bSAndre Oppermann 		 * current packet size.
10589b932e9eSAndre Oppermann 		 */
1059b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1060b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1061b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1062b835b6feSBjoern A. Zeeb 		} else {
1063ab48768bSAndre Oppermann 			if (ia != NULL)
1064c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1065ab48768bSAndre Oppermann 			else
10668f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1067ab48768bSAndre Oppermann 		}
106886425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1069df8bae1dSRodney W. Grimes 		break;
1070df8bae1dSRodney W. Grimes 
1071df8bae1dSRodney W. Grimes 	case ENOBUFS:
10723a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
10733a06e3e0SRuslan Ermilov 		m_freem(mcopy);
1074b8a6e03fSGleb Smirnoff 		return;
1075df8bae1dSRodney W. Grimes 	}
1076c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1077df8bae1dSRodney W. Grimes }
1078df8bae1dSRodney W. Grimes 
1079339efd75SMaxim Sobolev #define	CHECK_SO_CT(sp, ct) \
1080339efd75SMaxim Sobolev     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1081339efd75SMaxim Sobolev 
108282c23ebaSBill Fenner void
1083f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1084f2565d68SRobert Watson     struct mbuf *m)
108582c23ebaSBill Fenner {
108606193f0bSKonstantin Belousov 	bool stamped;
10878b615593SMarko Zec 
108806193f0bSKonstantin Belousov 	stamped = false;
1089339efd75SMaxim Sobolev 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
1090339efd75SMaxim Sobolev 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
109106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt;
109206193f0bSKonstantin Belousov 		struct timespec ts1;
1093be8a62e8SPoul-Henning Kamp 
109406193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
109506193f0bSKonstantin Belousov 		    M_TSTMP)) {
109606193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
109706193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt);
109806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
109906193f0bSKonstantin Belousov 			bintime_add(&bt, &boottimebin);
110006193f0bSKonstantin Belousov 		} else {
1101be8a62e8SPoul-Henning Kamp 			bintime(&bt);
110206193f0bSKonstantin Belousov 		}
1103b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&bt, sizeof(bt), SCM_BINTIME,
1104b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
110506193f0bSKonstantin Belousov 		if (*mp != NULL) {
1106be8a62e8SPoul-Henning Kamp 			mp = &(*mp)->m_next;
110706193f0bSKonstantin Belousov 			stamped = true;
110806193f0bSKonstantin Belousov 		}
1109be8a62e8SPoul-Henning Kamp 	}
1110339efd75SMaxim Sobolev 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
111106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt1;
1112c012cfe6SEd Maste 		struct timespec ts1;
111382c23ebaSBill Fenner 		struct timeval tv;
111482c23ebaSBill Fenner 
111506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
111606193f0bSKonstantin Belousov 		    M_TSTMP)) {
111706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
111806193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt1);
111906193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
112006193f0bSKonstantin Belousov 			bintime_add(&bt1, &boottimebin);
112106193f0bSKonstantin Belousov 			bintime2timeval(&bt1, &tv);
112206193f0bSKonstantin Belousov 		} else {
1123339efd75SMaxim Sobolev 			microtime(&tv);
112406193f0bSKonstantin Belousov 		}
1125b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), SCM_TIMESTAMP,
1126b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
112706193f0bSKonstantin Belousov 		if (*mp != NULL) {
112882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
112906193f0bSKonstantin Belousov 			stamped = true;
113006193f0bSKonstantin Belousov 		}
1131339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
113206193f0bSKonstantin Belousov 		struct bintime boottimebin;
113306193f0bSKonstantin Belousov 		struct timespec ts, ts1;
1134339efd75SMaxim Sobolev 
113506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
113606193f0bSKonstantin Belousov 		    M_TSTMP)) {
113706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
113806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
113906193f0bSKonstantin Belousov 			bintime2timespec(&boottimebin, &ts1);
11406040822cSAlan Somers 			timespecadd(&ts, &ts1, &ts);
114106193f0bSKonstantin Belousov 		} else {
1142339efd75SMaxim Sobolev 			nanotime(&ts);
114306193f0bSKonstantin Belousov 		}
1144b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ts, sizeof(ts), SCM_REALTIME,
1145b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
114606193f0bSKonstantin Belousov 		if (*mp != NULL) {
1147339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
114806193f0bSKonstantin Belousov 			stamped = true;
114906193f0bSKonstantin Belousov 		}
1150339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
1151339efd75SMaxim Sobolev 		struct timespec ts;
1152339efd75SMaxim Sobolev 
115306193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
115406193f0bSKonstantin Belousov 		    M_TSTMP))
115506193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
115606193f0bSKonstantin Belousov 		else
1157339efd75SMaxim Sobolev 			nanouptime(&ts);
1158b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ts, sizeof(ts), SCM_MONOTONIC,
1159b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
116006193f0bSKonstantin Belousov 		if (*mp != NULL) {
116106193f0bSKonstantin Belousov 			mp = &(*mp)->m_next;
116206193f0bSKonstantin Belousov 			stamped = true;
116306193f0bSKonstantin Belousov 		}
116406193f0bSKonstantin Belousov 	}
116506193f0bSKonstantin Belousov 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
116606193f0bSKonstantin Belousov 	    M_TSTMP)) {
116706193f0bSKonstantin Belousov 		struct sock_timestamp_info sti;
116806193f0bSKonstantin Belousov 
116906193f0bSKonstantin Belousov 		bzero(&sti, sizeof(sti));
117006193f0bSKonstantin Belousov 		sti.st_info_flags = ST_INFO_HW;
117106193f0bSKonstantin Belousov 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
117206193f0bSKonstantin Belousov 			sti.st_info_flags |= ST_INFO_HW_HPREC;
1173b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&sti, sizeof(sti), SCM_TIME_INFO,
1174b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
117506193f0bSKonstantin Belousov 		if (*mp != NULL)
1176339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1177be8a62e8SPoul-Henning Kamp 	}
117882c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
1179b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ip->ip_dst, sizeof(struct in_addr),
1180b46667c6SGleb Smirnoff 		    IP_RECVDSTADDR, IPPROTO_IP, M_NOWAIT);
118182c23ebaSBill Fenner 		if (*mp)
118282c23ebaSBill Fenner 			mp = &(*mp)->m_next;
118382c23ebaSBill Fenner 	}
11844957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
1185b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ip->ip_ttl, sizeof(u_char), IP_RECVTTL,
1186b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
11874957466bSMatthew N. Dodd 		if (*mp)
11884957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
11894957466bSMatthew N. Dodd 	}
119082c23ebaSBill Fenner #ifdef notyet
119182c23ebaSBill Fenner 	/* XXX
119282c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
119382c23ebaSBill Fenner 	 * than they already were.
119482c23ebaSBill Fenner 	 */
119582c23ebaSBill Fenner 	/* options were tossed already */
119682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
1197b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(opts_deleted_above,
1198b46667c6SGleb Smirnoff 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP, M_NOWAIT);
119982c23ebaSBill Fenner 		if (*mp)
120082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
120182c23ebaSBill Fenner 	}
120282c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
120382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1204b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(ip_srcroute(m), sizeof(struct in_addr),
1205b46667c6SGleb Smirnoff 		    IP_RECVRETOPTS, IPPROTO_IP, M_NOWAIT);
120682c23ebaSBill Fenner 		if (*mp)
120782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
120882c23ebaSBill Fenner 	}
120982c23ebaSBill Fenner #endif
121082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1211d314ad7bSJulian Elischer 		struct ifnet *ifp;
1212d314ad7bSJulian Elischer 		struct sdlbuf {
121382c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1214d314ad7bSJulian Elischer 			u_char	pad[32];
1215d314ad7bSJulian Elischer 		} sdlbuf;
1216d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1217d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
121882c23ebaSBill Fenner 
1219db0ac6deSCy Schubert 		if ((ifp = m->m_pkthdr.rcvif)) {
12204a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1221d314ad7bSJulian Elischer 			/*
1222d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1223d314ad7bSJulian Elischer 			 */
122446f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
122546f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1226d314ad7bSJulian Elischer 				goto makedummy;
1227d314ad7bSJulian Elischer 			}
1228d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1229d314ad7bSJulian Elischer 		} else {
1230d314ad7bSJulian Elischer makedummy:
123146f2df9cSSergey Kandaurov 			sdl2->sdl_len =
123246f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1233d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1234d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1235d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1236d314ad7bSJulian Elischer 		}
1237b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(sdl2, sdl2->sdl_len, IP_RECVIF,
1238b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
123982c23ebaSBill Fenner 		if (*mp)
124082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
124182c23ebaSBill Fenner 	}
12423cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
1243b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ip->ip_tos, sizeof(u_char), IP_RECVTOS,
1244b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
12453cca425bSMichael Tuexen 		if (*mp)
12463cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
12473cca425bSMichael Tuexen 	}
12489d3ddf43SAdrian Chadd 
12499d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
12509d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12519d3ddf43SAdrian Chadd 
12529d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12539d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12549d3ddf43SAdrian Chadd 
12559d3ddf43SAdrian Chadd 		/*
12569d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
12579d3ddf43SAdrian Chadd 		 * other - don't populate both?
12589d3ddf43SAdrian Chadd 		 */
1259b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&flowid, sizeof(uint32_t), IP_FLOWID,
1260b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
12619d3ddf43SAdrian Chadd 		if (*mp)
12629d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
1263b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&flow_type, sizeof(uint32_t),
1264b46667c6SGleb Smirnoff 		    IP_FLOWTYPE, IPPROTO_IP, M_NOWAIT);
12659d3ddf43SAdrian Chadd 		if (*mp)
12669d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12679d3ddf43SAdrian Chadd 	}
12689d3ddf43SAdrian Chadd 
12699d3ddf43SAdrian Chadd #ifdef	RSS
12709d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
12719d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12729d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
12739d3ddf43SAdrian Chadd 
12749d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12759d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12769d3ddf43SAdrian Chadd 
12779d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
1278b46667c6SGleb Smirnoff 			*mp = sbcreatecontrol(&rss_bucketid, sizeof(uint32_t),
1279b46667c6SGleb Smirnoff 			    IP_RSSBUCKETID, IPPROTO_IP, M_NOWAIT);
12809d3ddf43SAdrian Chadd 			if (*mp)
12819d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
12829d3ddf43SAdrian Chadd 		}
12839d3ddf43SAdrian Chadd 	}
12849d3ddf43SAdrian Chadd #endif
128582c23ebaSBill Fenner }
128682c23ebaSBill Fenner 
12874d2e3692SLuigi Rizzo /*
128830916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
128930916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
129030916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
129130916a2dSRobert Watson  * compiled.
12924d2e3692SLuigi Rizzo  */
12935f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on);
129482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
129582cea7e6SBjoern A. Zeeb 
129682cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
129782cea7e6SBjoern A. Zeeb 
1298df8bae1dSRodney W. Grimes int
1299f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1300f0068c4aSGarrett Wollman {
13018b615593SMarko Zec 
1302603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1303f0068c4aSGarrett Wollman 		return EADDRINUSE;
1304f0068c4aSGarrett Wollman 
1305603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13061c5de19aSGarrett Wollman 	/*
13071c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13081c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13091c5de19aSGarrett Wollman 	 */
1310603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1311603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1312603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13131c5de19aSGarrett Wollman 	}
1314f0068c4aSGarrett Wollman 
1315f0068c4aSGarrett Wollman 	return 0;
1316f0068c4aSGarrett Wollman }
1317f0068c4aSGarrett Wollman 
1318f0068c4aSGarrett Wollman int
1319f0068c4aSGarrett Wollman ip_rsvp_done(void)
1320f0068c4aSGarrett Wollman {
13218b615593SMarko Zec 
1322603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13231c5de19aSGarrett Wollman 	/*
13241c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13251c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13261c5de19aSGarrett Wollman 	 */
1327603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1328603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1329603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13301c5de19aSGarrett Wollman 	}
1331f0068c4aSGarrett Wollman 	return 0;
1332f0068c4aSGarrett Wollman }
1333bbb4330bSLuigi Rizzo 
13348f5a8818SKevin Lo int
13358f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1336bbb4330bSLuigi Rizzo {
13378f5a8818SKevin Lo 	struct mbuf *m;
13388f5a8818SKevin Lo 
13398f5a8818SKevin Lo 	m = *mp;
13408f5a8818SKevin Lo 	*mp = NULL;
13418b615593SMarko Zec 
1342bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
13438f5a8818SKevin Lo 		*mp = m;
13448f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
13458f5a8818SKevin Lo 		return (IPPROTO_DONE);
1346bbb4330bSLuigi Rizzo 	}
1347bbb4330bSLuigi Rizzo 
1348bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1349bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1350bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1351bbb4330bSLuigi Rizzo 	 */
1352bbb4330bSLuigi Rizzo 
1353603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1354bbb4330bSLuigi Rizzo 		m_freem(m);
13558f5a8818SKevin Lo 		return (IPPROTO_DONE);
1356bbb4330bSLuigi Rizzo 	}
1357bbb4330bSLuigi Rizzo 
1358603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
13598f5a8818SKevin Lo 		*mp = m;
13608f5a8818SKevin Lo 		rip_input(mp, offp, proto);
13618f5a8818SKevin Lo 		return (IPPROTO_DONE);
1362bbb4330bSLuigi Rizzo 	}
1363bbb4330bSLuigi Rizzo 	/* Drop the packet */
1364bbb4330bSLuigi Rizzo 	m_freem(m);
13658f5a8818SKevin Lo 	return (IPPROTO_DONE);
1366bbb4330bSLuigi Rizzo }
1367