xref: /freebsd/sys/netinet/ip_input.c (revision 78b1fc05b20504ed13aeeb4a5b47443246cabaeb)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
370ac40133SBrian Somers #include "opt_bootp.h"
3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
396a800098SYoshinobu Inoue #include "opt_ipsec.h"
4033553d6eSBjoern A. Zeeb #include "opt_route.h"
41b8bc95cdSAdrian Chadd #include "opt_rss.h"
42*78b1fc05SGleb Smirnoff #include "opt_sctp.h"
4374a9466cSGary Palmer 
44df8bae1dSRodney W. Grimes #include <sys/param.h>
45df8bae1dSRodney W. Grimes #include <sys/systm.h>
46ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
47df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
48b715f178SLuigi Rizzo #include <sys/malloc.h>
49df8bae1dSRodney W. Grimes #include <sys/domain.h>
50df8bae1dSRodney W. Grimes #include <sys/protosw.h>
51df8bae1dSRodney W. Grimes #include <sys/socket.h>
52df8bae1dSRodney W. Grimes #include <sys/time.h>
53df8bae1dSRodney W. Grimes #include <sys/kernel.h>
54385195c0SMarko Zec #include <sys/lock.h>
55cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
56385195c0SMarko Zec #include <sys/rwlock.h>
5757f60867SMark Johnston #include <sys/sdt.h>
581025071fSGarrett Wollman #include <sys/syslog.h>
59b5e8ce9fSBruce Evans #include <sys/sysctl.h>
60df8bae1dSRodney W. Grimes 
61df8bae1dSRodney W. Grimes #include <net/if.h>
629494d596SBrooks Davis #include <net/if_types.h>
63d314ad7bSJulian Elischer #include <net/if_var.h>
6482c23ebaSBill Fenner #include <net/if_dl.h>
65b252313fSGleb Smirnoff #include <net/pfil.h>
66df8bae1dSRodney W. Grimes #include <net/route.h>
67983066f0SAlexander V. Chernikov #include <net/route/nhop.h>
68748e0b0aSGarrett Wollman #include <net/netisr.h>
69b2bdc62aSAdrian Chadd #include <net/rss_config.h>
704b79449eSBjoern A. Zeeb #include <net/vnet.h>
71df8bae1dSRodney W. Grimes 
72df8bae1dSRodney W. Grimes #include <netinet/in.h>
7357f60867SMark Johnston #include <netinet/in_kdtrace.h>
74df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
75b5e8ce9fSBruce Evans #include <netinet/in_var.h>
76df8bae1dSRodney W. Grimes #include <netinet/ip.h>
77983066f0SAlexander V. Chernikov #include <netinet/in_fib.h>
78df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
79df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
80*78b1fc05SGleb Smirnoff #include <netinet/ip_encap.h>
81eddfbb76SRobert Watson #include <netinet/ip_fw.h>
82df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
83*78b1fc05SGleb Smirnoff #include <netinet/igmp_var.h>
84ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
8558938916SGarrett Wollman #include <machine/in_cksum.h>
86a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
87b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
88*78b1fc05SGleb Smirnoff #ifdef SCTP
89*78b1fc05SGleb Smirnoff #include <netinet/sctp_var.h>
90*78b1fc05SGleb Smirnoff #endif
91df8bae1dSRodney W. Grimes 
92fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
93fcf59617SAndrey V. Elsukov 
94f0068c4aSGarrett Wollman #include <sys/socketvar.h>
956ddbf1e2SGary Palmer 
96aed55708SRobert Watson #include <security/mac/mac_framework.h>
97aed55708SRobert Watson 
98d2035ffbSEd Maste #ifdef CTASSERT
99d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
100d2035ffbSEd Maste #endif
101d2035ffbSEd Maste 
1021dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
103843b0e57SXin LI extern void ipreass_init(void);
104843b0e57SXin LI extern void ipreass_drain(void);
105843b0e57SXin LI extern void ipreass_slowtimo(void);
1061dbefcc0SGleb Smirnoff #ifdef VIMAGE
107843b0e57SXin LI extern void ipreass_destroy(void);
1081dbefcc0SGleb Smirnoff #endif
1091dbefcc0SGleb Smirnoff 
11082cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
11182cea7e6SBjoern A. Zeeb 
11282cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1136df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
114eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1158b615593SMarko Zec     "Enable IP forwarding between interfaces");
1160312fbe9SPoul-Henning Kamp 
1178ad114c0SGeorge V. Neville-Neil /*
1188ad114c0SGeorge V. Neville-Neil  * Respond with an ICMP host redirect when we forward a packet out of
1198ad114c0SGeorge V. Neville-Neil  * the same interface on which it was received.  See RFC 792.
1208ad114c0SGeorge V. Neville-Neil  */
1218ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1;
1226df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
123eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1248b615593SMarko Zec     "Enable sending IP redirects");
1250312fbe9SPoul-Henning Kamp 
12694df3271SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_strong_es) = false;
12794df3271SGleb Smirnoff #define	V_ip_strong_es	VNET(ip_strong_es)
12894df3271SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es,
12994df3271SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false,
13094df3271SGleb Smirnoff     "Packet's IP destination address must match address on arrival interface");
131b3e95d4eSJonathan Lemon 
1322ce85919SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_sav) = true;
1332ce85919SGleb Smirnoff #define	V_ip_sav	VNET(ip_sav)
1342ce85919SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, source_address_validation,
1352ce85919SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_sav), true,
1362ce85919SGleb Smirnoff     "Drop incoming packets with source address that is a local address");
1372ce85919SGleb Smirnoff 
138b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head);	/* Packet filter hooks */
139df8bae1dSRodney W. Grimes 
140d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
141d4b5cae4SRobert Watson 	.nh_name = "ip",
142d4b5cae4SRobert Watson 	.nh_handler = ip_input,
143d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
144b8bc95cdSAdrian Chadd #ifdef	RSS
1452527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
146b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
147b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
148b8bc95cdSAdrian Chadd #else
149d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
150b8bc95cdSAdrian Chadd #endif
151d4b5cae4SRobert Watson };
152ca925d9cSJonathan Lemon 
153b8bc95cdSAdrian Chadd #ifdef	RSS
154b8bc95cdSAdrian Chadd /*
155b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
156b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
157b8bc95cdSAdrian Chadd  *
158b8bc95cdSAdrian Chadd  * It should likely have something that assert it
159b8bc95cdSAdrian Chadd  * actually has valid flow details.
160b8bc95cdSAdrian Chadd  */
161b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
162b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
163b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
164b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
165499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
166b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
167b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
168b8bc95cdSAdrian Chadd };
169b8bc95cdSAdrian Chadd #endif
170b8bc95cdSAdrian Chadd 
171*78b1fc05SGleb Smirnoff ipproto_input_t		*ip_protox[IPPROTO_MAX] = {
172*78b1fc05SGleb Smirnoff 			    [0 ... IPPROTO_MAX - 1] = rip_input };
173*78b1fc05SGleb Smirnoff ipproto_ctlinput_t	*ip_ctlprotox[IPPROTO_MAX] = {
174*78b1fc05SGleb Smirnoff 			    [0 ... IPPROTO_MAX - 1] = rip_ctlinput };
175*78b1fc05SGleb Smirnoff 
17682cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17782cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17882cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
179ca925d9cSJonathan Lemon 
180c8ee75f2SGleb Smirnoff /* Make sure it is safe to use hashinit(9) on CK_LIST. */
181c8ee75f2SGleb Smirnoff CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr)));
182c8ee75f2SGleb Smirnoff 
1830312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1840312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1853d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1860312fbe9SPoul-Henning Kamp #endif
1870312fbe9SPoul-Henning Kamp 
1881b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1906df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
191eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
192eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1931b968362SDag-Erling Smørgrav #endif
194eddfbb76SRobert Watson 
195315e3e38SRobert Watson /*
1965da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1975923c293SGleb Smirnoff  */
1985da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1995da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
2005da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
2015da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
2025923c293SGleb Smirnoff 
2035923c293SGleb Smirnoff #ifdef VIMAGE
2045da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
2055923c293SGleb Smirnoff #endif /* VIMAGE */
2065923c293SGleb Smirnoff 
2075923c293SGleb Smirnoff /*
208315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2095923c293SGleb Smirnoff  * into ipstat treated as an array.
210315e3e38SRobert Watson  */
211315e3e38SRobert Watson void
212315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
213315e3e38SRobert Watson {
214315e3e38SRobert Watson 
2155da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
216315e3e38SRobert Watson }
217315e3e38SRobert Watson 
218315e3e38SRobert Watson void
219315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
220315e3e38SRobert Watson {
221315e3e38SRobert Watson 
2225da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
223315e3e38SRobert Watson }
224315e3e38SRobert Watson 
225d4b5cae4SRobert Watson static int
226d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
227d4b5cae4SRobert Watson {
228d4b5cae4SRobert Watson 	int error, qlimit;
229d4b5cae4SRobert Watson 
230d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
231d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
232d4b5cae4SRobert Watson 	if (error || !req->newptr)
233d4b5cae4SRobert Watson 		return (error);
234d4b5cae4SRobert Watson 	if (qlimit < 1)
235d4b5cae4SRobert Watson 		return (EINVAL);
236d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
237d4b5cae4SRobert Watson }
238d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
2397029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
2407029da5cSPawel Biernacki     sysctl_netinet_intr_queue_maxlen, "I",
241d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
242d4b5cae4SRobert Watson 
243d4b5cae4SRobert Watson static int
244d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
245d4b5cae4SRobert Watson {
246d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
247d4b5cae4SRobert Watson 	int error, qdrops;
248d4b5cae4SRobert Watson 
249d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
250d4b5cae4SRobert Watson 	qdrops = qdrops_long;
251d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
252d4b5cae4SRobert Watson 	if (error || !req->newptr)
253d4b5cae4SRobert Watson 		return (error);
254d4b5cae4SRobert Watson 	if (qdrops != 0)
255d4b5cae4SRobert Watson 		return (EINVAL);
256d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
257d4b5cae4SRobert Watson 	return (0);
258d4b5cae4SRobert Watson }
259d4b5cae4SRobert Watson 
260d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
2617029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
2627029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_queue_drops, "I",
263d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
264d4b5cae4SRobert Watson 
265b8bc95cdSAdrian Chadd #ifdef	RSS
266b8bc95cdSAdrian Chadd static int
267b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
268b8bc95cdSAdrian Chadd {
269b8bc95cdSAdrian Chadd 	int error, qlimit;
270b8bc95cdSAdrian Chadd 
271b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
272b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
273b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
274b8bc95cdSAdrian Chadd 		return (error);
275b8bc95cdSAdrian Chadd 	if (qlimit < 1)
276b8bc95cdSAdrian Chadd 		return (EINVAL);
277b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
278b8bc95cdSAdrian Chadd }
2797faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
2807029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
2817029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_direct_queue_maxlen,
2827faa0d21SAndrey V. Elsukov     "I", "Maximum size of the IP direct input queue");
283b8bc95cdSAdrian Chadd 
284b8bc95cdSAdrian Chadd static int
285b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
286b8bc95cdSAdrian Chadd {
287b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
288b8bc95cdSAdrian Chadd 	int error, qdrops;
289b8bc95cdSAdrian Chadd 
290b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
291b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
292b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
293b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
294b8bc95cdSAdrian Chadd 		return (error);
295b8bc95cdSAdrian Chadd 	if (qdrops != 0)
296b8bc95cdSAdrian Chadd 		return (EINVAL);
297b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
298b8bc95cdSAdrian Chadd 	return (0);
299b8bc95cdSAdrian Chadd }
300b8bc95cdSAdrian Chadd 
3017faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
3027029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
3037029da5cSPawel Biernacki     sysctl_netinet_intr_direct_queue_drops, "I",
304b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
305b8bc95cdSAdrian Chadd #endif	/* RSS */
306b8bc95cdSAdrian Chadd 
307df8bae1dSRodney W. Grimes /*
308df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
309df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
310df8bae1dSRodney W. Grimes  */
31189128ff3SGleb Smirnoff static void
31289128ff3SGleb Smirnoff ip_vnet_init(void *arg __unused)
313df8bae1dSRodney W. Grimes {
314b252313fSGleb Smirnoff 	struct pfil_head_args args;
315df8bae1dSRodney W. Grimes 
316d7c5a620SMatt Macy 	CK_STAILQ_INIT(&V_in_ifaddrhead);
317603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3181ed81b73SMarko Zec 
3191ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3201dbefcc0SGleb Smirnoff 	ipreass_init();
3211ed81b73SMarko Zec 
3220b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
323b252313fSGleb Smirnoff 	args.pa_version = PFIL_VERSION;
324b252313fSGleb Smirnoff 	args.pa_flags = PFIL_IN | PFIL_OUT;
325b252313fSGleb Smirnoff 	args.pa_type = PFIL_TYPE_IP4;
326b252313fSGleb Smirnoff 	args.pa_headname = PFIL_INET_NAME;
327b252313fSGleb Smirnoff 	V_inet_pfil_head = pfil_head_register(&args);
3280b4b0b0fSJulian Elischer 
329ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
330ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
331ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
332ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
333ef91a976SAndrey V. Elsukov 		    __func__);
334ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
335ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
336ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
337ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
338ef91a976SAndrey V. Elsukov 		    __func__);
339ef91a976SAndrey V. Elsukov 
340484149deSBjoern A. Zeeb #ifdef VIMAGE
341484149deSBjoern A. Zeeb 	netisr_register_vnet(&ip_nh);
342484149deSBjoern A. Zeeb #ifdef	RSS
343484149deSBjoern A. Zeeb 	netisr_register_vnet(&ip_direct_nh);
344484149deSBjoern A. Zeeb #endif
345484149deSBjoern A. Zeeb #endif
34689128ff3SGleb Smirnoff }
34789128ff3SGleb Smirnoff VNET_SYSINIT(ip_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
34889128ff3SGleb Smirnoff     ip_vnet_init, NULL);
34989128ff3SGleb Smirnoff 
35089128ff3SGleb Smirnoff static void
35189128ff3SGleb Smirnoff ip_init(const void *unused __unused)
35289128ff3SGleb Smirnoff {
3531ed81b73SMarko Zec 
354db09bef3SAndre Oppermann 	/*
355*78b1fc05SGleb Smirnoff 	 * Register statically compiled protocols, that are unlikely to
356*78b1fc05SGleb Smirnoff 	 * ever become dynamic.
357db09bef3SAndre Oppermann 	 */
358*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_ICMP, icmp_input, NULL);
359*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_IGMP, igmp_input, NULL);
360*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_RSVP, rsvp_input, NULL);
361*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_IPV4, encap4_input, NULL);
362*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_MOBILE, encap4_input, NULL);
363*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_ETHERIP, encap4_input, NULL);
364*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_GRE, encap4_input, NULL);
365*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_IPV6, encap4_input, NULL);
366*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_PIM, encap4_input, NULL);
367*78b1fc05SGleb Smirnoff #ifdef SCTP	/* XXX: has a loadable & static version */
368*78b1fc05SGleb Smirnoff 	IPPROTO_REGISTER(IPPROTO_SCTP, sctp_input, sctp_ctlinput);
369*78b1fc05SGleb Smirnoff #endif
370194a213eSAndrey A. Chernov 
371d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
372b8bc95cdSAdrian Chadd #ifdef	RSS
373b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
374b8bc95cdSAdrian Chadd #endif
375df8bae1dSRodney W. Grimes }
37689128ff3SGleb Smirnoff SYSINIT(ip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_init, NULL);
377df8bae1dSRodney W. Grimes 
3789802380eSBjoern A. Zeeb #ifdef VIMAGE
3793f58662dSBjoern A. Zeeb static void
3803f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3819802380eSBjoern A. Zeeb {
382ef91a976SAndrey V. Elsukov 	int error;
3834d3dfd45SMikolaj Golub 
384484149deSBjoern A. Zeeb #ifdef	RSS
385484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
386484149deSBjoern A. Zeeb #endif
387484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
388484149deSBjoern A. Zeeb 
389b252313fSGleb Smirnoff 	pfil_head_unregister(V_inet_pfil_head);
390ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
391ef91a976SAndrey V. Elsukov 	if (error != 0) {
392ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
393ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
394ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
395ef91a976SAndrey V. Elsukov 	}
396ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
397ef91a976SAndrey V. Elsukov 	if (error != 0) {
398ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
399ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
400ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
401ef91a976SAndrey V. Elsukov 	}
40289856f7eSBjoern A. Zeeb 
40389856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
40489856f7eSBjoern A. Zeeb 	in_ifscrub_all();
40589856f7eSBjoern A. Zeeb 
40689856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
407b1d63265SAlexander V. Chernikov 	rib_flush_routes_family(AF_INET);
4089802380eSBjoern A. Zeeb 
409e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4101dbefcc0SGleb Smirnoff 	ipreass_destroy();
41189856f7eSBjoern A. Zeeb 
41289856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
41389856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4149802380eSBjoern A. Zeeb }
4153f58662dSBjoern A. Zeeb 
4163f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4179802380eSBjoern A. Zeeb #endif
4189802380eSBjoern A. Zeeb 
419b8bc95cdSAdrian Chadd #ifdef	RSS
420b8bc95cdSAdrian Chadd /*
421b8bc95cdSAdrian Chadd  * IP direct input routine.
422b8bc95cdSAdrian Chadd  *
423b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
424b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
425b8bc95cdSAdrian Chadd  */
426b8bc95cdSAdrian Chadd void
427b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
428b8bc95cdSAdrian Chadd {
429b8bc95cdSAdrian Chadd 	struct ip *ip;
430b8bc95cdSAdrian Chadd 	int hlen;
431b8bc95cdSAdrian Chadd 
432b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
433b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
434b8bc95cdSAdrian Chadd 
435fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
436fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
437fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
438fcf59617SAndrey V. Elsukov 			return;
439fcf59617SAndrey V. Elsukov 	}
440fcf59617SAndrey V. Elsukov #endif /* IPSEC */
441b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
442*78b1fc05SGleb Smirnoff 	ip_protox[ip->ip_p](&m, &hlen, ip->ip_p);
443b8bc95cdSAdrian Chadd }
444b8bc95cdSAdrian Chadd #endif
445b8bc95cdSAdrian Chadd 
4464d2e3692SLuigi Rizzo /*
447df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
448df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
449df8bae1dSRodney W. Grimes  */
450c67b1d17SGarrett Wollman void
451c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
452df8bae1dSRodney W. Grimes {
4539188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4545da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
455ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4560aade26eSRobert Watson 	struct ifnet *ifp;
45794df3271SGleb Smirnoff 	int hlen = 0;
45821d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
45902c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
460f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
46194df3271SGleb Smirnoff 	bool strong_es;
462b715f178SLuigi Rizzo 
463fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
464b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
465db40007dSAndrew R. Reiter 
466ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
46776ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
46876ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4692b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
47053be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4718f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4729b932e9eSAndre Oppermann 		goto ours;
4732b25acc1SLuigi Rizzo 	}
4742b25acc1SLuigi Rizzo 
47586425c62SRobert Watson 	IPSTAT_INC(ips_total);
47658938916SGarrett Wollman 
4770359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < sizeof(struct ip)))
47858938916SGarrett Wollman 		goto tooshort;
47958938916SGarrett Wollman 
4800359e7a5SMateusz Guzik 	if (m->m_len < sizeof(struct ip)) {
4810359e7a5SMateusz Guzik 		m = m_pullup(m, sizeof(struct ip));
4820359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
48386425c62SRobert Watson 			IPSTAT_INC(ips_toosmall);
484c67b1d17SGarrett Wollman 			return;
485df8bae1dSRodney W. Grimes 		}
4860359e7a5SMateusz Guzik 	}
487df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
48858938916SGarrett Wollman 
4890359e7a5SMateusz Guzik 	if (__predict_false(ip->ip_v != IPVERSION)) {
49086425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
491df8bae1dSRodney W. Grimes 		goto bad;
492df8bae1dSRodney W. Grimes 	}
49358938916SGarrett Wollman 
49453be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
4950359e7a5SMateusz Guzik 	if (__predict_false(hlen < sizeof(struct ip))) {	/* minimum header length */
49686425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
497df8bae1dSRodney W. Grimes 		goto bad;
498df8bae1dSRodney W. Grimes 	}
499df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
5000359e7a5SMateusz Guzik 		m = m_pullup(m, hlen);
5010359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
50286425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
503c67b1d17SGarrett Wollman 			return;
504df8bae1dSRodney W. Grimes 		}
505df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
506df8bae1dSRodney W. Grimes 	}
50733841545SHajimu UMEMOTO 
50857f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
50957f60867SMark Johnston 
5106c1c6ae5SRodney W. Grimes 	/* IN_LOOPBACK must not appear on the wire - RFC1122 */
5110aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5126c1c6ae5SRodney W. Grimes 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
5136c1c6ae5SRodney W. Grimes 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
5140aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
51586425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
51633841545SHajimu UMEMOTO 			goto bad;
51733841545SHajimu UMEMOTO 		}
51833841545SHajimu UMEMOTO 	}
51933841545SHajimu UMEMOTO 
520db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
521db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
522db4f9cc7SJonathan Lemon 	} else {
52358938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
52447c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
52558938916SGarrett Wollman 		} else {
52647c861ecSBrian Somers 			sum = in_cksum(m, hlen);
52758938916SGarrett Wollman 		}
528db4f9cc7SJonathan Lemon 	}
5290359e7a5SMateusz Guzik 	if (__predict_false(sum)) {
53086425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
531df8bae1dSRodney W. Grimes 		goto bad;
532df8bae1dSRodney W. Grimes 	}
533df8bae1dSRodney W. Grimes 
53402b199f1SMax Laier #ifdef ALTQ
53502b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
53602b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
53702b199f1SMax Laier 		return;
53802b199f1SMax Laier #endif
53902b199f1SMax Laier 
54021d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
5410359e7a5SMateusz Guzik 	if (__predict_false(ip_len < hlen)) {
54286425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
543df8bae1dSRodney W. Grimes 		goto bad;
544df8bae1dSRodney W. Grimes 	}
545df8bae1dSRodney W. Grimes 
546df8bae1dSRodney W. Grimes 	/*
547df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
548df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
549df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
550df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
551df8bae1dSRodney W. Grimes 	 */
5520359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < ip_len)) {
55358938916SGarrett Wollman tooshort:
55486425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
555df8bae1dSRodney W. Grimes 		goto bad;
556df8bae1dSRodney W. Grimes 	}
55721d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
558df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
55921d172a3SGleb Smirnoff 			m->m_len = ip_len;
56021d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
561df8bae1dSRodney W. Grimes 		} else
56221d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
563df8bae1dSRodney W. Grimes 	}
564b8bc95cdSAdrian Chadd 
565ad9f4d6aSAndrey V. Elsukov 	/*
566ad9f4d6aSAndrey V. Elsukov 	 * Try to forward the packet, but if we fail continue.
567f389439fSBjoern A. Zeeb 	 * ip_tryforward() may generate redirects these days.
568f389439fSBjoern A. Zeeb 	 * XXX the logic below falling through to normal processing
569f389439fSBjoern A. Zeeb 	 * if redirects are required should be revisited as well.
570ad9f4d6aSAndrey V. Elsukov 	 * ip_tryforward() does inbound and outbound packet firewall
571ad9f4d6aSAndrey V. Elsukov 	 * processing. If firewall has decided that destination becomes
572ad9f4d6aSAndrey V. Elsukov 	 * our local address, it sets M_FASTFWD_OURS flag. In this
573ad9f4d6aSAndrey V. Elsukov 	 * case skip another inbound firewall processing and update
574ad9f4d6aSAndrey V. Elsukov 	 * ip pointer.
575ad9f4d6aSAndrey V. Elsukov 	 */
5768ad114c0SGeorge V. Neville-Neil 	if (V_ipforwarding != 0
577fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
578fcf59617SAndrey V. Elsukov 	    && (!IPSEC_ENABLED(ipv4) ||
579fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
580ad9f4d6aSAndrey V. Elsukov #endif
581ad9f4d6aSAndrey V. Elsukov 	    ) {
582f389439fSBjoern A. Zeeb 		/*
583f389439fSBjoern A. Zeeb 		 * ip_dooptions() was run so we can ignore the source route (or
584f389439fSBjoern A. Zeeb 		 * any IP options case) case for redirects in ip_tryforward().
585f389439fSBjoern A. Zeeb 		 */
586ad9f4d6aSAndrey V. Elsukov 		if ((m = ip_tryforward(m)) == NULL)
58733872124SGeorge V. Neville-Neil 			return;
588ad9f4d6aSAndrey V. Elsukov 		if (m->m_flags & M_FASTFWD_OURS) {
589ad9f4d6aSAndrey V. Elsukov 			m->m_flags &= ~M_FASTFWD_OURS;
590ad9f4d6aSAndrey V. Elsukov 			ip = mtod(m, struct ip *);
591ad9f4d6aSAndrey V. Elsukov 			goto ours;
592ad9f4d6aSAndrey V. Elsukov 		}
593ad9f4d6aSAndrey V. Elsukov 	}
594fcf59617SAndrey V. Elsukov 
595fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
59614dd6717SSam Leffler 	/*
597ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
59814dd6717SSam Leffler 	 */
599fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4) &&
600fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
601c21fd232SAndre Oppermann 			goto passin;
602ad9f4d6aSAndrey V. Elsukov #endif
603fcf59617SAndrey V. Elsukov 
604c4ac87eaSDarren Reed 	/*
605134ea224SSam Leffler 	 * Run through list of hooks for input packets.
606f51f805fSSam Leffler 	 *
607f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
608f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
609f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
610c4ac87eaSDarren Reed 	 */
611c21fd232SAndre Oppermann 
612c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
613b252313fSGleb Smirnoff 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
614c21fd232SAndre Oppermann 		goto passin;
615c21fd232SAndre Oppermann 
616f51f805fSSam Leffler 	odst = ip->ip_dst;
617b252313fSGleb Smirnoff 	if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) !=
618b252313fSGleb Smirnoff 	    PFIL_PASS)
619beec8214SDarren Reed 		return;
620134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
621c4ac87eaSDarren Reed 		return;
6229b932e9eSAndre Oppermann 
623c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
62402c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
6259b932e9eSAndre Oppermann 
6269b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
6279b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
6289b932e9eSAndre Oppermann 		goto ours;
6299b932e9eSAndre Oppermann 	}
630ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
631de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
632099dd043SAndre Oppermann 			/*
633ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
634ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
635ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
636099dd043SAndre Oppermann 			 */
637ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
638099dd043SAndre Oppermann 			return;
639099dd043SAndre Oppermann 		}
640ffdbf9daSAndrey V. Elsukov 	}
641c21fd232SAndre Oppermann passin:
64221d172a3SGleb Smirnoff 
64321d172a3SGleb Smirnoff 	/*
644df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
645df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
646df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
647df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
648df8bae1dSRodney W. Grimes 	 */
6499b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
650c67b1d17SGarrett Wollman 		return;
651df8bae1dSRodney W. Grimes 
652f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
653f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
654f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
655f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
656f0068c4aSGarrett Wollman 	 * grabbing the packet.
657f0068c4aSGarrett Wollman          */
6580359e7a5SMateusz Guzik 	if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on)
659f0068c4aSGarrett Wollman 		goto ours;
660f0068c4aSGarrett Wollman 
661df8bae1dSRodney W. Grimes 	/*
662df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
663cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
664cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
665cc766e04SGarrett Wollman 	 * with it).
666df8bae1dSRodney W. Grimes 	 */
667d7c5a620SMatt Macy 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
668cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
669cc766e04SGarrett Wollman 		goto ours;
670cc766e04SGarrett Wollman 
6717538a9a0SJonathan Lemon 	/*
672823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
673823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
67494df3271SGleb Smirnoff 	 * strong ES model) with a list of additional predicates:
67594df3271SGleb Smirnoff 	 * - if IP forwarding is disabled
67694df3271SGleb Smirnoff 	 * - the packet is not locally generated
67794df3271SGleb Smirnoff 	 * - the packet is not subject to 'ipfw fwd'
67894df3271SGleb Smirnoff 	 * - Interface is not running CARP. If the packet got here, we already
67994df3271SGleb Smirnoff 	 *   checked it with carp_iamatch() and carp_forus().
680823db0e9SDon Lewis 	 */
68194df3271SGleb Smirnoff 	strong_es = V_ip_strong_es && (V_ipforwarding == 0) &&
68281674f12SGleb Smirnoff 	    ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
68354bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
684823db0e9SDon Lewis 
685ca925d9cSJonathan Lemon 	/*
686ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
687ca925d9cSJonathan Lemon 	 */
688c8ee75f2SGleb Smirnoff 	CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
68994df3271SGleb Smirnoff 		if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr)
69094df3271SGleb Smirnoff 			continue;
69194df3271SGleb Smirnoff 
692f9e354dfSJulian Elischer 		/*
69394df3271SGleb Smirnoff 		 * net.inet.ip.rfc1122_strong_es: the address matches, verify
69494df3271SGleb Smirnoff 		 * that the packet arrived via the correct interface.
695f9e354dfSJulian Elischer 		 */
69694df3271SGleb Smirnoff 		if (__predict_false(strong_es && ia->ia_ifp != ifp)) {
69794df3271SGleb Smirnoff 			IPSTAT_INC(ips_badaddr);
69894df3271SGleb Smirnoff 			goto bad;
699ca925d9cSJonathan Lemon 		}
70094df3271SGleb Smirnoff 
7012ce85919SGleb Smirnoff 		/*
7022ce85919SGleb Smirnoff 		 * net.inet.ip.source_address_validation: drop incoming
7032ce85919SGleb Smirnoff 		 * packets that pretend to be ours.
7042ce85919SGleb Smirnoff 		 */
7052ce85919SGleb Smirnoff 		if (V_ip_sav && !(ifp->if_flags & IFF_LOOPBACK) &&
7062ce85919SGleb Smirnoff 		    __predict_false(in_localip_fib(ip->ip_src, ifp->if_fib))) {
7072ce85919SGleb Smirnoff 			IPSTAT_INC(ips_badaddr);
7082ce85919SGleb Smirnoff 			goto bad;
7092ce85919SGleb Smirnoff 		}
7102ce85919SGleb Smirnoff 
71194df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
71294df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
71394df3271SGleb Smirnoff 		goto ours;
7148c0fec80SRobert Watson 	}
7152d9cfabaSRobert Watson 
716823db0e9SDon Lewis 	/*
717ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
718ca925d9cSJonathan Lemon 	 *
719ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
720ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
721ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
722ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
723823db0e9SDon Lewis 	 */
72481674f12SGleb Smirnoff 	if (ifp->if_flags & IFF_BROADCAST) {
725d7c5a620SMatt Macy 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
726ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
727ca925d9cSJonathan Lemon 				continue;
728ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
729df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
7300aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
7317caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7327caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7337caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
734df8bae1dSRodney W. Grimes 				goto ours;
7350aade26eSRobert Watson 			}
7360ac40133SBrian Somers #ifdef BOOTP_COMPAT
7370aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7387caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7397caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7407caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
741ca925d9cSJonathan Lemon 				goto ours;
7420aade26eSRobert Watson 			}
7430ac40133SBrian Somers #endif
744df8bae1dSRodney W. Grimes 		}
74519e5b0a7SRobert Watson 		ia = NULL;
746df8bae1dSRodney W. Grimes 	}
747df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
74803b0505bSZhenlei Huang 		/*
74903b0505bSZhenlei Huang 		 * RFC 3927 2.7: Do not forward multicast packets from
75003b0505bSZhenlei Huang 		 * IN_LINKLOCAL.
75103b0505bSZhenlei Huang 		 */
7523d846e48SZhenlei Huang 		if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
753df8bae1dSRodney W. Grimes 			/*
754df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
755df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
756df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
757df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
758df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
759df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
760df8bae1dSRodney W. Grimes 			 */
7610aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
76286425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
763df8bae1dSRodney W. Grimes 				m_freem(m);
764c67b1d17SGarrett Wollman 				return;
765df8bae1dSRodney W. Grimes 			}
766df8bae1dSRodney W. Grimes 
767df8bae1dSRodney W. Grimes 			/*
76811612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
769df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
770df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
771df8bae1dSRodney W. Grimes 			 */
77265634ae7SWojciech Macek 			if (ip->ip_p == IPPROTO_IGMP) {
773df8bae1dSRodney W. Grimes 				goto ours;
77465634ae7SWojciech Macek 			}
77586425c62SRobert Watson 			IPSTAT_INC(ips_forward);
776df8bae1dSRodney W. Grimes 		}
777df8bae1dSRodney W. Grimes 		/*
778d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
779d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
780d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
781df8bae1dSRodney W. Grimes 		 */
782df8bae1dSRodney W. Grimes 		goto ours;
783df8bae1dSRodney W. Grimes 	}
784df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
785df8bae1dSRodney W. Grimes 		goto ours;
786df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
787df8bae1dSRodney W. Grimes 		goto ours;
78803b0505bSZhenlei Huang 	/* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */
7893d846e48SZhenlei Huang 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
7903d846e48SZhenlei Huang 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
7913d846e48SZhenlei Huang 		IPSTAT_INC(ips_cantforward);
7923d846e48SZhenlei Huang 		m_freem(m);
7933d846e48SZhenlei Huang 		return;
7943d846e48SZhenlei Huang 	}
795df8bae1dSRodney W. Grimes 
7966a800098SYoshinobu Inoue 	/*
797df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
798df8bae1dSRodney W. Grimes 	 */
799603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
80086425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
801df8bae1dSRodney W. Grimes 		m_freem(m);
802546f251bSChris D. Faulhaber 	} else {
8039b932e9eSAndre Oppermann 		ip_forward(m, dchg);
804546f251bSChris D. Faulhaber 	}
805c67b1d17SGarrett Wollman 	return;
806df8bae1dSRodney W. Grimes 
807df8bae1dSRodney W. Grimes ours:
808d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
809d0ebc0d2SYaroslav Tykhiy 	/*
810d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
811d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
812d0ebc0d2SYaroslav Tykhiy 	 */
8137caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
814d0ebc0d2SYaroslav Tykhiy 		return;
815d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
816d0ebc0d2SYaroslav Tykhiy 
81763f8d699SJordan K. Hubbard 	/*
818b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
819ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
820df8bae1dSRodney W. Grimes 	 */
8218f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
822aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
823f0cada84SAndre Oppermann 		m = ip_reass(m);
824f0cada84SAndre Oppermann 		if (m == NULL)
825c67b1d17SGarrett Wollman 			return;
8266a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8277e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
82853be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
829f0cada84SAndre Oppermann 	}
830f0cada84SAndre Oppermann 
831fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
832fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
833fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
834fcf59617SAndrey V. Elsukov 			return;
835fcf59617SAndrey V. Elsukov 	}
836b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
83733841545SHajimu UMEMOTO 
838df8bae1dSRodney W. Grimes 	/*
839df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
840df8bae1dSRodney W. Grimes 	 */
84186425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8429b932e9eSAndre Oppermann 
843*78b1fc05SGleb Smirnoff 	ip_protox[ip->ip_p](&m, &hlen, ip->ip_p);
844c67b1d17SGarrett Wollman 	return;
845df8bae1dSRodney W. Grimes bad:
846df8bae1dSRodney W. Grimes 	m_freem(m);
847c67b1d17SGarrett Wollman }
848c67b1d17SGarrett Wollman 
849c67b1d17SGarrett Wollman /*
850df8bae1dSRodney W. Grimes  * IP timer processing;
851df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
852df8bae1dSRodney W. Grimes  * queue, discard it.
853df8bae1dSRodney W. Grimes  */
854df8bae1dSRodney W. Grimes void
855f2565d68SRobert Watson ip_slowtimo(void)
856df8bae1dSRodney W. Grimes {
8578b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
858df8bae1dSRodney W. Grimes 
8595ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8608b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8618b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8621dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8638b615593SMarko Zec 		CURVNET_RESTORE();
8648b615593SMarko Zec 	}
8655ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
866df8bae1dSRodney W. Grimes }
867df8bae1dSRodney W. Grimes 
8689802380eSBjoern A. Zeeb void
8699802380eSBjoern A. Zeeb ip_drain(void)
8709802380eSBjoern A. Zeeb {
8719802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8729802380eSBjoern A. Zeeb 
8739802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8749802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8759802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8761dbefcc0SGleb Smirnoff 		ipreass_drain();
8778b615593SMarko Zec 		CURVNET_RESTORE();
8788b615593SMarko Zec 	}
8795ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
880df8bae1dSRodney W. Grimes }
881df8bae1dSRodney W. Grimes 
882de38924dSAndre Oppermann int
883*78b1fc05SGleb Smirnoff ipproto_register(uint8_t proto, ipproto_input_t input, ipproto_ctlinput_t ctl)
884de38924dSAndre Oppermann {
885de38924dSAndre Oppermann 
886*78b1fc05SGleb Smirnoff 	MPASS(proto > 0);
887de38924dSAndre Oppermann 
888de38924dSAndre Oppermann 	/*
889de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
890*78b1fc05SGleb Smirnoff 	 * already.  An index pointing to rip_input() is unused.
891de38924dSAndre Oppermann 	 */
892*78b1fc05SGleb Smirnoff 	if (ip_protox[proto] == rip_input) {
893*78b1fc05SGleb Smirnoff 		ip_protox[proto] = input;
894*78b1fc05SGleb Smirnoff 		ip_ctlprotox[proto] = ctl;
895de38924dSAndre Oppermann 		return (0);
896*78b1fc05SGleb Smirnoff 	} else
897*78b1fc05SGleb Smirnoff 		return (EEXIST);
898de38924dSAndre Oppermann }
899de38924dSAndre Oppermann 
900de38924dSAndre Oppermann int
901*78b1fc05SGleb Smirnoff ipproto_unregister(uint8_t proto)
902de38924dSAndre Oppermann {
903de38924dSAndre Oppermann 
904*78b1fc05SGleb Smirnoff 	MPASS(proto > 0);
905de38924dSAndre Oppermann 
906*78b1fc05SGleb Smirnoff 	if (ip_protox[proto] != rip_input) {
907*78b1fc05SGleb Smirnoff 		ip_protox[proto] = rip_input;
908*78b1fc05SGleb Smirnoff 		ip_ctlprotox[proto] = rip_ctlinput;
909de38924dSAndre Oppermann 		return (0);
910*78b1fc05SGleb Smirnoff 	} else
911*78b1fc05SGleb Smirnoff 		return (ENOENT);
912de38924dSAndre Oppermann }
913de38924dSAndre Oppermann 
914df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
915df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
916df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
917df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
918df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
919fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9203b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
921df8bae1dSRodney W. Grimes };
922df8bae1dSRodney W. Grimes 
923df8bae1dSRodney W. Grimes /*
924df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
925df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
926df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
927df8bae1dSRodney W. Grimes  * of codes and types.
928df8bae1dSRodney W. Grimes  *
929df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
930df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
931df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
932df8bae1dSRodney W. Grimes  * protocol deal with that.
933df8bae1dSRodney W. Grimes  *
934df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
935df8bae1dSRodney W. Grimes  * via a source route.
936df8bae1dSRodney W. Grimes  */
9379b932e9eSAndre Oppermann void
9389b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
939df8bae1dSRodney W. Grimes {
9402b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
941efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
942df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
943d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9449b932e9eSAndre Oppermann 	struct in_addr dest;
945b835b6feSBjoern A. Zeeb 	struct route ro;
9464043ee3cSAlexander V. Chernikov 	uint32_t flowid;
947c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9483efc3014SJulian Elischer 
949b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
950b8a6e03fSGleb Smirnoff 
9519b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
95286425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
953df8bae1dSRodney W. Grimes 		m_freem(m);
954df8bae1dSRodney W. Grimes 		return;
955df8bae1dSRodney W. Grimes 	}
956fcf59617SAndrey V. Elsukov 	if (
957fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH
958fcf59617SAndrey V. Elsukov 	    V_ipstealth == 0 &&
959fcf59617SAndrey V. Elsukov #endif
960fcf59617SAndrey V. Elsukov 	    ip->ip_ttl <= IPTTLDEC) {
961fcf59617SAndrey V. Elsukov 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
9628922ddbeSAndrey V. Elsukov 		return;
9638922ddbeSAndrey V. Elsukov 	}
964df8bae1dSRodney W. Grimes 
965d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
966d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
967d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
968d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
969d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
9704043ee3cSAlexander V. Chernikov 	flowid = m->m_pkthdr.flowid;
9714043ee3cSAlexander V. Chernikov 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid);
972983066f0SAlexander V. Chernikov 	if (ro.ro_nh != NULL) {
973983066f0SAlexander V. Chernikov 		ia = ifatoia(ro.ro_nh->nh_ifa);
97456844a62SErmal Luçi 	} else
97556844a62SErmal Luçi 		ia = NULL;
976df8bae1dSRodney W. Grimes 	/*
977bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
978bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
979bfef7ed4SIan Dowse 	 *
9804d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9814d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9824d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9834d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
9844d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
9854d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
9864d2e3692SLuigi Rizzo 	 *
987c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
988bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
989bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
990bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
991df8bae1dSRodney W. Grimes 	 */
992dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
993eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
9949967cafcSSam Leffler 		/*
9959967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
9969967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
9979967cafcSSam Leffler 		 * be conservative and just discard the copy since
9989967cafcSSam Leffler 		 * code below may some day want the tags.
9999967cafcSSam Leffler 		 */
10009967cafcSSam Leffler 		m_free(mcopy);
10019967cafcSSam Leffler 		mcopy = NULL;
10029967cafcSSam Leffler 	}
1003bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10048f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1005e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1006bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1007bfef7ed4SIan Dowse 	}
100804287599SRuslan Ermilov #ifdef IPSTEALTH
1009fcf59617SAndrey V. Elsukov 	if (V_ipstealth == 0)
101004287599SRuslan Ermilov #endif
101104287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
1012fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1013fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
1014fcf59617SAndrey V. Elsukov 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
1015fcf59617SAndrey V. Elsukov 			/* mbuf consumed by IPsec */
1016d16a2e47SMark Johnston 			RO_NHFREE(&ro);
1017fcf59617SAndrey V. Elsukov 			m_freem(mcopy);
1018fcf59617SAndrey V. Elsukov 			if (error != EINPROGRESS)
1019fcf59617SAndrey V. Elsukov 				IPSTAT_INC(ips_cantforward);
1020b8a6e03fSGleb Smirnoff 			return;
102104287599SRuslan Ermilov 		}
1022fcf59617SAndrey V. Elsukov 		/* No IPsec processing required */
1023fcf59617SAndrey V. Elsukov 	}
1024fcf59617SAndrey V. Elsukov #endif /* IPSEC */
1025df8bae1dSRodney W. Grimes 	/*
1026df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1027df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1028df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1029df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1030df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1031df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1032df8bae1dSRodney W. Grimes 	 */
10339b932e9eSAndre Oppermann 	dest.s_addr = 0;
1034efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1035efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
1036983066f0SAlexander V. Chernikov 		struct nhop_object *nh;
103702c1c707SAndre Oppermann 
1038983066f0SAlexander V. Chernikov 		nh = ro.ro_nh;
103902c1c707SAndre Oppermann 
1040983066f0SAlexander V. Chernikov 		if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
1041983066f0SAlexander V. Chernikov 			struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
1042df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1043df8bae1dSRodney W. Grimes 
1044983066f0SAlexander V. Chernikov 			if (nh_ia != NULL &&
1045983066f0SAlexander V. Chernikov 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
1046df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1047df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1048df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
104962e1a437SZhenlei Huang 				if (nh->nh_flags & NHF_GATEWAY) {
105062e1a437SZhenlei Huang 				    if (nh->gw_sa.sa_family == AF_INET)
105162e1a437SZhenlei Huang 					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
105262e1a437SZhenlei Huang 				    else /* Do not redirect in case gw is AF_INET6 */
105362e1a437SZhenlei Huang 					type = 0;
105462e1a437SZhenlei Huang 				} else
105562e1a437SZhenlei Huang 					dest.s_addr = ip->ip_dst.s_addr;
1056df8bae1dSRodney W. Grimes 			}
1057df8bae1dSRodney W. Grimes 		}
105802c1c707SAndre Oppermann 	}
1059df8bae1dSRodney W. Grimes 
1060b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1061b835b6feSBjoern A. Zeeb 
1062983066f0SAlexander V. Chernikov 	if (error == EMSGSIZE && ro.ro_nh)
1063983066f0SAlexander V. Chernikov 		mtu = ro.ro_nh->nh_mtu;
1064983066f0SAlexander V. Chernikov 	RO_NHFREE(&ro);
1065b835b6feSBjoern A. Zeeb 
1066df8bae1dSRodney W. Grimes 	if (error)
106786425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1068df8bae1dSRodney W. Grimes 	else {
106986425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1070df8bae1dSRodney W. Grimes 		if (type)
107186425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1072df8bae1dSRodney W. Grimes 		else {
10739188b4a1SAndre Oppermann 			if (mcopy)
1074df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1075b8a6e03fSGleb Smirnoff 			return;
1076df8bae1dSRodney W. Grimes 		}
1077df8bae1dSRodney W. Grimes 	}
10784f6c66ccSMatt Macy 	if (mcopy == NULL)
1079b8a6e03fSGleb Smirnoff 		return;
10804f6c66ccSMatt Macy 
1081df8bae1dSRodney W. Grimes 	switch (error) {
1082df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1083df8bae1dSRodney W. Grimes 		/* type, code set above */
1084df8bae1dSRodney W. Grimes 		break;
1085df8bae1dSRodney W. Grimes 
1086efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1087df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1088df8bae1dSRodney W. Grimes 	case ENETDOWN:
1089df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1090df8bae1dSRodney W. Grimes 	default:
1091df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1092df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1093df8bae1dSRodney W. Grimes 		break;
1094df8bae1dSRodney W. Grimes 
1095df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1096df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1097df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
10989b932e9eSAndre Oppermann 		/*
1099b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1100b835b6feSBjoern A. Zeeb 		 * interface MTU.
1101ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1102ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1103ab48768bSAndre Oppermann 		 * current packet size.
11049b932e9eSAndre Oppermann 		 */
1105b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1106b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1107b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1108b835b6feSBjoern A. Zeeb 		} else {
1109ab48768bSAndre Oppermann 			if (ia != NULL)
1110c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1111ab48768bSAndre Oppermann 			else
11128f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1113ab48768bSAndre Oppermann 		}
111486425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1115df8bae1dSRodney W. Grimes 		break;
1116df8bae1dSRodney W. Grimes 
1117df8bae1dSRodney W. Grimes 	case ENOBUFS:
11183a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11193a06e3e0SRuslan Ermilov 		m_freem(mcopy);
1120b8a6e03fSGleb Smirnoff 		return;
1121df8bae1dSRodney W. Grimes 	}
1122c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1123df8bae1dSRodney W. Grimes }
1124df8bae1dSRodney W. Grimes 
1125339efd75SMaxim Sobolev #define	CHECK_SO_CT(sp, ct) \
1126339efd75SMaxim Sobolev     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1127339efd75SMaxim Sobolev 
112882c23ebaSBill Fenner void
1129f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1130f2565d68SRobert Watson     struct mbuf *m)
113182c23ebaSBill Fenner {
113206193f0bSKonstantin Belousov 	bool stamped;
11338b615593SMarko Zec 
113406193f0bSKonstantin Belousov 	stamped = false;
1135339efd75SMaxim Sobolev 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
1136339efd75SMaxim Sobolev 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
113706193f0bSKonstantin Belousov 		struct bintime boottimebin, bt;
113806193f0bSKonstantin Belousov 		struct timespec ts1;
1139be8a62e8SPoul-Henning Kamp 
114006193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
114106193f0bSKonstantin Belousov 		    M_TSTMP)) {
114206193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
114306193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt);
114406193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
114506193f0bSKonstantin Belousov 			bintime_add(&bt, &boottimebin);
114606193f0bSKonstantin Belousov 		} else {
1147be8a62e8SPoul-Henning Kamp 			bintime(&bt);
114806193f0bSKonstantin Belousov 		}
1149b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&bt, sizeof(bt), SCM_BINTIME,
1150b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
115106193f0bSKonstantin Belousov 		if (*mp != NULL) {
1152be8a62e8SPoul-Henning Kamp 			mp = &(*mp)->m_next;
115306193f0bSKonstantin Belousov 			stamped = true;
115406193f0bSKonstantin Belousov 		}
1155be8a62e8SPoul-Henning Kamp 	}
1156339efd75SMaxim Sobolev 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
115706193f0bSKonstantin Belousov 		struct bintime boottimebin, bt1;
1158c012cfe6SEd Maste 		struct timespec ts1;
115982c23ebaSBill Fenner 		struct timeval tv;
116082c23ebaSBill Fenner 
116106193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
116206193f0bSKonstantin Belousov 		    M_TSTMP)) {
116306193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
116406193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt1);
116506193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
116606193f0bSKonstantin Belousov 			bintime_add(&bt1, &boottimebin);
116706193f0bSKonstantin Belousov 			bintime2timeval(&bt1, &tv);
116806193f0bSKonstantin Belousov 		} else {
1169339efd75SMaxim Sobolev 			microtime(&tv);
117006193f0bSKonstantin Belousov 		}
1171b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), SCM_TIMESTAMP,
1172b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
117306193f0bSKonstantin Belousov 		if (*mp != NULL) {
117482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
117506193f0bSKonstantin Belousov 			stamped = true;
117606193f0bSKonstantin Belousov 		}
1177339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
117806193f0bSKonstantin Belousov 		struct bintime boottimebin;
117906193f0bSKonstantin Belousov 		struct timespec ts, ts1;
1180339efd75SMaxim Sobolev 
118106193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
118206193f0bSKonstantin Belousov 		    M_TSTMP)) {
118306193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
118406193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
118506193f0bSKonstantin Belousov 			bintime2timespec(&boottimebin, &ts1);
11866040822cSAlan Somers 			timespecadd(&ts, &ts1, &ts);
118706193f0bSKonstantin Belousov 		} else {
1188339efd75SMaxim Sobolev 			nanotime(&ts);
118906193f0bSKonstantin Belousov 		}
1190b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ts, sizeof(ts), SCM_REALTIME,
1191b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
119206193f0bSKonstantin Belousov 		if (*mp != NULL) {
1193339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
119406193f0bSKonstantin Belousov 			stamped = true;
119506193f0bSKonstantin Belousov 		}
1196339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
1197339efd75SMaxim Sobolev 		struct timespec ts;
1198339efd75SMaxim Sobolev 
119906193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
120006193f0bSKonstantin Belousov 		    M_TSTMP))
120106193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
120206193f0bSKonstantin Belousov 		else
1203339efd75SMaxim Sobolev 			nanouptime(&ts);
1204b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ts, sizeof(ts), SCM_MONOTONIC,
1205b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
120606193f0bSKonstantin Belousov 		if (*mp != NULL) {
120706193f0bSKonstantin Belousov 			mp = &(*mp)->m_next;
120806193f0bSKonstantin Belousov 			stamped = true;
120906193f0bSKonstantin Belousov 		}
121006193f0bSKonstantin Belousov 	}
121106193f0bSKonstantin Belousov 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
121206193f0bSKonstantin Belousov 	    M_TSTMP)) {
121306193f0bSKonstantin Belousov 		struct sock_timestamp_info sti;
121406193f0bSKonstantin Belousov 
121506193f0bSKonstantin Belousov 		bzero(&sti, sizeof(sti));
121606193f0bSKonstantin Belousov 		sti.st_info_flags = ST_INFO_HW;
121706193f0bSKonstantin Belousov 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
121806193f0bSKonstantin Belousov 			sti.st_info_flags |= ST_INFO_HW_HPREC;
1219b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&sti, sizeof(sti), SCM_TIME_INFO,
1220b46667c6SGleb Smirnoff 		    SOL_SOCKET, M_NOWAIT);
122106193f0bSKonstantin Belousov 		if (*mp != NULL)
1222339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1223be8a62e8SPoul-Henning Kamp 	}
122482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
1225b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ip->ip_dst, sizeof(struct in_addr),
1226b46667c6SGleb Smirnoff 		    IP_RECVDSTADDR, IPPROTO_IP, M_NOWAIT);
122782c23ebaSBill Fenner 		if (*mp)
122882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
122982c23ebaSBill Fenner 	}
12304957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
1231b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ip->ip_ttl, sizeof(u_char), IP_RECVTTL,
1232b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
12334957466bSMatthew N. Dodd 		if (*mp)
12344957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
12354957466bSMatthew N. Dodd 	}
123682c23ebaSBill Fenner #ifdef notyet
123782c23ebaSBill Fenner 	/* XXX
123882c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
123982c23ebaSBill Fenner 	 * than they already were.
124082c23ebaSBill Fenner 	 */
124182c23ebaSBill Fenner 	/* options were tossed already */
124282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
1243b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(opts_deleted_above,
1244b46667c6SGleb Smirnoff 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP, M_NOWAIT);
124582c23ebaSBill Fenner 		if (*mp)
124682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
124782c23ebaSBill Fenner 	}
124882c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
124982c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1250b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(ip_srcroute(m), sizeof(struct in_addr),
1251b46667c6SGleb Smirnoff 		    IP_RECVRETOPTS, IPPROTO_IP, M_NOWAIT);
125282c23ebaSBill Fenner 		if (*mp)
125382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
125482c23ebaSBill Fenner 	}
125582c23ebaSBill Fenner #endif
125682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1257d314ad7bSJulian Elischer 		struct ifnet *ifp;
1258d314ad7bSJulian Elischer 		struct sdlbuf {
125982c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1260d314ad7bSJulian Elischer 			u_char	pad[32];
1261d314ad7bSJulian Elischer 		} sdlbuf;
1262d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1263d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
126482c23ebaSBill Fenner 
1265db0ac6deSCy Schubert 		if ((ifp = m->m_pkthdr.rcvif)) {
12664a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1267d314ad7bSJulian Elischer 			/*
1268d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1269d314ad7bSJulian Elischer 			 */
127046f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
127146f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1272d314ad7bSJulian Elischer 				goto makedummy;
1273d314ad7bSJulian Elischer 			}
1274d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1275d314ad7bSJulian Elischer 		} else {
1276d314ad7bSJulian Elischer makedummy:
127746f2df9cSSergey Kandaurov 			sdl2->sdl_len =
127846f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1279d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1280d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1281d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1282d314ad7bSJulian Elischer 		}
1283b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(sdl2, sdl2->sdl_len, IP_RECVIF,
1284b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
128582c23ebaSBill Fenner 		if (*mp)
128682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
128782c23ebaSBill Fenner 	}
12883cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
1289b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&ip->ip_tos, sizeof(u_char), IP_RECVTOS,
1290b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
12913cca425bSMichael Tuexen 		if (*mp)
12923cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
12933cca425bSMichael Tuexen 	}
12949d3ddf43SAdrian Chadd 
12959d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
12969d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12979d3ddf43SAdrian Chadd 
12989d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12999d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13009d3ddf43SAdrian Chadd 
13019d3ddf43SAdrian Chadd 		/*
13029d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
13039d3ddf43SAdrian Chadd 		 * other - don't populate both?
13049d3ddf43SAdrian Chadd 		 */
1305b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&flowid, sizeof(uint32_t), IP_FLOWID,
1306b46667c6SGleb Smirnoff 		    IPPROTO_IP, M_NOWAIT);
13079d3ddf43SAdrian Chadd 		if (*mp)
13089d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
1309b46667c6SGleb Smirnoff 		*mp = sbcreatecontrol(&flow_type, sizeof(uint32_t),
1310b46667c6SGleb Smirnoff 		    IP_FLOWTYPE, IPPROTO_IP, M_NOWAIT);
13119d3ddf43SAdrian Chadd 		if (*mp)
13129d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13139d3ddf43SAdrian Chadd 	}
13149d3ddf43SAdrian Chadd 
13159d3ddf43SAdrian Chadd #ifdef	RSS
13169d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
13179d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13189d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
13199d3ddf43SAdrian Chadd 
13209d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13219d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13229d3ddf43SAdrian Chadd 
13239d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
1324b46667c6SGleb Smirnoff 			*mp = sbcreatecontrol(&rss_bucketid, sizeof(uint32_t),
1325b46667c6SGleb Smirnoff 			    IP_RSSBUCKETID, IPPROTO_IP, M_NOWAIT);
13269d3ddf43SAdrian Chadd 			if (*mp)
13279d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
13289d3ddf43SAdrian Chadd 		}
13299d3ddf43SAdrian Chadd 	}
13309d3ddf43SAdrian Chadd #endif
133182c23ebaSBill Fenner }
133282c23ebaSBill Fenner 
13334d2e3692SLuigi Rizzo /*
133430916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
133530916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
133630916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
133730916a2dSRobert Watson  * compiled.
13384d2e3692SLuigi Rizzo  */
13395f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on);
134082cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
134182cea7e6SBjoern A. Zeeb 
134282cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
134382cea7e6SBjoern A. Zeeb 
1344df8bae1dSRodney W. Grimes int
1345f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1346f0068c4aSGarrett Wollman {
13478b615593SMarko Zec 
1348f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1349f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1350f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1351f0068c4aSGarrett Wollman 
1352603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1353f0068c4aSGarrett Wollman 		return EADDRINUSE;
1354f0068c4aSGarrett Wollman 
1355603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13561c5de19aSGarrett Wollman 	/*
13571c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13581c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13591c5de19aSGarrett Wollman 	 */
1360603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1361603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1362603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13631c5de19aSGarrett Wollman 	}
1364f0068c4aSGarrett Wollman 
1365f0068c4aSGarrett Wollman 	return 0;
1366f0068c4aSGarrett Wollman }
1367f0068c4aSGarrett Wollman 
1368f0068c4aSGarrett Wollman int
1369f0068c4aSGarrett Wollman ip_rsvp_done(void)
1370f0068c4aSGarrett Wollman {
13718b615593SMarko Zec 
1372603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13731c5de19aSGarrett Wollman 	/*
13741c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13751c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13761c5de19aSGarrett Wollman 	 */
1377603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1378603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1379603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13801c5de19aSGarrett Wollman 	}
1381f0068c4aSGarrett Wollman 	return 0;
1382f0068c4aSGarrett Wollman }
1383bbb4330bSLuigi Rizzo 
13848f5a8818SKevin Lo int
13858f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1386bbb4330bSLuigi Rizzo {
13878f5a8818SKevin Lo 	struct mbuf *m;
13888f5a8818SKevin Lo 
13898f5a8818SKevin Lo 	m = *mp;
13908f5a8818SKevin Lo 	*mp = NULL;
13918b615593SMarko Zec 
1392bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
13938f5a8818SKevin Lo 		*mp = m;
13948f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
13958f5a8818SKevin Lo 		return (IPPROTO_DONE);
1396bbb4330bSLuigi Rizzo 	}
1397bbb4330bSLuigi Rizzo 
1398bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1399bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1400bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1401bbb4330bSLuigi Rizzo 	 */
1402bbb4330bSLuigi Rizzo 
1403603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1404bbb4330bSLuigi Rizzo 		m_freem(m);
14058f5a8818SKevin Lo 		return (IPPROTO_DONE);
1406bbb4330bSLuigi Rizzo 	}
1407bbb4330bSLuigi Rizzo 
1408603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
14098f5a8818SKevin Lo 		*mp = m;
14108f5a8818SKevin Lo 		rip_input(mp, offp, proto);
14118f5a8818SKevin Lo 		return (IPPROTO_DONE);
1412bbb4330bSLuigi Rizzo 	}
1413bbb4330bSLuigi Rizzo 	/* Drop the packet */
1414bbb4330bSLuigi Rizzo 	m_freem(m);
14158f5a8818SKevin Lo 	return (IPPROTO_DONE);
1416bbb4330bSLuigi Rizzo }
1417